In [3]:
#--------------------------
#Stock class definition
#--------------------------
class stock_class:
  'This is a class to work download stocks'
  #Key class attributes
  fundamentals = pd.DataFrame() #fundamentals dynamics
  hist_price_div = pd.DataFrame() #price and divs dynamics
  profile = pd.DataFrame() #company profile
  n_api=0 #num of api requests

  #Parameters required at class creation
  def __init__(self, ticker, benchmark = False):
    self.ticker=ticker
    self.benchmark=benchmark
    #getting fundamentals and prices as class attributes using functions (methods) defined below:
    self.hist_price_div = self.read_from_gd()[0]  
    self.fundamentals = self.read_from_gd()[1]
    self.profile = self.read_from_gd()[2]

    #If no profile of gdrive - get it from yahoo finance
    if (self.profile.empty == True) and (self.benchmark == False):
      self.profile=pd.DataFrame(self.read_company_info_from_yahoo())
      self.profile.to_csv(self.ticker+ '_profile.csv')
      shutil.copy(self.ticker+ '_profile.csv', gdfolder + self.ticker+ '_profile.csv')

    #If no price&divs data on gdrive - do request to API
    if (self.hist_price_div.empty == True):
      #Reading from API
      hist_price=self.read_from_api(self.hist_price_url()) #local variable
      #to minimize number of api requets if error
      if hist_price.empty == False:
        ##Working further with price and divs
        hist_divs=self.read_from_api(self.hist_divs_url())
        hist_price.drop(hist_price.columns.difference(['close']), 1, inplace=True)#drop all except
        if hist_divs.empty == False:
          hist_divs.drop(hist_divs.columns.difference(['adjDividend']), 1, inplace=True)#drop all except
        else: #fictios 0 div entry to make other scripts work for no div shares
          hist_divs=pd.DataFrame()
          hist_divs=pd.DataFrame([{'date': '2020-01-02', 'adjDividend': 0}])
          hist_divs['date'] = pd.to_datetime(hist_divs['date'])
          hist_divs.set_index('date', inplace=True)
        self.hist_price_div = pd.concat([hist_price, hist_divs], axis =1)
        self.hist_price_div.index=pd.to_datetime(self.hist_price_div.index) #conversion to timestamp
        self.hist_price_div['adjDividend'] = self.hist_price_div['adjDividend'].fillna(0)
        self.hist_price_div.to_csv(self.ticker+ '_hist_price_div.csv')
        shutil.copy(self.ticker+ '_hist_price_div.csv', gdfolder + self.ticker+ '_hist_price_div.csv')
        
        ##Working with profile and fundamentals dynamics (not for benchmarks):
        if self.benchmark == False:
          ratios=self.read_from_api(self.ratios_url())
          ratios_ttm=self.read_from_api(self.ratios_ttm_url())
          key_metrics=self.read_from_api(self.key_metrics_url())
          key_metrics_ttm=self.read_from_api(self.key_metrics_ttm_url())      
          self.fundamentals = pd.concat([ratios, key_metrics], axis =1)
          self.fundamentals = self.fundamentals.loc[:,~self.fundamentals.columns.duplicated()] #removes duplicated columns
          #Combining TTM (as a first row) with other annual fundamentals     
          fundamentals_ttm = pd.concat([ratios_ttm, key_metrics_ttm], axis =1)
          fundamentals_ttm.insert(0, "date", [pricedatemax], False) 
          fundamentals_ttm.insert(0, "symbol", self.ticker , False)
          fundamentals_ttm['date'] = pd.to_datetime(fundamentals_ttm['date'])
          fundamentals_ttm.set_index('date', inplace=True)
          for column in fundamentals_ttm.columns:
            fundamentals_ttm.rename({column:column.replace('TTM','')}, axis='columns',inplace=True)
          fundamentals_ttm = fundamentals_ttm.loc[:,~fundamentals_ttm.columns.duplicated()] #removes duplicated columns
          self.fundamentals = pd.concat([self.fundamentals, fundamentals_ttm], axis =0, join='inner')
          self.fundamentals.sort_index(inplace=True)
          self.fundamentals.index=pd.to_datetime(self.fundamentals.index) #conversion to timestamp
        self.fundamentals.to_csv(self.ticker+ '_fundamentals.csv')
        shutil.copy(self.ticker+ '_fundamentals.csv', gdfolder + self.ticker+ '_fundamentals.csv')      
        self.n_api=self.n_api+6
      else:
        print('Ticker not found or num of API requests has been exceeded: ' + self.ticker)

  #Defining urls
  def hist_price_url (self):          
    return source['site'] + "historical-price-full/"+self.ticker+"?from=" + pricedatemin + "&to=" + pricedatemax + "&apikey="+source['apikey']
  def hist_divs_url (self):
    return source['site'] + "historical-price-full/stock_dividend/"+self.ticker+"?from=" + pricedatemin + "&to=" + pricedatemax + "&apikey="+source['apikey']
 
  def ratios_url (self):
    return source['site'] + "ratios/"+self.ticker+"?limit=" + str(yrs) + "&apikey="+source['apikey']
  def ratios_ttm_url (self):
    return source['site'] + "ratios-ttm/"+self.ticker+"?&apikey="+source['apikey']
  def key_metrics_url (self):
    return source['site'] + "key-metrics/"+self.ticker+"?limit=" + str(yrs) + "&apikey="+source['apikey']
  def key_metrics_ttm_url (self):
    return source['site'] + "key-metrics-ttm/"+self.ticker+"?&apikey="+source['apikey']
  def growth_url (self):
    return source['site'] + "financial-growth/"+self.ticker+"?limit=" + str(yrs) + "&apikey="+source['apikey']
  def company_profile_url (self):
    return "https://finance.yahoo.com/quote/"+self.ticker+"/profile?p="+self.ticker
  def company_holders_url (self):
    return "https://finance.yahoo.com/quote/"+self.ticker+"/holders?p="+self.ticker
  def company_summary_url (self):
    return "https://finance.yahoo.com/quote/"+self.ticker+"?p="+self.ticker

  #Function to read data from API to dataframe
  def read_from_api(self, url):
    try:
      data = urllib.request.urlopen(url, context=ctx).read()
      info = json.loads(data)
      if "historical" in url:
        info = pd.DataFrame(info['historical'])
      else:
        info = pd.DataFrame(info)      
      if 'date' in info: 
        info['date'] = pd.to_datetime(info['date'])
        info.set_index('date', inplace=True)
      return info 
    except:
      info = pd.DataFrame()
      return info

  #Function to get company info
  def read_company_info_from_yahoo (self):
    try:
      html = urllib.request.urlopen(self.company_profile_url(), context=ctx).read()
      soup = BeautifulSoup(html, 'html.parser')
      tags = soup('span',{'class': 'Fw(600)'})
      tagsd = soup('p',{'Mt(15px) Lh(1.6)'})
      html = urllib.request.urlopen(self.company_holders_url(), context=ctx).read()
      soup = BeautifulSoup(html, 'html.parser')
      tagh = soup('td',{'class': 'Py(10px) Va(m) Fw(600) W(15%)'})
      return [{'symbol': self.ticker, 'Sector': tags[0].text, 'Industry': tags[1].text, 'NumEmployees': tags[2].text , 'InstitutionalHolders': tagh[1].text , 'InsideHolders': tagh[0].text , 'Description' : tagsd[0].text}]
    except:
      return [{'symbol': '', 'Sector': '', 'Industry': '', 'NumEmployees': '' , 'InstitutionalHolders': '' , 'InsideHolders': '', 'Description' : ''}]

  #Function to get ttm values from yahoo
  def get_ttm_from_yahoo (self):
    try:
      tgt_website = 'https://sg.finance.yahoo.com/quote/' + self.ticker + '/key-statistics?p=' + self.ticker
      tgt_website = r"{}".format(tgt_website)
      # The web page is make up of several html table. By calling read_html function.
      # all the tables are retrieved in dataframe format.
      # Next is to append all the table and transpose it to give a nice one row data.
      df_list = pd.read_html(tgt_website)
      result_df = df_list[0]
      for df in df_list[1:]:
          result_df = result_df.append(df)
      # The data is in column format.
      # Transpose the result to make all data in single row
      result_df=result_df.set_index(0).T
      try: # to avoid errors if NaN
        result_df['Forward annual dividend yield 4'][1]=result_df['Forward annual dividend yield 4'][1].replace('%','')
      except: 
        result_df['Forward annual dividend yield 4'][1]=0      
      result_df.loc[1]=pd.to_numeric(result_df.loc[1], errors='coerce')
      #result_df['52-week high 3'][1]
      #result_df['52-week high 3'][1]
      #result_df['52-week high 3'][1]
      #result_df['Diluted EPS (ttm)'][1]
      #result_df['Forward annual dividend yield 4'][1]      
      return [ {'date': date.today(), 'PEttm': result_df['Trailing P/E'][1], 'EPS': result_df['Diluted EPS (ttm)'][1], 'DivYield': (result_df['Forward annual dividend yield 4'][1])/100, 'RevPerShare': result_df['Revenue per share (ttm)'][1]} ]
    except:
      return [ {'date': date.today(), 'PEttm': '', 'EPS': '', 'DivYield': '', 'RevPerShare': '' } ]

  #Function to read data from google drive
  def read_from_gd (self):
    #1 getting hist_price_div
    try:
      shutil.copy(gdfolder + self.ticker+ '_hist_price_div.csv', self.ticker+ '_hist_price_div.csv')
      hist_price_div = pd.read_csv(self.ticker+ '_hist_price_div.csv', index_col=0)
      hist_price_div.index=pd.to_datetime(hist_price_div.index)
    except:
      hist_price_div = pd.DataFrame()
    # 2 Getting fundamentals    
    try:
      shutil.copy(gdfolder + self.ticker+ '_fundamentals.csv', self.ticker+ '_fundamentals.csv')
      fundamentals = pd.read_csv(self.ticker+ '_fundamentals.csv', index_col=0)
      fundamentals.index=pd.to_datetime(fundamentals.index) #conversion to timestamp
    except:
      fundamentals = pd.DataFrame()
    # 3 Getting company profile    
    try:
      shutil.copy(gdfolder + self.ticker+ '_profile.csv', self.ticker+ '_profile.csv')
      profile = pd.read_csv(self.ticker+ '_profile.csv', index_col=0)
      #profile.index=pd.to_datetime(profile.index) #conversion to timestamp
    except:
      profile = pd.DataFrame()
    return [hist_price_div, fundamentals, profile] 

  def limit_dataframes (self, datemin, datemax):
    try:
      self.hist_price_div=self.hist_price_div[datemin:datemax]
      self.fundamentals=self.fundamentals[datemin:datemax]  
    except:
      pass
    
  #Function to get CAGR based on price changes only
  def cagr_price (self, datemin=pricedatemin, datemax=pricedatemax):
    try:
      tstep1=self.hist_price_div.index.get_loc(datemin, method='nearest')
      tstep2=self.hist_price_div.index.get_loc(datemax, method='nearest')
      delta=(self.hist_price_div.index[tstep2]-self.hist_price_div.index[tstep1]).days
      cagr_price=(self.hist_price_div.iloc[tstep2,0]/self.hist_price_div.iloc[tstep1,0])**(365/delta)-1
      return cagr_price
    except:
      print(self.ticker + ': error with CAGR calc')

  #Function to get CAGR with divs reinvested
  def cagr_price_div (self, datemin=pricedatemin, datemax=pricedatemax):
    try:
      #Adding row to calc CAGR
      self.hist_price_div['divs_reinv'] = self.hist_price_div['close']
      tstep1=self.hist_price_div.index.get_loc(datemin, method='nearest')
      tstep2=self.hist_price_div.index.get_loc(datemax, method='nearest')
      i=tstep1
      while i <= tstep2:
        self.hist_price_div.iloc[i,2] = self.hist_price_div.iloc[i-1,2]*self.hist_price_div.iloc[i,0]/self.hist_price_div.iloc[i-1,0]+self.hist_price_div.iloc[i,1]
        i=i+1
      delta=(self.hist_price_div.index[tstep2]-self.hist_price_div.index[tstep1]).days
      cagr_price_div=(self.hist_price_div.iloc[tstep2,2]/self.hist_price_div.iloc[tstep1,2])**(365/delta)-1
      return cagr_price_div
    except:
      print(self.ticker + ': error with CAGR calc')

  def growth (self, param, years):
    try:
      self.fundamentals
      g=list()
      i=len(self.fundamentals.index)-1      
      while i >= 1:
        g.append(self.fundamentals[param][i]/self.fundamentals[param][i-1]-1)
        i=i-1
      if years == 3:
        ret=sum(g[1:4])/3
      elif years ==1:
        ret=g[0]
      else:
        ret=''
      return ret
    except:
      return ''

  def average (self, param, years):
    try:
      l=len(self.fundamentals.index)
      ret = sum(self.fundamentals[param][(l-years):l]) / years
      return ret
    except:
      return ''

NameError: ignored