# Source data is based on this ADVFN site

https://uk.advfn.com/p.php?pid=financials&symbol=AAPL&btn=quarterly_reports

https://uk.advfn.com/p.php?pid=financials&symbol=AAPL&btn=quarterly_reports&istart_date=0

In [1]:
import os
import sys
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup
from io import StringIO

In [2]:
class ADVFN:
    
    urlbase = 'https://uk.advfn.com/p.php?pid=financials'
    fundamental_type = ['INDICATORS', 'INCOME STATEMENT', 'INCOME STATEMENT (YEAR-TO-DATE)',
                        'BALANCE SHEET', 'ASSETS', 'EQUITY & LIABILITIES', 'CASH-FLOW STATEMENT', 
                        'OPERATING ACTIVITIES', 'INVESTING ACTIVITIES', 'FINANCING ACTIVITIES',
                        'NET CASH FLOW', 'RATIOS CALCULATIONS', 'PROFIT MARGINS', 'NORMALIZED RATIOS',
                        'SOLVENCY RATIOS', 'EFFICIENCY RATIOS', 'ACTIVITY RATIOS', 'LIQUIDITY RATIOS',
                        'CAPITAL STRUCTURE RATIOS', 'PROFITABILITY', 'AGAINST THE INDUSTRY RATIOS']

    def __init__(self):
        pass
    
    def _get_format_url(self, symbol, start_idx):
        url = self.urlbase + '&symbol={symbol}&btn=quarterly_reports'.format(symbol=symbol)
        url += '&istart_date={start_idx}'.format(start_idx=start_idx)
        return url
    
    def _get_soup_table(self, url):
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'lxml')
        tables = soup.findAll('table')
        # TODO: figure out better way than index
        soup_data_table = tables[7].find('table')  
        return soup_data_table        
    
    def _get_format_data_df(self, soup_data_table):
        df = pd.read_html(StringIO(str(soup_data_table)))[0]
        is_empty = df[1].isnull() & df[2].isnull() & df[3].isnull() & df[4].isnull() & df[5].isnull()
        empty_df = df[is_empty].copy()
        df = df[~is_empty].copy()
        empty_df = empty_df[empty_df[0].isin(self.fundamental_type)].copy().reset_index()
        quarters = df[df[0] == 'quarter end date'].values[0]
        quarters[0] = 'fundamental'
        df.columns = quarters
        df['category'] = None
        for idx in df.index:
            category = empty_df[empty_df['index'] < idx].tail(1)[0].values[0]
            df.loc[idx, 'category'] = category
        df = pd.melt(df, id_vars=['category', 'fundamental'], value_vars=quarters[1:])
        df.columns = ['category', 'fundamental', 'quarter', 'val']
        return df[~df['quarter'].isnull()]
    
    def get_stock_fundamentals(self, symbol, start_idx=0):
        stock_data = pd.DataFrame()
        url = self._get_format_url(symbol, start_idx)
        table = self._get_soup_table(url)
        if table:
            stock_data = self._get_format_data_df(table)
            df = self.get_stock_fundamentals(symbol, start_idx+1)
            stock_data = stock_data.append(df, ignore_index=True).drop_duplicates()
        return stock_data



In [3]:
stocks = ADVFN()

In [13]:
aapl = stocks.get_stock_fundamentals('COKE')
aapl.shape

(26230, 4)

In [14]:
aapl[(aapl['category'] == 'PROFITABILITY') & (aapl['fundamental'] == 'Return on Stock Equity (ROE)')].head(100)

Unnamed: 0,category,fundamental,quarter,val
252,PROFITABILITY,Return on Stock Equity (ROE),1994/03,56.0
527,PROFITABILITY,Return on Stock Equity (ROE),1994/06,50.3
802,PROFITABILITY,Return on Stock Equity (ROE),1994/09,44.0
1077,PROFITABILITY,Return on Stock Equity (ROE),1994/12,41.5
1352,PROFITABILITY,Return on Stock Equity (ROE),1995/03,43.5
2727,PROFITABILITY,Return on Stock Equity (ROE),1995/06,40.7
3002,PROFITABILITY,Return on Stock Equity (ROE),1995/09,37.6
3277,PROFITABILITY,Return on Stock Equity (ROE),1995/12,39.7
3552,PROFITABILITY,Return on Stock Equity (ROE),1996/03,38.3
3827,PROFITABILITY,Return on Stock Equity (ROE),1996/06,35.3


In [None]:
class StockList:
    #http://www.advfn.com/nasdaq/nasdaq.asp?companies=B
    #http://www.advfn.com/nyse/newyorkstockexchange.asp?companies=A
    def __init__(self):
        pass






---------------------




### In progress Testing building out the class

----------------------

In [90]:
response = requests.get('https://uk.advfn.com/p.php?pid=financials&symbol=AAPL&btn=quarterly_reports&istart_date=0')
soup = BeautifulSoup(response.content, 'lxml')
tables = soup.findAll('table')
soup_data_table = tables[7].find('table')
html_data_list = pd.read_html(StringIO(str(soup_data_table)))
datadf = html_data_list[0]
datadf

In [65]:
datadf.head()

Unnamed: 0,0,1,2,3,4,5
0,INDICATORS,,,,,
1,quarter end date,1994/03,1994/06,1994/09,1994/12,1995/03
2,date preliminary data loaded,0000-00-00,0000-00-00,0000-00-00,0000-00-00,1995-04-24
3,earnings period indicator,S,N,F,Q,S
4,quarterly indicator,2,3,4,1,2


In [77]:
df = datadf.copy()

In [123]:
is_empty = df[1].isnull() & df[2].isnull() & df[3].isnull() & df[4].isnull() & df[5].isnull()
empty_df = df[is_empty]

In [124]:
empty_df = empty_df.reset_index()

In [125]:
empty_df

Unnamed: 0,index,0,1,2,3,4,5
0,0,INDICATORS,,,,,
1,6,template indicator,,,,,
2,10,INCOME STATEMENT,,,,,
3,54,*,,,,,
4,64,*,,,,,
5,75,INCOME STATEMENT (YEAR-TO-DATE),,,,,
6,80,BALANCE SHEET,,,,,
7,81,ASSETS,,,,,
8,115,inventory valuation method,,,,,
9,116,EQUITY & LIABILITIES,,,,,
