# EDA Notebook for Analysis of fin ratios

### Understanding the trend of different financial ratios and metric is a critical part in analysis of business. This notebook explores those trends to give the sense of stability of business, growth hsitory, returns and margins. 

In [1]:
import pandas as pd
import numpy as np
import sys
sys.path.append("..")
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

from data.scripts.simplified_finance_stats.fin_stats import fin_stats
from data.scripts.simplified_finance_stats.fin_ratios import get_ratios
from data.scripts.simplified_finance_stats.fin_stats_2 import fin_stats_2
from report_13f import company_13f
from data.scripts.build_training_data.train_data import train_data
from data.scripts.simplified_finance_stats.stock_stats import stock_stats

pd.options.display.max_rows = 999

In [2]:
# Set path for data
base_path = '../data/'
sheets_path = 'combined_simplified/combined_all_us.csv' 
other_path = 'combined_simplified/others_all_us.csv'
mkt_path = 'combined_simplified/stock_stats_all_us.csv'

Three kinds of data are used here.
1. The basic fundamental financial data from balance sheet, income statement and cash flow statement
2. Other financial data not included in the above statements
3. Market data related to the stock such as mkt_cap, price etc

In [3]:
# setup all data
finances = fin_stats(base_path + sheets_path)
fin_others = fin_stats_2(base_path + other_path)
mkt_data = stock_stats(base_path + mkt_path)

Total Missing tickers: 181
Total Missing tickers: 181
Total Missing tickers: 181


In [4]:
tick = 'FB'
b = finances.get_sheet(tick,"balance_sheet")
i = finances.get_sheet(tick,"income_sheet")
c = finances.get_sheet(tick,"cashflow_sheet")
o = fin_others.get_sheet(tick)
mk = mkt_data.get_stock_data(tick)

# combine all dataset
all_fin_data = pd.concat([b,i,c,o,mk])

Build ratios dataframe from the above data

In [5]:
df_ratios = get_ratios(b,i,c,o,mk)

In [6]:
b

Unnamed: 0,2012,2013,2014,2015,2016
che,9626.0,11449.0,11199.0,18434.0,29449.0
rect,1170.0,1160.0,1678.0,2559.0,3993.0
invt,0.0,0.0,0.0,0.0,0.0
aco,471.0,461.0,793.0,659.0,959.0
act,11267.0,13070.0,13670.0,21652.0,34401.0
ppent,2391.0,2882.0,3967.0,5687.0,8591.0
ivaeq,0.0,0.0,0.0,0.0,0.0
ivao,0.0,0.0,0.0,0.0,0.0
intan,1388.0,1722.0,21910.0,21272.0,20657.0
ao,57.0,221.0,637.0,796.0,1312.0


In [7]:
i

Unnamed: 0,2012,2013,2014,2015,2016
revt,5089.0,7872.0,12466.0,17928.0,27638.0
cogs,720.0,756.0,945.0,917.0,1448.0
xsga,3187.0,3193.0,5297.0,8767.0,11356.0
oibdp,1182.0,3923.0,6224.0,8244.0,14834.0
oiadp,538.0,2921.0,4982.0,6294.0,12493.0
xint,51.0,56.0,23.0,23.0,10.0
nopi,7.0,6.0,-61.0,-8.0,101.0
spi,0.0,-117.0,12.0,-69.0,-66.0
pi,494.0,2754.0,4910.0,6194.0,12518.0
txt,441.0,1254.0,1970.0,2506.0,2301.0


#### Understand trends for various financial metrics and ratios

In [8]:
from data.scripts.simplified_finance_stats.capture_trends import get_trend_data

In [9]:
# Specify the timeline with last n years to get data from

def get_last_n_data(df,n):
    """Returns the trend data for last n years"""
    last_n_years = n
    cols_to_keep = df.columns.tolist()[-1*last_n_years::]
    df = df[cols_to_keep]
    return df

df_ratios = get_last_n_data(df_ratios,15)
d = get_trend_data(df_ratios)

ValueError: Cannot set a frame with no defined index and a value that cannot be converted to a Series

In [None]:
d.head()

Currently, get_trend_data, fits a linear regression model. In the above dataframe, the slope and constant are the outputs of the model. I believe fitting too complex models to financial data is not very helpful especially if we are looking into general trends. It is better to be roughly right than precisely wrong.

In [None]:
df_ratios.head()

Plot regression and distribution of df_ratios dataframe

In [None]:
# Plotting the data

def plot_trends(df,df_trends):

    x = np.asarray(df.columns.tolist())

    for i, feature in enumerate(df.index.tolist()):
        y = df.loc[feature].values
        y2 = df_trends['slope'].loc[feature]*x + df_trends['constant'].loc[feature]
        r2_val = round(df_trends['r2'].loc[feature],3)
        
        try:
            # call regplot on each axes
            fig, (ax1, ax2) = plt.subplots(ncols=2)
            fig.set_size_inches(15,5)
            sns.distplot(y,kde=1,bins=20,rug=1,norm_hist=0,color='g', ax=ax1)
            ax1.set_title(tick + ' ' + feature + ' - Distribution')
            sns.regplot(x, y,label='R2: ' + str(r2_val), ax=ax2)
            ax2.set_title(tick + ' ' + feature + ' - Regression')
            ax2.legend(loc='best')
            plt.show()
        except:
            pass


In [None]:
plot_trends(df_ratios,d)

In [None]:
# Plot fundamental data
all_fin_data = get_last_n_data(all_fin_data,12)
d_trend_all = get_trend_data(all_fin_data)

In [None]:
plot_trends(all_fin_data,d_trend_all)

In [None]:
d_trend_all