In [1]:
"""ETL for UK stocks

Please run this script after extracting the following data
1. Fundamental data
2. Price data
3. Fama French factor data

"""
# library
import pandas as pd
from common import (data_path, date_col, missing_code, price_col,
                    processed_path, reindex, remove_missingChar,
                    remove_symbols, symbol_col)

pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', 1000)

In [2]:
"""Load data"""
# load return data
price = pd.read_csv(processed_path["returns"])
price[date_col] = pd.to_datetime(price[date_col])
price = price.set_index([symbol_col, date_col])
price.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,excess_returns,r12_7,r2_1
symbol,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
3IN,2008-04-30,0.071916,-0.054559,0.040225
3IN,2008-05-31,0.022298,-0.056313,0.071916
3IN,2008-06-30,-0.010799,-0.021389,0.022298
3IN,2008-07-31,0.002396,0.077308,-0.010799
3IN,2008-08-31,-0.072512,0.185581,0.002396


In [3]:
# load factor data
factors = pd.read_csv(processed_path["factor"])
factors[date_col] = pd.to_datetime(factors[date_col])
factors = factors.set_index(date_col)
factors.head()

Unnamed: 0_level_0,smb,hml,umd,rf,rm,rmrf
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1980-10-31,3.6e-05,-0.072617,0.041297,0.011246,0.060245,0.048999
1980-11-30,0.006165,-0.018527,0.006964,0.010202,0.008133,-0.002069
1980-12-31,0.002356,-0.001671,0.015869,0.010252,-0.044098,-0.05435
1981-01-31,0.020263,0.005622,0.014245,0.009949,-0.005982,-0.015931
1981-02-28,-0.008055,0.022217,0.006087,0.009175,0.057703,0.048528


In [4]:
# load fundamental data
fundamental = pd.read_csv(processed_path["fundamental"])
fundamental[date_col] = pd.to_datetime(fundamental[date_col])
fundamental = fundamental.set_index([symbol_col, date_col])

fundamental.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,revenue,costAndExpenses,depreciationAndAmortization,ebitda,ebitdaratio,operatingIncome,incomeBeforeTax,incomeBeforeTaxRatio,netIncome,netIncomeRatio,eps,epsdiluted,weightedAverageShsOut,weightedAverageShsOutDil
symbol,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
ABC,2010-01-31,-0.012672,0.005278,-2.611476,-0.532474,-0.519802,-0.044206,0.26598,0.278653,0.232303,0.244975,-0.256053,-0.241976,0.480895,0.475188
ABC,2010-02-28,-0.012672,0.005278,-2.611476,-0.532474,-0.519802,-0.044206,0.26598,0.278653,0.232303,0.244975,-0.256053,-0.241976,0.480895,0.475188
ABC,2010-03-31,-0.012672,0.005278,-2.611476,-0.532474,-0.519802,-0.044206,0.26598,0.278653,0.232303,0.244975,-0.256053,-0.241976,0.480895,0.475188
ABC,2010-04-30,-0.012672,0.005278,-2.611476,-0.532474,-0.519802,-0.044206,0.26598,0.278653,0.232303,0.244975,-0.256053,-0.241976,0.480895,0.475188
ABC,2010-05-31,-0.012672,0.005278,-2.611476,-0.532474,-0.519802,-0.044206,0.26598,0.278653,0.232303,0.244975,-0.256053,-0.241976,0.480895,0.475188


In [5]:
"""Join the data"""
# Fama factor data
fama_data = price.combine_first(factors)\
    .dropna()\
    .astype("float")
fama_data = remove_missingChar(fama_data)
fama_data = reindex(fama_data)
fama_data = remove_symbols(fama_data)
fama_data = reindex(fama_data)
fama_data = fama_data[[price_col]]\
    .merge(fama_data.drop(columns=[price_col]),
           left_index=True, right_index=True)

print(
    fama_data.index.get_level_values(date_col).min(),
    fama_data.index.get_level_values(date_col).max(),
    fama_data.index.get_level_values(symbol_col).unique().shape[0]
)
fama_data.info()
fama_data.loc["ABC"]

1998-03-31 00:00:00 2017-12-31 00:00:00 844
<class 'pandas.core.frame.DataFrame'>
MultiIndex: 200872 entries, ('3IN', Timestamp('1998-03-31 00:00:00', freq='M')) to ('ZYT', Timestamp('2017-12-31 00:00:00', freq='M'))
Data columns (total 9 columns):
 #   Column          Non-Null Count   Dtype  
---  ------          --------------   -----  
 0   excess_returns  200872 non-null  float64
 1   hml             200872 non-null  float64
 2   r12_7           200872 non-null  float64
 3   r2_1            200872 non-null  float64
 4   rf              200872 non-null  float64
 5   rm              200872 non-null  float64
 6   rmrf            200872 non-null  float64
 7   smb             200872 non-null  float64
 8   umd             200872 non-null  float64
dtypes: float64(9)
memory usage: 18.6+ MB


Unnamed: 0_level_0,excess_returns,hml,r12_7,r2_1,rf,rm,rmrf,smb,umd
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1998-03-31,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99
1998-04-30,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99
1998-05-31,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99
1998-06-30,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99
1998-07-31,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99
1998-08-31,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99
1998-09-30,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99
1998-10-31,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99
1998-11-30,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99
1998-12-31,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99


In [6]:
# Firm characteristic data
firm_char = fama_data.combine_first(fundamental)\
        .dropna()\
        .astype("float")
firm_char = remove_missingChar(firm_char)
firm_char = reindex(firm_char)
firm_char = remove_symbols(firm_char)
firm_char = reindex(firm_char)
firm_char = firm_char[[price_col]]\
    .merge(firm_char.drop(columns=[price_col]),
           left_index=True, right_index=True)

print(
    firm_char.index.get_level_values(date_col).min(),
    firm_char.index.get_level_values(date_col).max(),
    firm_char.index.get_level_values(symbol_col).unique().shape[0]
)
firm_char.info()
firm_char.loc["ABC"]

1998-03-31 00:00:00 2017-12-31 00:00:00 203
<class 'pandas.core.frame.DataFrame'>
MultiIndex: 48314 entries, ('ABC', Timestamp('1998-03-31 00:00:00', freq='M')) to ('ZTF', Timestamp('2017-12-31 00:00:00', freq='M'))
Data columns (total 23 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   excess_returns               48314 non-null  float64
 1   costAndExpenses              48314 non-null  float64
 2   depreciationAndAmortization  48314 non-null  float64
 3   ebitda                       48314 non-null  float64
 4   ebitdaratio                  48314 non-null  float64
 5   eps                          48314 non-null  float64
 6   epsdiluted                   48314 non-null  float64
 7   hml                          48314 non-null  float64
 8   incomeBeforeTax              48314 non-null  float64
 9   incomeBeforeTaxRatio         48314 non-null  float64
 10  netIncome                    48314 non-null  float

Unnamed: 0_level_0,excess_returns,costAndExpenses,depreciationAndAmortization,ebitda,ebitdaratio,eps,epsdiluted,hml,incomeBeforeTax,incomeBeforeTaxRatio,netIncome,netIncomeRatio,operatingIncome,r12_7,r2_1,revenue,rf,rm,rmrf,smb,umd,weightedAverageShsOut,weightedAverageShsOutDil
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
1998-03-31,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99
1998-04-30,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99
1998-05-31,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99
1998-06-30,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99
1998-07-31,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99
1998-08-31,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99
1998-09-30,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99
1998-10-31,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99
1998-11-30,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99
1998-12-31,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99,-99.99


In [7]:
"""Date check"""
# percentage of non missing observations per month
check = pd.concat([
    fama_data.replace(missing_code, float("NaN"))
    .notnull()
    .reset_index()
    .groupby(date_col)
    .mean()[[price_col]]
    .rename(columns={price_col: "fama"})
    .T,
    fama_data.replace(missing_code, float("NaN"))
    .notnull()
    .reset_index()
    .groupby(date_col)
    .sum()[[price_col]]
    .rename(columns={price_col: "fama_count"})
    .T,
    firm_char.replace(missing_code, float("NaN"))
    .notnull()
    .reset_index()
    .groupby(date_col)
    .mean()[[price_col]]
    .rename(columns={price_col: "firm_char"})
    .T,
    firm_char.replace(missing_code, float("NaN"))
    .notnull()
    .reset_index()
    .groupby(date_col)
    .sum()[[price_col]]
    .rename(columns={price_col: "firm_char_count"})
    .T
])
check

date,1998-03-31,1998-04-30,1998-05-31,1998-06-30,1998-07-31,1998-08-31,1998-09-30,1998-10-31,1998-11-30,1998-12-31,1999-01-31,1999-02-28,1999-03-31,1999-04-30,1999-05-31,1999-06-30,1999-07-31,1999-08-31,1999-09-30,1999-10-31,1999-11-30,1999-12-31,2000-01-31,2000-02-29,2000-03-31,2000-04-30,2000-05-31,2000-06-30,2000-07-31,2000-08-31,2000-09-30,2000-10-31,2000-11-30,2000-12-31,2001-01-31,2001-02-28,2001-03-31,2001-04-30,2001-05-31,2001-06-30,2001-07-31,2001-08-31,2001-09-30,2001-10-31,2001-11-30,2001-12-31,2002-01-31,2002-02-28,2002-03-31,2002-04-30,2002-05-31,2002-06-30,2002-07-31,2002-08-31,2002-09-30,2002-10-31,2002-11-30,2002-12-31,2003-01-31,2003-02-28,2003-03-31,2003-04-30,2003-05-31,2003-06-30,2003-07-31,2003-08-31,2003-09-30,2003-10-31,2003-11-30,2003-12-31,2004-01-31,2004-02-29,2004-03-31,2004-04-30,2004-05-31,2004-06-30,2004-07-31,2004-08-31,2004-09-30,2004-10-31,2004-11-30,2004-12-31,2005-01-31,2005-02-28,2005-03-31,2005-04-30,2005-05-31,2005-06-30,2005-07-31,2005-08-31,2005-09-30,2005-10-31,2005-11-30,2005-12-31,2006-01-31,2006-02-28,2006-03-31,2006-04-30,2006-05-31,2006-06-30,2006-07-31,2006-08-31,2006-09-30,2006-10-31,2006-11-30,2006-12-31,2007-01-31,2007-02-28,2007-03-31,2007-04-30,2007-05-31,2007-06-30,2007-07-31,2007-08-31,2007-09-30,2007-10-31,2007-11-30,2007-12-31,2008-01-31,2008-02-29,2008-03-31,2008-04-30,2008-05-31,2008-06-30,2008-07-31,2008-08-31,2008-09-30,2008-10-31,2008-11-30,2008-12-31,2009-01-31,2009-02-28,2009-03-31,2009-04-30,2009-05-31,2009-06-30,2009-07-31,2009-08-31,2009-09-30,2009-10-31,2009-11-30,2009-12-31,2010-01-31,2010-02-28,2010-03-31,2010-04-30,2010-05-31,2010-06-30,2010-07-31,2010-08-31,2010-09-30,2010-10-31,2010-11-30,2010-12-31,2011-01-31,2011-02-28,2011-03-31,2011-04-30,2011-05-31,2011-06-30,2011-07-31,2011-08-31,2011-09-30,2011-10-31,2011-11-30,2011-12-31,2012-01-31,2012-02-29,2012-03-31,2012-04-30,2012-05-31,2012-06-30,2012-07-31,2012-08-31,2012-09-30,2012-10-31,2012-11-30,2012-12-31,2013-01-31,2013-02-28,2013-03-31,2013-04-30,2013-05-31,2013-06-30,2013-07-31,2013-08-31,2013-09-30,2013-10-31,2013-11-30,2013-12-31,2014-01-31,2014-02-28,2014-03-31,2014-04-30,2014-05-31,2014-06-30,2014-07-31,2014-08-31,2014-09-30,2014-10-31,2014-11-30,2014-12-31,2015-01-31,2015-02-28,2015-03-31,2015-04-30,2015-05-31,2015-06-30,2015-07-31,2015-08-31,2015-09-30,2015-10-31,2015-11-30,2015-12-31,2016-01-31,2016-02-29,2016-03-31,2016-04-30,2016-05-31,2016-06-30,2016-07-31,2016-08-31,2016-09-30,2016-10-31,2016-11-30,2016-12-31,2017-01-31,2017-02-28,2017-03-31,2017-04-30,2017-05-31,2017-06-30,2017-07-31,2017-08-31,2017-09-30,2017-10-31,2017-11-30,2017-12-31
fama,0.399289,0.399289,0.399289,0.399289,0.401659,0.402844,0.402844,0.404028,0.406398,0.408768,0.412322,0.413507,0.413507,0.414692,0.415877,0.417062,0.417062,0.420616,0.420616,0.420616,0.421801,0.422986,0.424171,0.424171,0.424171,0.425355,0.425355,0.427725,0.430095,0.43128,0.43128,0.43128,0.43128,0.433649,0.436019,0.454976,0.454976,0.457346,0.462085,0.466825,0.466825,0.472749,0.475118,0.476303,0.478673,0.481043,0.486967,0.490521,0.491706,0.492891,0.492891,0.496445,0.496445,0.49763,0.5,0.5,0.501185,0.50237,0.505924,0.505924,0.507109,0.508294,0.510664,0.511848,0.511848,0.516588,0.516588,0.516588,0.516588,0.517773,0.517773,0.518957,0.520142,0.522512,0.523697,0.522512,0.523697,0.524882,0.526066,0.528436,0.53673,0.540284,0.541469,0.543839,0.543839,0.545024,0.548578,0.553318,0.563981,0.578199,0.579384,0.584123,0.590047,0.593602,0.603081,0.603081,0.60545,0.614929,0.619668,0.624408,0.631517,0.64218,0.645735,0.646919,0.651659,0.656398,0.669431,0.668246,0.670616,0.67654,0.682464,0.684834,0.689573,0.689573,0.693128,0.695498,0.702607,0.7109,0.718009,0.720379,0.722749,0.727488,0.732227,0.732227,0.739336,0.742891,0.742891,0.745261,0.74763,0.75237,0.75237,0.754739,0.754739,0.757109,0.759479,0.765403,0.768957,0.768957,0.768957,0.770142,0.772512,0.772512,0.772512,0.772512,0.772512,0.772512,0.772512,0.772512,0.773697,0.773697,0.773697,0.774882,0.777251,0.779621,0.780806,0.781991,0.78436,0.7891,0.795024,0.797393,0.799763,0.804502,0.805687,0.806872,0.809242,0.811611,0.817536,0.81872,0.819905,0.822275,0.825829,0.827014,0.830569,0.835308,0.836493,0.838863,0.840047,0.843602,0.844787,0.844787,0.847156,0.849526,0.849526,0.850711,0.851896,0.85782,0.859005,0.86019,0.861374,0.861374,0.863744,0.866114,0.870853,0.874408,0.879147,0.885071,0.888626,0.896919,0.898104,0.904028,0.909953,0.915877,0.921801,0.921801,0.92654,0.939573,0.949052,0.958531,0.970379,0.978673,0.979858,0.982227,0.986967,0.990521,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fama_count,337.0,337.0,337.0,337.0,339.0,340.0,340.0,341.0,343.0,345.0,348.0,349.0,349.0,350.0,351.0,352.0,352.0,355.0,355.0,355.0,356.0,357.0,358.0,358.0,358.0,359.0,359.0,361.0,363.0,364.0,364.0,364.0,364.0,366.0,368.0,384.0,384.0,386.0,390.0,394.0,394.0,399.0,401.0,402.0,404.0,406.0,411.0,414.0,415.0,416.0,416.0,419.0,419.0,420.0,422.0,422.0,423.0,424.0,427.0,427.0,428.0,429.0,431.0,432.0,432.0,436.0,436.0,436.0,436.0,437.0,437.0,438.0,439.0,441.0,442.0,441.0,442.0,443.0,444.0,446.0,453.0,456.0,457.0,459.0,459.0,460.0,463.0,467.0,476.0,488.0,489.0,493.0,498.0,501.0,509.0,509.0,511.0,519.0,523.0,527.0,533.0,542.0,545.0,546.0,550.0,554.0,565.0,564.0,566.0,571.0,576.0,578.0,582.0,582.0,585.0,587.0,593.0,600.0,606.0,608.0,610.0,614.0,618.0,618.0,624.0,627.0,627.0,629.0,631.0,635.0,635.0,637.0,637.0,639.0,641.0,646.0,649.0,649.0,649.0,650.0,652.0,652.0,652.0,652.0,652.0,652.0,652.0,652.0,653.0,653.0,653.0,654.0,656.0,658.0,659.0,660.0,662.0,666.0,671.0,673.0,675.0,679.0,680.0,681.0,683.0,685.0,690.0,691.0,692.0,694.0,697.0,698.0,701.0,705.0,706.0,708.0,709.0,712.0,713.0,713.0,715.0,717.0,717.0,718.0,719.0,724.0,725.0,726.0,727.0,727.0,729.0,731.0,735.0,738.0,742.0,747.0,750.0,757.0,758.0,763.0,768.0,773.0,778.0,778.0,782.0,793.0,801.0,809.0,819.0,826.0,827.0,829.0,833.0,836.0,844.0,844.0,844.0,844.0,844.0,844.0,844.0,844.0,844.0,844.0,844.0,844.0,844.0,844.0,844.0,844.0,844.0,844.0,844.0,844.0,844.0,844.0,844.0,844.0
firm_char,0.46798,0.46798,0.46798,0.477833,0.482759,0.487685,0.487685,0.487685,0.487685,0.492611,0.502463,0.507389,0.507389,0.507389,0.507389,0.527094,0.527094,0.527094,0.527094,0.527094,0.527094,0.527094,0.536946,0.536946,0.541872,0.541872,0.541872,0.541872,0.541872,0.541872,0.541872,0.527094,0.527094,0.53202,0.53202,0.527094,0.527094,0.527094,0.527094,0.527094,0.527094,0.53202,0.53202,0.522167,0.522167,0.522167,0.536946,0.536946,0.536946,0.536946,0.536946,0.546798,0.541872,0.541872,0.541872,0.53202,0.53202,0.536946,0.541872,0.541872,0.541872,0.541872,0.541872,0.541872,0.541872,0.551724,0.551724,0.541872,0.541872,0.541872,0.55665,0.55665,0.55665,0.55665,0.55665,0.55665,0.55665,0.55665,0.55665,0.546798,0.546798,0.546798,0.561576,0.561576,0.561576,0.561576,0.566502,0.571429,0.571429,0.571429,0.571429,0.561576,0.566502,0.571429,0.576355,0.571429,0.571429,0.571429,0.576355,0.576355,0.576355,0.576355,0.576355,0.566502,0.566502,0.566502,0.576355,0.571429,0.571429,0.566502,0.566502,0.566502,0.566502,0.566502,0.571429,0.551724,0.551724,0.55665,0.55665,0.55665,0.55665,0.561576,0.561576,0.561576,0.55665,0.55665,0.55665,0.551724,0.55665,0.561576,0.561576,0.566502,0.566502,0.566502,0.566502,0.566502,0.566502,0.566502,0.571429,0.571429,0.571429,0.571429,0.581281,0.576355,0.586207,0.586207,0.586207,0.586207,0.586207,0.586207,0.586207,0.581281,0.581281,0.586207,0.600985,0.605911,0.605911,0.615764,0.62069,0.62069,0.62069,0.62069,0.62069,0.605911,0.605911,0.605911,0.635468,0.635468,0.640394,0.64532,0.64532,0.64532,0.650246,0.655172,0.655172,0.62069,0.625616,0.630542,0.630542,0.625616,0.630542,0.625616,0.625616,0.625616,0.62069,0.630542,0.630542,0.62069,0.62069,0.630542,0.635468,0.635468,0.64532,0.640394,0.650246,0.660099,0.660099,0.660099,0.660099,0.64532,0.650246,0.655172,0.669951,0.669951,0.669951,0.674877,0.679803,0.704433,0.70936,0.719212,0.719212,0.665025,0.665025,0.669951,0.674877,0.674877,0.674877,0.674877,0.674877,0.674877,0.640394,0.640394,0.640394,0.640394,0.640394,0.640394,0.640394,0.635468,0.635468,0.630542,0.630542,0.630542,0.596059,0.596059,0.596059,0.596059,0.596059,0.596059
firm_char_count,95.0,95.0,95.0,97.0,98.0,99.0,99.0,99.0,99.0,100.0,102.0,103.0,103.0,103.0,103.0,107.0,107.0,107.0,107.0,107.0,107.0,107.0,109.0,109.0,110.0,110.0,110.0,110.0,110.0,110.0,110.0,107.0,107.0,108.0,108.0,107.0,107.0,107.0,107.0,107.0,107.0,108.0,108.0,106.0,106.0,106.0,109.0,109.0,109.0,109.0,109.0,111.0,110.0,110.0,110.0,108.0,108.0,109.0,110.0,110.0,110.0,110.0,110.0,110.0,110.0,112.0,112.0,110.0,110.0,110.0,113.0,113.0,113.0,113.0,113.0,113.0,113.0,113.0,113.0,111.0,111.0,111.0,114.0,114.0,114.0,114.0,115.0,116.0,116.0,116.0,116.0,114.0,115.0,116.0,117.0,116.0,116.0,116.0,117.0,117.0,117.0,117.0,117.0,115.0,115.0,115.0,117.0,116.0,116.0,115.0,115.0,115.0,115.0,115.0,116.0,112.0,112.0,113.0,113.0,113.0,113.0,114.0,114.0,114.0,113.0,113.0,113.0,112.0,113.0,114.0,114.0,115.0,115.0,115.0,115.0,115.0,115.0,115.0,116.0,116.0,116.0,116.0,118.0,117.0,119.0,119.0,119.0,119.0,119.0,119.0,119.0,118.0,118.0,119.0,122.0,123.0,123.0,125.0,126.0,126.0,126.0,126.0,126.0,123.0,123.0,123.0,129.0,129.0,130.0,131.0,131.0,131.0,132.0,133.0,133.0,126.0,127.0,128.0,128.0,127.0,128.0,127.0,127.0,127.0,126.0,128.0,128.0,126.0,126.0,128.0,129.0,129.0,131.0,130.0,132.0,134.0,134.0,134.0,134.0,131.0,132.0,133.0,136.0,136.0,136.0,137.0,138.0,143.0,144.0,146.0,146.0,135.0,135.0,136.0,137.0,137.0,137.0,137.0,137.0,137.0,130.0,130.0,130.0,130.0,130.0,130.0,130.0,129.0,129.0,128.0,128.0,128.0,121.0,121.0,121.0,121.0,121.0,121.0


In [8]:
"""Export data"""
firm_char.to_csv(data_path["fundamental"])
fama_data.to_csv(data_path["factor"])