In [1]:
# packages
import numpy as np
import pandas as pd

In [2]:
# hide warning messages
import warnings
warnings.filterwarnings("ignore")

### Daterange

In [3]:
# getting the daterange in daily frequency to create some dataframes (we're gonna use the marketcap dataset for this)
returns_path = '../../../input/returns/daily.parquet'
returns = pd.read_parquet(returns_path)
daterange = returns.index
daterange

DatetimeIndex(['2005-01-03', '2005-01-04', '2005-01-05', '2005-01-06',
               '2005-01-07', '2005-01-10', '2005-01-11', '2005-01-12',
               '2005-01-13', '2005-01-14',
               ...
               '2019-12-17', '2019-12-18', '2019-12-19', '2019-12-20',
               '2019-12-23', '2019-12-24', '2019-12-26', '2019-12-27',
               '2019-12-30', '2019-12-31'],
              dtype='datetime64[ns]', length=3773, freq=None)

### Functions

fill_df function: receives two parameters, factor and df_factor.

* factor is the finacial factor ('size', 'value', ..., 'ipo').
* df_factor is the factor dataframe in daily frequency which will be filled.

This funtion returns the df_factor filled with the returns in daily frequency.

In [4]:
def fill_df(factor, df_factor):
    for date in daterange:
        day = str(date)[:4] + str(date)[5:7] + str(date)[8:10]
        try:
            # deciles portfolio dataframe
            path = f'../../../output/data/double_check/{factor}/{day}.parquet'
            deciles_portfolio = pd.read_parquet(path)

            # filling the df_factor
            for decile in df_factor.columns:
                df_factor[decile][date] = deciles_portfolio.sum(axis=0)[decile]
                
        except:
            pass
    # dropping two days with NaN data
    # df_factor = df_factor.dropna(axis=0, how='all')

    # converting to float type (to compute correlation)
    df_factor = df_factor.astype('float64')
    return(df_factor)

## Intradaily Factors

In [5]:
size = pd.DataFrame(index=daterange, columns=['p1','p2','p3','p4','p5','p6','p7','p8','p9','p10'])
size = fill_df('size', size)
size

Unnamed: 0,p1,p2,p3,p4,p5,p6,p7,p8,p9,p10
2005-01-03,-0.010109,-0.014020,-0.016132,-0.015536,-0.017383,-0.017261,-0.019373,-0.022418,-0.021244,-0.015023
2005-01-04,-0.013028,-0.015371,-0.017263,-0.017974,-0.019939,-0.020209,-0.019879,-0.022224,-0.021613,-0.015979
2005-01-05,-0.002791,-0.005667,-0.007622,-0.007419,-0.009610,-0.011414,-0.013768,-0.012893,-0.013079,-0.006647
2005-01-06,0.001774,0.002710,0.002593,0.002261,0.000916,0.001457,0.000238,0.000014,-0.000670,0.000907
2005-01-07,-0.003371,-0.002109,-0.005479,-0.005572,-0.007488,-0.013478,-0.014525,-0.015509,-0.016021,-0.007450
...,...,...,...,...,...,...,...,...,...,...
2019-12-24,-0.000679,0.000305,-0.000546,-0.001128,-0.001207,0.000436,0.002240,0.003909,0.005084,0.006440
2019-12-26,0.004725,0.000493,0.000106,0.000992,-0.001072,-0.001296,-0.003418,-0.004265,0.001529,0.003057
2019-12-27,-0.002526,-0.003274,-0.004280,-0.005046,-0.007554,-0.006757,-0.009006,-0.011778,-0.013119,-0.007349
2019-12-30,-0.006588,-0.004699,-0.004705,-0.002762,-0.004505,-0.004506,-0.003058,-0.005136,-0.004270,-0.001719


In [6]:
value = pd.DataFrame(index=daterange, columns=['p1','p2','p3','p4','p5','p6','p7','p8','p9','p10'])
value = fill_df('value', value)
value

Unnamed: 0,p1,p2,p3,p4,p5,p6,p7,p8,p9,p10
2005-01-03,-0.012373,-0.010467,-0.014667,-0.014894,-0.019096,-0.014457,-0.019404,-0.010784,-0.016662,-0.016814
2005-01-04,-0.017645,-0.015512,-0.014567,-0.014889,-0.019023,-0.014759,-0.017945,-0.013756,-0.010957,-0.012115
2005-01-05,-0.003260,-0.005663,-0.007161,-0.006781,-0.006425,-0.005041,-0.007681,-0.003716,-0.005137,-0.009465
2005-01-06,-0.000552,-0.001850,0.001541,0.003403,0.007615,0.001885,0.001601,0.005978,0.003544,0.006540
2005-01-07,-0.001252,-0.005031,-0.006712,-0.004440,-0.005695,-0.005054,-0.006589,-0.005810,-0.004857,-0.003677
...,...,...,...,...,...,...,...,...,...,...
2019-12-24,-0.000227,-0.000908,-0.001413,0.000741,0.000062,0.001514,-0.000685,-0.000774,-0.000459,0.001132
2019-12-26,0.006030,0.005049,0.001308,-0.000157,0.001472,0.000455,-0.000252,0.000553,0.000048,0.000489
2019-12-27,-0.002977,-0.003638,-0.002873,-0.001522,-0.002470,-0.001537,-0.003092,-0.006428,-0.004066,-0.012573
2019-12-30,-0.008963,-0.002158,-0.008056,-0.005421,-0.002728,-0.000045,-0.003658,-0.003228,-0.004027,-0.002650


## Daily Factors

In [7]:
path = '../../../output/data/double_check/size/p10.parquet'
size_daily = pd.read_parquet(path)
size_daily

Unnamed: 0,p1,p2,p3,p4,p5,p6,p7,p8,p9,p10
2005-01-03,-0.010109,-0.014020,-0.016132,-0.015536,-0.017383,-0.017261,-0.019373,-0.022418,-0.021244,-0.015023
2005-01-04,-0.013028,-0.015371,-0.017263,-0.017974,-0.019939,-0.020209,-0.019879,-0.022224,-0.021613,-0.015979
2005-01-05,-0.002791,-0.005667,-0.007622,-0.007419,-0.009610,-0.011414,-0.013768,-0.012893,-0.013079,-0.006647
2005-01-06,0.001774,0.002710,0.002593,0.002261,0.000916,0.001457,0.000238,0.000014,-0.000670,0.000907
2005-01-07,-0.003371,-0.002109,-0.005479,-0.005572,-0.007488,-0.013478,-0.014525,-0.015509,-0.016021,-0.007450
...,...,...,...,...,...,...,...,...,...,...
2019-12-24,-0.000679,0.000305,-0.000546,-0.001128,-0.001207,0.000436,0.002240,0.003909,0.005084,0.006440
2019-12-26,0.004725,0.000493,0.000106,0.000992,-0.001072,-0.001296,-0.003418,-0.004265,0.001529,0.003057
2019-12-27,-0.002526,-0.003274,-0.004280,-0.005046,-0.007554,-0.006757,-0.009006,-0.011778,-0.013119,-0.007349
2019-12-30,-0.006588,-0.004699,-0.004705,-0.002762,-0.004505,-0.004506,-0.003058,-0.005136,-0.004270,-0.001719


In [8]:
path = '../../../output/data/double_check/value/p10.parquet'
value_daily = pd.read_parquet(path)
value_daily

Unnamed: 0,p1,p2,p3,p4,p5,p6,p7,p8,p9,p10
2005-01-03,-0.012373,-0.010467,-0.014667,-0.014894,-0.019096,-0.014457,-0.019404,-0.010784,-0.016662,-0.016814
2005-01-04,-0.017645,-0.015512,-0.014567,-0.014889,-0.019023,-0.014759,-0.017945,-0.013756,-0.010957,-0.012115
2005-01-05,-0.003260,-0.005663,-0.007161,-0.006781,-0.006425,-0.005041,-0.007681,-0.003716,-0.005137,-0.009465
2005-01-06,-0.000552,-0.001850,0.001541,0.003403,0.007615,0.001885,0.001601,0.005978,0.003544,0.006540
2005-01-07,-0.001252,-0.005031,-0.006712,-0.004440,-0.005695,-0.005054,-0.006589,-0.005810,-0.004857,-0.003677
...,...,...,...,...,...,...,...,...,...,...
2019-12-24,-0.000227,-0.000908,-0.001413,0.000741,0.000062,0.001514,-0.000685,-0.000774,-0.000459,0.001132
2019-12-26,0.006030,0.005049,0.001308,-0.000157,0.001472,0.000455,-0.000252,0.000553,0.000048,0.000489
2019-12-27,-0.002977,-0.003638,-0.002873,-0.001522,-0.002470,-0.001537,-0.003092,-0.006428,-0.004066,-0.012573
2019-12-30,-0.008963,-0.002158,-0.008056,-0.005421,-0.002728,-0.000045,-0.003658,-0.003228,-0.004027,-0.002650


### Daily vs Intradaily

### Difference Rate

In [9]:
def rate(df1, df2):
    df = ((df1.reset_index(drop=True)/df2.reset_index(drop=True)) - 1).abs()
    df.index = df1.index
    return(df)

In [10]:
rate(size, size_daily)

Unnamed: 0,p1,p2,p3,p4,p5,p6,p7,p8,p9,p10
2005-01-03,2.220446e-16,2.220446e-16,0.000000e+00,2.220446e-16,0.000000e+00,4.440892e-16,2.220446e-16,2.220446e-16,0.000000e+00,2.220446e-16
2005-01-04,1.110223e-16,0.000000e+00,0.000000e+00,2.220446e-16,4.440892e-16,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
2005-01-05,4.440892e-16,4.440892e-16,2.220446e-16,2.220446e-16,0.000000e+00,2.220446e-16,2.220446e-16,2.220446e-16,4.440892e-16,0.000000e+00
2005-01-06,1.998401e-15,1.110223e-16,8.881784e-16,4.440892e-16,0.000000e+00,8.881784e-16,2.775558e-15,2.486900e-13,6.772360e-15,0.000000e+00
2005-01-07,8.881784e-16,1.110223e-15,1.110223e-16,4.440892e-16,1.110223e-16,1.110223e-16,2.220446e-16,3.330669e-16,2.220446e-16,2.220446e-16
...,...,...,...,...,...,...,...,...,...,...
2019-12-24,1.332268e-15,8.881784e-16,1.554312e-15,0.000000e+00,1.332268e-15,2.220446e-15,3.330669e-16,2.220446e-16,0.000000e+00,2.220446e-16
2019-12-26,2.220446e-16,8.881784e-16,1.310063e-14,2.220446e-16,0.000000e+00,2.664535e-15,1.110223e-16,7.771561e-16,4.440892e-16,2.220446e-16
2019-12-27,5.551115e-16,2.220446e-16,0.000000e+00,0.000000e+00,2.220446e-16,5.551115e-16,2.220446e-16,1.110223e-16,0.000000e+00,1.110223e-16
2019-12-30,0.000000e+00,3.330669e-16,5.551115e-16,2.220446e-16,0.000000e+00,6.661338e-16,4.440892e-16,2.220446e-16,1.110223e-15,1.554312e-15


In [11]:
rate(size, size_daily).values.sum()

6.436329247350159e-11

In [12]:
rate(value, value_daily)

Unnamed: 0,p1,p2,p3,p4,p5,p6,p7,p8,p9,p10
2005-01-03,0.000000e+00,1.110223e-16,2.220446e-16,2.220446e-16,0.000000e+00,2.220446e-16,2.220446e-16,0.000000e+00,2.220446e-16,0.000000e+00
2005-01-04,4.440892e-16,1.110223e-16,2.220446e-16,4.440892e-16,0.000000e+00,2.220446e-16,0.000000e+00,2.220446e-16,3.330669e-16,1.110223e-16
2005-01-05,2.664535e-15,4.440892e-16,0.000000e+00,1.110223e-16,2.220446e-16,4.440892e-16,2.220446e-16,3.330669e-16,3.330669e-16,2.220446e-16
2005-01-06,1.221245e-15,2.220446e-16,2.220446e-16,6.661338e-16,6.661338e-16,2.220446e-16,2.220446e-15,0.000000e+00,8.881784e-16,2.220446e-16
2005-01-07,1.776357e-15,1.110223e-15,8.881784e-16,2.220446e-16,7.771561e-16,4.440892e-16,4.440892e-16,2.220446e-16,0.000000e+00,2.220446e-16
...,...,...,...,...,...,...,...,...,...,...
2019-12-24,4.551914e-15,4.440892e-16,2.220446e-16,0.000000e+00,2.220446e-15,4.440892e-16,7.771561e-16,5.551115e-16,2.220446e-16,1.554312e-15
2019-12-26,0.000000e+00,6.661338e-16,2.442491e-15,2.664535e-15,6.661338e-16,8.881784e-16,2.220446e-16,9.992007e-16,8.992806e-15,2.220446e-15
2019-12-27,4.440892e-16,1.110223e-16,4.440892e-16,4.440892e-16,4.440892e-16,2.220446e-16,0.000000e+00,2.220446e-16,4.440892e-16,2.220446e-16
2019-12-30,2.220446e-16,2.220446e-16,4.440892e-16,3.330669e-16,2.220446e-16,1.199041e-14,4.440892e-16,1.110223e-16,2.220446e-16,1.110223e-16


In [13]:
rate(value, value_daily).values.sum()

1.1281930945017393e-10