### FINA 4380 with Marius Popescu

### Characteristic-Based Portfolio Formation

In [1]:
import numpy as np
import pandas as pd

import datetime as dt
import pandas_datareader.data as web

import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [2]:
data = pd.read_csv('month_rets.csv',
                    index_col = 'date',
                    usecols = ['date','TICKER', 'PRC', 'RET'],
                    parse_dates = True)
data.head()

Unnamed: 0_level_0,TICKER,PRC,RET
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2013-01-31,MSFT,27.45,0.027717
2013-02-28,MSFT,27.8,0.021129
2013-03-28,MSFT,28.605,0.028957
2013-04-30,MSFT,33.1,0.15714
2013-05-31,MSFT,34.9,0.061329


In [3]:
data.index=data.index+pd.offsets.MonthEnd(0)

In [4]:
#data.info()

In [5]:
data['TICKER'].unique()

array(['MSFT', 'IBM', 'FB', 'AAPL', 'AMZN', 'GOOGL'], dtype=object)

In [6]:
prices_201301 = data[['TICKER','PRC']].loc['2013-01']
prices_201301

Unnamed: 0_level_0,TICKER,PRC
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-01-31,MSFT,27.45
2013-01-31,IBM,203.07001
2013-01-31,FB,30.981
2013-01-31,AAPL,455.48999
2013-01-31,AMZN,265.5
2013-01-31,GOOGL,755.69


In [7]:
prices_201301['Rank']=pd.qcut(prices_201301['PRC'],3,labels=['T1','T2','T3'])
#prices_201301['Rank']=pd.qcut(prices_201301['PRC'],3,labels=range(1,4))

In [8]:
prices_201301.sort_values(['PRC'])

Unnamed: 0_level_0,TICKER,PRC,Rank
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2013-01-31,MSFT,27.45,T1
2013-01-31,FB,30.981,T1
2013-01-31,IBM,203.07001,T2
2013-01-31,AMZN,265.5,T2
2013-01-31,AAPL,455.48999,T3
2013-01-31,GOOGL,755.69,T3


In [9]:
prices = data[['TICKER','PRC']]
prices.head()

Unnamed: 0_level_0,TICKER,PRC
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-01-31,MSFT,27.45
2013-02-28,MSFT,27.8
2013-03-31,MSFT,28.605
2013-04-30,MSFT,33.1
2013-05-31,MSFT,34.9


In [10]:
prices_terciles = prices[prices.index.month == 6].copy()
prices_terciles['Rank']=prices_terciles.groupby('date')['PRC'].transform(lambda x : pd.qcut(x,3,labels=['T1','T2','T3']))
#prices['Rank']=prices.groupby('date')['PRC'].transform(lambda x : pd.qcut(x,3,labels=range(1,4)))

In [11]:
prices_terciles.reset_index().set_index(['date','Rank']).sort_index()

Unnamed: 0_level_0,Unnamed: 1_level_0,TICKER,PRC
date,Rank,Unnamed: 2_level_1,Unnamed: 3_level_1
2013-06-30,T1,MSFT,34.545
2013-06-30,T1,FB,24.88
2013-06-30,T2,IBM,191.11
2013-06-30,T2,AMZN,277.69
2013-06-30,T3,AAPL,396.53
2013-06-30,T3,GOOGL,880.37
2014-06-30,T1,MSFT,41.7
2014-06-30,T1,FB,67.29
2014-06-30,T2,IBM,181.27
2014-06-30,T2,AAPL,92.93


In [12]:
prices_terciles.drop('PRC',axis=1,inplace=True)

In [13]:
prices_terciles.index=prices_terciles.index+pd.offsets.MonthEnd(1)
prices_terciles.head()

Unnamed: 0_level_0,TICKER,Rank
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-07-31,MSFT,T1
2014-07-31,MSFT,T1
2015-07-31,MSFT,T1
2016-07-31,MSFT,T1
2017-07-31,MSFT,T1


In [14]:
data.head(10)

Unnamed: 0_level_0,TICKER,PRC,RET
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2013-01-31,MSFT,27.45,0.027717
2013-02-28,MSFT,27.8,0.021129
2013-03-31,MSFT,28.605,0.028957
2013-04-30,MSFT,33.1,0.15714
2013-05-31,MSFT,34.9,0.061329
2013-06-30,MSFT,34.545,-0.010172
2013-07-31,MSFT,31.84,-0.078304
2013-08-31,MSFT,33.4,0.056219
2013-09-30,MSFT,33.28,-0.003593
2013-10-31,MSFT,35.405,0.063852


In [15]:
new_df=pd.merge_asof(data.drop('PRC',axis=1).sort_index(),prices_terciles.sort_index(),by='TICKER',on='date').dropna()
new_df.set_index(['date','Rank']).sort_index().head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,TICKER,RET
date,Rank,Unnamed: 2_level_1,Unnamed: 3_level_1
2013-07-31,T1,MSFT,-0.078304
2013-07-31,T1,FB,0.4791
2013-07-31,T2,IBM,0.020564
2013-07-31,T2,AMZN,0.084735
2013-07-31,T3,AAPL,0.141225
2013-07-31,T3,GOOGL,0.008383
2013-08-31,T1,MSFT,0.056219
2013-08-31,T1,FB,0.12212
2013-08-31,T2,IBM,-0.060603
2013-08-31,T2,AMZN,-0.067193


In [99]:
new_df.drop('TICKER',axis=1).groupby(['date','Rank']).mean().unstack(1)

Unnamed: 0_level_0,RET,RET,RET
Rank,T1,T2,T3
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
2013-07-31,0.200398,0.05265,0.074804
2013-08-31,0.089169,-0.063898,0.018687
2013-09-30,0.106403,0.064321,0.006386
2013-10-31,0.031677,0.066068,0.136484
2013-11-30,0.010618,0.044604,0.048911
2013-12-31,0.071807,0.028523,0.033293
2014-01-31,0.078218,-0.079306,-0.026964
2014-02-28,0.056978,0.031468,0.043339
2014-03-31,-0.025057,-0.01577,-0.031622
2014-04-30,-0.011015,-0.037579,0.028878
