# Create January Effect Annual Returns

How to create annual returns starting at 1st day of the year or 10th day of the year

### Import Packages

In [25]:
import pandas as pd
import numpy as np
import datetime as dt
import sys
sys.path.insert(0, "../")
import util
from importlib import reload
util=reload(util)

### Set Local Macro Variables

In [26]:
startyear=1990
startday=10

### Read in Daily Data
#### Daily CRSP

In [27]:
crsp_d = pd.read_csv('qcrspdsf_raw.csv.gz', compression='gzip', usecols=lambda x: x.lower())

crsp_d.columns = crsp_d.columns.str.lower()
crsp_d = crsp_d[['permno', 'permco', 'date', 'ret', 'retx', 'shrout', 'prc']]
crsp_d[['permno','permco']] = crsp_d[['permno','permco']].astype(int)
crsp_d['ret'] = pd.to_numeric(crsp_d['ret'], errors='coerce')
crsp_d['retx'] = pd.to_numeric(crsp_d['retx'], errors='coerce')

crsp_d['date'] = pd.to_datetime(crsp_d['date'])

crsp_d.head()

  exec(code_obj, self.user_global_ns, self.user_ns)


Unnamed: 0,permno,permco,date,ret,retx,shrout,prc
0,10001,7953,1990-01-02,0.0,0.0,1022.0,10.125
1,10001,7953,1990-01-03,-0.012346,-0.012346,1022.0,10.0
2,10001,7953,1990-01-04,0.0,0.0,1022.0,10.0
3,10001,7953,1990-01-05,0.00625,0.00625,1022.0,-10.0625
4,10001,7953,1990-01-08,0.006211,0.006211,1022.0,10.125


#### CRSP Daily Stock Event - Delisting

In [28]:
qcrspdse_raw = pd.read_csv('qcrspdse_raw.csv.gz', compression='gzip')

qcrspdse_raw = qcrspdse_raw[['permno', 'shrcd', 'exchcd', 'namedt', 'nameendt']]

qcrspdse_raw[['permno','shrcd','exchcd']] = qcrspdse_raw[['permno','shrcd','exchcd']].astype(int)
qcrspdse_raw = qcrspdse_raw[qcrspdse_raw['exchcd'].isin([1, 2, 3])]
qcrspdse_raw = qcrspdse_raw[qcrspdse_raw['shrcd'].isin([10, 11])]

qcrspdse_raw['namedt'] = pd.to_datetime(qcrspdse_raw['namedt'])
qcrspdse_raw['nameendt'] = pd.to_datetime(qcrspdse_raw['nameendt'])

qcrspdse_raw.head()

Unnamed: 0,permno,shrcd,exchcd,namedt,nameendt
0,10000,10,3,1986-01-07,1986-12-03
1,10000,10,3,1986-12-04,1987-03-09
2,10000,10,3,1987-03-10,1987-06-11
3,10001,11,3,1986-01-09,1993-11-21
4,10001,11,3,1993-11-22,2004-06-09


##### Join daily stock data with name history

In [29]:
crsp_d = crsp_d.merge(qcrspdse_raw, on='permno', how='left')

crsp_d = crsp_d[(crsp_d.namedt <= crsp_d.date) & (crsp_d.date <= crsp_d.nameendt)]

crsp_d.head()

Unnamed: 0,permno,permco,date,ret,retx,shrout,prc,shrcd,exchcd,namedt,nameendt
0,10001,7953,1990-01-02,0.0,0.0,1022.0,10.125,11.0,3.0,1986-01-09,1993-11-21
10,10001,7953,1990-01-03,-0.012346,-0.012346,1022.0,10.0,11.0,3.0,1986-01-09,1993-11-21
20,10001,7953,1990-01-04,0.0,0.0,1022.0,10.0,11.0,3.0,1986-01-09,1993-11-21
30,10001,7953,1990-01-05,0.00625,0.00625,1022.0,-10.0625,11.0,3.0,1986-01-09,1993-11-21
40,10001,7953,1990-01-08,0.006211,0.006211,1022.0,10.125,11.0,3.0,1986-01-09,1993-11-21


#### CRSP Daily Stock Event - Delisting

In [30]:
dlret = pd.read_csv('qdlretd_raw.csv.gz', compression='gzip')

dlret = dlret[['permno', 'dlret', 'dlstdt']]

dlret[['permno']] = dlret[['permno']].astype(int)

dlret['date'] = pd.to_datetime(dlret['dlstdt'])

dlret.head()

Unnamed: 0,permno,dlret,dlstdt,date
0,10000,0.0,1987-06-11,1987-06-11
1,10001,0.0,2017-08-03,2017-08-03
2,10002,0.010906,2013-02-15,2013-02-15
3,10003,-0.003648,1995-12-15,1995-12-15
4,10004,,1986-01-17,1986-01-17


##### Format Dates

In [31]:
crsp_d = util.jdate(crsp_d)
dlret = util.jdate(dlret)

In [32]:
crsp = util.comebineRet(crsp_d,dlret)

crsp.head()

Unnamed: 0,permno,permco,ret,retx,shrout,prc,shrcd,exchcd,namedt,nameendt,jdate,dlret,dlstdt,retadj,retxadj
0,10001,7953,0.0,0.0,1022.0,10.125,11.0,3.0,1986-01-09,1993-11-21,1990-01-02,0.0,,0.0,0.0
1,10001,7953,-0.012346,-0.012346,1022.0,10.0,11.0,3.0,1986-01-09,1993-11-21,1990-01-03,0.0,,-0.012346,-0.012346
2,10001,7953,0.0,0.0,1022.0,10.0,11.0,3.0,1986-01-09,1993-11-21,1990-01-04,0.0,,0.0,0.0
3,10001,7953,0.00625,0.00625,1022.0,-10.0625,11.0,3.0,1986-01-09,1993-11-21,1990-01-05,0.0,,0.00625,0.00625
4,10001,7953,0.006211,0.006211,1022.0,10.125,11.0,3.0,1986-01-09,1993-11-21,1990-01-08,0.0,,0.006211,0.006211


In [33]:
crsp['month']=crsp['jdate'].dt.month
crsp['year']=crsp['jdate'].dt.year

In [34]:
crsp = crsp[~(crsp['year']<startyear)]

In [35]:
crsp = crsp.sort_values('jdate')

In [36]:
crsp = util.annRet(crsp)

In [37]:
crsp['daycount'] = crsp['jdate'].dt.dayofyear

In [38]:
crsp2 = crsp.copy()

In [39]:
crsp2 = crsp2[~(crsp2['daycount']<startday)]

In [40]:
# Create annual return data set
crsp2=crsp2.sort_values(['permno','jdate'],ascending=[True,True])
crsp2['annRet10'] = crsp2.groupby(['permno','year'])['1+retadj'].cumprod()-1

In [41]:
crsp2 = crsp2[~(crsp2['month']!=12)]

In [42]:
crsp3 = crsp2.drop_duplicates(['permno','year'],keep='last')

In [43]:
crsp3[crsp3['permno']==14593].tail()

Unnamed: 0,permno,permco,ret,retx,shrout,prc,shrcd,exchcd,namedt,nameendt,...,dlret,dlstdt,retadj,retxadj,month,year,1+retadj,annRet,daycount,annRet10
5216065,14593,7,-0.007796,-0.007796,5257816.0,115.82,11.0,3.0,2007-01-11,2017-12-27,...,0.0,,-0.007796,-0.007796,12,2016,0.992204,0.124805,365,0.221092
5216316,14593,7,-0.010814,-0.010814,5087056.0,169.23,11.0,3.0,2017-12-28,2021-09-30,...,0.0,,-0.010814,-0.010814,12,2017,0.989186,0.484747,363,0.445194
5216567,14593,7,0.009665,0.009665,4729803.0,157.74001,11.0,3.0,2017-12-28,2021-09-30,...,0.0,,0.009665,0.009665,12,2018,1.009665,-0.053916,365,-0.081593
5216819,14593,7,0.007307,0.007307,4384959.0,293.64999,11.0,3.0,2017-12-28,2021-09-30,...,0.0,,0.007307,0.007307,12,2019,1.007307,0.889663,365,0.944266
5217072,14593,7,-0.007703,-0.007703,17001802.0,132.69,11.0,3.0,2017-12-28,2021-09-30,...,0.0,,-0.007703,-0.007703,12,2020,0.992297,0.823096,366,0.729007


### Export

In [44]:
crsp3.annRet.describe()

count    171032.000000
mean          0.158212
std           0.983966
min          -1.000000
25%          -0.251010
50%           0.038523
75%           0.350889
max         110.599789
Name: annRet, dtype: float64

In [45]:
crsp3.annRet10.describe()

count    171032.000000
mean          0.136185
std           0.899211
min          -1.000000
25%          -0.256937
50%           0.033964
75%           0.336286
max         114.448125
Name: annRet10, dtype: float64

In [21]:
crsp3.to_csv("AnnRet.csv.gz", 
           index=False, 
           compression="gzip")

#### Apple Check

In [22]:
crsp3[(crsp3['permno']==14593)&(crsp3['year']==2019)]

Unnamed: 0,permno,permco,ret,retx,shrout,prc,shrcd,exchcd,namedt,nameendt,...,dlret,dlstdt,retadj,retxadj,month,year,1+retadj,annRet,daycount,annRet10
4945768,14593,7,0.007307,0.007307,4384959.0,293.64999,11.0,3.0,2017-12-28,2021-09-30,...,0.0,,0.007307,0.007307,12,2019,1.007307,0.889663,365,0.944266
