# Project Code and Data Walkthrough

### Duncan Park and Kyle Parran

#### This notebook intends to give users a brief tour of the data pulled, processed, and analyzed in our project.



#




## Import Statements

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# local modules
import sys
sys.path.append('../src/')
from calc_format_futures_data import *
from pull_futures_data import *

import warnings
warnings.filterwarnings("ignore")

Loading library list...
Done


## Loading Data from WRDS

#### First, for each commodity you try to find info on all the futures contracts for that respective commodity

In [2]:
info_df = fetch_wrds_contract_info(2036, 'paper')
info_df

Unnamed: 0,futcode,contrcode,contrname,contrdate,startdate,lasttrddate
0,115736.0,2036.0,ORANGE JUICE (FCOJ-A),0595,1993-12-02,1995-05-17
1,115808.0,2036.0,ORANGE JUICE (FCOJ-A),1191,1991-03-04,1991-11-15
2,120392.0,2036.0,ORANGE JUICE (FCOJ-A),0902,2001-03-12,2002-09-10
3,120868.0,2036.0,ORANGE JUICE (FCOJ-A),0393,1991-10-02,1993-03-18
4,121580.0,2036.0,ORANGE JUICE (FCOJ-A),1183,1982-05-28,1983-11-16
...,...,...,...,...,...,...
201,103753.0,2036.0,ORANGE JUICE (FCOJ-A),0779,1978-02-02,1979-07-18
202,109822.0,2036.0,ORANGE JUICE (FCOJ-A),1186,1985-06-03,1986-11-14
203,110839.0,2036.0,ORANGE JUICE (FCOJ-A),0104,2002-01-11,2004-01-09
204,112575.0,2036.0,ORANGE JUICE (FCOJ-A),1102,2001-05-01,2002-11-07


#### Then, for each commodity, we can query all the time series data for all of these relevant futures.

In [3]:
futcodes = tuple(info_df["futcode"].unique())
futcodes_contrdates = info_df.set_index("futcode")["contrdate"].to_dict()

In [4]:
futcodes_contrdates

{115736.0: '0595',
 115808.0: '1191',
 120392.0: '0902',
 120868.0: '0393',
 121580.0: '1183',
 121668.0: '0500',
 122846.0: '0580',
 123139.0: '0594',
 123615.0: '0196',
 124167.0: '0788',
 124512.0: '1198',
 124571.0: '0783',
 125977.0: '0306',
 126250.0: '0390',
 127489.0: '0307',
 129546.0: '0579',
 129570.0: '0108',
 129851.0: '0191',
 129949.0: '1194',
 129952.0: '1107',
 130490.0: '0583',
 130504.0: '0990',
 132069.0: '0998',
 135076.0: '1180',
 135304.0: '0395',
 138266.0: '0795',
 138271.0: '0982',
 138403.0: '0103',
 141395.0: '0194',
 141893.0: '0180',
 141895.0: '0199',
 144960.0: '0302',
 145578.0: '1181',
 145834.0: '0797',
 147611.0: '0184',
 147911.0: '0907',
 155211.0: '0793',
 156178.0: '0781',
 156464.0: '0784',
 156790.0: '1192',
 157394.0: '0588',
 159065.0: '0992',
 160136.0: '0708',
 161827.0: '0908',
 161863.0: '0502',
 163075.0: '1187',
 163467.0: '0397',
 170028.0: '0789',
 173753.0: '0379',
 174494.0: '0790',
 176033.0: '0504',
 176399.0: '1106',
 176695.0: '

In [5]:
data_contracts = fetch_wrds_fut_contract(futcodes_contrdates, 'paper')
data_contracts

Unnamed: 0,futcode,date_,settlement,contrdate
0,46.0,2001-10-08,93.250000,0303
1,46.0,2001-10-09,93.250000,0303
2,46.0,2001-10-10,93.549988,0303
3,46.0,2001-10-11,93.799988,0303
4,46.0,2001-10-12,98.799988,0303
...,...,...,...,...
72477,318251.0,1977-11-10,124.500000,1177
72478,318251.0,1977-11-11,125.500000,1177
72479,318251.0,1977-11-14,129.850000,1177
72480,318251.0,1977-11-15,131.100000,1177


In [6]:
data_contracts = data_contracts.sort_values(["futcode", "date_"])

# Group by futcode and the month-year of date_, then take the last row in each group
monthly_df = data_contracts.groupby(["futcode", data_contracts["date_"].dt.to_period("M")]).tail(1)

In [7]:
monthly_df

Unnamed: 0,futcode,date_,settlement,contrdate
17,46.0,2001-10-31,99.500000,0303
36,46.0,2001-11-30,103.299990,0303
54,46.0,2001-12-28,98.049988,0303
75,46.0,2002-01-31,92.899994,0303
94,46.0,2002-02-28,91.000000,0303
...,...,...,...,...
72405,318251.0,1977-07-29,111.950000,1177
72428,318251.0,1977-08-31,109.100000,1177
72449,318251.0,1977-09-30,127.900000,1177
72470,318251.0,1977-10-31,125.750000,1177


In [8]:
res = extract_prompt_and_12m_settlements(monthly_df)

In [9]:
res.dropna().head(20)

Unnamed: 0,prompt_settlement,11mth_settlement,basis
1974-08,52.0,57.699997,-0.009456
1974-10,53.399994,61.349991,-0.012617
1975-02,47.5,53.899994,-0.011491
1975-04,49.25,57.25,-0.013683
1975-06,53.199997,61.149994,-0.012661
1975-08,59.899994,66.0,-0.008816
1975-10,63.549988,69.0,-0.00748
1975-12,58.0,65.049988,-0.010428
1976-02,61.449997,68.799988,-0.010271
1976-04,59.449997,64.899994,-0.007974


In [10]:
res['basis'].mean()

-0.002837122103595275

### Need to fix futures stats calculations. do basis and excess returns calculations on separate dataframes