In [1]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta

### Combine the main contracts

1. front.csv has the daily main contract code
2. Read the main contract data day by day:
    1. extract last day and today's data,
    2. slice the data from last day's 7:00 p.m. to today's 1:20 p.m.
    3. compute the minute trading volume to "Volume"
3. Save the combined data to "../data/combined_data.csv"

In [2]:
def get_one_contract_data_by_date(date_str, contract_path):
    """Get the contract minute data by the date
    Trading hours:
        Sunday – Friday, 7:00 p.m. – 7:45 a.m. CT and 
        Monday – Friday, 8:30 a.m. – 1:20 p.m. CT
    Define the last day's night-time and today's day-time as today's trading hours
    
    Args:
        date_str: str, the date we want to extract the main contract data, e.g. "2016-01-04"
        contract_path: str, the main contract path, e.g. "../data/ZC/ZCH16.csv"
        
    Returns:
        pandas.DataFrame
    
    """
    year, month, day = map(int, date_str.split('-'))
    last_date_str = (datetime(year, month, day) - timedelta(days=1)).strftime("%Y-%m-%d")
    
    all_data = pd.read_csv(contract_path, header=0, index_col=0, parse_dates=[0])
    all_data['Volume'] = all_data.TotalVolume.diff()
    
    last_day_data = all_data.loc[last_date_str].between_time("19:00", "23:59")
    today_data = all_data.loc[date_str].between_time("0:00", "13:20")
    
    data = last_day_data.append(today_data, ignore_index=False)
    data.iloc[0, 5] = data.iloc[0, 4] #The first minute's trading volume 
    return data

In [3]:
def get_all_main_contract_data(main_contracts_path = "../data/ZC/front.csv", contracts_root_path = "../data/ZC/"):
    """Combine the daily main contract data 
    Args:
        main_contracts_path: str, the path that have the daily main contract code
        contracts_root_path: str, the root path that have all the contract data
        
    Returns:
        data, pd.DataFrame,columns=["Open", "High", "Low", "Close", "TotalVolume", "Volume", "Change"], 
                            where "Change" == 1 indicate that the main contract changed that day
    
    """
    func_contract_to_path = lambda x: contract_root_path + "ZC" + x + ".csv"
    
    main_contracts = pd.read_csv(main_contract_path, header=None, names=['date_str', 'main_contract'])
    main_contracts['Change'] = main_contracts.main_contract.apply(lambda x: hash(x)).rolling(2).apply(lambda x: 1 if x[1] != x[0] else 0)
    
    data = pd.DataFrame(columns=["Open", "High", "Low", "Close", "TotalVolume", "Volume", "Change"])
    for idx in range(1, len(main_contracts)):
        contract_code = main_contracts.loc[idx, 'main_contract']
        contract_path = func_contract_to_path(contract_code)
        
        date_str = main_contracts.loc[idx, 'date_str']
        change_or_not = main_contracts.loc[idx, 'Change']
        
        today_data = get_one_contract_data_by_date(date_str, contract_path)
        today_data['Change'] = change_or_not
        
        data = data.append(today_data, ignore_index=False)
        
    return data

### Test the function

In [5]:
pd.set_option('display.max_rows', 5000)

main_contract_path = "../data/ZC/front.csv"
contract_root_path = "../data/ZC/"

main_contracts = pd.read_csv(main_contract_path, header=None, names=['date_str', 'main_contract'])
main_contracts['Change'] = main_contracts.main_contract.apply(lambda x: hash(x)).rolling(2).apply(lambda x: 1 if x[1] != x[0] else 0)

main_contracts.head()

  import sys


Unnamed: 0,date_str,main_contract,Change
0,2016-01-04,H16,
1,2016-01-05,H16,0.0
2,2016-01-06,H16,0.0
3,2016-01-07,H16,0.0
4,2016-01-08,H16,0.0


### test get_one_contract_data_by_date()

In [6]:
# test get_one_contract_data_by_date()
func_main_to_path = lambda x: contract_root_path + "ZC" + x + ".csv" 
contract_path = func_main_to_path( main_contracts.loc[1, 'main_contract'])

test_data = get_one_contract_data_by_date(main_contracts.loc[1, 'date_str'], contract_path)
test_data.head()
# test_data

Unnamed: 0_level_0,Open,High,Low,Close,TotalVolume,Volume
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2016-01-04 19:01:00,352.75,353.25,352.5,353.25,4243,4243.0
2016-01-04 19:02:00,353.0,353.25,353.0,353.0,4259,16.0
2016-01-04 19:03:00,353.0,353.25,352.75,352.75,4404,145.0
2016-01-04 19:04:00,353.0,353.0,353.0,353.0,4467,63.0
2016-01-04 19:05:00,353.0,353.25,353.0,353.25,4518,51.0


In [14]:
%time all_data = get_all_main_contract_data()

  


Wall time: 5min 51s


In [13]:
days = len(main_contracts)
all_data.shape[0] / days / 60

11.547193436960276

In [15]:
# Save the data to csv
all_data.to_csv("../data/combined_data.csv")