In [11]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns

import os
print(os.getcwd())

C:\Users\matze\Documents\Winton\CRISPDM


In [12]:
X_train = pd.read_csv('X_cleaned.csv', index_col=0)
y_train = pd.read_csv('y_cleaned.csv', index_col=0)
weights = pd.read_csv('weights.csv', index_col=0)

X_test = pd.read_csv('Test_cleaned.csv', index_col=0)

In [13]:
print('Shape of X_train:: {}'.format(X_train.shape),'Shape of X_test:: {}'.format(X_test.shape), 'Shape of y_train:: {}'.format(y_train.shape), 'Shape of Weights:: {}'.format(weights.shape))

Shape of X_train:: (40000, 179) Shape of X_test:: (120000, 179) Shape of y_train:: (40000, 62) Shape of Weights:: (40000, 2)


### Helpers

In [14]:
#featureset_colnames = X_train.loc[:, 'Feature_1':'Feature_25'].columns
daily_ret_past_colnames = X_train.loc[:, 'Ret_MinusTwo':'Ret_MinusOne'].columns
daily_ret_fut_colnames = y_train.loc[:, 'Ret_PlusOne':'Ret_PlusTwo'].columns
minute_ret_past_colnames = X_train.loc[:, 'Ret_2':'Ret_120'].columns
minute_ret_fut_colnames = y_train.loc[:, 'Ret_121':'Ret_180'].columns

### Features for X

In [15]:
### Stock prices

def get_prices(return_rates):
    df = return_rates.loc[:, minute_ret_past_colnames].copy()+1
    df.iloc[:,0] =  df.iloc[:,0]*100
    #print(df.head())
    df = df.cumprod(axis=1)
    return df

def get_prices_future(return_rates):
    df = return_rates.loc[:, minute_ret_fut_colnames].copy()+1
    df.iloc[:,0] =  df.iloc[:,0]*100
    #print(df.head())
    df = df.cumprod(axis=1)
    return df

def get_smoothed_prices(return_rates, rol_window):
    df = get_prices(return_rates)
    df = df.rolling(window=rol_window, axis=1).mean()
    return df


def get_smoothed_ret(return_rates, rol_window, pct_period):
    df = get_prices(return_rates)
    df = df.apply(lambda x: x.rolling(window=rol_window).mean(), axis=1).pct_change(periods=pct_period, axis = 1).dropna(axis=1)
    return df

In [16]:
def get_daily_diff(df):
    return df.Ret_MinusOne - df.Ret_MinusTwo

def get_daily_abs_diff(df):
    return abs(df.Ret_MinusOne - df.Ret_MinusTwo)

def get_daily_sum(df):
    return df.Ret_MinusOne + df.Ret_MinusTwo

def get_last_minute_diff(df):
    return df.Ret_120-df.Ret_119
    
def get_abs_last_minute_ret(df):
    return abs(df.Ret_120-df.Ret_119)

def get_last_minute_sum(df):
    return df.Ret_120 + df.Ret_119


def get_grouped_mean(df, colnames):
    df = df.groupby(colnames, as_index=False).transform('mean')
    return df

def get_grouped_mad(df, colnames):
    df = df.groupby(colnames, as_index=False).transform('mad')
    return df

def get_grouped_rank_minute(df):
    ranked = df.groupby(['Feature_7','Feature_5']).transform('mean').loc[:, minute_ret_past_colnames]
    ranked['Feature_7'] = df.Feature_7
    return ranked.groupby('Feature_7', sort=False).rank(method='dense').loc[:, minute_ret_past_colnames]

def get_grouped_rank_daily(df):
    ranked = df.groupby(['Feature_7','Feature_5']).transform('mean').loc[:, daily_ret_past_colnames]
    ranked['Feature_7'] = df.Feature_7
    return ranked.groupby('Feature_7', sort=False).rank(method='dense').loc[:, daily_ret_past_colnames]

def get_interview_features(df):
    interview_features = pd.DataFrame(index=df.index)
    interview_features['daily_diff'] = get_daily_diff(df)
    interview_features['abs_daily_diff'] = get_daily_abs_diff(df)
    interview_features['daily_sum'] = get_daily_sum(df)
    interview_features['last_minute_diff'] = get_last_minute_diff(df)
    interview_features['abs_last_minute_diff'] = get_abs_last_minute_ret(df)
    interview_features['last_minute_sum'] = get_last_minute_sum(df)
    interview_features['last_minute_sum'] = get_last_minute_sum(df)
    
    interview_features['grouped_mean_fet_7'] = get_grouped_mean(df, ['Feature_7']).Ret_MinusOne
    interview_features['grouped_mean_fet_5'] = get_grouped_mean(df, ['Feature_5']).Ret_MinusOne
    interview_features['grouped_mad_fet_7'] = get_grouped_mad(df, ['Feature_7']).Ret_MinusOne
    interview_features['grouped_mad_fet_5'] = get_grouped_mad(df, ['Feature_5']).Ret_MinusOne
    interview_features['grouped_rank_daily_1'] = get_grouped_rank_daily(df).Ret_MinusOne
    interview_features['grouped_rank_daily_2'] = get_grouped_rank_daily(df).Ret_MinusTwo
    
    
    return interview_features


In [17]:
#Calculated features and lag features
#X_train.loc[:,featureset_colnames].describe()
#mad std abs-dar log-dar quantiles quartiles skewness kurtosis
def get_descriptives(df):
    minutes = df.loc[:, minute_ret_past_colnames]
    descriptives = pd.DataFrame(index= minutes.index)
    descriptives['minute_sum'] = minutes.sum(axis=1)
    descriptives['minute_mean'] = minutes.mean(axis=1)
    descriptives['minute_mad'] = minutes.mad(axis=1)
    descriptives['minute_median'] = minutes.mad(axis=1)
    descriptives['minute_std'] = minutes.std(axis=1)
    descriptives['minute_var'] = minutes.var(axis=1)
    descriptives['minute_sem'] = minutes.sem(axis=1)
    descriptives['minute_skew'] = minutes.skew(axis=1)
    descriptives['minute_kurt'] = minutes.kurt(axis=1)
    descriptives['minute_25_quantile'] = minutes.quantile(q=0.25, axis=1)
    descriptives['minute_75_quantile'] = minutes.quantile(q= 0.75,axis=1)
    return descriptives

def get_smoothed_descriptives(df):
    minutes = get_smoothed_ret(df, 5, 1)
    descriptives = pd.DataFrame(index= minutes.index)
    descriptives['smoothed_minute_sum'] = minutes.sum(axis=1)
    descriptives['smoothed_minute_mean'] = minutes.mean(axis=1)
    descriptives['smoothed_minute_mad'] = minutes.mad(axis=1)
    descriptives['smoothed_minute_median'] = minutes.mad(axis=1)
    descriptives['smoothed_minute_std'] = minutes.std(axis=1)
    descriptives['smoothed_minute_var'] = minutes.var(axis=1)
    descriptives['smoothed_minute_sem'] = minutes.sem(axis=1)
    descriptives['smoothed_minute_skew'] = minutes.skew(axis=1)
    descriptives['smoothed_minute_kurt'] = minutes.kurt(axis=1)
    descriptives['smoothed_minute_25_quantile'] = minutes.quantile(q=0.25, axis=1)
    descriptives['smoothed_minute_75_quantile'] = minutes.quantile(q= 0.75,axis=1)
    return descriptives

#get_smoothed_descriptives(X_train)

#Whole-market features. We tried to build some features to get information from whole market: increasing or decreasing, calm or volatile. They were also used in our self-adaptive strategy

def get_market_indicators_fet7(df):
    grouped = df.groupby('Feature_7')
    market_indicators = pd.DataFrame(index=df.index)
    market_indicators['fet_7_RetMinOne_Mean'] = grouped.Ret_MinusOne.transform('mean')
    market_indicators['fet_7_RetMinTwo_Mean'] = grouped.Ret_MinusTwo.transform('mean')
    market_indicators['fet_7_RetMinOne_Mad'] = grouped.Ret_MinusOne.transform('mad')
    market_indicators['fet_7_RetMinTwo_Mad'] = grouped.Ret_MinusTwo.transform('mad')
    market_indicators['fet_7_diff_RetDaily_Mean'] = market_indicators['fet_7_RetMinOne_Mean'] - market_indicators['fet_7_RetMinTwo_Mean']
    market_indicators['fet_7_diff_RetDaily_Mad'] = market_indicators['fet_7_RetMinOne_Mad'] - market_indicators['fet_7_RetMinTwo_Mad']
    market_indicators['fet_7_absdiff_RetDaily_Mad'] = abs(market_indicators['fet_7_diff_RetDaily_Mad'])
    market_indicators['fet_7_absdiff_RetDaily_Mean'] = abs(market_indicators['fet_7_diff_RetDaily_Mean'])
    market_indicators['fet_7_RetMinOne_Count'] = grouped.Ret_MinusOne.transform('count')
    market_indicators['fet_7_RetMinTwo_Count'] = grouped.Ret_MinusTwo.transform('count')
    return market_indicators
    


def get_market_indicators_fet7_fet5(df):
    market_indicators = pd.DataFrame(index=df.index)
    
    #ranks
    market_indicators['Rank_MinusOne'] = get_grouped_rank_daily(df).Ret_MinusOne
    market_indicators['Rank_MinusTwo'] = get_grouped_rank_daily(df).Ret_MinusTwo
    market_indicators['Diff_Ranked'] = market_indicators['Rank_MinusTwo'] - market_indicators['Rank_MinusOne']
    market_indicators['AbsDiff_Ranked'] = abs(market_indicators['Rank_MinusTwo'] - market_indicators['Rank_MinusOne'])
    market_indicators['Sum_Ranked'] = market_indicators['Rank_MinusTwo'] + market_indicators['Rank_MinusOne']
    
    #descriptives
    grouped = df.groupby(['Feature_7', 'Feature_5'])
    market_indicators['fet_7_fet_5_Ret_MinusOne_Mad'] = grouped.Ret_MinusOne.transform('mad')
    market_indicators['fet_7_fet_5_Ret_MinusTwo_Mad'] = grouped.Ret_MinusTwo.transform('mad')
    market_indicators['fet_7_fet_5_Ret_MinusOne_Mean'] = grouped.Ret_MinusOne.transform('mean')
    market_indicators['fet_7_fet_5_Ret_MinusTwo_Mean'] = grouped.Ret_MinusTwo.transform('mean')
    market_indicators['fet_7_fet_5_Diff_Daily_Mean'] = grouped.Ret_MinusOne.transform('mean') - grouped.Ret_MinusTwo.transform('mean')
    market_indicators['fet_7_fet_5_AbsDiff_Daily_Mean'] = abs(grouped.Ret_MinusOne.transform('mean') - grouped.Ret_MinusTwo.transform('mean'))
    market_indicators['fet_7_fet_5_Sum_Daily_Mean'] = grouped.Ret_MinusOne.transform('mean') + grouped.Ret_MinusTwo.transform('mean')
    
    market_indicators['fet_7_fet_5_Ret_Daily_Count'] = grouped.Ret_MinusOne.transform('count')
    
    #indicators
    return market_indicators

In [18]:
def max_dd(ser):
    max2here = ser.expanding(min_periods=1).max()
    dd2here = ser - max2here
    return dd2here.min()
def get_max_dd(df):
    return df.loc[:, minute_ret_past_colnames].cumsum(axis=1).apply(max_dd, axis=1)
   

In [19]:
def get_delta_ret(df):
    cumret = df.loc[:,minute_ret_past_colnames].cumsum(axis=1)
    deltarets = pd.DataFrame(index=df.index)
    deltarets['Delta_120'] = cumret.Ret_120
    deltarets['Delta_119'] = cumret.Ret_119
    
    deltarets['Delta_last_5'] = df.loc[:,'Ret_116':'Ret_120'].cumsum(axis=1).Ret_120
    deltarets['Delta_last_10'] = df.loc[:,'Ret_111':'Ret_120'].cumsum(axis=1).Ret_120
    deltarets['Delta_last_60'] = df.loc[:,'Ret_61':'Ret_120'].cumsum(axis=1).Ret_120
    deltarets['Delta_first_60'] = df.loc[:,'Ret_2':'Ret_61'].cumsum(axis=1).Ret_61
    
    deltarets['Delta_dif_10_120'] = deltarets['Delta_last_10'] - deltarets['Delta_120']
    deltarets['Delta_dif_60_60']  = deltarets['Delta_last_60'] - deltarets['Delta_first_60']
    
    deltarets['Delta_div_10_120'] = deltarets['Delta_last_10'] / deltarets['Delta_120']
    deltarets['Delta_div_60_60'] = deltarets['Delta_last_60'] / deltarets['Delta_first_60']
    
    return deltarets

def get_mad(df):
    mads = pd.DataFrame(index=df.index)
    mads['MAD_120'] = df.loc[:, minute_ret_past_colnames].mad(axis=1)
    mads['MAD_last_60'] = df.loc[:, 'Ret_61': 'Ret_120'].mad(axis=1)
    mads['MAD_first_60'] = df.loc[:, 'Ret_2': 'Ret_60'].mad(axis=1)
    mads['MAD_last_30'] = df.loc[:, 'Ret_91': 'Ret_120'].mad(axis=1)
    mads['MAD_last_15'] = df.loc[:, 'Ret_106': 'Ret_120'].mad(axis=1)
    
    
    #ratio of last 15 to 120 stuff like that
    mads['MAD_div_60_60'] = mads['MAD_last_60'] / mads['MAD_first_60']
    mads['MAD_div_15_120'] = mads['MAD_last_15'] / mads['MAD_120']
    
    mads['MAD_dif_60_120'] = mads['MAD_last_60'] - mads['MAD_120']
    mads['MAD_dif_15_120'] = mads['MAD_last_15'] - mads['MAD_120']
    return mads

def get_some_interaction(df):
    mads = get_mad(df)
    deltarets = get_delta_ret(df)
    
    interaction = pd.DataFrame(index=df.index)
    interaction['interaction_1']=  mads.MAD_120 * deltarets.Delta_last_5
    interaction['interaction_2']=  mads.MAD_last_15 * deltarets.Delta_last_10
    #interaction['interaction_3']=
    #interaction['interaction_4']=
    #interaction['interaction_3']=
    #interaction['interaction_3']=
    #interaction['interaction_3']=
    #interaction['interaction_3']= X_2.PR_120 * interview_features.grouped_mad_fet_7
    return interaction

In [20]:
def get_period_ret(df):
    cumret = get_prices(df)
    
    #periods
    periodret = pd.DataFrame(index=df.index)
    periodret['PR_120'] = (cumret.Ret_120 - cumret.Ret_2)/cumret.Ret_2
    periodret['PR_Last_5'] = (cumret.Ret_120 - cumret.Ret_116)/cumret.Ret_116
    periodret['PR_Last_10'] = (cumret.Ret_120 - cumret.Ret_111)/cumret.Ret_111
    periodret['PR_Last_30'] = (cumret.Ret_120 - cumret.Ret_91)/cumret.Ret_91
    periodret['PR_Last_60'] = (cumret.Ret_120 - cumret.Ret_61)/cumret.Ret_61
    
    #ranks
    periodret['Feature_7'] = df.Feature_7
    periodret['Feature_5'] = df.Feature_5

    
    ranks_fet7 = periodret.groupby('Feature_7').rank(method='dense').drop(columns='Feature_5').add_prefix('Ranked_7_')
    #ranks_fet7.columns = ranks_fet7.columns.astype.add_prefix('test')
    #ranks_fet_7_fet_5 = grouped.groupby(['Feature_7','Feature_5']).rank().add_prefix('Ranked_7_5_')
    
    #grouped = periodret.groupby(['Feature_7'])
    #return grouped
    return pd.concat([periodret,ranks_fet7], axis=1).drop(columns=['Feature_5','Feature_7'])
    '''
    y_features['Ret_Next_5'] = (get_prices_future(y_train).Ret_125 - get_prices_future(y_train).Ret_121)/get_prices_future(y_train).Ret_121
    
    
    return deltarets
'''
#get_period_ret(X_train)

#get_period_ret(X_train).groupby('Feature_7').get_group(338)[['PR_120','Ranked_7_PR_120']].sort_values('PR_120')

In [36]:
### get grouped minute mean mad
def get_grouped_minute(df):
    minutes = get_smoothed_ret(df, 5, 1)
    
    
    
    group_minutes = pd.DataFrame(index= minutes.index)
    group_minutes['smoothed_minute_mean'] = minutes.mean(axis=1)
    group_minutes['smoothed_minute_sum'] = minutes.sum(axis=1)
    group_minutes['Feature_7'] = df.Feature_7
    grouped = group_minutes.groupby('Feature_7')
    
    #market_indicators = pd.DataFrame(index=df.index)
    group_minutes['fet_7_Minute_Mean'] = grouped.smoothed_minute_mean.transform('mean')
    group_minutes['fet_7_Minute_MAD'] = grouped.smoothed_minute_mean.transform('mad')
    #group_minutes['fet_7_Minute_Sum'] = grouped.smoothed_minute_sum.transform('mean')
    #group_minutes['fet_7_Minute_Sum_MAD'] = grouped.smoothed_minute_sum.transform('mad')
    return group_minutes[['fet_7_Minute_Mean','fet_7_Minute_MAD']]

#get_grouped_minute(X_train)

Unnamed: 0_level_0,fet_7_Minute_Mean,fet_7_Minute_MAD,fet_7_Minute_Sum,fet_7_Minute_Sum_MAD
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,-9.951471e-06,0.000042,-0.001134,0.004810
2,2.410986e-05,0.000027,0.002749,0.003038
3,-1.365349e-05,0.000032,-0.001556,0.003596
4,5.758270e-07,0.000027,0.000066,0.003030
5,-1.085946e-05,0.000029,-0.001238,0.003261
6,3.805322e-05,0.000033,0.004338,0.003787
7,-2.445015e-07,0.000031,-0.000028,0.003553
8,-1.124410e-05,0.000024,-0.001282,0.002787
9,6.758064e-07,0.000038,0.000077,0.004341
10,5.543010e-05,0.000061,0.006319,0.006978


### Generating Featuresets

In [38]:
'''
train_features = pd.concat([X_train.loc[:,featureset_colnames], get_some_interaction(X_train), get_mad(X_train), get_delta_ret(X_train), get_market_indicators_fet7_fet5(X_train), get_market_indicators_fet7(X_train),get_smoothed_descriptives(X_train), get_descriptives(X_train), get_interview_features(X_train)], axis=1)
train_features['Ret_MinusOne']= X_train.Ret_MinusOne
train_features['Ret_MinusTwo']= X_train.Ret_MinusTwo
train_features['max_dd']= get_max_dd(X_train)
'''
def get_features(df):
    features = pd.concat([df, get_some_interaction(df), get_mad(df), get_delta_ret(df), get_market_indicators_fet7_fet5(df), get_market_indicators_fet7(df),get_smoothed_descriptives(df), get_descriptives(df), get_interview_features(df),get_period_ret(df),get_grouped_minute(df),get_max_dd(df)], axis=1)
    return features


train_features = get_features(X_train)
test_features = get_features(X_test)

In [39]:
print('Train features.shape:', train_features.shape)
print('Test features.shape:', test_features.shape)

Train features.shape: (40000, 272)
Test features.shape: (120000, 272)


In [40]:
print('Train features.shape:', train_features.isna().any().sum())
print('Test features.shape:', test_features.isna().any().sum())

Train features.shape: 0
Test features.shape: 0


### Building another featureset for daily with interactions

In [37]:
def get_features_2(df):
    features = pd.concat([get_some_interaction(df), get_interview_features(df), get_delta_ret(df).Delta_120,get_smoothed_descriptives(df).smoothed_minute_mean, get_descriptives(df).minute_mad, get_period_ret(df).PR_120,get_grouped_minute(df),get_max_dd(df)], axis=1)
    return features
#, get_market_indicators_fet7_fet5(df), get_market_indicators_fet7(df)
X_train_fet_2 = get_features_2(X_train)
X_test_fet_2 = get_features_2(X_test)
display(X_train_fet_2.isna().any().sum())
display(X_test_fet_2.isna().any().sum())
display(X_train_fet_2.shape)
display(X_test_fet_2.shape)

X_train_fet_2.to_csv('X_features_2.csv')
X_test_fet_2.to_csv('test_features_2.csv')

0

0

(40000, 23)

(120000, 23)

## Features For y

In [121]:
y_train.head()

Unnamed: 0_level_0,Ret_121,Ret_122,Ret_123,Ret_124,Ret_125,Ret_126,Ret_127,Ret_128,Ret_129,Ret_130,...,Ret_173,Ret_174,Ret_175,Ret_176,Ret_177,Ret_178,Ret_179,Ret_180,Ret_PlusOne,Ret_PlusTwo
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,-0.000137,-0.000565,-0.000704,-0.005605,0.000826,0.001966,0.002676,0.000422,-0.000428,-0.000539,...,0.000861,0.000544,-0.002688,0.002246,-0.000838,-0.0006953224,3e-06,-0.001974,-0.019512,0.028846
2,0.000261,0.000238,-0.000113,-0.000248,-0.000351,-3e-06,-2e-06,0.000267,0.000263,-0.00024,...,-0.000497,0.00024,-0.000129,0.000123,0.000248,3.315418e-07,3e-06,2.7e-05,-0.002939,-0.010253
3,0.000271,0.000126,0.000655,-0.000515,-0.000924,-0.000769,0.000282,-0.00012,0.000408,-0.000267,...,0.000255,0.000278,-0.000524,-0.000394,0.000116,0.0005322557,0.000274,0.000784,-0.024791,0.015711
4,0.00021,-0.000301,-0.000142,6.8e-05,-0.000508,-0.000122,0.000295,0.000297,-0.000199,-0.000217,...,-0.000161,-0.000155,0.000346,-9e-05,0.000288,-0.0001281102,7.4e-05,0.000341,-0.00568,-0.00219
5,1.1e-05,1.3e-05,0.000622,0.000612,-0.001207,0.001233,-0.001234,-3e-06,0.001226,9e-06,...,-5e-06,-7e-06,-0.001235,2.7e-05,0.002449,8.619882e-06,0.001209,-4e-06,0.036104,-0.026552


In [122]:
def get_prices_future(return_rates):
    df = return_rates.loc[:, minute_ret_fut_colnames].copy()+1
    df.iloc[:,0] =  df.iloc[:,0]*100
    #print(df.head())
    df = df.cumprod(axis=1)
    return df

def get_smoothed_prices(return_rates, rol_window):
    df = get_prices_future(return_rates)
    df = df.rolling(window=rol_window, axis=1).mean()
    return df


def get_smoothed_ret(return_rates, rol_window, pct_period):
    df = get_prices_future(return_rates)
    df = df.apply(lambda x: x.rolling(window=rol_window).mean(), axis=1).pct_change(periods=pct_period, axis = 1).dropna(axis=1)
    return df

In [123]:
from sklearn.preprocessing import Binarizer

y_features = y_train.copy()
# get return over period
y_features['Ret_Next_5'] = (get_prices_future(y_train).Ret_125 - get_prices_future(y_train).Ret_121)/get_prices_future(y_train).Ret_121
y_features['Ret_Next_10'] = (get_prices_future(y_train).Ret_130 - get_prices_future(y_train).Ret_121)/get_prices_future(y_train).Ret_130
y_features['Ret_Next_30'] = (get_prices_future(y_train).Ret_150 - get_prices_future(y_train).Ret_121)/get_prices_future(y_train).Ret_150
y_features['Ret_Next_60'] = (get_prices_future(y_train).Ret_180 - get_prices_future(y_train).Ret_121)/get_prices_future(y_train).Ret_180

#Binarizer
binarizer = Binarizer()
y_features['Direction_Next_5'] =binarizer.fit_transform(y_features.Ret_Next_5.values.reshape(-1,1))
y_features['Direction_Next_10'] =binarizer.fit_transform(y_features.Ret_Next_10.values.reshape(-1,1))
y_features['Direction_Next_60'] =binarizer.fit_transform(y_features.Ret_Next_60.values.reshape(-1,1))
y_features['Direction_PlusOne'] =binarizer.fit_transform(y_features.Ret_PlusOne.values.reshape(-1,1))
#MAD
y_features['MAD_Next_5'] = y_train.loc[:,'Ret_121':'Ret_125'].mad(axis=1)
y_features['MAD_Next_10'] = y_train.loc[:,'Ret_121':'Ret_130'].mad(axis=1)
y_features['MAD_Next_30'] = y_train.loc[:,'Ret_121':'Ret_150'].mad(axis=1)

#trend of timeperiode


In [125]:
y_features.shape

(40000, 72)

# Output

In [41]:
train_features.to_csv('X_features.csv')
#y_features.to_csv('y_features.csv')

In [42]:
test_features.to_csv('test_features.csv')
