In [1]:
import pandas as pd 
import pandas_datareader as data
import sys 
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [2]:
def load_financial_data(start_date,end_date,symbol,output_file):
    path = sys.path[0][:-27]+"stocks/"+symbol+"|"+output_file
    try:
        df = pd.read_pickle(path)
    except FileNotFoundError:
        df = data.DataReader(symbol,'yahoo',start_date,end_date)
        df.to_pickle(path)
    return df

In this code, the following applies:

1)The classification response variable is +1 if the close price tomorrow is higher than the close price today, and -1 if the close price tomorrow is lower than the close price today.

2)For this example, we assume that the close price tomorrow is not the same as the close price today, which we can choose to handle by creating a third categorical value, 0.

In [3]:
def create_classification_trading_condition(df):
    df['Open-Close'] = df.Open - df.Close
    df['High-Low'] = df.High - df.Low
    df['Target'] = df.close.shift(-1)-df.close
    df = df.dropna()
    X = df[['Open-Close', 'High-Low']]
    Y = np.where(df['Close'].shift(-1) > df['Close'], 1, -1)
    return (X, Y)

In this code, the following applies:

1)It is a positive value if the price goes up tomorrow, a negative value if the price goes down tomorrow, and zero if the price does not change.

2)The sign of the value indicates the direction, and the magnitude of the response variable captures the magnitude of the price move.

In [4]:
def create_regression_trading_condition(df):
    df['Open-Close'] = df.Open - df.Close
    df['High-Low'] = df.High - df.Low
    df['Target'] = df.close.shift(-1)-df.close
    df = df.dropna()
    X = df[['Open-Close', 'High-Low']]
    Y = df['Close'].shift(-1) - df['Close']
    return (X, Y)

In [5]:
def train_test_split_group(X,Y,split_ratio=0.8):
    return train_test_split(X,Y,shuffle=False,train_size=split_ratio)

In [6]:
stock_data = load_financial_data('2001-01-01','2018-01-01','ITC.NS','ITC_data_large.pkl')

In [7]:
X, Y = create_regression_trading_condition(stock_data)

AttributeError: 'DataFrame' object has no attribute 'close'

In [None]:
pd.plotting.scatter_matrix(stock_data[['Open-Close', 'High-Low','Target']], grid=True, diagonal='kde')

In [8]:
stock_data

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close,Open-Close,High-Low
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2001-01-01,20.177776,19.719999,19.977777,19.863333,24715935.0,13.358301,0.114445,0.457777
2001-01-02,20.222221,19.906666,19.906666,20.158888,31447890.0,13.557064,-0.252222,0.315556
2001-01-03,21.216665,20.000000,20.088888,20.986666,164613915.0,14.113756,-0.897778,1.216665
2001-01-04,21.333332,20.480000,21.100000,20.566666,63363600.0,13.831298,0.533335,0.853333
2001-01-05,20.959999,20.401112,20.511110,20.808887,74853135.0,13.994199,-0.297777,0.558887
...,...,...,...,...,...,...,...,...
2017-12-26,265.000000,262.649994,263.250000,264.149994,9549857.0,236.290039,-0.899994,2.350006
2017-12-27,264.299988,261.649994,264.200012,262.149994,6091016.0,234.500961,2.050018,2.649994
2017-12-28,264.049988,261.049988,263.399994,261.850006,15889965.0,234.232620,1.549988,3.000000
2017-12-29,264.000000,262.250000,262.500000,263.250000,6565616.0,235.484970,-0.750000,1.750000
