# Logistic regression strategy implementation

In [3]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import datetime
import pandas as pd
import yfinance as yf

In [4]:
def download_data(stock, start, end):
    data = {}
    ticker = yf.download(stock, start, end)
    data['Close'] = ticker['Adj Close']
    return pd.DataFrame(data)

In [14]:
def construct_features(data, lags=2):
    
    # calculate the lagged adjusted closing prices (name = Close)
    for i in range(0, lags):
        data['Lag%s' % str(i+1)] = data['Close'].shift(i+1)
    
    # calculate the percent of actual changes
    data['Today Change'] = data['Close']
    data['Today Change'] = data['Today Change'].pct_change() * 100
    
    # calculate the lags in percentage (normalization)
    for i in range(0, lags):
        data['Lag%s' % str(i+1)] = data['Lag%s' % str(i+1)].pct_change() * 100
        
    # direction - the target variable
    data['Direction'] =  np.where(data['Today Change'] > 0, 1, -1)
        
    print(data)

In [15]:
if __name__ == '__main__':
    start_date = datetime.datetime(2017, 1, 1)
    end_date   = datetime.datetime(2018, 1, 1)
    
    stock_data = download_data('IBM', start_date, end_date)
    construct_features(stock_data)

[*********************100%***********************]  1 of 1 completed
                 Close      Lag1      Lag2  Today Change  Direction
Date                                                               
2017-01-03  136.249847       NaN       NaN           NaN         -1
2017-01-04  137.936768       NaN       NaN      1.238108          1
2017-01-05  137.480408  1.238108       NaN     -0.330847         -1
2017-01-06  138.156799 -0.330847  1.238108      0.491991          1
2017-01-09  136.624695  0.491991 -0.330847     -1.108961         -1
...                ...       ...       ...           ...        ...
2017-12-22  129.098679 -0.948044 -0.182706      0.660073          1
2017-12-26  129.378067  0.660073 -0.948044      0.216415          1
2017-12-27  129.632019  0.216415  0.660073      0.196287          1
2017-12-28  130.402390  0.196287  0.216415      0.594275          1
2017-12-29  129.877502  0.594275  0.196287     -0.402513         -1

[251 rows x 5 columns]
