## Getting the training and testing data

In [44]:
import yfinance
import pandas

In [45]:
def get_stock_data(ticker: str, start_date: str, end_date: str) -> pandas.DataFrame:
    data: pandas.DataFrame = yfinance.download(ticker, start=start_date, end=end_date, multi_level_index=False)
    return data

In [46]:
training_start_date: str = "2012-01-01"
training_stop_date: str = "2017-01-01"

In [47]:
training_data: pandas.DataFrame = get_stock_data("AMZN", training_start_date, training_stop_date)
training_data.to_csv("training_data/AMZN.csv")

[*********************100%***********************]  1 of 1 completed




In [48]:
test_start_date: str = "2017-01-01"
test_stop_date: str = "2020-01-01"

In [49]:
testing_data: pandas.DataFrame = get_stock_data("AMZN", test_start_date, test_stop_date)
testing_data.to_csv("test_data/AMZN.csv")

[*********************100%***********************]  1 of 1 completed


## Generating the training inputs and outputs

In [50]:
import csv

In [51]:
training_input_datas = []
training_output_datas = []

with open("training_data/AMZN.csv", newline="") as training_file:
    reader =  csv.reader(training_file)
    prices = tuple(reader)[1:]
    for idx in range(5, len(prices[5:-1])):
        input_data = []
        for i in range(-5, 0):
            input_data.extend(list(map(float, prices[idx+i][1:])))
        training_input_datas.append(input_data)
        training_output_datas.append(list(map(float, prices[idx][1:])))

print(training_input_datas[:5])
print(training_output_datas[:5])

[[8.951499938964844, 8.973999977111816, 8.77750015258789, 8.794500350952148, 102216000.0, 8.875499725341797, 9.024999618530273, 8.803500175476074, 8.96049976348877, 84104000.0, 8.880499839782715, 8.912500381469727, 8.702500343322754, 8.79699993133545, 76182000.0, 9.130499839782715, 9.232500076293945, 8.875, 8.903499603271484, 140168000.0, 8.928000450134277, 9.218500137329102, 8.850000381469727, 9.137999534606934, 101138000.0], [8.875499725341797, 9.024999618530273, 8.803500175476074, 8.96049976348877, 84104000.0, 8.880499839782715, 8.912500381469727, 8.702500343322754, 8.79699993133545, 76182000.0, 9.130499839782715, 9.232500076293945, 8.875, 8.903499603271484, 140168000.0, 8.928000450134277, 9.218500137329102, 8.850000381469727, 9.137999534606934, 101138000.0, 8.967000007629395, 9.119999885559082, 8.854999542236328, 9.055000305175781, 79716000.0], [8.880499839782715, 8.912500381469727, 8.702500343322754, 8.79699993133545, 76182000.0, 9.130499839782715, 9.232500076293945, 8.875, 8.9034

In [52]:
testing_input_datas = []
testing_output_datas = []

with open("test_data/AMZN.csv", newline="") as training_file:
    reader =  csv.reader(training_file)
    prices = tuple(reader)[1:]
    for idx in range(5, len(prices[5:-1])):
        input_data = []
        for i in range(-5, 0):
            input_data.extend(list(map(float, prices[idx+i][1:])))
        testing_input_datas.append(input_data)
        testing_output_datas.append(list(map(float, prices[idx][1:])))

print(testing_input_datas[:5])
print(testing_output_datas[:5])

[[37.68349838256836, 37.9379997253418, 37.3849983215332, 37.895999908447266, 70422000.0, 37.85900115966797, 37.98400115966797, 37.709999084472656, 37.919498443603516, 50210000.0, 39.022499084472656, 39.119998931884766, 38.01300048828125, 38.07749938964844, 116602000.0, 39.79949951171875, 39.97200012207031, 38.92399978637695, 39.11800003051758, 119724000.0, 39.84600067138672, 40.0885009765625, 39.5885009765625, 39.900001525878906, 68922000.0], [37.85900115966797, 37.98400115966797, 37.709999084472656, 37.919498443603516, 50210000.0, 39.022499084472656, 39.119998931884766, 38.01300048828125, 38.07749938964844, 116602000.0, 39.79949951171875, 39.97200012207031, 38.92399978637695, 39.11800003051758, 119724000.0, 39.84600067138672, 40.0885009765625, 39.5885009765625, 39.900001525878906, 68922000.0, 39.79499816894531, 39.900001525878906, 39.47700119018555, 39.83000183105469, 51168000.0], [39.022499084472656, 39.119998931884766, 38.01300048828125, 38.07749938964844, 116602000.0, 39.7994995117

## Training using scikit-learn

In [53]:
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

In [None]:
model = Pipeline([
    ("scaler", StandardScaler()),
    ("regressor", MLPRegressor(hidden_layer_sizes=(75, 75), max_iter=1000000, learning_rate="adaptive"))
])

In [67]:
model.fit(training_input_datas, training_output_datas)

In [68]:
preds = model.predict(testing_input_datas)

In [69]:
r2 = r2_score(testing_output_datas, preds)
mse = mean_squared_error(testing_output_datas, preds)

print(f"R2 score is {r2}")
print(f"MSE score is {mse}")

R2 score is -4.428110422611911
MSE score is 1905560881250656.8
