In [4]:
import pandas as pd
import numpy as np
from datetime import datetime
from sklearn.linear_model import LinearRegression

sphist = pd.read_csv("sphist.csv")
sphist["Date"] = pd.to_datetime(sphist["Date"])
sphist = sphist.sort_values("Date")

#Generating indicators
#avg prev 5 days
sphist["prev 5-d avg"] = sphist["Close"].shift().rolling(5).sum()/5

#avg prev 30 days
sphist["prev 30-d avg"] = sphist["Close"].shift().rolling(30).sum()/30

#avg prev 365 days
sphist["prev 365-d avg"] = sphist["Close"].shift().rolling(365).sum()/365

#ratio of avg prev 30 : avg prev 365 days
sphist["ratio 5d/365d avg"] = sphist["prev 5-d avg"] / sphist["prev 365-d avg"]

#stdev of prev 5 days
sphist["prev 5-d stdev"] = sphist["Close"].shift().rolling(5).std()

#stdev of prev 365 days
sphist["prev 365-d stdev"] = sphist["Close"].shift().rolling(365).std()

#stdev of prev 5 days : stdev of prev 365 days
sphist["ratio 5d/365d stdev"] = sphist["prev 5-d stdev"] / sphist["prev 365-d stdev"]

sphist = sphist.dropna(axis=0)

train = sphist[sphist["Date"] < datetime(2013,1,1)].sort_values("Date")
test = sphist.drop(train.index).sort_values("Date")

lin_r = LinearRegression()
dropped_feat = ["High","Low","Open","Volume","Adj Close","Date", "Close"]

lin_r.fit(train.drop(dropped_feat, axis=1), train["Close"])
pred = lin_r.predict(test.drop(dropped_feat, axis=1))

MAE = sum(np.abs(pred - test["Close"]))/len(pred)
print("Mean Absolute Error:", MAE, '\n')

test["Predicted"] = pred
test["Error"] = test["Predicted"] - test["Close"]
print(test[["Date", "Close", "Predicted", "Error"]])

Mean Absolute Error: 16.145140609743716 

          Date        Close    Predicted      Error
738 2013-01-02  1462.420044  1419.354407 -43.065637
737 2013-01-03  1459.369995  1425.505780 -33.864215
736 2013-01-04  1466.469971  1433.379736 -33.090235
735 2013-01-07  1461.890015  1443.360695 -18.529320
734 2013-01-08  1457.150024  1457.055697  -0.094327
733 2013-01-09  1461.020020  1464.869450   3.849430
732 2013-01-10  1472.119995  1464.460455  -7.659540
731 2013-01-11  1472.050049  1466.795298  -5.254751
730 2013-01-14  1470.680054  1467.743572  -2.936482
729 2013-01-15  1472.339966  1469.485739  -2.854227
728 2013-01-16  1472.630005  1472.825472   0.195467
727 2013-01-17  1480.939941  1475.584981  -5.354960
726 2013-01-18  1485.979980  1476.966922  -9.013058
725 2013-01-22  1492.560059  1479.515931 -13.044128
724 2013-01-23  1494.810059  1483.794688 -11.015371
723 2013-01-24  1494.819946  1488.391453  -6.428493
722 2013-01-25  1502.959961  1493.240665  -9.719296
721 2013-01-28  1500.1