In [2]:

# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "end_to_end_project"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID)
os.makedirs(IMAGES_PATH, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)




In [3]:
import pandas as pd

test= pd.read_csv(r"C:\Users\alesi\Documents\group_project_ml\test.csv")
train= pd.read_csv(r"C:\Users\alesi\Documents\group_project_ml\train.csv")

In [7]:
print(test)
print(train)

             0         1         2         3         4         5         6  \
0    -0.670595 -0.839068 -0.734415 -0.587261 -0.788800 -0.975857 -0.774088   
1     0.188165  0.166410  0.321011  0.318078  0.641710  0.951932  1.170069   
2     0.886510  0.760716  0.751800  0.052198 -0.050958 -0.140734 -0.173480   
3    -0.187722  0.030199 -0.072558 -0.098400 -0.110795 -0.127632 -0.241193   
4     0.248822  0.168815  0.260804  0.505885  0.471486  1.018661  0.971406   
...        ...       ...       ...       ...       ...       ...       ...   
3136  1.033977  1.043510  1.029088  1.038866  1.044243  1.032999  1.034221   
3137  0.998393  0.997933  0.989993  1.001384  0.995516  0.983090  0.989993   
3138  1.288652  1.279170  1.271384  1.280467  1.276575  1.245434  1.208504   
3139  0.804169  0.821925  0.824755  0.836336  0.840196  0.827586  0.834277   
3140  1.063461  1.063461  1.065692  1.081557  1.058999  1.056272  1.066931   

             7         8         9  ...        40        41    

In [8]:
train.head()

Unnamed: 0,w,y,0,1,2,3,4,5,6,7,...,40,41,42,43,44,45,46,47,48,49
0,1,0.48313,0.79098,0.702555,0.52822,0.298746,0.025488,-0.17348,-0.24529,-0.405057,...,0.954288,1.143901,1.359252,1.081061,1.364409,1.449354,1.195431,1.195992,1.165327,0.77111
1,1,1.135624,0.765286,0.604512,0.414197,0.241638,0.181862,-0.03192,-0.070617,-0.18598,...,0.561545,0.678086,0.84895,1.133852,1.041396,1.242806,1.248121,1.331348,1.267123,1.292718
2,1,0.686081,0.702834,0.637708,0.798416,0.755065,0.705225,0.535391,0.613129,0.549732,...,1.328694,1.324254,1.272889,1.074786,0.75395,0.539693,0.402041,0.442759,0.487557,0.699007
3,1,-1.342005,0.018378,-0.097297,-0.02085,-0.083325,-0.268512,-0.486335,-0.73113,-0.924458,...,-0.386676,-0.357946,-0.612069,-0.698063,-0.891789,-1.127624,-1.535678,-1.490786,-1.85684,-1.441472
4,1,-0.241418,0.46312,0.665307,0.446953,0.48078,0.3925,0.309231,0.158462,0.190963,...,-0.174944,-0.529541,-0.191921,-0.258093,-0.542403,-0.414866,-0.48558,-0.76882,-0.662573,-0.211837


In [9]:
test.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
0,-0.670595,-0.839068,-0.734415,-0.587261,-0.7888,-0.975857,-0.774088,-1.021334,-1.110286,-0.893337,...,-0.997627,-1.103794,-1.092988,-0.989165,-0.827528,-0.813729,-0.532411,-0.289483,-0.40772,-0.407505
1,0.188165,0.16641,0.321011,0.318078,0.64171,0.951932,1.170069,1.177711,0.987763,0.981345,...,0.743405,0.916254,0.866453,0.953677,0.716259,0.692816,0.446713,0.539733,0.279293,0.180641
2,0.88651,0.760716,0.7518,0.052198,-0.050958,-0.140734,-0.17348,0.178508,0.198187,0.357906,...,0.444142,0.492294,0.573348,0.546323,0.373874,0.699132,0.808303,1.118522,1.284887,1.541929
3,-0.187722,0.030199,-0.072558,-0.0984,-0.110795,-0.127632,-0.241193,-0.374608,-0.651771,-0.513491,...,-0.340927,-0.268253,-0.654777,-1.133722,-1.484557,-1.446644,-1.654337,-1.521009,-1.593825,-1.110684
4,0.248822,0.168815,0.260804,0.505885,0.471486,1.018661,0.971406,1.062348,0.986871,0.947982,...,0.422044,0.688196,0.382416,0.344843,0.177595,0.330549,0.595061,0.88486,1.125103,1.220779


In [10]:
financial_train = train.drop("y", axis=1) # drop labels for training 
y=train["y"].copy()

In [11]:
financial_train


Unnamed: 0,w,0,1,2,3,4,5,6,7,8,...,40,41,42,43,44,45,46,47,48,49
0,1,0.790980,0.702555,0.528220,0.298746,0.025488,-0.173480,-0.245290,-0.405057,-0.371818,...,0.954288,1.143901,1.359252,1.081061,1.364409,1.449354,1.195431,1.195992,1.165327,0.771110
1,1,0.765286,0.604512,0.414197,0.241638,0.181862,-0.031920,-0.070617,-0.185980,-0.188546,...,0.561545,0.678086,0.848950,1.133852,1.041396,1.242806,1.248121,1.331348,1.267123,1.292718
2,1,0.702834,0.637708,0.798416,0.755065,0.705225,0.535391,0.613129,0.549732,0.472387,...,1.328694,1.324254,1.272889,1.074786,0.753950,0.539693,0.402041,0.442759,0.487557,0.699007
3,1,0.018378,-0.097297,-0.020850,-0.083325,-0.268512,-0.486335,-0.731130,-0.924458,-0.995159,...,-0.386676,-0.357946,-0.612069,-0.698063,-0.891789,-1.127624,-1.535678,-1.490786,-1.856840,-1.441472
4,1,0.463120,0.665307,0.446953,0.480780,0.392500,0.309231,0.158462,0.190963,0.273600,...,-0.174944,-0.529541,-0.191921,-0.258093,-0.542403,-0.414866,-0.485580,-0.768820,-0.662573,-0.211837
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7321,1,1.002615,1.007519,0.990882,1.009554,1.009953,1.016928,0.980784,0.919103,0.957826,...,1.003996,1.015111,1.014566,1.032765,1.035272,1.036289,1.027571,1.028043,1.032729,1.021831
7322,1,1.067617,1.075324,1.068344,1.086520,1.088120,1.071688,1.086666,1.074451,1.070816,...,1.022539,1.050458,1.064563,1.055257,1.057292,1.048132,1.050167,1.048422,1.051767,1.044205
7323,1,0.976882,0.978272,0.972884,0.978098,0.977229,0.968538,0.967669,0.954632,0.958978,...,1.050409,1.073527,1.069529,1.075787,1.072658,1.066400,1.069355,1.074744,1.074917,1.068486
7324,1,0.851952,0.853579,0.871475,0.878525,0.882863,0.880152,0.886117,0.882321,0.881236,...,1.042842,1.045553,1.039588,1.060737,1.053688,1.047180,1.052061,1.050434,1.056399,1.056941


In [12]:
financial_train.isnull()

Unnamed: 0,w,0,1,2,3,4,5,6,7,8,...,40,41,42,43,44,45,46,47,48,49
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7321,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
7322,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
7323,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
7324,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [13]:
financial_train.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7326 entries, 0 to 7325
Data columns (total 51 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   w       7326 non-null   int64  
 1   0       7326 non-null   float64
 2   1       7326 non-null   float64
 3   2       7326 non-null   float64
 4   3       7326 non-null   float64
 5   4       7326 non-null   float64
 6   5       7326 non-null   float64
 7   6       7326 non-null   float64
 8   7       7326 non-null   float64
 9   8       7326 non-null   float64
 10  9       7326 non-null   float64
 11  10      7326 non-null   float64
 12  11      7326 non-null   float64
 13  12      7326 non-null   float64
 14  13      7326 non-null   float64
 15  14      7326 non-null   float64
 16  15      7326 non-null   float64
 17  16      7326 non-null   float64
 18  17      7326 non-null   float64
 19  18      7326 non-null   float64
 20  19      7326 non-null   float64
 21  20      7326 non-null   float64
 22  

In [14]:
sample_incomplete_rows = financial_train[financial_train.isnull().any(axis=1)].head()
sample_incomplete_rows

Unnamed: 0,w,0,1,2,3,4,5,6,7,8,...,40,41,42,43,44,45,46,47,48,49


In [15]:
financial_train.mean(axis=0)

#financial_train.std(axis=0)

w     1.000000
0     0.016903
1     0.018974
2     0.023204
3     0.027638
4     0.031068
5     0.039904
6     0.045646
7     0.051259
8     0.055237
9     0.059982
10    0.063486
11    0.060599
12    0.059525
13    0.057512
14    0.054321
15    0.049322
16    0.046435
17    0.044036
18    0.040652
19    0.041987
20    0.040917
21    0.040409
22    0.040925
23    0.046406
24    0.048168
25    0.051337
26    0.055149
27    0.055678
28    0.057295
29    0.059418
30    0.054513
31    0.052467
32    0.050844
33    0.047891
34    0.043695
35    0.041315
36    0.039177
37    0.036693
38    0.034440
39    0.030586
40    0.036622
41    0.039024
42    0.043605
43    0.045400
44    0.049477
45    0.055759
46    0.060715
47    0.070436
48    0.078284
49    0.081028
dtype: float64

In [16]:
import sklearn
from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()
print(scaler.fit(financial_train))
print(scaler.transform(financial_train))

StandardScaler()
[[0.         1.47338019 1.30235825 ... 1.30566091 1.23364047 0.7724038 ]
 [0.         1.42447536 1.11556794 ... 1.46267542 1.34916464 1.35623541]
 [0.         1.30560409 1.17881301 ... 0.43189994 0.46446768 0.69169975]
 ...
 [0.         1.82722656 1.82765497 ... 1.16501074 1.13103868 1.10525443]
 [0.         1.58943544 1.59008974 ... 1.136811   1.11002293 1.09233256]
 [0.         2.16700344 2.16893165 ... 1.13269484 1.09770608 1.05858625]]


In [17]:
fin_train_tr=scaler.fit_transform(financial_train)

In [18]:
fin_train_tr

array([[0.        , 1.47338019, 1.30235825, ..., 1.30566091, 1.23364047,
        0.7724038 ],
       [0.        , 1.42447536, 1.11556794, ..., 1.46267542, 1.34916464,
        1.35623541],
       [0.        , 1.30560409, 1.17881301, ..., 0.43189994, 0.46446768,
        0.69169975],
       ...,
       [0.        , 1.82722656, 1.82765497, ..., 1.16501074, 1.13103868,
        1.10525443],
       [0.        , 1.58943544, 1.59008974, ..., 1.136811  , 1.11002293,
        1.09233256],
       [0.        , 2.16700344, 2.16893165, ..., 1.13269484, 1.09770608,
        1.05858625]])

In [19]:
fin_train_tr.shape

(7326, 51)

In [20]:
from sklearn.linear_model import LinearRegression

lin_reg = LinearRegression()
reg=lin_reg.fit(fin_train_tr, y)

In [21]:
reg.coef_


array([-3.22655118e-17, -3.34435281e-02, -2.18638984e-02, -4.79024293e-03,
        1.00354517e-02,  9.06220587e-03,  3.37682379e-03, -2.60761139e-02,
       -4.78667970e-03, -9.22734994e-03, -6.35231575e-03, -1.64575017e-02,
        1.18452641e-02, -4.95130008e-03,  4.94386796e-03,  4.89572843e-03,
       -1.73787263e-02,  3.21569799e-03,  9.23936053e-04,  1.92482862e-02,
        2.33734567e-02,  2.88237312e-02,  3.28647979e-02,  4.46000499e-03,
        3.52186374e-03,  1.41236562e-02,  5.80923820e-03,  1.39946248e-02,
        5.15499646e-02,  3.94230051e-02,  2.66438936e-02, -1.18676667e-02,
        2.86494500e-03, -2.85030348e-03,  3.21493729e-02,  5.47392023e-02,
        6.09082667e-02,  3.87584383e-02, -1.53845735e-02, -4.30981803e-02,
       -5.68740933e-02, -5.75771621e-03,  2.81220450e-02,  5.59567792e-02,
        3.56888898e-02, -2.53793003e-03, -1.30826349e-01, -1.70443946e-01,
       -6.74140281e-02,  2.44145141e-01,  7.11379589e-01])

In [22]:

from sklearn.metrics import mean_squared_error

fin_predictions = lin_reg.predict(fin_train_tr)
lin_mse = mean_squared_error(y, fin_predictions)
lin_rmse = np.sqrt(lin_mse)
lin_rmse

0.15053862493315537

In [23]:
from sklearn.metrics import mean_absolute_error

lin_mae = mean_absolute_error(y, fin_predictions)
lin_mae

0.12126773129689675

In [24]:
from sklearn.metrics import r2_score
r2_score(y, fin_predictions)

0.9727796241669521

In [25]:
from sklearn.svm import SVR
supp=SVR(kernel="rbf", C=15, epsilon=0.03)
reg2=supp.fit(financial_train,y)

In [26]:
reg2.get_params()

{'C': 15,
 'cache_size': 200,
 'coef0': 0.0,
 'degree': 3,
 'epsilon': 0.03,
 'gamma': 'scale',
 'kernel': 'rbf',
 'max_iter': -1,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}

In [27]:
reg2.score(financial_train,y)

0.9920424960180685

In [28]:



fin_prediction = reg2.predict(fin_train_tr)
lin_mse = mean_squared_error(y, fin_prediction)
lin_rmse = np.sqrt(lin_mse)
lin_rmse

0.5415128683729208

In [29]:
from sklearn.neural_network import MLPRegressor
regr=MLPRegressor()
why=regr.fit(fin_train_tr,y)
why.score(fin_train_tr,y)

0.978999848632362

In [30]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test= train_test_split(financial_train, np.ravel(y), test_size=0.20, random_state=102)

In [31]:
from sklearn.svm import SVR
from sklearn.metrics import classification_report, confusion_matrix
supp=SVR(kernel="rbf", C=15, epsilon=0.03)
reg2=supp.fit(X_train,y_train)


In [76]:
reg2.score(X_test, y_test)

0.9753129462527894

In [72]:
pr=reg2.predict(X_test)

In [49]:
y_test


array([-0.18481207,  0.19511758,  0.18541586, ...,  0.5392682 ,
       -0.38838834,  0.06417495])

In [58]:
from sklearn.model_selection import GridSearchCV
param_grid={'C':[1,10,100], "epsilon":[1,0.1,0.01], "kernel":["rbf"]}
grid=GridSearchCV(SVR(),param_grid, refit=True, verbose=3)
grid.fit(X_train,y_train)

Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV] C=1, epsilon=1, kernel=rbf ......................................
[CV] .......... C=1, epsilon=1, kernel=rbf, score=0.784, total=   0.0s
[CV] C=1, epsilon=1, kernel=rbf ......................................
[CV] .......... C=1, epsilon=1, kernel=rbf, score=0.780, total=   0.0s
[CV] C=1, epsilon=1, kernel=rbf ......................................
[CV] .......... C=1, epsilon=1, kernel=rbf, score=0.793, total=   0.0s
[CV] C=1, epsilon=1, kernel=rbf ......................................
[CV] .......... C=1, epsilon=1, kernel=rbf, score=0.783, total=   0.0s
[CV] C=1, epsilon=1, kernel=rbf ......................................
[CV] .......... C=1, epsilon=1, kernel=rbf, score=0.797, total=   0.0s
[CV] C=1, epsilon=0.1, kernel=rbf ....................................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s


[CV] ........ C=1, epsilon=0.1, kernel=rbf, score=0.975, total=   2.2s
[CV] C=1, epsilon=0.1, kernel=rbf ....................................
[CV] ........ C=1, epsilon=0.1, kernel=rbf, score=0.977, total=   2.3s
[CV] C=1, epsilon=0.1, kernel=rbf ....................................
[CV] ........ C=1, epsilon=0.1, kernel=rbf, score=0.978, total=   2.6s
[CV] C=1, epsilon=0.1, kernel=rbf ....................................
[CV] ........ C=1, epsilon=0.1, kernel=rbf, score=0.978, total=   2.3s
[CV] C=1, epsilon=0.1, kernel=rbf ....................................
[CV] ........ C=1, epsilon=0.1, kernel=rbf, score=0.980, total=   2.3s
[CV] C=1, epsilon=0.01, kernel=rbf ...................................
[CV] ....... C=1, epsilon=0.01, kernel=rbf, score=0.975, total=   4.6s
[CV] C=1, epsilon=0.01, kernel=rbf ...................................
[CV] ....... C=1, epsilon=0.01, kernel=rbf, score=0.977, total=   4.2s
[CV] C=1, epsilon=0.01, kernel=rbf ...................................
[CV] .

[Parallel(n_jobs=1)]: Done  45 out of  45 | elapsed:  6.7min finished


GridSearchCV(estimator=SVR(),
             param_grid={'C': [1, 10, 100], 'epsilon': [1, 0.1, 0.01],
                         'kernel': ['rbf']},
             verbose=3)

In [61]:
grid.fit(X_train, y_train)

Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV] C=1, epsilon=1, kernel=rbf ......................................
[CV] .......... C=1, epsilon=1, kernel=rbf, score=0.784, total=   0.0s
[CV] C=1, epsilon=1, kernel=rbf ......................................
[CV] .......... C=1, epsilon=1, kernel=rbf, score=0.780, total=   0.0s
[CV] C=1, epsilon=1, kernel=rbf ......................................
[CV] .......... C=1, epsilon=1, kernel=rbf, score=0.793, total=   0.0s
[CV] C=1, epsilon=1, kernel=rbf ......................................
[CV] .......... C=1, epsilon=1, kernel=rbf, score=0.783, total=   0.0s
[CV] C=1, epsilon=1, kernel=rbf ......................................
[CV] .......... C=1, epsilon=1, kernel=rbf, score=0.797, total=   0.0s
[CV] C=1, epsilon=0.1, kernel=rbf ....................................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s


[CV] ........ C=1, epsilon=0.1, kernel=rbf, score=0.975, total=   2.3s
[CV] C=1, epsilon=0.1, kernel=rbf ....................................
[CV] ........ C=1, epsilon=0.1, kernel=rbf, score=0.977, total=   2.2s
[CV] C=1, epsilon=0.1, kernel=rbf ....................................
[CV] ........ C=1, epsilon=0.1, kernel=rbf, score=0.978, total=   2.2s
[CV] C=1, epsilon=0.1, kernel=rbf ....................................
[CV] ........ C=1, epsilon=0.1, kernel=rbf, score=0.978, total=   2.2s
[CV] C=1, epsilon=0.1, kernel=rbf ....................................
[CV] ........ C=1, epsilon=0.1, kernel=rbf, score=0.980, total=   2.2s
[CV] C=1, epsilon=0.01, kernel=rbf ...................................


KeyboardInterrupt: 

In [62]:
print(grid.best_params_)

{'C': 1, 'epsilon': 0.1, 'kernel': 'rbf'}


In [63]:
print(grid.best_estimator_)

SVR(C=1)


In [64]:
grid_predictions=grid.predict(X_test)

In [68]:
grid.score(X_test,y_test)

0.9776314084500491

In [69]:
grid_predictions

array([-0.28248174,  0.11965822,  0.43475427, ...,  0.42386377,
       -0.58005265, -0.04143172])

In [70]:
y

0       0.483130
1       1.135624
2       0.686081
3      -1.342005
4      -0.241418
          ...   
7321    1.026517
7322    1.039843
7323    1.064836
7324    1.052061
7325    1.028131
Name: y, Length: 7326, dtype: float64

In [71]:
y_test

array([-0.18481207,  0.19511758,  0.18541586, ...,  0.5392682 ,
       -0.38838834,  0.06417495])

In [77]:

lin_mse = mean_squared_error(grid_predictions, y_test)
lin_rmse = np.sqrt(lin_mse)
lin_rmse

0.13314876299544767

In [78]:
lin_reg = LinearRegression()

reg=lin_reg.fit(X_train, y_train)
reg.score(X_test,y_test)

0.9715156474483099

In [81]:
predictions=reg.predict(X_test)
lin_mse = mean_squared_error(predictions, y_test)
lin_rmse = np.sqrt(lin_mse)
lin_rmse

0.15025225288423888

In [43]:
from sklearn.model_selection import cross_val_score
clf = SVR(kernel='rbf', C=1, epsilon=0.1)
scores = cross_val_score(clf, X_train, y_train, cv=5)
scores


array([0.97476061, 0.97716372, 0.97810266, 0.9783374 , 0.98019588])

NameError: name 'test' is not defined

In [32]:
test

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
0,-0.670595,-0.839068,-0.734415,-0.587261,-0.788800,-0.975857,-0.774088,-1.021334,-1.110286,-0.893337,...,-0.997627,-1.103794,-1.092988,-0.989165,-0.827528,-0.813729,-0.532411,-0.289483,-0.407720,-0.407505
1,0.188165,0.166410,0.321011,0.318078,0.641710,0.951932,1.170069,1.177711,0.987763,0.981345,...,0.743405,0.916254,0.866453,0.953677,0.716259,0.692816,0.446713,0.539733,0.279293,0.180641
2,0.886510,0.760716,0.751800,0.052198,-0.050958,-0.140734,-0.173480,0.178508,0.198187,0.357906,...,0.444142,0.492294,0.573348,0.546323,0.373874,0.699132,0.808303,1.118522,1.284887,1.541929
3,-0.187722,0.030199,-0.072558,-0.098400,-0.110795,-0.127632,-0.241193,-0.374608,-0.651771,-0.513491,...,-0.340927,-0.268253,-0.654777,-1.133722,-1.484557,-1.446644,-1.654337,-1.521009,-1.593825,-1.110684
4,0.248822,0.168815,0.260804,0.505885,0.471486,1.018661,0.971406,1.062348,0.986871,0.947982,...,0.422044,0.688196,0.382416,0.344843,0.177595,0.330549,0.595061,0.884860,1.125103,1.220779
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3136,1.033977,1.043510,1.029088,1.038866,1.044243,1.032999,1.034221,1.027377,1.027377,1.005378,...,0.977756,0.944023,0.945001,0.938646,0.955268,0.945735,0.940112,0.943290,0.956245,0.947446
3137,0.998393,0.997933,0.989993,1.001384,0.995516,0.983090,0.989993,0.981479,0.993330,0.995679,...,1.017606,1.025608,1.027689,1.026088,1.026248,1.024808,1.022247,1.024488,1.025288,1.017286
3138,1.288652,1.279170,1.271384,1.280467,1.276575,1.245434,1.208504,1.207506,1.064278,1.063479,...,1.023056,1.022158,1.032638,1.026849,1.016369,1.014872,1.013774,1.023356,1.026450,1.028047
3139,0.804169,0.821925,0.824755,0.836336,0.840196,0.827586,0.834277,0.833762,0.828101,0.832733,...,1.101904,1.101132,1.083119,1.113742,1.130468,1.122491,1.126608,1.134843,1.133042,1.113484


In [35]:
from sklearn.svm import SVR
supp=SVR(kernel="rbf", C=10, epsilon=0.1)
reg2=supp.fit(financial_train,y)

In [36]:
pr=reg2.predict(test)

ValueError: X.shape[1] = 50 should be equal to 51, the number of features at training time

In [37]:
test

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
0,-0.670595,-0.839068,-0.734415,-0.587261,-0.788800,-0.975857,-0.774088,-1.021334,-1.110286,-0.893337,...,-0.997627,-1.103794,-1.092988,-0.989165,-0.827528,-0.813729,-0.532411,-0.289483,-0.407720,-0.407505
1,0.188165,0.166410,0.321011,0.318078,0.641710,0.951932,1.170069,1.177711,0.987763,0.981345,...,0.743405,0.916254,0.866453,0.953677,0.716259,0.692816,0.446713,0.539733,0.279293,0.180641
2,0.886510,0.760716,0.751800,0.052198,-0.050958,-0.140734,-0.173480,0.178508,0.198187,0.357906,...,0.444142,0.492294,0.573348,0.546323,0.373874,0.699132,0.808303,1.118522,1.284887,1.541929
3,-0.187722,0.030199,-0.072558,-0.098400,-0.110795,-0.127632,-0.241193,-0.374608,-0.651771,-0.513491,...,-0.340927,-0.268253,-0.654777,-1.133722,-1.484557,-1.446644,-1.654337,-1.521009,-1.593825,-1.110684
4,0.248822,0.168815,0.260804,0.505885,0.471486,1.018661,0.971406,1.062348,0.986871,0.947982,...,0.422044,0.688196,0.382416,0.344843,0.177595,0.330549,0.595061,0.884860,1.125103,1.220779
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3136,1.033977,1.043510,1.029088,1.038866,1.044243,1.032999,1.034221,1.027377,1.027377,1.005378,...,0.977756,0.944023,0.945001,0.938646,0.955268,0.945735,0.940112,0.943290,0.956245,0.947446
3137,0.998393,0.997933,0.989993,1.001384,0.995516,0.983090,0.989993,0.981479,0.993330,0.995679,...,1.017606,1.025608,1.027689,1.026088,1.026248,1.024808,1.022247,1.024488,1.025288,1.017286
3138,1.288652,1.279170,1.271384,1.280467,1.276575,1.245434,1.208504,1.207506,1.064278,1.063479,...,1.023056,1.022158,1.032638,1.026849,1.016369,1.014872,1.013774,1.023356,1.026450,1.028047
3139,0.804169,0.821925,0.824755,0.836336,0.840196,0.827586,0.834277,0.833762,0.828101,0.832733,...,1.101904,1.101132,1.083119,1.113742,1.130468,1.122491,1.126608,1.134843,1.133042,1.113484


In [38]:
financial_train

Unnamed: 0,w,0,1,2,3,4,5,6,7,8,...,40,41,42,43,44,45,46,47,48,49
0,1,0.790980,0.702555,0.528220,0.298746,0.025488,-0.173480,-0.245290,-0.405057,-0.371818,...,0.954288,1.143901,1.359252,1.081061,1.364409,1.449354,1.195431,1.195992,1.165327,0.771110
1,1,0.765286,0.604512,0.414197,0.241638,0.181862,-0.031920,-0.070617,-0.185980,-0.188546,...,0.561545,0.678086,0.848950,1.133852,1.041396,1.242806,1.248121,1.331348,1.267123,1.292718
2,1,0.702834,0.637708,0.798416,0.755065,0.705225,0.535391,0.613129,0.549732,0.472387,...,1.328694,1.324254,1.272889,1.074786,0.753950,0.539693,0.402041,0.442759,0.487557,0.699007
3,1,0.018378,-0.097297,-0.020850,-0.083325,-0.268512,-0.486335,-0.731130,-0.924458,-0.995159,...,-0.386676,-0.357946,-0.612069,-0.698063,-0.891789,-1.127624,-1.535678,-1.490786,-1.856840,-1.441472
4,1,0.463120,0.665307,0.446953,0.480780,0.392500,0.309231,0.158462,0.190963,0.273600,...,-0.174944,-0.529541,-0.191921,-0.258093,-0.542403,-0.414866,-0.485580,-0.768820,-0.662573,-0.211837
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7321,1,1.002615,1.007519,0.990882,1.009554,1.009953,1.016928,0.980784,0.919103,0.957826,...,1.003996,1.015111,1.014566,1.032765,1.035272,1.036289,1.027571,1.028043,1.032729,1.021831
7322,1,1.067617,1.075324,1.068344,1.086520,1.088120,1.071688,1.086666,1.074451,1.070816,...,1.022539,1.050458,1.064563,1.055257,1.057292,1.048132,1.050167,1.048422,1.051767,1.044205
7323,1,0.976882,0.978272,0.972884,0.978098,0.977229,0.968538,0.967669,0.954632,0.958978,...,1.050409,1.073527,1.069529,1.075787,1.072658,1.066400,1.069355,1.074744,1.074917,1.068486
7324,1,0.851952,0.853579,0.871475,0.878525,0.882863,0.880152,0.886117,0.882321,0.881236,...,1.042842,1.045553,1.039588,1.060737,1.053688,1.047180,1.052061,1.050434,1.056399,1.056941


In [41]:
financial_train=train.drop("w", axis=1)
financial_train2=financial_train.drop("y",axis=1)
financial_train2

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
0,0.790980,0.702555,0.528220,0.298746,0.025488,-0.173480,-0.245290,-0.405057,-0.371818,-0.032334,...,0.954288,1.143901,1.359252,1.081061,1.364409,1.449354,1.195431,1.195992,1.165327,0.771110
1,0.765286,0.604512,0.414197,0.241638,0.181862,-0.031920,-0.070617,-0.185980,-0.188546,0.275131,...,0.561545,0.678086,0.848950,1.133852,1.041396,1.242806,1.248121,1.331348,1.267123,1.292718
2,0.702834,0.637708,0.798416,0.755065,0.705225,0.535391,0.613129,0.549732,0.472387,0.431216,...,1.328694,1.324254,1.272889,1.074786,0.753950,0.539693,0.402041,0.442759,0.487557,0.699007
3,0.018378,-0.097297,-0.020850,-0.083325,-0.268512,-0.486335,-0.731130,-0.924458,-0.995159,-1.352881,...,-0.386676,-0.357946,-0.612069,-0.698063,-0.891789,-1.127624,-1.535678,-1.490786,-1.856840,-1.441472
4,0.463120,0.665307,0.446953,0.480780,0.392500,0.309231,0.158462,0.190963,0.273600,0.198676,...,-0.174944,-0.529541,-0.191921,-0.258093,-0.542403,-0.414866,-0.485580,-0.768820,-0.662573,-0.211837
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7321,1.002615,1.007519,0.990882,1.009554,1.009953,1.016928,0.980784,0.919103,0.957826,0.958625,...,1.003996,1.015111,1.014566,1.032765,1.035272,1.036289,1.027571,1.028043,1.032729,1.021831
7322,1.067617,1.075324,1.068344,1.086520,1.088120,1.071688,1.086666,1.074451,1.070816,1.053512,...,1.022539,1.050458,1.064563,1.055257,1.057292,1.048132,1.050167,1.048422,1.051767,1.044205
7323,0.976882,0.978272,0.972884,0.978098,0.977229,0.968538,0.967669,0.954632,0.958978,0.961933,...,1.050409,1.073527,1.069529,1.075787,1.072658,1.066400,1.069355,1.074744,1.074917,1.068486
7324,0.851952,0.853579,0.871475,0.878525,0.882863,0.880152,0.886117,0.882321,0.881236,0.881236,...,1.042842,1.045553,1.039588,1.060737,1.053688,1.047180,1.052061,1.050434,1.056399,1.056941


In [42]:
from sklearn.svm import SVR
supp=SVR(kernel="rbf", C=10, epsilon=0.1)
reg2=supp.fit(financial_train2,y)

In [44]:
pr=reg2.predict(test)

In [46]:
print(pr)

[-0.49265118  0.50436034  1.6018164  ...  1.06213673  1.02655585
  1.09210299]


In [48]:
for i in pr:
    print(i)

-0.4926511783119046
0.5043603376981998
1.6018163958331604
-1.0766609481786045
1.2210692422581508
-0.3754741757656032
-0.6063530028139749
0.09837768222539303
1.1561049351096924
-1.192411820353847
-0.07105164051183618
0.656339388387257
1.1720082823077322
-1.3140415685490912
0.6256848728261748
-0.3973706960613627
-1.5299203980479141
-0.615604565812534
-1.2805103942971614
0.3413662885308624
1.0233301114825228
-1.2098208993907447
1.4883712353372816
1.0272095184927905
1.078003359384374
0.36073359277079603
0.3056455140814039
0.675385263575071
0.1189186517769947
-0.6994930909663599
1.1212286306922992
0.8961535796772675
-1.0544617257138844
-1.6036728277969763
1.2177325032630235
1.0237190656680588
-0.44701633155747295
-1.5568974983147987
-0.2943677482825339
-0.7403477077777373
-0.08328433908594277
-1.3223334121172352
-0.45420762803982034
0.5375270654675401
0.6086393343031813
-0.06856769930712556
0.828340153844787
-0.7766679121307134
0.3094538834053913
0.7034185408109113
-0.5706635465086936
-0.74

-0.21220119820682634
1.3809365306463088
-1.163715794946549
-0.18359156213468372
1.432450177508072
0.29581638082079614
-0.9584164812406779
0.22456270670160608
-0.5427946209251701
-0.7330779474871507
-0.37532756477624535
0.28911974743740193
1.3987653282920136
-1.3171666004620306
-0.8231797745460916
-1.1281578156839691
-0.5672339931603431
-0.26035488265030365
-0.6048979536302634
-0.5339552424164551
-0.18911175532030697
-1.5981051706940517
0.150704443687703
-1.4426487856756434
-1.5626703832714317
1.459550501410391
-1.5364468386887506
1.2536494646825553
1.8698035940107514
1.345023903850811
0.26794623458259664
-0.6380015706981693
-1.3128458771233635
-0.13062576736478124
-0.49295998217231
-0.3255607342867139
-1.5254442530359107
-1.3766497733810248
0.936801958931984
-0.4655664160221278
-0.2223621252241252
-0.7485563253261052
-0.4451107840504152
0.6834421205972903
0.20693398334392746
-0.7958963846940966
-0.6104725175519576
-0.6024050700831829
-1.0065376045712489
0.41239585316174204
0.0386020562

0.6571573840757838
0.5340570201388659
-0.6171488289145792
-1.0704017065754374
0.09232821585952247
0.13282417662034554
-0.6912268034942173
-1.2285628740097028
-1.2179507400376115
1.2028368716667956
-0.2680204183559371
-1.2887440315920193
-0.6816803980863007
-0.5798432587374549
1.2286672909319933
-0.6804474021574154
1.00360897171374
1.5197422815659496
0.1651462311100014
-1.3604251361333417
-0.6896880216130488
1.3490531435919109
1.4718564929191937
0.9441175328806533
-0.5743880261616683
0.0037873016382969087
-0.26864842290017515
1.2050801696123261
1.3972602030502217
1.2515253129253066
0.6424587183504964
-1.2676505140904148
-1.1970517867759511
0.2946096631192486
0.6329077990478743
-0.47629818529884027
0.6467976352759961
-1.4187630195886287
0.5141225124133677
-0.27021433229766384
-0.4868792742092678
1.5894425746015268
-0.3765728330900551
0.6757994736261401
0.5556268396248882
-0.6783146448857879
-0.6237721156128558
-0.3475305856937268
-0.43322045504888473
-0.026466677587337423
-0.547712671799

In [49]:

with open("output.txt", "w") as txt_file:
    for line in pr:
        txt_file.write(" ".join(line) + "\n")

TypeError: can only join an iterable

In [50]:
np.savetxt("submission.txt", pr,delimiter = ",")

In [60]:

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout

In [61]:
pip install tensorflow

Note: you may need to restart the kernel to use updated packages.


In [62]:
# Initialising the RNN
regressor = Sequential()

# Adding the first LSTM layer and some Dropout regularisation
regressor.add(LSTM(units = 30, return_sequences = True, input_shape = (X_train.shape[1], 1)))
regressor.add(Dropout(0.5))

# Adding a second LSTM layer and some Dropout regularisation
regressor.add(LSTM(units = 10, return_sequences = True))
regressor.add(Dropout(0.3))

# Adding a fourth LSTM layer and some Dropout regularisation
regressor.add(LSTM(units = 5))
regressor.add(Dropout(0.2))

# Adding the output layer
regressor.add(Dense(units = 1))

# Compiling the RNN
regressor.compile(optimizer = 'rmsprop', loss = 'mean_squared_error')

# Fitting the RNN to the Training set
regressor.fit(X_train, y_train, epochs = 50, batch_size = 32)

Epoch 1/50


ValueError: in user code:

    C:\Users\alesi\anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py:805 train_function  *
        return step_function(self, iterator)
    C:\Users\alesi\anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py:795 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    C:\Users\alesi\anaconda3\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:1259 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    C:\Users\alesi\anaconda3\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2730 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    C:\Users\alesi\anaconda3\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:3417 _call_for_each_replica
        return fn(*args, **kwargs)
    C:\Users\alesi\anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py:788 run_step  **
        outputs = model.train_step(data)
    C:\Users\alesi\anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py:754 train_step
        y_pred = self(x, training=True)
    C:\Users\alesi\anaconda3\lib\site-packages\tensorflow\python\keras\engine\base_layer.py:998 __call__
        input_spec.assert_input_compatibility(self.input_spec, inputs, self.name)
    C:\Users\alesi\anaconda3\lib\site-packages\tensorflow\python\keras\engine\input_spec.py:219 assert_input_compatibility
        raise ValueError('Input ' + str(input_index) + ' of layer ' +

    ValueError: Input 0 of layer sequential is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: (None, 51)


In [70]:
features_set = []

for i in range(50, 7325):
    features_set.append(financial_train2[i-50:i,0])
    

TypeError: '(slice(0, 50, None), 0)' is an invalid key

TypeError: '(slice(1, 50, None), 0)' is an invalid key

TypeError: '(0, slice(51, 7325, None))' is an invalid key

In [73]:
fin_array=financial_train2.to_numpy()

In [75]:
fin_array.shape

(7326, 50)

In [76]:
features_set = np.reshape(fin_array, (fin_array.shape[0], fin_array.shape[1], 1))

In [77]:
features_set

array([[[0.79097974],
        [0.70255482],
        [0.52821982],
        ...,
        [1.19599235],
        [1.16532671],
        [0.77111018]],

       [[0.76528639],
        [0.60451245],
        [0.41419747],
        ...,
        [1.33134806],
        [1.26712275],
        [1.29271781]],

       [[0.70283443],
        [0.63770849],
        [0.79841602],
        ...,
        [0.44275919],
        [0.48755741],
        [0.69900745]],

       ...,

       [[0.97688166],
        [0.97827221],
        [0.97288375],
        ...,
        [1.07474367],
        [1.07491745],
        [1.06848604]],

       [[0.85195223],
        [0.85357913],
        [0.87147501],
        ...,
        [1.05043384],
        [1.05639908],
        [1.05694137]],

       [[1.15539183],
        [1.15740127],
        [1.14400536],
        ...,
        [1.04688547],
        [1.04554588],
        [1.02679169]]])

In [78]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout

In [79]:
model = Sequential()

In [80]:
model.add(LSTM(units=50, return_sequences=True, input_shape=(features_set.shape[1], 1)))

In [81]:
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(units=50, return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(units=50))
model.add(Dropout(0.2))
model.add(Dense(units = 1))

In [82]:
model.compile(optimizer = 'adam', loss = 'mean_squared_error')

In [83]:
labels=np.array(y)

In [84]:
labels

array([0.48313004, 1.13562417, 0.68608099, ..., 1.06483572, 1.05206068,
       1.02813128])

In [86]:
model.fit(features_set, labels, epochs = 10, batch_size = 32)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x2b6db78a100>

In [87]:
test

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
0,-0.670595,-0.839068,-0.734415,-0.587261,-0.788800,-0.975857,-0.774088,-1.021334,-1.110286,-0.893337,...,-0.997627,-1.103794,-1.092988,-0.989165,-0.827528,-0.813729,-0.532411,-0.289483,-0.407720,-0.407505
1,0.188165,0.166410,0.321011,0.318078,0.641710,0.951932,1.170069,1.177711,0.987763,0.981345,...,0.743405,0.916254,0.866453,0.953677,0.716259,0.692816,0.446713,0.539733,0.279293,0.180641
2,0.886510,0.760716,0.751800,0.052198,-0.050958,-0.140734,-0.173480,0.178508,0.198187,0.357906,...,0.444142,0.492294,0.573348,0.546323,0.373874,0.699132,0.808303,1.118522,1.284887,1.541929
3,-0.187722,0.030199,-0.072558,-0.098400,-0.110795,-0.127632,-0.241193,-0.374608,-0.651771,-0.513491,...,-0.340927,-0.268253,-0.654777,-1.133722,-1.484557,-1.446644,-1.654337,-1.521009,-1.593825,-1.110684
4,0.248822,0.168815,0.260804,0.505885,0.471486,1.018661,0.971406,1.062348,0.986871,0.947982,...,0.422044,0.688196,0.382416,0.344843,0.177595,0.330549,0.595061,0.884860,1.125103,1.220779
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3136,1.033977,1.043510,1.029088,1.038866,1.044243,1.032999,1.034221,1.027377,1.027377,1.005378,...,0.977756,0.944023,0.945001,0.938646,0.955268,0.945735,0.940112,0.943290,0.956245,0.947446
3137,0.998393,0.997933,0.989993,1.001384,0.995516,0.983090,0.989993,0.981479,0.993330,0.995679,...,1.017606,1.025608,1.027689,1.026088,1.026248,1.024808,1.022247,1.024488,1.025288,1.017286
3138,1.288652,1.279170,1.271384,1.280467,1.276575,1.245434,1.208504,1.207506,1.064278,1.063479,...,1.023056,1.022158,1.032638,1.026849,1.016369,1.014872,1.013774,1.023356,1.026450,1.028047
3139,0.804169,0.821925,0.824755,0.836336,0.840196,0.827586,0.834277,0.833762,0.828101,0.832733,...,1.101904,1.101132,1.083119,1.113742,1.130468,1.122491,1.126608,1.134843,1.133042,1.113484


In [93]:
test_features = np.array(test)

In [94]:
test_features


array([[-0.67059511, -0.83906782, -0.73441476, ..., -0.28948304,
        -0.40771979, -0.40750533],
       [ 0.18816473,  0.16640998,  0.32101104, ...,  0.53973293,
         0.27929333,  0.18064056],
       [ 0.88651049,  0.76071638,  0.75180036, ...,  1.11852205,
         1.28488708,  1.54192877],
       ...,
       [ 1.28865155,  1.27916959,  1.27138431, ...,  1.02335559,
         1.02644969,  1.02804669],
       [ 0.80416879,  0.82192486,  0.82475549, ...,  1.13484295,
         1.13304163,  1.11348427],
       [ 1.06346063,  1.06346063,  1.06569167, ...,  1.10857712,
         1.09965297,  1.09965297]])

In [95]:
test_features = np.reshape(test_features, (test_features.shape[0], test_features.shape[1], 1))

In [96]:
predictions = model.predict(test_features)

In [97]:
predictions

array([[-0.47704262],
       [ 0.5339699 ],
       [ 1.4745919 ],
       ...,
       [ 1.0128956 ],
       [ 1.012894  ],
       [ 1.0108343 ]], dtype=float32)

In [99]:
np.savetxt("submissions.txt", predictions,delimiter = ",")