In [1]:
import pandas as pd

In [2]:
ten_year=pd.read_csv('../ten_year.csv')
ten_year.head()

Unnamed: 0,name,ticker,open,close,adj_close,low,high,volume,market_signal
0,ALTABA INC.,AABA,23.799999,23.719999,23.719999,23.6,24.15,25671700,sell
1,"AARON&#39;S, INC.",AAN,11.366667,11.213333,10.835422,11.166667,11.473333,1600,sell
2,APPLE INC.,AAPL,28.467142,27.834286,18.704624,27.507143,28.608572,269794700,sell
3,"ACI WORLDWIDE, INC.",ACIW,6.326667,6.096667,6.096667,6.026667,6.46,2103300,sell
4,"AXCELIS TECHNOLOGIES, INC.",ACLS,18.32,18.76,18.76,18.280001,19.24,122100,buy


In [81]:
# Extract Apple stock as an example
ten_year=ten_year.loc[ten_year['ticker'] == 'AAPL']
ten_year.head()
ten_year.shape

(2682, 9)

In [82]:
# Set features. This will be used as X values.
feature=ten_year[["open","close","adj_close","low","high","volume"]]

# Target values will come from "market_signal" column
target=ten_year["market_signal"]

print(feature.shape,target.shape)

(2682, 6) (2682,)


In [83]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(feature,target,random_state=10,stratify=target)

In [84]:
# Scale the data by using MinMaxScaler
from sklearn.preprocessing import MinMaxScaler
scaler=MinMaxScaler().fit(X_train)
X_train=scaler.transform(X_train)
X_test=scaler.transform(X_test)

In [85]:
from sklearn.svm import SVC
model=SVC(kernel="linear")
model.fit(X_train,y_train)

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='linear',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [86]:
print(f"Training Data Score: {model.score(X_train, y_train)}")
print(f"Testing Data Score: {model.score(X_test, y_test)}")

Training Data Score: 0.5216310293386375
Testing Data Score: 0.5290611028315947


In [87]:
# Create the GridSearchCV model
from sklearn.model_selection import GridSearchCV
param_grid={"C":[1,5,10],"gamma":[0.0001,0.001,0.01]}
grid=GridSearchCV(model,param_grid,verbose=3)

In [88]:
# Train the model with GridSearch
grid.fit(X_train,y_train)

Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV] C=1, gamma=0.0001 ...............................................
[CV] ................... C=1, gamma=0.0001, score=0.524, total=   0.1s
[CV] C=1, gamma=0.0001 ...............................................
[CV] ................... C=1, gamma=0.0001, score=0.510, total=   0.1s
[CV] C=1, gamma=0.0001 ...............................................
[CV] ................... C=1, gamma=0.0001, score=0.515, total=   0.0s
[CV] C=1, gamma=0.0001 ...............................................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.1s remaining:    0.0s


[CV] ................... C=1, gamma=0.0001, score=0.517, total=   0.0s
[CV] C=1, gamma=0.0001 ...............................................
[CV] ................... C=1, gamma=0.0001, score=0.535, total=   0.1s
[CV] C=1, gamma=0.001 ................................................
[CV] .................... C=1, gamma=0.001, score=0.524, total=   0.0s
[CV] C=1, gamma=0.001 ................................................
[CV] .................... C=1, gamma=0.001, score=0.510, total=   0.0s
[CV] C=1, gamma=0.001 ................................................
[CV] .................... C=1, gamma=0.001, score=0.515, total=   0.1s
[CV] C=1, gamma=0.001 ................................................
[CV] .................... C=1, gamma=0.001, score=0.517, total=   0.0s
[CV] C=1, gamma=0.001 ................................................
[CV] .................... C=1, gamma=0.001, score=0.535, total=   0.0s
[CV] C=1, gamma=0.01 .................................................
[CV] .

[Parallel(n_jobs=1)]: Done  45 out of  45 | elapsed:    2.4s finished


GridSearchCV(cv=None, error_score=nan,
             estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                           class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='scale', kernel='linear', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='deprecated', n_jobs=None,
             param_grid={'C': [1, 5, 10], 'gamma': [0.0001, 0.001, 0.01]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=3)

In [89]:
print(grid.best_params_)
print(grid.best_score_)

{'C': 10, 'gamma': 0.0001}
0.5519622730022344


In [90]:
predictions=grid.predict(X_test)

# Classificaiton report 
from sklearn.metrics import classification_report
print(classification_report(y_test,predictions))

from sklearn import metrics
#evaluation(Accuracy)
print("Accuracy:",metrics.accuracy_score(predictions,y_test))

              precision    recall  f1-score   support

         buy       0.56      0.88      0.68       345
        sell       0.68      0.27      0.39       326

    accuracy                           0.58       671
   macro avg       0.62      0.57      0.53       671
weighted avg       0.62      0.58      0.54       671

Accuracy: 0.5827123695976155


In [91]:
# Save the model 
import joblib
filename = './saved models/SVM_ten_year.sav'
joblib.dump(grid, filename)

['./saved models/SVM_ten_year.sav']