# Support Vector Machines with SMA and RSI trading strategy

In [83]:
import datetime
import pandas as pd
import yfinance as yf
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import ParameterGrid
import matplotlib.pyplot as plt

In [84]:
def fetch_data(symbol, start, end):
    ticker = yf.download(symbol, start, end)
    return pd.DataFrame(ticker)

In [92]:
def calculate_rsi(data, period=14):
    #data['move'] = data['Close'] - data['Close'].shift(1)  # Defining data['move'] this way gives RSI values larger than 100 !!!
    data['move'] = data['Close'] - data['Open']
    data['up'] = np.where(data['move'] > 0, data['move'], 0)
    data['down'] = np.where(data['move'] < 0, data['move'], 0)
    data['average_gain'] = data['up'].rolling(period).mean()
    data['average_loss'] = data['down'].rolling(period).mean()
    data['relative_strength'] =  data['average_gain'] / data['average_loss']
    return 100.0 - (100.0 / (1.0 + data['relative_strength']))

In [93]:
def construct_signals(data, ma_period=60, rsi_period=14):
    data['SMA'] = data['Close'].rolling(window=ma_period).mean()
    # these are the 2 features
    data['trend'] = (data['Open'] - data['SMA']) * 100
    data['RSI'] = calculate_rsi(data, rsi_period) /100 # Divided by 100 to normalize
    # we need the target variables (labels)
    data['direction'] = np.where(data['Close'] - data['Open'] > 0, 1, -1)

In [94]:
if __name__ == '__main__':
    start_date = datetime.datetime(2018, 1, 1)
    end_date   = datetime.datetime(2020, 1, 1)
    
    # EUR-USD currency pair
    currency_data = fetch_data('EURUSD=X', start_date, end_date)
    construct_signals(currency_data)
    
    currency_data = currency_data.dropna()
    print(currency_data)
    
    X = currency_data[['trend', 'RSI']]
    y = currency_data['direction']
    
    # split the data into training and test set (80%-20%)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)
    
    # model - we can find the right coefficients
    parameters = {'gamma': [10, 1, 0.1, 0.01, 0.001], 'C': [1, 10, 100, 1000, 10000]}
    grid = list(ParameterGrid(parameters))
    
    best_accuracy = 0
    best_parameters = None
    
    for p in grid:
        svm = SVC(C=p['C'], gamma=p['gamma'])
        svm.fit(X_train, y_train)
        predictions = svm.predict(X_test)
        
        if accuracy_score(y_test, predictions) >  best_accuracy:
            best_accuracy = accuracy_score(y_test, predictions)
            best_parameters = p
            
    # we have found the best parameters
    model = SVC(C=best_parameters['C'], gamma=best_parameters['gamma'])
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    print('Accuracy of the model: %.2f' % accuracy_score(y_test, predictions))
    print(confusion_matrix(predictions, y_test))

[*********************100%***********************]  1 of 1 completed
                Open      High       Low     Close  Adj Close  Volume  \
Date                                                                    
2018-03-23  1.232605  1.236017  1.231997  1.232499   1.232499       0   
2018-03-26  1.235391  1.244896  1.235239  1.235437   1.235437       0   
2018-03-27  1.245501  1.247816  1.237333  1.245749   1.245749       0   
2018-03-28  1.240803  1.242236  1.233715  1.240787   1.240787       0   
2018-03-29  1.231284  1.233806  1.228516  1.231027   1.231027       0   
...              ...       ...       ...       ...        ...     ...   
2019-12-26  1.109509  1.110494  1.108279  1.109545   1.109545       0   
2019-12-27  1.110124  1.118068  1.110000  1.110112   1.110112       0   
2019-12-30  1.118155  1.122070  1.118155  1.118230   1.118230       0   
2019-12-31  1.120448  1.124101  1.120072  1.120230   1.120230       0   
2020-01-01  1.122083  1.122838  1.115947  1.122083   1.