In [1]:
import numpy as np
import pandas as pd
import sklearn.decomposition as SKLDec
import matplotlib.pyplot as plt
import matplotlib
import sklearn.model_selection as SKLSel
import sklearn.neural_network as SKANN
import sklearn.metrics as SKLMet
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
import sklearn.feature_selection as fs
import sklearn.linear_model as lm
from sklearn.metrics  import  accuracy_score , confusion_matrix,f1_score,precision_score
import numpy as np
import seaborn as sns
import itertools
import sklearn.svm as SVM
import sklearn.ensemble as Ensemble
from sklearn.neighbors import KNeighborsClassifier

from sklearn.utils.multiclass import unique_labels

In [2]:

def ResultInfo(TestReal,TestPredicted,MethodName):
 

    ResultData = pd.DataFrame(columns=['F1Score', 'AccurancyRate', 'MSE', 'MAE', 'PrecisionScore'])

    # Forecasting Accuracy
    ANNAbsError = SKLMet.mean_absolute_error(TestReal, TestPredicted)
    ANNSeqError = SKLMet.mean_squared_error(TestReal, TestPredicted)
    ANNMeanSqEror = np.sqrt(ANNSeqError)

    f1score = f1_score(TestReal, TestPredicted, average='macro')

    PrecisionScore = precision_score(TestReal, TestPredicted,average='macro')

    AccurScore = accuracy_score(TestReal, TestPredicted)

    ResultData.loc[MethodName] = [f1score, AccurScore, ANNMeanSqEror, ANNAbsError, PrecisionScore]

    return ResultData



In [3]:

def ResultGraph(TestReal,TestPredicted,MethodName):
    # Testing Chart
#     Predicted = pd.DataFrame(TestPredicted, index=TestReal.index)
#     fig1, ax = plt.subplots()
#     ax.plot(TestReal)
#     ax.plot(Predicted, label='ANNForecast')
#     plt.legend(loc='upper left')
#     plt.title(MethodName+' Testing Chart')
#     plt.ylabel('Log Returns')
#     plt.xlabel('Date')
#     plt.show()



    ''' Plot confusion matrix for given classifier and data. '''

    # Define label names and get confusion matrix values
    labels = ["Win", "Draw", "Defeat"]

    cm = confusion_matrix(TestReal, TestPredicted)

    # Check if matrix should be normalized

    cm = cm.astype('float') / cm.sum()

    # Configure figure
    sns.set_style("whitegrid", {"axes.grid": False})
    fig = plt.figure(1)
    plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)

    plt.title('Confusion Matriks for '+MethodName)
    plt.colorbar()
    tick_marks = np.arange(len(labels))
    plt.xticks(tick_marks, labels, rotation=45)
    plt.yticks(tick_marks, labels)
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, round(cm[i, j], 2),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    
    plt.show()


In [4]:
def PCADimensionFinder(TrainingPCA,NumberOfFeatures):

    CutThreshold = 1/NumberOfFeatures

    PCAExpVarValues=pd.DataFrame(TrainingPCA.explained_variance_ratio_,columns=['EXPVar'])

    PCAExpVarValues.loc[:,'SUM'] =(PCAExpVarValues).cumsum()

    PCAExpVarValues.loc[:,'Diff']=(PCAExpVarValues.SUM - PCAExpVarValues.SUM.shift(1))

    PCAExpVarValues.loc[PCAExpVarValues.Diff>CutThreshold,'Control']=True
    PCAExpVarValues.loc[PCAExpVarValues.Diff<CutThreshold,'Control']=False
    ComponentsNumber= len(PCAExpVarValues[PCAExpVarValues.Control==True])+1

    return ComponentsNumber


In [5]:

def ANNResulter(MethodName, TrainingFeutres, TrainingTarget, TestFeatures, TestTarget,Resulter=ResultInfo):
    # 5.2.1. Time Series Cross-Validation
    # Exhaustive Grid Search Time Series Cross-Validation with Parameter Array Specification
    # TimeSeriesSplit = anchored time series cross-validation with
    # initial training subset = validating subset ~ n_samples / (n_splits + 1) in size
    # alpha = L2 regularization

    TSCV = SKLSel.GridSearchCV(SKANN.MLPClassifier(), cv=SKLSel.TimeSeriesSplit(n_splits=10),
                               param_grid={"alpha": [0.0001, 0.001, 0.010, 0.100]}).fit(TrainingFeutres, TrainingTarget)

    #  Time Series Cross-Validation Optimal Parameter Selection

    TCSVPar = TSCV.best_estimator_.alpha

    print("== Multi-Layer Perceptron Method Algorithm Training Optimal Parameter Selection ==")
    print("Artificial Neural Network Regression  Optimal  Regularization: ", TCSVPar)
    print("")

    # Multi-Layer Perceptron Method Algorithm Training
    ANNTraining = SKANN.MLPClassifier(alpha=TCSVPar).fit(TrainingFeutres, TrainingTarget)

    # Multi-Layer Perceptron Method Algorithm Testing
    ANNTesting = ANNTraining.predict(TestFeatures)

    # Multi-Layer Perceptron Method Forecasting
    # Forecasting for Trading Subset

    return ResultInfo(TestTarget,ANNTesting,MethodName)

In [6]:
def XGBoostesulter(MethodName, TrainingFeutres, TrainingTarget, TestFeatures, TestTarget,Resulter=ResultInfo):
    # 5.2.1. Time Series Cross-Validation
    # Exhaustive Grid Search Time Series Cross-Validation with Parameter Array Specification
    # TimeSeriesSplit = anchored time series cross-validation with
    # initial training subset = validating subset ~ n_samples / (n_splits + 1) in size
    # alpha = L2 regularization

    # 5.2.1. Time Series Cross-Validation
    TSCV = SKLSel.GridSearchCV(Ensemble.GradientBoostingClassifier(), cv=SKLSel.TimeSeriesSplit(n_splits=10),
                              param_grid={"max_depth": [1, 2, 3, 4, 5]}).fit(TrainingFeutres, TrainingTarget)

    #  Time Series Cross-Validation Optimal Parameter Selection

    TCSVPar = TSCV.best_estimator_.max_depth

    print("")
    print("== Ensemble Method Algorithm Training Optimal Parameter Selection ==")
    print("")
    print("Gradient Boosting Machine Regression A Optimal Maximum Depth: ", TCSVPar)

    print("")


    # Multi-Layer Perceptron Method Algorithm Training
    XGBoostTraining = Ensemble.GradientBoostingClassifier(max_depth=TCSVPar).fit(TrainingFeutres, TrainingTarget)

    # Multi-Layer Perceptron Method Algorithm Testing
    XGBoostTesting = XGBoostTraining.predict(TestFeatures)

    # Multi-Layer Perceptron Method Forecasting
    # Forecasting for Trading Subset

    return ResultInfo(TestTarget,XGBoostTesting,MethodName)


In [7]:
def SVMResulter(MethodName, TrainingFeutres, TrainingTarget, TestFeatures, TestTarget,Resulter=ResultInfo):
    # 5.2.1. Time Series Cross-Validation
    # Exhaustive Grid Search Time Series Cross-Validation with Parameter Array Specification
    # TimeSeriesSplit = anchored time series cross-validation with
    # initial training subset = validating subset ~ n_samples / (n_splits + 1) in size
    # alpha = L2 regularization

    # 5.2.1. Time Series Cross-Validation
    TSCV = SKLSel.GridSearchCV(SVM.SVC(kernel='rbf',gamma='auto'), cv=SKLSel.TimeSeriesSplit(n_splits=10),
                              param_grid={"C": [0.25, 0.50, 1.00, 1.25]}).fit(TrainingFeutres, TrainingTarget)

    #  Time Series Cross-Validation Optimal Parameter Selection

    TCSVPar = TSCV.best_estimator_.C

    print("")
    print("== Maximum Margin Method Algorithm Training Optimal Parameter Selection ==")
    print("")
    print("RBF Support Vector Machine Regression A Optimal Error Term Penalty: ", TCSVPar)
    print("")


    # Multi-Layer Perceptron Method Algorithm Training
    SVMTraining = SVM.SVC(kernel='rbf',C=TCSVPar,gamma='auto').fit(TrainingFeutres, TrainingTarget)

    # Multi-Layer Perceptron Method Algorithm Testing
    SVMTesting = SVMTraining.predict(TestFeatures)

    # Multi-Layer Perceptron Method Forecasting
    # Forecasting for Trading Subset

    return ResultInfo(TestTarget,SVMTesting,MethodName)


In [8]:
def XGBoostModel(TrainingFeutres, TrainingTarget):
    # 5.2.1. Time Series Cross-Validation
    # Exhaustive Grid Search Time Series Cross-Validation with Parameter Array Specification
    # TimeSeriesSplit = anchored time series cross-validation with
    # initial training subset = validating subset ~ n_samples / (n_splits + 1) in size
    # alpha = L2 regularization

    # 5.2.1. Time Series Cross-Validation
    TSCV = SKLSel.GridSearchCV(Ensemble.GradientBoostingClassifier(), cv=SKLSel.TimeSeriesSplit(n_splits=10),
                              param_grid={"max_depth": [1, 2, 3, 4, 5]}).fit(TrainingFeutres, TrainingTarget)

    #  Time Series Cross-Validation Optimal Parameter Selection

    TCSVPar = TSCV.best_estimator_.max_depth

    

    # Multi-Layer Perceptron Method Algorithm Training
    XGBoostTraining = Ensemble.GradientBoostingClassifier(max_depth=TCSVPar).fit(TrainingFeutres, TrainingTarget)

    return XGBoostTraining


In [9]:

def ANNModel( TrainingFeutres, TrainingTarget):
    # 5.2.1. Time Series Cross-Validation
    # Exhaustive Grid Search Time Series Cross-Validation with Parameter Array Specification
    # TimeSeriesSplit = anchored time series cross-validation with
    # initial training subset = validating subset ~ n_samples / (n_splits + 1) in size
    # alpha = L2 regularization

    TSCV = SKLSel.GridSearchCV(SKANN.MLPClassifier(), cv=SKLSel.TimeSeriesSplit(n_splits=10),
                               param_grid={"alpha": [0.0001, 0.001, 0.010, 0.100]}).fit(TrainingFeutres, TrainingTarget)

    #  Time Series Cross-Validation Optimal Parameter Selection

    TCSVPar = TSCV.best_estimator_.alpha

    print("== Multi-Layer Perceptron Method Algorithm Training Optimal Parameter Selection ==")
    print("Artificial Neural Network Regression  Optimal  Regularization: ", TCSVPar)
    print("")

    # Multi-Layer Perceptron Method Algorithm Training
    ANNTraining = SKANN.MLPClassifier(alpha=TCSVPar).fit(TrainingFeutres, TrainingTarget)
    
    return ANNTraining

In [10]:
def SVMModel( TrainingFeutres, TrainingTarget):
    # 5.2.1. Time Series Cross-Validation
    # Exhaustive Grid Search Time Series Cross-Validation with Parameter Array Specification
    # TimeSeriesSplit = anchored time series cross-validation with
    # initial training subset = validating subset ~ n_samples / (n_splits + 1) in size
    # alpha = L2 regularization

    # 5.2.1. Time Series Cross-Validation
    TSCV = SKLSel.GridSearchCV(SVM.SVC(kernel='rbf',gamma='auto'), cv=SKLSel.TimeSeriesSplit(n_splits=10),
                              param_grid={"C": [0.25, 0.50, 1.00, 1.25]}).fit(TrainingFeutres, TrainingTarget)

    #  Time Series Cross-Validation Optimal Parameter Selection

    TCSVPar = TSCV.best_estimator_.C

    print("")
    print("== Maximum Margin Method Algorithm Training Optimal Parameter Selection ==")
    print("")
    print("RBF Support Vector Machine Regression A Optimal Error Term Penalty: ", TCSVPar)
    print("")


    # Multi-Layer Perceptron Method Algorithm Training
    SVMTraining = SVM.SVC(kernel='rbf',C=TCSVPar,gamma='auto').fit(TrainingFeutres, TrainingTarget)
    
    return SVMTraining


In [11]:
def BetResult(TestData,PredictedData,ModelName):
    MatchData=pd.read_csv('DataSets/Match.csv')
    MatchData.index=MatchData['id']
    NewData=TestData.copy()
        
    NewData.loc[:,'Predicted']=PredictedData
    NewData.loc[:,'BetResult']=float(0)
    NewData.loc[:,'BetRate']=float(0)

    
    
    
    HomeBets=['B365H', 'BWH', 'IWH',
       'LBH', 'PSH', 'WHH','SJH','VCH', 'GBH', 'BSH']

    AwayBets=['B365A', 'BWA',  'IWA',
          'LBA', 'PSA', 'WHA',  'SJA', 'VCA','GBA',
           'BSA']


    DBets=[ 'B365D', 'BWD', 'IWD', 'LBD', 'PSD', 'WHD',
           'SJD', 'VCD',  'GBD',  'BSD']
    
    for index in NewData.index:
        RealResult=NewData.loc[index,'Result']
        PredictedResult=NewData.loc[index,'Predicted']

        if PredictedResult==1:
            Bets=MatchData.loc[index,HomeBets]
        elif PredictedResult==-1:
            Bets=MatchData.loc[index,AwayBets]
        elif PredictedResult==0:
            Bets=MatchData.loc[index,DBets]

        BetRate=max(Bets)
        NewData.at[index,'BetRate']=BetRate

        if PredictedResult==RealResult:
            NewData.at[index,'BetResult']=1
    
    NewData.loc[ NewData['BetResult']==0   ,'Earning']=  -100
    NewData.loc[ NewData['BetResult']==1   ,'Earning']=   (NewData['BetRate']-1)*100

    NewData.loc[:,'CumEarning']=  NewData['Earning'].cumsum()

    NewData.loc[:,'Date']= pd.to_datetime( MatchData['date'])
    
    NewData= NewData.reset_index()

    fig1, ax = plt.subplots()

    ax.plot(NewData['CumEarning'])

    ax.legend(['BetEarning'])
    plt.ylabel('Cumulative Earning')
    plt.suptitle(ModelName)
    plt.xticks(rotation=90)
    plt.savefig('sdasdsa.png')

