In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score
import os.path
from tqdm import tqdm
import random
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score, explained_variance_score
from collections import OrderedDict

np.random.seed(500)

### Data Input and Visualization

Reading the input file

In [2]:
deliveries_unprocessed = pd.read_csv(os.path.abspath('') + "\\..\\deliveries.csv")
matches_unprocesed = pd.read_csv(os.path.abspath('') + "\\..\\matches.csv")
print('Deliveries Data Size:', deliveries_unprocessed.shape)
print('Matches Data Size:', matches_unprocesed.shape)

Deliveries Data Size: (150139, 21)
Matches Data Size: (633, 18)


Below code creates a dictionary of matchId and the date it was played and add date information in deliveries table

In [24]:
matchId_date_dict = {}
for index, row in matches_unprocesed.iterrows():
    matchId_date_dict[row['id']] = row['date']
    
date_column = []
for index, row in deliveries_unprocessed.iterrows():
    date_column.append(matches_unprocesed.iloc[row['match_id'] - 1]['date'])
deliveries_unprocessed['date'] = date_column

Removing matches with no results and result with duckworth lewis method

In [25]:
matchId_dl_and_noResult = []
for index, row in matches_unprocesed.iterrows():
    if row['result'] == 'no result':
        matchId_dl_and_noResult.append(row['id'])
    if row['dl_applied'] == 1:
        matchId_dl_and_noResult.append(row['id'])

In [26]:
drop_index_in_matches = []
for index, row in matches_unprocesed.iterrows():
    if row['id'] in matchId_dl_and_noResult:
        drop_index_in_matches.append(index)
        
drop_index_in_deliveries = []
for index, row in deliveries_unprocessed.iterrows():
    if row['match_id'] in matchId_dl_and_noResult:
        drop_index_in_deliveries.append(index)

In [27]:
matches = matches_unprocesed.drop(drop_index_in_matches)
deliveries = deliveries_unprocessed.drop(drop_index_in_deliveries)

Sorting the data with respect to date

In [28]:
deliveries = deliveries.sort_values(by=['date', 'match_id', 'inning', 'over', 'ball'])
deliveries.head()

Unnamed: 0,match_id,inning,batting_team,bowling_team,over,ball,batsman,non_striker,bowler,is_super_over,...,legbye_runs,noball_runs,penalty_runs,batsman_runs,extra_runs,total_runs,player_dismissed,dismissal_kind,fielder,date
13862,60,1,Kolkata Knight Riders,Royal Challengers Bangalore,1,1,SC Ganguly,BB McCullum,P Kumar,0,...,1,0,0,0,1,1,,,,2008-04-18
13863,60,1,Kolkata Knight Riders,Royal Challengers Bangalore,1,2,BB McCullum,SC Ganguly,P Kumar,0,...,0,0,0,0,0,0,,,,2008-04-18
13864,60,1,Kolkata Knight Riders,Royal Challengers Bangalore,1,3,BB McCullum,SC Ganguly,P Kumar,0,...,0,0,0,0,1,1,,,,2008-04-18
13865,60,1,Kolkata Knight Riders,Royal Challengers Bangalore,1,4,BB McCullum,SC Ganguly,P Kumar,0,...,0,0,0,0,0,0,,,,2008-04-18
13866,60,1,Kolkata Knight Riders,Royal Challengers Bangalore,1,5,BB McCullum,SC Ganguly,P Kumar,0,...,0,0,0,0,0,0,,,,2008-04-18


In [29]:
matches = matches.sort_values(by=['date'])
matches.head()

Unnamed: 0,id,season,city,date,team1,team2,toss_winner,toss_decision,result,dl_applied,winner,win_by_runs,win_by_wickets,player_of_match,venue,umpire1,umpire2,umpire3
59,60,2008,Bangalore,2008-04-18,Kolkata Knight Riders,Royal Challengers Bangalore,Royal Challengers Bangalore,field,normal,0,Kolkata Knight Riders,140,0,BB McCullum,M Chinnaswamy Stadium,Asad Rauf,RE Koertzen,
61,62,2008,Delhi,2008-04-19,Rajasthan Royals,Delhi Daredevils,Rajasthan Royals,bat,normal,0,Delhi Daredevils,0,9,MF Maharoof,Feroz Shah Kotla,Aleem Dar,GA Pratapkumar,
60,61,2008,Chandigarh,2008-04-19,Chennai Super Kings,Kings XI Punjab,Chennai Super Kings,bat,normal,0,Chennai Super Kings,33,0,MEK Hussey,"Punjab Cricket Association Stadium, Mohali",MR Benson,SL Shastri,
63,64,2008,Kolkata,2008-04-20,Deccan Chargers,Kolkata Knight Riders,Deccan Chargers,bat,normal,0,Kolkata Knight Riders,0,5,DJ Hussey,Eden Gardens,BF Bowden,K Hariharan,
62,63,2008,Mumbai,2008-04-20,Mumbai Indians,Royal Challengers Bangalore,Mumbai Indians,bat,normal,0,Royal Challengers Bangalore,0,5,MV Boucher,Wankhede Stadium,SJ Davis,DJ Harper,


Extracting the matchId-team-batsmen and matchId-team-oppositionBowler data in dictionary

In [30]:
matchId_team_batsmen_dict = OrderedDict()
matchId_team_opposition_bowler_dict = {}
for index, row in deliveries.iterrows():
    batsmen_set = matchId_team_batsmen_dict.get((row['match_id'], row['batting_team']), set())
    batsmen_set.add(row['batsman'])
    batsmen_set.add(row['non_striker'])
    matchId_team_batsmen_dict[(row['match_id'], row['batting_team'])] = batsmen_set
    
    opposition_bowler_set = matchId_team_opposition_bowler_dict.get((row['match_id'], row['batting_team']), set())
    opposition_bowler_set.add(row['bowler'])
    matchId_team_opposition_bowler_dict[(row['match_id'], row['batting_team'])] = opposition_bowler_set

Created a dictionary that assigned a value to each player

In [31]:
batsmen = deliveries['batsman'].unique()
bowlers = deliveries['bowler'].unique()
all_players = list(set(batsmen) | set(bowlers))
player_dict = {}
for i in range(len(all_players)):
    player_dict[all_players[i]] = i

Created a dictionary that stores score for each matchId and team played

In [32]:
matchId_team_score = OrderedDict()
for index, row in deliveries.iterrows():
    score = matchId_team_score.get((row['match_id'], row['batting_team']), 0)
    if int(row['is_super_over']) == 0:
        score += int(row['total_runs'])
    matchId_team_score[(row['match_id'], row['batting_team'])] = score

Following code creates 2 dictionaries - one which stores batting average for each batsmen for each day he played and another dictionary that stores the bowling econnomy of each bowler for each day he bowled in IPL

In [33]:
batting_average = pd.read_pickle('batsmen_average_data.pkl')
bowling_average = pd.read_pickle('bowlers_average_data.pkl')

In [34]:
batting_average.head()

Unnamed: 0,Name,Date,Batting Average,Average Balls Faced
0,DA Warner,2017-04-05,33.73,24.4
1,S Dhawan,2017-04-05,27.517857,23.410714
2,MC Henriques,2017-04-05,17.74359,14.461538
3,Yuvraj Singh,2017-04-05,22.27619,17.733333
4,DJ Hooda,2017-04-05,12.291667,9.333333


In [35]:
bowling_average.head()

Unnamed: 0,Name,Date,Average no of wickets,Average economy
0,YS Chahal,2017-04-05,1.380952,8.150794
1,S Aravind,2017-04-05,1.535714,8.309524
2,SR Watson,2017-04-05,1.104651,7.946705
3,STR Binny,2017-04-05,0.433962,7.738994
4,A Nehra,2017-04-05,1.353659,7.846545


In [36]:
batsmen_date_average_run = {}
bowler_date_average_economy = {}
for index, row in batting_average.iterrows():
    batsmen_date_average_run[(row['Name'], row['Date'])] = row['Batting Average']
for index, row in bowling_average.iterrows():
    bowler_date_average_economy[(row['Name'], row['Date'])] = row['Average economy']

Below code creates an input vector for each match and team played in it

In [37]:
matchId_team_player_vector_dict = OrderedDict()
for (match_id, team) in matchId_team_batsmen_dict:
    player_vector = np.zeros(len(all_players))
    for batsman in list(matchId_team_batsmen_dict[(match_id, team)]):
        player_vector[player_dict[batsman]] = 1
#         player_vector[player_dict[batsman]] = batsmen_date_average_run.get((batsman, matchId_date_dict[match_id]), 10)
    for bowler in list(matchId_team_opposition_bowler_dict[(match_id, team)]):
        player_vector[player_dict[bowler]] = 1
#         player_vector[player_dict[bowler]] = bowler_date_average_economy.get((bowler, matchId_date_dict[match_id]), 8)
    matchId_team_player_vector_dict[(match_id, team)] = player_vector

In [38]:
X_np = []
y_np = []
for (match_id, team) in matchId_team_batsmen_dict:
    X_np.append(matchId_team_player_vector_dict[(match_id, team)])
    y_np.append([match_id, team, matchId_team_score[(match_id, team)]])

In [39]:
X = pd.DataFrame(np.array(X_np))
X.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,484,485,486,487,488,489,490,491,492,493
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [41]:
y = pd.DataFrame(np.array(y_np))
y.head()

Unnamed: 0,0,1,2
0,60,Kolkata Knight Riders,222
1,60,Royal Challengers Bangalore,82
2,61,Chennai Super Kings,240
3,61,Kings XI Punjab,207
4,62,Rajasthan Royals,129


Train test split with first 0.8 ratio of data to train and rest to test

In [43]:
X_train = X[:int(6*len(X)/10)]
y_train = y[:int(6*len(X)/10)]
X_test = X[int(6*len(X)/10):]
y_test = y[int(6*len(X)/10):]
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [44]:
X_train.shape

(740, 494)

In [45]:
X_test.shape

(494, 494)

In [46]:
svr_linear_grid_cv = GridSearchCV(estimator=SVR(kernel='linear'), param_grid={'C':np.logspace(-5, 5, num=10, base=2)}, cv= 5, iid=False, verbose=10, n_jobs = -1)
svr_linear_grid_cv.fit(X_train, y_train[2])

Fitting 5 folds for each of 10 candidates, totalling 50 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:    2.7s
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    2.7s
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    3.3s
[Parallel(n_jobs=-1)]: Done  26 tasks      | elapsed:    3.9s
[Parallel(n_jobs=-1)]: Done  33 out of  50 | elapsed:    3.9s remaining:    2.0s
[Parallel(n_jobs=-1)]: Done  39 out of  50 | elapsed:    4.4s remaining:    1.2s
[Parallel(n_jobs=-1)]: Done  45 out of  50 | elapsed:    4.6s remaining:    0.5s
[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:    5.1s finished


GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=SVR(C=1.0, cache_size=200, coef0=0.0, degree=3,
                           epsilon=0.1, gamma='auto_deprecated',
                           kernel='linear', max_iter=-1, shrinking=True,
                           tol=0.001, verbose=False),
             iid=False, n_jobs=-1,
             param_grid={'C': array([3.12500000e-02, 6.75037337e-02, 1.45816130e-01, 3.14980262e-01,
       6.80395000e-01, 1.46973449e+00, 3.17480210e+00, 6.85795186e+00,
       1.48139954e+01, 3.20000000e+01])},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=10)

In [60]:
svr_linear_pred = svr_linear_grid_cv.predict(X_test)
print('Test RMSE:', np.sqrt(mean_squared_error(y_test[2], svr_linear_pred)))
print('Test R2:', r2_score(y_test[2], svr_linear_pred))
print('Test Explained Variance:', explained_variance_score(y_test[2], svr_linear_pred))
print()
print('Train RMSE:', np.sqrt(mean_squared_error(y_train[2], svr_linear_grid_cv.predict(X_train))))
print('Train R2:', r2_score(y_train[2], svr_linear_grid_cv.predict(X_train)))
print('Train Explained Variance:', explained_variance_score(y_train[2], svr_linear_grid_cv.predict(X_train)))

Test RMSE: 30.01562687793747
Test R2: -0.03387508796062444
Test Explained Variance: -0.031978373042029995

Train RMSE: 24.69995566087199
Train R2: 0.29648456454915795
Train Explained Variance: 0.2965708376417745


In [54]:
param_dict = {'C':np.logspace(-5, 5, num=5, base=2), 'gamma':np.logspace(-6, 6, num=5, base=2), 'degree':[1,2,3]}
svr_poly_grid_cv = GridSearchCV(estimator=SVR(kernel='poly'), param_grid=param_dict, cv= 5, iid=False, verbose=10, n_jobs = -1)
svr_poly_grid_cv.fit(X_train, y_train[2])

Fitting 5 folds for each of 75 candidates, totalling 375 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:    2.3s
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    2.4s
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    3.1s
[Parallel(n_jobs=-1)]: Done  26 tasks      | elapsed:    3.3s
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    3.8s
[Parallel(n_jobs=-1)]: Done  48 tasks      | elapsed:    4.3s
[Parallel(n_jobs=-1)]: Done  61 tasks      | elapsed:    4.9s
[Parallel(n_jobs=-1)]: Done  74 tasks      | elapsed:    5.5s
[Parallel(n_jobs=-1)]: Done  89 tasks      | elapsed:    6.4s
[Parallel(n_jobs=-1)]: Done 104 tasks      | elapsed:    7.0s
[Parallel(n_jobs=-1)]: Done 121 tasks      | elapsed:    7.8s
[Parallel(n_jobs=-1)]: Done 138 tasks      | elapsed:    8.6s
[Parallel(n_jobs=-1)]: Done 157 tasks      | elapsed:    9.5s
[Parallel(n_jobs=-1)]: Done 176 tasks      | elapsed:   10.4s
[Parallel(n_jobs=-1)]: Done 197 tasks      | elapsed:  

GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=SVR(C=1.0, cache_size=200, coef0=0.0, degree=3,
                           epsilon=0.1, gamma='auto_deprecated', kernel='poly',
                           max_iter=-1, shrinking=True, tol=0.001,
                           verbose=False),
             iid=False, n_jobs=-1,
             param_grid={'C': array([3.12500000e-02, 1.76776695e-01, 1.00000000e+00, 5.65685425e+00,
       3.20000000e+01]),
                         'degree': [1, 2, 3],
                         'gamma': array([1.5625e-02, 1.2500e-01, 1.0000e+00, 8.0000e+00, 6.4000e+01])},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=10)

In [62]:
svr_poly_pred = svr_poly_grid_cv.predict(X_test)
print('Test RMSE:', np.sqrt(mean_squared_error(y_test[2], svr_poly_pred)))
print('Test R2:', r2_score(y_test[2], svr_poly_pred))
print('Test Explained Variance:', explained_variance_score(y_test[2], svr_poly_pred))
print()
print('Train RMSE:', np.sqrt(mean_squared_error(y_train[2], svr_poly_grid_cv.predict(X_train))))
print('Train R2:', r2_score(y_train[2], svr_poly_grid_cv.predict(X_train)))
print('Train Explained Variance:', explained_variance_score(y_train[2], svr_poly_grid_cv.predict(X_train)))

Test RMSE: 30.038522940122885
Test R2: -0.03545297915926482
Test Explained Variance: -0.034082693074340975

Train RMSE: 24.62390133086604
Train R2: 0.30081032293027554
Train Explained Variance: 0.30091325185980333


In [56]:
svr_rbf_grid_cv = GridSearchCV(estimator=SVR(kernel='rbf'), param_grid=param_dict, cv= 5, iid=False, verbose=10, n_jobs = -1)
svr_rbf_grid_cv.fit(X_train, y_train[2])

Fitting 5 folds for each of 75 candidates, totalling 375 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.7s
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    1.2s
[Parallel(n_jobs=-1)]: Done  26 tasks      | elapsed:    1.7s
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    2.3s
[Parallel(n_jobs=-1)]: Done  48 tasks      | elapsed:    2.5s
[Parallel(n_jobs=-1)]: Done  61 tasks      | elapsed:    3.4s
[Parallel(n_jobs=-1)]: Done  74 tasks      | elapsed:    3.9s
[Parallel(n_jobs=-1)]: Done  89 tasks      | elapsed:    4.6s
[Parallel(n_jobs=-1)]: Done 104 tasks      | elapsed:    5.2s
[Parallel(n_jobs=-1)]: Done 121 tasks      | elapsed:    6.1s
[Parallel(n_jobs=-1)]: Done 138 tasks      | elapsed:    6.8s
[Parallel(n_jobs=-1)]: Done 157 tasks      | elapsed:    7.7s
[Parallel(n_jobs=-1)]: Done 176 tasks      | elapsed:    8.4s
[Parallel(n_jobs=-1)]: Done 197 tasks      | elapsed:  

GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=SVR(C=1.0, cache_size=200, coef0=0.0, degree=3,
                           epsilon=0.1, gamma='auto_deprecated', kernel='rbf',
                           max_iter=-1, shrinking=True, tol=0.001,
                           verbose=False),
             iid=False, n_jobs=-1,
             param_grid={'C': array([3.12500000e-02, 1.76776695e-01, 1.00000000e+00, 5.65685425e+00,
       3.20000000e+01]),
                         'degree': [1, 2, 3],
                         'gamma': array([1.5625e-02, 1.2500e-01, 1.0000e+00, 8.0000e+00, 6.4000e+01])},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=10)

In [63]:
svr_rbf_pred = svr_rbf_grid_cv.predict(X_test)
print('Test RMSE:', np.sqrt(mean_squared_error(y_test[2], svr_rbf_pred)))
print('Test R2:', r2_score(y_test[2], svr_rbf_pred))
print('Test Explained Variance:', explained_variance_score(y_test[2], svr_rbf_pred))
print()
print('Train RMSE:', np.sqrt(mean_squared_error(y_train[2], svr_rbf_grid_cv.predict(X_train))))
print('Train R2:', r2_score(y_train[2], svr_rbf_grid_cv.predict(X_train)))
print('Train Explained Variance:', explained_variance_score(y_train[2], svr_rbf_grid_cv.predict(X_train)))

Test RMSE: 29.7646150207205
Test R2: -0.016655405452932204
Test Explained Variance: -0.01065722007867631

Train RMSE: 23.456196975090183
Train R2: 0.3655513418728602
Train Explained Variance: 0.3655924994780192


In [58]:
svr_sigmoid_grid_cv = GridSearchCV(estimator=SVR(kernel='sigmoid'), param_grid=param_dict, cv= 5, iid=False, verbose=10, n_jobs = -1)
svr_sigmoid_grid_cv.fit(X_train, y_train[2])

Fitting 5 folds for each of 75 candidates, totalling 375 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    1.1s
[Parallel(n_jobs=-1)]: Done  26 tasks      | elapsed:    1.7s
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    2.1s
[Parallel(n_jobs=-1)]: Done  48 tasks      | elapsed:    2.4s
[Parallel(n_jobs=-1)]: Done  61 tasks      | elapsed:    3.2s
[Parallel(n_jobs=-1)]: Done  74 tasks      | elapsed:    3.8s
[Parallel(n_jobs=-1)]: Done  89 tasks      | elapsed:    4.4s
[Parallel(n_jobs=-1)]: Done 104 tasks      | elapsed:    5.0s
[Parallel(n_jobs=-1)]: Done 121 tasks      | elapsed:    5.9s
[Parallel(n_jobs=-1)]: Done 138 tasks      | elapsed:    6.6s
[Parallel(n_jobs=-1)]: Done 157 tasks      | elapsed:    7.5s
[Parallel(n_jobs=-1)]: Done 176 tasks      | elapsed:    8.2s
[Parallel(n_jobs=-1)]: Done 197 tasks      | elapsed:  

GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=SVR(C=1.0, cache_size=200, coef0=0.0, degree=3,
                           epsilon=0.1, gamma='auto_deprecated',
                           kernel='sigmoid', max_iter=-1, shrinking=True,
                           tol=0.001, verbose=False),
             iid=False, n_jobs=-1,
             param_grid={'C': array([3.12500000e-02, 1.76776695e-01, 1.00000000e+00, 5.65685425e+00,
       3.20000000e+01]),
                         'degree': [1, 2, 3],
                         'gamma': array([1.5625e-02, 1.2500e-01, 1.0000e+00, 8.0000e+00, 6.4000e+01])},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=10)

In [64]:
svr_sigmoid_pred = svr_sigmoid_grid_cv.predict(X_test)
print('Test RMSE:', np.sqrt(mean_squared_error(y_test[2], svr_sigmoid_pred)))
print('Test R2:', r2_score(y_test[2], svr_sigmoid_pred))
print('Test Explained Variance:', explained_variance_score(y_test[2], svr_sigmoid_pred))
print()
print('Train RMSE:', np.sqrt(mean_squared_error(y_train[2], svr_sigmoid_grid_cv.predict(X_train))))
print('Train R2:', r2_score(y_train[2], svr_sigmoid_grid_cv.predict(X_train)))
print('Train Explained Variance:', explained_variance_score(y_train[2], svr_sigmoid_grid_cv.predict(X_train)))

Test RMSE: 30.077356365471104
Test R2: -0.038131950935592185
Test Explained Variance: -0.03442190985835869

Train RMSE: 27.7451976090233
Train R2: 0.11231899276826063
Train Explained Variance: 0.11247134844169637


In [66]:
total_matches = len(svr_sigmoid_pred)/2
svr_linear_winner_predictions = []
svr_poly_winner_predictions = []
svr_rbf_winner_predictions = []
svr_sigmoid_winner_predictions = []

correct_winner = []

for i in range(0, len(svr_linear_pred), 2):
    if svr_linear_pred[i] > svr_linear_pred[i+1]:
        svr_linear_winner_predictions.append(1)
    else:
        svr_linear_winner_predictions.append(2)

for i in range(0, len(svr_poly_pred), 2):
    if svr_poly_pred[i] > svr_poly_pred[i+1]:
        svr_poly_winner_predictions.append(1)
    else:
        svr_poly_winner_predictions.append(2)

for i in range(0, len(svr_rbf_pred), 2):
    if svr_rbf_pred[i] > svr_rbf_pred[i+1]:
        svr_rbf_winner_predictions.append(1)
    else:
        svr_rbf_winner_predictions.append(2)
        
for i in range(0, len(svr_sigmoid_pred), 2):
    if svr_sigmoid_pred[i] > svr_sigmoid_pred[i+1]:
        svr_sigmoid_winner_predictions.append(1)
    else:
        svr_sigmoid_winner_predictions.append(2)
        
for i in range(0, len(y_test[2]), 2):
    if y_test.iloc[i][2] > y_test.iloc[i+1][2]:
        correct_winner.append(1)
    else:
        correct_winner.append(2)

In [67]:
svr_linear_win_loss_accuracy = 0
svr_poly_win_loss_accuracy = 0
svr_rbf_win_loss_accuracy = 0
svr_sigmoid_win_loss_accuracy = 0

for i in range(0, len(correct_winner)):
    if correct_winner[i] == svr_linear_winner_predictions[i]:
        svr_linear_win_loss_accuracy += 1
    if correct_winner[i] == svr_poly_winner_predictions[i]:
        svr_poly_win_loss_accuracy += 1
    if correct_winner[i] == svr_rbf_winner_predictions[i]:
        svr_rbf_win_loss_accuracy += 1
    if correct_winner[i] == svr_sigmoid_winner_predictions[i]:
        svr_sigmoid_win_loss_accuracy += 1
        
svr_linear_win_loss_accuracy = svr_linear_win_loss_accuracy/total_matches
svr_poly_win_loss_accuracy = svr_poly_win_loss_accuracy/total_matches
svr_rbf_win_loss_accuracy = svr_rbf_win_loss_accuracy/total_matches
svr_sigmoid_win_loss_accuracy = svr_sigmoid_win_loss_accuracy/total_matches

print('SVR Linear Win-Loss Accuracy:', svr_linear_win_loss_accuracy)
print('SVR Poly Win-Loss Accuracy:', svr_poly_win_loss_accuracy)
print('SVR RBF Win-Loss Accuracy:', svr_rbf_win_loss_accuracy)
print('SVR Sigmoid Win-Loss Accuracy:', svr_sigmoid_win_loss_accuracy)

SVR Linear Win-Loss Accuracy: 0.5384615384615384
SVR Poly Win-Loss Accuracy: 0.5465587044534413
SVR RBF Win-Loss Accuracy: 0.6032388663967612
SVR Sigmoid Win-Loss Accuracy: 0.5708502024291497
