In [3]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import numpy as np
import pandas as pd

In [4]:
df = pd.read_csv('datasets/best_runs.csv')
df

Unnamed: 0,Player,M1,M2,M3,M4,M5,M6,M7,M8,M9,Total
0,V Kohli,85,55,16,103,95,0,88,101,51,594
1,Q de Kock,100,20,4,109,174,24,114,5,41,591
2,R Ravindra,123,51,9,32,75,116,9,108,42,565
3,RG Sharma,0,131,86,48,46,87,4,40,61,503
4,DA Warner,41,11,163,13,104,81,15,18,53,499
5,HE vander Dussen,108,4,60,26,1,21,133,13,76,442
6,DJ Mitchell,0,48,89,1,130,54,24,29,43,418
7,GJ Maxwell,15,31,0,3,106,41,0,201,0,397
8,AK Markram,106,1,42,56,60,91,6,9,25,396


In [5]:
# Preparing data for M10 score prediction
# label encoding the players and saving the mapping in a dictionary
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df['Player'] = le.fit_transform(df['Player'])
le_name_mapping = dict(zip(le.classes_, le.transform(le.classes_)))
print(le_name_mapping)

X = df.iloc[:, 0:9]  # Match 1 to 9 scores as features
y_m10_scores = df.iloc[:, 8]  # Using M9 scores as a proxy for M10

# Splitting data into training and testing sets for model validation
X_train, X_test, y_train, y_test = train_test_split(X, y_m10_scores, test_size=0.2, random_state=42)

# Fitting the linear regression model
lr_m10 = LinearRegression()
lr_m10.fit(X_train, y_train)

# Predicting the scores for M10
y_pred_m10 = lr_m10.predict(X_test)

# Evaluating the model
mse_m10 = mean_squared_error(y_test, y_pred_m10)
rmse_m10 = np.sqrt(mse_m10)

# Predicting M10 score using the entire dataset (as we need predictions for each player)
predicted_m10_scores = lr_m10.predict(X)

# Printing the RMSE for evaluation
rmse_m10, predicted_m10_scores


{'AK Markram': 0, 'DA Warner': 1, 'DJ Mitchell': 2, 'GJ Maxwell': 3, 'HE vander Dussen': 4, 'Q de Kock': 5, 'R Ravindra': 6, 'RG Sharma': 7, 'V Kohli': 8}


(39.88690526625581,
 array([101.        ,  38.34722645, 108.        ,  40.        ,
         18.        ,  13.        ,  29.        , 155.50392422,
          9.        ]))

In [6]:
# Preparing data for M11 score prediction

    
X = df.iloc[:, 0:10]  # Match 1 to 10 scores as features
y_m11_scores = df.iloc[:, 9]  # Using M9 scores as a proxy for M10

# Splitting data into training and testing sets for model validation
X_train, X_test, y_train, y_test = train_test_split(X, y_m11_scores, test_size=0.2, random_state=42)

# Fitting the linear regression model
lr_m11 = LinearRegression()
lr_m11.fit(X_train, y_train)

# Predicting the scores for M11
y_pred_m11 = lr_m11.predict(X_test)

# Evaluating the model
mse_m11 = mean_squared_error(y_test, y_pred_m11)
rmse_m11 = np.sqrt(mse_m11)

# Predicting M11 score using the entire dataset (as we need predictions for each player)
predicted_m11_scores = lr_m11.predict(X)

# Printing the RMSE for evaluation
rmse_m11, predicted_m11_scores

(31.75609750602793,
 array([51.        , 21.55897595, 42.        , 61.        , 53.        ,
        76.        , 43.        , 40.48389855, 25.        ]))

In [7]:
# add new columns to the dataframe for storing the predicted scores
df['M10'] = predicted_m10_scores
df['M11'] = predicted_m11_scores
df

Unnamed: 0,Player,M1,M2,M3,M4,M5,M6,M7,M8,M9,Total,M10,M11
0,8,85,55,16,103,95,0,88,101,51,594,101.0,51.0
1,5,100,20,4,109,174,24,114,5,41,591,38.347226,21.558976
2,6,123,51,9,32,75,116,9,108,42,565,108.0,42.0
3,7,0,131,86,48,46,87,4,40,61,503,40.0,61.0
4,1,41,11,163,13,104,81,15,18,53,499,18.0,53.0
5,4,108,4,60,26,1,21,133,13,76,442,13.0,76.0
6,2,0,48,89,1,130,54,24,29,43,418,29.0,43.0
7,3,15,31,0,3,106,41,0,201,0,397,155.503924,40.483899
8,0,106,1,42,56,60,91,6,9,25,396,9.0,25.0


In [8]:
# update the Total column with the predicted scores
df['Total'] = df['M1'] + df['M2'] + df['M3'] + df['M4'] + df['M5'] + df['M6'] + df['M7'] + df['M8'] + df['M9'] + df['M10'] + df['M11']

In [9]:
df

Unnamed: 0,Player,M1,M2,M3,M4,M5,M6,M7,M8,M9,Total,M10,M11
0,8,85,55,16,103,95,0,88,101,51,746.0,101.0,51.0
1,5,100,20,4,109,174,24,114,5,41,650.906202,38.347226,21.558976
2,6,123,51,9,32,75,116,9,108,42,715.0,108.0,42.0
3,7,0,131,86,48,46,87,4,40,61,604.0,40.0,61.0
4,1,41,11,163,13,104,81,15,18,53,570.0,18.0,53.0
5,4,108,4,60,26,1,21,133,13,76,531.0,13.0,76.0
6,2,0,48,89,1,130,54,24,29,43,490.0,29.0,43.0
7,3,15,31,0,3,106,41,0,201,0,592.987823,155.503924,40.483899
8,0,106,1,42,56,60,91,6,9,25,430.0,9.0,25.0


In [10]:
# replace the Player column with the original names by iterating over the dictionary
for key, value in le_name_mapping.items():
    df['Player'].replace(value, key, inplace=True)
    
df

Unnamed: 0,Player,M1,M2,M3,M4,M5,M6,M7,M8,M9,Total,M10,M11
0,V Kohli,85,55,16,103,95,0,88,101,51,746.0,101.0,51.0
1,Q de Kock,100,20,4,109,174,24,114,5,41,650.906202,38.347226,21.558976
2,R Ravindra,123,51,9,32,75,116,9,108,42,715.0,108.0,42.0
3,RG Sharma,0,131,86,48,46,87,4,40,61,604.0,40.0,61.0
4,DA Warner,41,11,163,13,104,81,15,18,53,570.0,18.0,53.0
5,HE vander Dussen,108,4,60,26,1,21,133,13,76,531.0,13.0,76.0
6,DJ Mitchell,0,48,89,1,130,54,24,29,43,490.0,29.0,43.0
7,GJ Maxwell,15,31,0,3,106,41,0,201,0,592.987823,155.503924,40.483899
8,AK Markram,106,1,42,56,60,91,6,9,25,430.0,9.0,25.0


In [11]:
# sort the dataframe by the Total column
df.sort_values(by=['Total'], inplace=True, ascending=False)
df

Unnamed: 0,Player,M1,M2,M3,M4,M5,M6,M7,M8,M9,Total,M10,M11
0,V Kohli,85,55,16,103,95,0,88,101,51,746.0,101.0,51.0
2,R Ravindra,123,51,9,32,75,116,9,108,42,715.0,108.0,42.0
1,Q de Kock,100,20,4,109,174,24,114,5,41,650.906202,38.347226,21.558976
3,RG Sharma,0,131,86,48,46,87,4,40,61,604.0,40.0,61.0
7,GJ Maxwell,15,31,0,3,106,41,0,201,0,592.987823,155.503924,40.483899
4,DA Warner,41,11,163,13,104,81,15,18,53,570.0,18.0,53.0
5,HE vander Dussen,108,4,60,26,1,21,133,13,76,531.0,13.0,76.0
6,DJ Mitchell,0,48,89,1,130,54,24,29,43,490.0,29.0,43.0
8,AK Markram,106,1,42,56,60,91,6,9,25,430.0,9.0,25.0


In [12]:
# save the dataframe to a csv file
df.to_csv('datasets/predicted_scores.csv', index=False)


In [13]:
for index, row in df.iterrows():
    # iterate from M1 to M11
    for i in range(1, 12):
        # if the predicted score is greater than 100, set it to 100
        if row['M' + str(i)] > 100:
            df.at[index, 'M' + str(i)] = 100
        # if the predicted score is less than 0, set it to 0
        elif row['M' + str(i)] < 0:
            df.at[index, 'M' + str(i)] = 0
            
df

Unnamed: 0,Player,M1,M2,M3,M4,M5,M6,M7,M8,M9,Total,M10,M11
0,V Kohli,85,55,16,100,95,0,88,100,51,746.0,100.0,51.0
2,R Ravindra,100,51,9,32,75,100,9,100,42,715.0,100.0,42.0
1,Q de Kock,100,20,4,100,100,24,100,5,41,650.906202,38.347226,21.558976
3,RG Sharma,0,100,86,48,46,87,4,40,61,604.0,40.0,61.0
7,GJ Maxwell,15,31,0,3,100,41,0,100,0,592.987823,100.0,40.483899
4,DA Warner,41,11,100,13,100,81,15,18,53,570.0,18.0,53.0
5,HE vander Dussen,100,4,60,26,1,21,100,13,76,531.0,13.0,76.0
6,DJ Mitchell,0,48,89,1,100,54,24,29,43,490.0,29.0,43.0
8,AK Markram,100,1,42,56,60,91,6,9,25,430.0,9.0,25.0
