In [1]:
import os
import pandas as pd
import numpy as np
import warnings
from sklearn.model_selection import train_test_split
warnings.filterwarnings("ignore")

In [2]:
def load_data(file_path , filename):
    csv_path = os.path.join(file_path, filename)
    return pd.read_csv(csv_path)

In [3]:
print(os.getcwd())
batting_file_path = "Cleaned/Batting"


# All batting dataframes
odi_data = load_data(batting_file_path, "odi_data_rating.csv")
t20_data = load_data(batting_file_path, "t20_data_rating.csv")

C:\Users\sheru\Documents\GitHub\t20-blocks


In [4]:
odi_data

Unnamed: 0.1,Unnamed: 0,Player,Span,Mat,Inns,NO,Runs,HS,Ave,BF,SR,100,50,0,Final Region,p1_z,p2_z,p3_z,rating
0,0,SR Tendulkar,1989,463,452,41,18426,200,44,21367,86,49,96,20,INDIA,10.000000,10.000000,72.043792,10.000000
1,1,KC Sangakkara,2000,404,380,41,14234,169,41,18048,78,25,93,15,SL,6.865620,4.942602,50.476428,6.690866
2,3,ST Jayasuriya,1989,445,433,18,13430,189,32,14725,91,28,68,34,SL,4.816705,4.047619,55.562842,6.657806
3,2,RT Ponting,1995,375,365,39,13704,164,42,17046,80,30,82,20,AUS,6.164604,5.229592,49.843024,6.540932
4,6,V Kohli,2008,242,233,39,11609,183,59,12445,93,43,55,13,INDIA,4.074044,5.027636,49.084549,6.051177
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2480,2432,Ishtiaq Muhammad,2016,1,1,1,0,0,0,0,0,0,0,0,HKG,0.000000,0.000000,0.000000,0.000000
2481,2431,SM Harwood,2009,1,1,0,0,0,0,4,0,0,0,1,AUS,0.000000,0.000000,0.000000,0.000000
2482,2430,Harunur Rashid,1988,2,2,0,0,0,0,17,0,0,0,2,BDESH,0.000000,0.000000,0.000000,0.000000
2483,2429,RG Hart,2002,2,1,0,0,0,0,9,0,0,0,1,NZ,0.000000,0.000000,0.000000,0.000000


In [5]:
columns = ['Mat', 'Inns','Runs', 'HS','Ave','BF','SR','100','50']
odi_X = odi_data[columns]
odi_Y = odi_data['rating']
odi_X

Unnamed: 0,Mat,Inns,Runs,HS,Ave,BF,SR,100,50
0,463,452,18426,200,44,21367,86,49,96
1,404,380,14234,169,41,18048,78,25,93
2,445,433,13430,189,32,14725,91,28,68
3,375,365,13704,164,42,17046,80,30,82
4,242,233,11609,183,59,12445,93,43,55
...,...,...,...,...,...,...,...,...,...
2480,1,1,0,0,0,0,0,0,0
2481,1,1,0,0,0,4,0,0,0
2482,2,2,0,0,0,17,0,0,0
2483,2,1,0,0,0,9,0,0,0


In [6]:
odi_Y

0       10.000000
1        6.690866
2        6.657806
3        6.540932
4        6.051177
          ...    
2480     0.000000
2481     0.000000
2482     0.000000
2483     0.000000
2484     0.000000
Name: rating, Length: 2485, dtype: float64

In [7]:
xtrain,xtest,ytrain,ytest = train_test_split(odi_X,odi_Y,test_size = 0.3,random_state=42)

In [8]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

In [9]:
model = LinearRegression()
model.fit(xtrain, ytrain)
ypred = model.predict(xtest)
accuracy = r2_score(ytest, ypred)
print("Accuracy:", accuracy*100,"%")

Accuracy: 99.27258932480683 %


In [10]:
import pickle

In [12]:
def write_pickle_file(file_path, filename, model):
    isExist = os.path.exists(file_path)
    if not isExist:
        os.makedirs(file_path)
        print("The new directory is created!")
    model_path = os.path.join(file_path, filename)
    with open(model_path, 'wb') as file:
        pickle.dump(model, file)
    
    if os.path.exists(model_path) and os.path.getsize(model_path) > 0:
        print(filename + " was written to successfully!")

In [13]:
write_pickle_file("Models/Batting", "odi_model.pkl", model)

The new directory is created!
odi_model.pkl was written to successfully!


In [16]:
t20_data.columns

Index(['Unnamed: 0', 'Player', 'Span', 'Mat', 'Inns', 'NO', 'Runs', 'HS',
       'Ave', 'BF', 'SR', '100', '50', '0', '4s', '6s', 'Final Region', 'p1_z',
       'p2_z', 'p3_z', 'p4_z', 'rating'],
      dtype='object')

In [17]:
columns = ['Mat', 'Inns','Runs', 'HS','Ave','BF','SR','100','50', '4s', '6s']
t20_X = t20_data[columns]
t20_Y = t20_data['rating']
t20_X

Unnamed: 0,Mat,Inns,Runs,HS,Ave,BF,SR,100,50,4s,6s
0,104,96,2633,118,32,1905,138,4,19,234,120
1,75,70,2633,94,52,1907,138,0,24,247,71
2,83,80,2436,105,33,1810,134,2,15,215,113
3,71,70,2140,123,35,1571,136,2,13,199,91
4,111,104,2263,75,30,1824,124,0,7,186,61
...,...,...,...,...,...,...,...,...,...,...,...
1730,4,1,0,0,0,0,0,0,0,0,0
1731,2,1,0,0,0,1,0,0,0,0,0
1732,1,1,0,0,0,1,0,0,0,0,0
1733,5,1,0,0,0,0,0,0,0,0,0


In [19]:
xtrain,xtest,ytrain,ytest = train_test_split(t20_X,t20_Y,test_size = 0.3,random_state=42)

In [20]:
model = LinearRegression()
model.fit(xtrain, ytrain)
ypred = model.predict(xtest)
accuracy = r2_score(ytest, ypred)
print("Accuracy:", accuracy*100,"%")

Accuracy: 96.32195843326046 %


In [21]:
write_pickle_file("Models/Batting", "t20_model.pkl", model)

t20_model.pkl was written to successfully!
