# Cricket Prediction using machine learning

## Data Preprocessing

In [89]:
import pandas as pd

In [90]:
dataset=pd.read_csv('ipl.csv',index_col=0)

In [91]:
dataset = dataset.drop(columns=['gender', 'match_type','date','umpire_1','umpire_2','player of the match','win_by_runs','win_by_wickets'])

In [92]:
# columns with missing values
dataset.columns[dataset.isnull().any()]

Index(['city'], dtype='object')

In [93]:
dataset['city'].fillna(dataset['city'].mode()[0], inplace=True)

In [94]:
dataset.columns[dataset.isnull().any()]

dataset.replace(['Mumbai Indians','Kolkata Knight Riders','Royal Challengers Bangalore','Deccan Chargers','Chennai Super Kings',
                 'Rajasthan Royals','Delhi Daredevils','Gujarat Lions','Kings XI Punjab',
                 'Sunrisers Hyderabad','Rising Pune Supergiants','Kochi Tuskers Kerala','Pune Warriors','Rising Pune Supergiant']
                ,['MI','KKR','RCB','DC','CSK','RR','DD','GL','KXIP','SRH','RPS','KTK','PW','RPS'],inplace=True)

In [95]:
def createDict(series) :
    
    dictionary={}
    
    i=0
    
    for ser in series :
        if(ser in dictionary) :
            continue
        dictionary[ser]=i
        i=i+1
        
    return dictionary

In [96]:
teamDict=createDict(dataset['team 1'])

toss_winnerDict=createDict(dataset['toss_winner'])

cityDict=createDict(dataset['city'])

venueDict=createDict(dataset['venue'])

winnerDict=dict(teamDict)

winnerDict['tie']=14

winnerDict['no result']=15

In [97]:
encode = {
'team 1': teamDict,
'team 2': teamDict,
'toss_winner': teamDict,
'winner': winnerDict,
'city':cityDict,
'venue':venueDict
    
 }
dataset.replace(encode, inplace=True)

In [98]:
dataset.head(5)

Unnamed: 0,city,team 1,team 2,toss_decision,toss_winner,venue,winner
0,0,0,4,field,0,0,4
1,1,1,7,bat,7,1,7
2,2,2,5,bat,5,2,2
3,3,3,0,bat,3,3,0
4,4,4,6,bat,6,4,4


In [99]:
winner = dataset['winner']

In [100]:
features = dataset.drop('winner',axis=1)

In [101]:
features=pd.get_dummies(features)

In [102]:
city=features['city']

cDict={('city_'+key):[] for key,val in cityDict.items()}

for city in city :
    
    for key,val in cityDict.items():
        
        if(city==val):
            
            cDict['city_'+key].append(1)
        else:
            cDict['city_'+key].append(0)

temp=pd.DataFrame(cDict)

temp


Unnamed: 0,city_Abu Dhabi,city_Ahmedabad,city_Bangalore,city_Bloemfontein,city_Cape Town,city_Centurion,city_Chandigarh,city_Chennai,city_Cuttack,city_Delhi,...,city_Kochi,city_Kolkata,city_Mumbai,city_Nagpur,city_Port Elizabeth,city_Pune,city_Raipur,city_Rajkot,city_Ranchi,city_Visakhapatnam
0,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
8,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [103]:

def oneHotEncode(col,df) :
    
    columnDataframe=df[col]
    
    if(col=='team 1' or col=='team 2' or col=='toss_winner'):
        
        currDict=teamDict
    
    if(col=='city') :
        
        currDict=cityDict
        
    if(col=='venue') :
        
        currDict=venueDict
        
    columnDict={(col+'_'+key):[] for key,val in currDict.items()}

    for value in columnDataframe :

        for key,val in currDict.items():

            if(value==val):

                columnDict[col+'_'+key].append(1)
            else:
                columnDict[col+'_'+key].append(0)
                
    temp=pd.DataFrame(columnDict)

    df=df.join(temp)
    
    df=df.drop(columns=col)
    
    return df

In [104]:
features = oneHotEncode('team 1',features)

features = oneHotEncode('city',features)

features = oneHotEncode('team 2',features)

features = oneHotEncode('toss_winner',features)

In [105]:
print(features)

     venue  toss_decision_bat  toss_decision_field  team 1_CSK  team 1_DC  \
0        0                  0                    1           0          0   
1        1                  1                    0           0          0   
2        2                  1                    0           0          0   
3        3                  1                    0           0          0   
4        4                  1                    0           0          0   
5        5                  1                    0           0          0   
6        6                  1                    0           0          1   
7        7                  0                    1           1          0   
8        6                  0                    1           0          1   
9        1                  0                    1           0          0   
10       0                  0                    1           0          0   
11       7                  1                    0           1          0   

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(features, 
                                                    winner,random_state=0,test_size =.25)

In [None]:
def prediction(Model,X_train,y_train,X_test,y_test) :
    
    clf=Model()
    
    clf.fit(X_train,y_train)
    
    print(clf.score(X_test,y_test))
    
    return clf


In [None]:
from sklearn.neural_network import MLPClassifier

from sklearn.svm import LinearSVC

from sklearn.linear_model import LogisticRegression

from sklearn.ensemble import RandomForestClassifier

clf_A = prediction(MLPClassifier,X_train,y_train,X_test,y_test)

clf_B = prediction(LinearSVC,X_train,y_train,X_test,y_test)

clf_C = prediction(LogisticRegression,X_train,y_train,X_test,y_test)

clf_D = prediction(RandomForestClassifier,X_train,y_train,X_test,y_test)