### Installing libraries and loading data

In [1]:
import numpy as np
import pandas as pd

import warnings

# Ignore all warnings
warnings.filterwarnings("ignore")

In [3]:
!pip install scikit-learn==1.5.1



In [5]:

import sklearn
print(sklearn.__version__)

1.5.1


In [6]:
match = pd.read_csv('matches.csv')
delivery = pd.read_csv('deliveries.csv')

In [7]:
match.head()

Unnamed: 0,id,season,city,date,match_type,player_of_match,venue,team1,team2,toss_winner,toss_decision,winner,result,result_margin,target_runs,target_overs,super_over,method,umpire1,umpire2
0,335982,2007/08,Bangalore,2008-04-18,League,BB McCullum,M Chinnaswamy Stadium,Royal Challengers Bangalore,Kolkata Knight Riders,Royal Challengers Bangalore,field,Kolkata Knight Riders,runs,140.0,223.0,20.0,N,,Asad Rauf,RE Koertzen
1,335983,2007/08,Chandigarh,2008-04-19,League,MEK Hussey,"Punjab Cricket Association Stadium, Mohali",Kings XI Punjab,Chennai Super Kings,Chennai Super Kings,bat,Chennai Super Kings,runs,33.0,241.0,20.0,N,,MR Benson,SL Shastri
2,335984,2007/08,Delhi,2008-04-19,League,MF Maharoof,Feroz Shah Kotla,Delhi Daredevils,Rajasthan Royals,Rajasthan Royals,bat,Delhi Daredevils,wickets,9.0,130.0,20.0,N,,Aleem Dar,GA Pratapkumar
3,335985,2007/08,Mumbai,2008-04-20,League,MV Boucher,Wankhede Stadium,Mumbai Indians,Royal Challengers Bangalore,Mumbai Indians,bat,Royal Challengers Bangalore,wickets,5.0,166.0,20.0,N,,SJ Davis,DJ Harper
4,335986,2007/08,Kolkata,2008-04-20,League,DJ Hussey,Eden Gardens,Kolkata Knight Riders,Deccan Chargers,Deccan Chargers,bat,Kolkata Knight Riders,wickets,5.0,111.0,20.0,N,,BF Bowden,K Hariharan


In [8]:
match.shape

(1095, 20)

In [9]:
delivery.head()

Unnamed: 0,match_id,inning,batting_team,bowling_team,over,ball,batter,bowler,non_striker,batsman_runs,extra_runs,total_runs,extras_type,is_wicket,player_dismissed,dismissal_kind,fielder
0,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,1,SC Ganguly,P Kumar,BB McCullum,0,1,1,legbyes,0,,,
1,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,2,BB McCullum,P Kumar,SC Ganguly,0,0,0,,0,,,
2,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,3,BB McCullum,P Kumar,SC Ganguly,0,1,1,wides,0,,,
3,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,4,BB McCullum,P Kumar,SC Ganguly,0,0,0,,0,,,
4,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,5,BB McCullum,P Kumar,SC Ganguly,0,0,0,,0,,,


### checking valid current teams

In [10]:
match['team1'].unique()

array(['Royal Challengers Bangalore', 'Kings XI Punjab',
       'Delhi Daredevils', 'Mumbai Indians', 'Kolkata Knight Riders',
       'Rajasthan Royals', 'Deccan Chargers', 'Chennai Super Kings',
       'Kochi Tuskers Kerala', 'Pune Warriors', 'Sunrisers Hyderabad',
       'Gujarat Lions', 'Rising Pune Supergiants',
       'Rising Pune Supergiant', 'Delhi Capitals', 'Punjab Kings',
       'Lucknow Super Giants', 'Gujarat Titans',
       'Royal Challengers Bengaluru'], dtype=object)

In [11]:
#Total teams name which play the IPL match currently are given below:
teams = [
    'Sunrisers Hyderabad',
    'Mumbai Indians',
    'Royal Challengers Bangalore',
    'Kolkata Knight Riders',
    'Kings XI Punjab',
    'Chennai Super Kings',
    'Rajasthan Royals',
    'Delhi Capitals'
]

In [12]:
# There are two teams which is being renamed. So will replace the old team name with the new one.
match['team1'] = match['team1'].str.replace('Delhi Daredevils','Delhi Capitals')
match['team2'] = match['team2'].str.replace('Delhi Daredevils','Delhi Capitals')

match['team1'] = match['team1'].str.replace('Deccan Chargers','Sunrisers Hyderabad')
match['team2'] = match['team2'].str.replace('Deccan Chargers','Sunrisers Hyderabad')



In [13]:
#We can see there are few teams which does not play now in the IPL. So we will not consider those teams in our analysis.
match = match[match['team1'].isin(teams)]
match = match[match['team2'].isin(teams)]

In [14]:
match.shape

(839, 20)

In [15]:
#We will be needing only few rows from the match dataframe.

match_df = match[['id','city','winner','target_runs']]

In [16]:
match_df.head()

Unnamed: 0,id,city,winner,target_runs
0,335982,Bangalore,Kolkata Knight Riders,223.0
1,335983,Chandigarh,Chennai Super Kings,241.0
2,335984,Delhi,Delhi Daredevils,130.0
3,335985,Mumbai,Royal Challengers Bangalore,166.0
4,335986,Kolkata,Kolkata Knight Riders,111.0


In [17]:
#we will merge the new match dataframe with the delivery dataframe
delivery_df = match_df.merge(delivery,left_on='id',right_on='match_id')

In [18]:
delivery_df


Unnamed: 0,id,city,winner,target_runs,match_id,inning,batting_team,bowling_team,over,ball,...,bowler,non_striker,batsman_runs,extra_runs,total_runs,extras_type,is_wicket,player_dismissed,dismissal_kind,fielder
0,335982,Bangalore,Kolkata Knight Riders,223.0,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,1,...,P Kumar,BB McCullum,0,1,1,legbyes,0,,,
1,335982,Bangalore,Kolkata Knight Riders,223.0,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,2,...,P Kumar,SC Ganguly,0,0,0,,0,,,
2,335982,Bangalore,Kolkata Knight Riders,223.0,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,3,...,P Kumar,SC Ganguly,0,1,1,wides,0,,,
3,335982,Bangalore,Kolkata Knight Riders,223.0,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,4,...,P Kumar,SC Ganguly,0,0,0,,0,,,
4,335982,Bangalore,Kolkata Knight Riders,223.0,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,5,...,P Kumar,SC Ganguly,0,0,0,,0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
199873,1426312,Chennai,Kolkata Knight Riders,114.0,1426312,2,Kolkata Knight Riders,Sunrisers Hyderabad,9,5,...,AK Markram,VR Iyer,1,0,1,,0,,,
199874,1426312,Chennai,Kolkata Knight Riders,114.0,1426312,2,Kolkata Knight Riders,Sunrisers Hyderabad,9,6,...,AK Markram,SS Iyer,1,0,1,,0,,,
199875,1426312,Chennai,Kolkata Knight Riders,114.0,1426312,2,Kolkata Knight Riders,Sunrisers Hyderabad,10,1,...,Shahbaz Ahmed,SS Iyer,1,0,1,,0,,,
199876,1426312,Chennai,Kolkata Knight Riders,114.0,1426312,2,Kolkata Knight Riders,Sunrisers Hyderabad,10,2,...,Shahbaz Ahmed,VR Iyer,1,0,1,,0,,,


### Data pre-processing after one inning has been played

In [19]:
#we need to analyse for the second inning only.
delivery_df = delivery_df[delivery_df['inning'] == 2]

In [20]:
delivery_df.drop('match_id',axis=1,inplace=True)

In [21]:
delivery_df.rename(columns={'id': 'match_id'}, inplace=True)

In [22]:
delivery_df


Unnamed: 0,match_id,city,winner,target_runs,inning,batting_team,bowling_team,over,ball,batter,bowler,non_striker,batsman_runs,extra_runs,total_runs,extras_type,is_wicket,player_dismissed,dismissal_kind,fielder
124,335982,Bangalore,Kolkata Knight Riders,223.0,2,Royal Challengers Bangalore,Kolkata Knight Riders,0,1,R Dravid,AB Dinda,W Jaffer,1,0,1,,0,,,
125,335982,Bangalore,Kolkata Knight Riders,223.0,2,Royal Challengers Bangalore,Kolkata Knight Riders,0,2,W Jaffer,AB Dinda,R Dravid,0,1,1,wides,0,,,
126,335982,Bangalore,Kolkata Knight Riders,223.0,2,Royal Challengers Bangalore,Kolkata Knight Riders,0,3,W Jaffer,AB Dinda,R Dravid,0,0,0,,0,,,
127,335982,Bangalore,Kolkata Knight Riders,223.0,2,Royal Challengers Bangalore,Kolkata Knight Riders,0,4,W Jaffer,AB Dinda,R Dravid,1,0,1,,0,,,
128,335982,Bangalore,Kolkata Knight Riders,223.0,2,Royal Challengers Bangalore,Kolkata Knight Riders,0,5,R Dravid,AB Dinda,W Jaffer,1,0,1,,0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
199873,1426312,Chennai,Kolkata Knight Riders,114.0,2,Kolkata Knight Riders,Sunrisers Hyderabad,9,5,SS Iyer,AK Markram,VR Iyer,1,0,1,,0,,,
199874,1426312,Chennai,Kolkata Knight Riders,114.0,2,Kolkata Knight Riders,Sunrisers Hyderabad,9,6,VR Iyer,AK Markram,SS Iyer,1,0,1,,0,,,
199875,1426312,Chennai,Kolkata Knight Riders,114.0,2,Kolkata Knight Riders,Sunrisers Hyderabad,10,1,VR Iyer,Shahbaz Ahmed,SS Iyer,1,0,1,,0,,,
199876,1426312,Chennai,Kolkata Knight Riders,114.0,2,Kolkata Knight Riders,Sunrisers Hyderabad,10,2,SS Iyer,Shahbaz Ahmed,VR Iyer,1,0,1,,0,,,


In [23]:
# We will calculate the total runs scored after each ball.
delivery_df['current_score'] = delivery_df.groupby('match_id').cumsum()['total_runs']

In [24]:
# Lets calculate total run left after each ball.
delivery_df['runs_left'] = delivery_df['target_runs'] - delivery_df['current_score']

In [25]:
# Now lets calculate the total balls left
delivery_df['balls_left'] = 120 - (delivery_df['over']*6 + delivery_df['ball'])

In [26]:
delivery_df


Unnamed: 0,match_id,city,winner,target_runs,inning,batting_team,bowling_team,over,ball,batter,...,extra_runs,total_runs,extras_type,is_wicket,player_dismissed,dismissal_kind,fielder,current_score,runs_left,balls_left
124,335982,Bangalore,Kolkata Knight Riders,223.0,2,Royal Challengers Bangalore,Kolkata Knight Riders,0,1,R Dravid,...,0,1,,0,,,,1,222.0,119
125,335982,Bangalore,Kolkata Knight Riders,223.0,2,Royal Challengers Bangalore,Kolkata Knight Riders,0,2,W Jaffer,...,1,1,wides,0,,,,2,221.0,118
126,335982,Bangalore,Kolkata Knight Riders,223.0,2,Royal Challengers Bangalore,Kolkata Knight Riders,0,3,W Jaffer,...,0,0,,0,,,,2,221.0,117
127,335982,Bangalore,Kolkata Knight Riders,223.0,2,Royal Challengers Bangalore,Kolkata Knight Riders,0,4,W Jaffer,...,0,1,,0,,,,3,220.0,116
128,335982,Bangalore,Kolkata Knight Riders,223.0,2,Royal Challengers Bangalore,Kolkata Knight Riders,0,5,R Dravid,...,0,1,,0,,,,4,219.0,115
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
199873,1426312,Chennai,Kolkata Knight Riders,114.0,2,Kolkata Knight Riders,Sunrisers Hyderabad,9,5,SS Iyer,...,0,1,,0,,,,110,4.0,61
199874,1426312,Chennai,Kolkata Knight Riders,114.0,2,Kolkata Knight Riders,Sunrisers Hyderabad,9,6,VR Iyer,...,0,1,,0,,,,111,3.0,60
199875,1426312,Chennai,Kolkata Knight Riders,114.0,2,Kolkata Knight Riders,Sunrisers Hyderabad,10,1,VR Iyer,...,0,1,,0,,,,112,2.0,59
199876,1426312,Chennai,Kolkata Knight Riders,114.0,2,Kolkata Knight Riders,Sunrisers Hyderabad,10,2,SS Iyer,...,0,1,,0,,,,113,1.0,58


In [27]:
# We will calculate the wickets left after every wicket.
wickets = delivery_df.groupby('match_id').cumsum()['is_wicket'].values
delivery_df['wickets_left'] = 10 - wickets


In [28]:
delivery_df

Unnamed: 0,match_id,city,winner,target_runs,inning,batting_team,bowling_team,over,ball,batter,...,total_runs,extras_type,is_wicket,player_dismissed,dismissal_kind,fielder,current_score,runs_left,balls_left,wickets_left
124,335982,Bangalore,Kolkata Knight Riders,223.0,2,Royal Challengers Bangalore,Kolkata Knight Riders,0,1,R Dravid,...,1,,0,,,,1,222.0,119,10
125,335982,Bangalore,Kolkata Knight Riders,223.0,2,Royal Challengers Bangalore,Kolkata Knight Riders,0,2,W Jaffer,...,1,wides,0,,,,2,221.0,118,10
126,335982,Bangalore,Kolkata Knight Riders,223.0,2,Royal Challengers Bangalore,Kolkata Knight Riders,0,3,W Jaffer,...,0,,0,,,,2,221.0,117,10
127,335982,Bangalore,Kolkata Knight Riders,223.0,2,Royal Challengers Bangalore,Kolkata Knight Riders,0,4,W Jaffer,...,1,,0,,,,3,220.0,116,10
128,335982,Bangalore,Kolkata Knight Riders,223.0,2,Royal Challengers Bangalore,Kolkata Knight Riders,0,5,R Dravid,...,1,,0,,,,4,219.0,115,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
199873,1426312,Chennai,Kolkata Knight Riders,114.0,2,Kolkata Knight Riders,Sunrisers Hyderabad,9,5,SS Iyer,...,1,,0,,,,110,4.0,61,8
199874,1426312,Chennai,Kolkata Knight Riders,114.0,2,Kolkata Knight Riders,Sunrisers Hyderabad,9,6,VR Iyer,...,1,,0,,,,111,3.0,60,8
199875,1426312,Chennai,Kolkata Knight Riders,114.0,2,Kolkata Knight Riders,Sunrisers Hyderabad,10,1,VR Iyer,...,1,,0,,,,112,2.0,59,8
199876,1426312,Chennai,Kolkata Knight Riders,114.0,2,Kolkata Knight Riders,Sunrisers Hyderabad,10,2,SS Iyer,...,1,,0,,,,113,1.0,58,8


In [29]:
# current run rate(crr) = runs/overs
delivery_df['crr'] = (delivery_df['current_score']*6)/(120 - delivery_df['balls_left'])

In [30]:
#required run rate(rrr)=runs left/over
delivery_df['rrr'] = (delivery_df['runs_left']*6)/delivery_df['balls_left']

In [31]:
# we will create a function result to store the value after checking if the batting team wins the match
def result(row):
    return 1 if row['batting_team'] == row['winner'] else 0

In [32]:
delivery_df['result'] = delivery_df.apply(result,axis=1)

In [33]:
delivery_df


Unnamed: 0,match_id,city,winner,target_runs,inning,batting_team,bowling_team,over,ball,batter,...,player_dismissed,dismissal_kind,fielder,current_score,runs_left,balls_left,wickets_left,crr,rrr,result
124,335982,Bangalore,Kolkata Knight Riders,223.0,2,Royal Challengers Bangalore,Kolkata Knight Riders,0,1,R Dravid,...,,,,1,222.0,119,10,6.000000,11.193277,0
125,335982,Bangalore,Kolkata Knight Riders,223.0,2,Royal Challengers Bangalore,Kolkata Knight Riders,0,2,W Jaffer,...,,,,2,221.0,118,10,6.000000,11.237288,0
126,335982,Bangalore,Kolkata Knight Riders,223.0,2,Royal Challengers Bangalore,Kolkata Knight Riders,0,3,W Jaffer,...,,,,2,221.0,117,10,4.000000,11.333333,0
127,335982,Bangalore,Kolkata Knight Riders,223.0,2,Royal Challengers Bangalore,Kolkata Knight Riders,0,4,W Jaffer,...,,,,3,220.0,116,10,4.500000,11.379310,0
128,335982,Bangalore,Kolkata Knight Riders,223.0,2,Royal Challengers Bangalore,Kolkata Knight Riders,0,5,R Dravid,...,,,,4,219.0,115,10,4.800000,11.426087,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
199873,1426312,Chennai,Kolkata Knight Riders,114.0,2,Kolkata Knight Riders,Sunrisers Hyderabad,9,5,SS Iyer,...,,,,110,4.0,61,8,11.186441,0.393443,1
199874,1426312,Chennai,Kolkata Knight Riders,114.0,2,Kolkata Knight Riders,Sunrisers Hyderabad,9,6,VR Iyer,...,,,,111,3.0,60,8,11.100000,0.300000,1
199875,1426312,Chennai,Kolkata Knight Riders,114.0,2,Kolkata Knight Riders,Sunrisers Hyderabad,10,1,VR Iyer,...,,,,112,2.0,59,8,11.016393,0.203390,1
199876,1426312,Chennai,Kolkata Knight Riders,114.0,2,Kolkata Knight Riders,Sunrisers Hyderabad,10,2,SS Iyer,...,,,,113,1.0,58,8,10.935484,0.103448,1


## creating Final dataframe

In [34]:
final_df = delivery_df[['batting_team','bowling_team','city','runs_left','balls_left','wickets_left','target_runs','crr','rrr','result']]


In [35]:
# lets shuffle the rows to build the model accurately

final_df = final_df.sample(final_df.shape[0])

In [36]:
final_df

Unnamed: 0,batting_team,bowling_team,city,runs_left,balls_left,wickets_left,target_runs,crr,rrr,result
62659,Chennai Super Kings,Delhi Daredevils,Chennai,113.0,114,10,115.0,2.000000,5.947368,1
27841,Delhi Daredevils,Kings XI Punjab,Chandigarh,10.0,8,6,143.0,7.125000,7.500000,1
185334,Rajasthan Royals,Chennai Super Kings,Mumbai,79.0,62,8,151.0,7.448276,7.645161,1
50304,Delhi Daredevils,Kings XI Punjab,Dharamsala,96.0,47,7,171.0,6.164384,12.255319,0
122561,Mumbai Indians,Kings XI Punjab,Mumbai,28.0,15,5,231.0,11.600000,11.200000,0
...,...,...,...,...,...,...,...,...,...,...
136692,Chennai Super Kings,Kings XI Punjab,Pune,93.0,56,6,154.0,5.718750,9.964286,1
103100,Delhi Daredevils,Rajasthan Royals,Mumbai,170.0,105,9,190.0,8.000000,9.714286,0
50248,Delhi Daredevils,Kings XI Punjab,Dharamsala,156.0,102,10,171.0,5.000000,9.176471,0
69567,Sunrisers Hyderabad,Kolkata Knight Riders,Kolkata,49.0,-1,3,181.0,6.545455,-294.000000,0


In [37]:
final_df.sample()

Unnamed: 0,batting_team,bowling_team,city,runs_left,balls_left,wickets_left,target_runs,crr,rrr,result
123374,Mumbai Indians,Kolkata Knight Riders,Bangalore,78.0,96,8,108.0,7.5,4.875,1


In [38]:
#lets check the null values in the final data frame
final_df.isnull().sum()

batting_team       0
bowling_team       0
city            6012
runs_left          0
balls_left         0
wickets_left       0
target_runs        0
crr                0
rrr               12
result             0
dtype: int64

In [39]:
# we will drop the rows where the value is null.
final_df.dropna(inplace=True)

In [40]:
final_df.describe()

Unnamed: 0,runs_left,balls_left,wickets_left,target_runs,crr,rrr,result
count,90361.0,90361.0,90361.0,90361.0,90361.0,90361.0,90361.0
mean,93.308352,62.863492,7.531258,167.051958,7.52925,,0.519749
std,50.599661,33.408582,2.151726,30.991279,2.338339,,0.499613
min,-10.0,-2.0,0.0,43.0,0.0,-inf,0.0
25%,53.0,35.0,6.0,148.0,6.333333,7.180328,0.0
50%,92.0,64.0,8.0,166.0,7.546392,8.94,1.0
75%,131.0,92.0,9.0,187.0,8.756757,11.14286,1.0
max,278.0,119.0,10.0,278.0,36.0,inf,1.0


In [41]:
# From the above descrption we can see that there are values as infinite and -infinite in the rrr column. So we will be looking fot the data where the balls_left is not 0.

final_df = final_df[final_df['balls_left'] != 0]


### Splitting the data into train and test set

In [42]:
# X will have all rows and all columns except the result and Y will have all rows and only result column

X = final_df.iloc[:,:-1]
y = final_df.iloc[:,-1]


from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=1)

In [43]:
X_train

Unnamed: 0,batting_team,bowling_team,city,runs_left,balls_left,wickets_left,target_runs,crr,rrr
128264,Rajasthan Royals,Mumbai Indians,Jaipur,160.0,114,10,168.0,8.000000,8.421053
189548,Rajasthan Royals,Royal Challengers Bangalore,Bengaluru,35.0,13,6,190.0,8.691589,16.153846
100819,Kings XI Punjab,Chennai Super Kings,Chennai,100.0,7,2,193.0,4.938053,85.714286
108772,Kolkata Knight Riders,Delhi Daredevils,Kolkata,84.0,109,10,99.0,8.181818,4.623853
97148,Rajasthan Royals,Mumbai Indians,Ahmedabad,94.0,58,9,165.0,6.870968,9.724138
...,...,...,...,...,...,...,...,...,...
118803,Kings XI Punjab,Sunrisers Hyderabad,Hyderabad,144.0,104,9,160.0,6.000000,8.307692
54280,Royal Challengers Bangalore,Kolkata Knight Riders,Bangalore,108.0,52,5,166.0,5.117647,12.461538
115909,Kolkata Knight Riders,Sunrisers Hyderabad,Delhi,115.0,82,9,163.0,7.578947,8.414634
107035,Chennai Super Kings,Kings XI Punjab,Chandigarh,44.0,50,8,131.0,7.457143,5.280000


In [45]:
# Apply the column transformer to the categorical variable


from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

trf = ColumnTransformer([
    ('trf',OneHotEncoder(sparse_output=False,handle_unknown='ignore'),['batting_team','bowling_team','city'])
]
,remainder='passthrough')

In [46]:
# we will use the Logistic Regression model as it gives the result based on probability

from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline

In [47]:
pipe = Pipeline(steps=[
    ('step1',trf),
    ('step2',LogisticRegression(solver='liblinear'))
])

In [48]:
pipe.fit(X_train,y_train)

In [49]:
# Predicting the result for test dataset

y_pred = pipe.predict(X_test)

In [50]:
#Checking the accuracy of the model

from sklearn.metrics import accuracy_score
accuracy_score(y_test,y_pred)

0.8145886532696791

In [51]:
# Lets check for 10th row.
pipe.predict_proba(X_test)[10]

array([0.61995029, 0.38004971])

## exporting the file

In [52]:
import pickle
pickle.dump(pipe,open('pipe.pkl','wb'))

In [53]:
teams

['Sunrisers Hyderabad',
 'Mumbai Indians',
 'Royal Challengers Bangalore',
 'Kolkata Knight Riders',
 'Kings XI Punjab',
 'Chennai Super Kings',
 'Rajasthan Royals',
 'Delhi Capitals']

In [54]:
delivery_df['city'].unique()

array(['Bangalore', 'Chandigarh', 'Delhi', 'Mumbai', 'Kolkata', 'Jaipur',
       'Hyderabad', 'Chennai', 'Cape Town', 'Port Elizabeth', 'Durban',
       'Centurion', 'East London', 'Johannesburg', 'Kimberley',
       'Bloemfontein', 'Ahmedabad', 'Cuttack', 'Nagpur', 'Dharamsala',
       'Visakhapatnam', 'Pune', 'Raipur', 'Ranchi', 'Abu Dhabi', nan,
       'Bengaluru', 'Indore', 'Dubai', 'Sharjah', 'Navi Mumbai',
       'Guwahati'], dtype=object)

In [55]:
import sklearn
print(sklearn.__version__)

1.5.1
