# IPL Data Analysis from visualisation 

My target for this notebook is to learn interactive data visualisation with plotly,matplotlib,cufflinks. I will try out various plots using plotly,matplotlib,cufflinks and finally try to put altogether in a dashboard. 

## Importing the libraries

In [None]:
import numpy as np 
import pandas as pd 
from matplotlib import pyplot as plt
import seaborn as sns
import plotly.offline as pyo
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go
import random
import cufflinks as cf
cf.go_offline()
cf.set_config_file(offline=False, world_readable=True)
def random_colors(number_of_colors):
    color = ["#"+''.join([random.choice('0123456789ABCDEF') for j in range(6)])
                 for i in range(number_of_colors)]
    return color


## Loading the datasets

In [None]:
matches = pd.read_csv('IPL Matches 2008-2020.csv')
balls = pd.read_csv('IPL Ball-by-Ball 2008-2020.csv')

### First look at the ball by ball data

In [None]:
balls.head()

### First look at the matches data

In [None]:
matches.head()

In [None]:
matches.columns

In [None]:
balls.columns

In [None]:
print(matches['winner'].unique())
print(matches['city'].unique())

In [None]:
matches.team1.replace({'Rising Pune Supergiants' : 'Rising Pune Supergiant'},regex=True,inplace=True)
matches.team2.replace({'Rising Pune Supergiants' : 'Rising Pune Supergiant'},regex=True,inplace=True)
matches.winner.replace({'Rising Pune Supergiants' : 'Rising Pune Supergiant'},regex=True,inplace=True)
matches.venue.replace({'Feroz Shah Kotla Ground':'Feroz Shah Kotla',
                    'M Chinnaswamy Stadium':'M. Chinnaswamy Stadium',
                    'MA Chidambaram Stadium, Chepauk':'M.A. Chidambaram Stadium',
                     'M. A. Chidambaram Stadium':'M.A. Chidambaram Stadium',
                     'Punjab Cricket Association IS Bindra Stadium, Mohali':'Punjab Cricket Association Stadium',
                     'Punjab Cricket Association Stadium, Mohali':'Punjab Cricket Association Stadium',
                     'IS Bindra Stadium':'Punjab Cricket Association Stadium',
                    'Rajiv Gandhi International Stadium, Uppal':'Rajiv Gandhi International Stadium',
                    'Rajiv Gandhi Intl. Cricket Stadium':'Rajiv Gandhi International Stadium'},regex=True,inplace=True)

## Total number of matches each season 

In [None]:
matches["season"] = matches["date"].apply(lambda x:x.split("-")[0])

In [None]:
seasons_data = matches["season"].value_counts()
total_matches = matches.groupby('season')['id'].count()
data = [go.Bar(
    x = seasons_data.index,
    y = seasons_data.values,
    marker = dict(color = random_colors(len(seasons_data.index)),line=dict(color='#000000', width=1.5))
)]

layout = go.Layout(
   {
      "title":"Total number of matches till 2020 (2008-2020)",
   }
)

fig = go.Figure(data=data,layout = layout)
iplot(fig)

## Number of Player of the match

In [None]:
matches['player_of_match'].value_counts()[0:10]

In [None]:
list(matches['player_of_match'].value_counts()[0:10].keys())

In [None]:
plt.figure(figsize=(15,10))
plt.bar(list(matches['player_of_match'].value_counts()[0:10].keys()),list(matches['player_of_match'].value_counts()[0:10]))
plt.show()

In [None]:
data = [go.Bar(
    x = matches['player_of_match'].value_counts()[:20].index,
    y = matches['player_of_match'].value_counts()[:20].values,
    marker = dict(color = random_colors(20),line=dict(color='#000000', width=1.5))
)]


layout = go.Layout(title="Total number of Player of the match. ",
                   xaxis=dict(title="Name of the top 20 Player of the match."),
                   yaxis=dict(title="Number of Player of the match"))

# Create figure with all prepared data for plot
fig = go.Figure(data=data, layout=layout)
# Create a plot in your Python script directory with name "bar-chart.html"
iplot(fig)

## Total number of toss Win by Each Teams

In [None]:
matches['toss_winner'].value_counts().keys()

In [None]:
plt.figure(figsize=(22,12))
plt.bar(list(matches['toss_winner'].value_counts()[0:10].keys()),list(matches['toss_winner'].value_counts()[0:10]))
plt.show()

## Total number of matches Win by Each Teams

In [None]:
matches['winner'].value_counts()

In [None]:
plt.figure(figsize=(32,18))
plt.bar(list(matches['winner'].value_counts()[0:10].keys()),list(matches['winner'].value_counts()[0:10]))
plt.show()

In [None]:
data = [go.Bar(
    x = matches['winner'].value_counts().index,
    y = matches['winner'].value_counts().values,
    marker = dict(color = random_colors(len(matches['winner'].value_counts().index)),line=dict(color='#000000', width=1.5))
)]

layout = go.Layout(
   {
      "title":"Total number of wins by each team till 2020",
   }
)

fig = go.Figure(data=data,layout = layout)
iplot(fig)

### pie plot Total number of matches Win by Each Teams

In [None]:
plt.figure(figsize=(13,13))
plt.pie(list(matches['winner'].value_counts()),labels=list(matches['winner'].value_counts().keys()),autopct='%0.1f%%')
plt.show()

### Top Cities that have hosted IPL Matches

In [None]:
city_counts= matches.groupby('city').apply(lambda x:x['city'].count()).reset_index(name='Match Counts')
top_cities_order=city_counts.sort_values(by='Match Counts',ascending=False)
top_cities=top_cities_order[:10]

trace = go.Pie(labels = top_cities.city.values, values =top_cities["Match Counts"].values,
               marker=dict(colors = random_colors(10), 
                           line=dict(color='#000000', width=5)
                           ))
data = [trace]
layout = go.Layout(
   {
      "title":"Top Cities that have hosted IPL Matches",
   }
)

fig = go.Figure(data=data,layout = layout)
iplot(fig)

## Total number of toss and match wins for every team till 2020

In [None]:
trace1 = go.Bar(x=matches["winner"].value_counts().index, y=matches["winner"].value_counts().values,name="match win")
trace2 = go.Bar(x=matches["toss_winner"].value_counts().index, y=matches["toss_winner"].value_counts().values,name="toss win")

# Fill out  data with our traces
data = [trace1, trace2]
# Create layout and specify title, legend and so on

layout = go.Layout(title="Total number of wins for every team till 2020",
                   xaxis=dict(title="Teams"),
                   yaxis=dict(title="Number of Matches"),
                   legend=dict(x=1.0, y=0.5)
                   ,barmode="group")


# Create figure with all prepared data for plot
fig = go.Figure(data=data, layout=layout)
# Create a plot in your Python script directory with name "bar-chart.html"
iplot(fig)

### Toss Win success ratio for every team

In [None]:
Total_matches_played = matches['team1'].value_counts() + matches['team2'].value_counts()

toss_won = matches['toss_winner'].value_counts()
toss_win_success_rate = (toss_won/Total_matches_played)*100
toss_win_success_rate_sort = toss_win_success_rate.sort_values(ascending = False)
toss_win_success_rate_sort

data = [go.Bar(
    x = toss_win_success_rate.sort_values(ascending=False).index,
    y = toss_win_success_rate.sort_values(ascending=False).values,
    marker = dict(color = random_colors(len(toss_win_success_rate.sort_values(ascending=False).index)),line=dict(color='#000000', width=1.5))
)]

layout = go.Layout(title="Toss Win success ratio.",
                   xaxis=dict(title="Teams"),
                   yaxis=dict(title="Number of Matches"))

# Create figure with all prepared data for plot
fig = go.Figure(data=data, layout=layout)
# Create a plot in your Python script directory with name "bar-chart.html"
iplot(fig)

In [None]:
matches_won = matches.groupby('winner').count()
total_matches = matches['team1'].value_counts() + matches['team2'].value_counts()

matches_won['Total matches'] = total_matches
win_df = matches_won[["Total matches","result"]]
success_ratio = round((matches_won['id']/total_matches),4)*100
success_ratio_sort = success_ratio.sort_values(ascending = False)

data = [go.Bar(
    x = success_ratio_sort.index,
    y = success_ratio_sort.values,
    marker = dict(color = random_colors(len(success_ratio_sort.index)),line=dict(color='#000000', width=1.5))
)]

layout = go.Layout(title="Success rate of Teams",
                   xaxis=dict(title="Teams"),
                   yaxis=dict(title="Success rate of wining"))

# Create figure with all prepared data for plot
fig = go.Figure(data=data, layout=layout)
# Create a plot in your Python script directory with name "bar-chart.html"
iplot(fig)

### Number of seasons won by any team

In [None]:
each_season_winner = matches.groupby('season')['season','winner'].tail(1)
each_season_winner_sort = each_season_winner.sort_values('season',ascending = True)

data = [go.Bar(
    x = each_season_winner_sort["winner"].value_counts().index,
    y = each_season_winner_sort["winner"].value_counts().values,
    marker = dict(color = random_colors(len(each_season_winner_sort["winner"].value_counts().index)),line=dict(color='#000000', width=1.5))
)]

layout = go.Layout(title="Most Titles Wins",
                   xaxis=dict(title="Teams"),
                   yaxis=dict(title="Number of seasons won by any team."))

# Create figure with all prepared data for plot
fig = go.Figure(data=data, layout=layout)
# Create a plot in your Python script directory with name "bar-chart.html"
iplot(fig)

## Top 10 Batsman in IPL- Seasons till 2020

In [None]:
batting_tot=balls.groupby('batsman').apply(lambda x:np.sum(x['batsman_runs'])).reset_index(name='Runs')
batting_sorted=batting_tot.sort_values(by='Runs',ascending=False)
top_batsmen=batting_sorted[:10] 

data = [go.Bar(
    x = top_batsmen.batsman,
    y = top_batsmen.Runs,
    marker = dict(color = random_colors(10),line=dict(color='#000000', width=1.5))
)]

layout = go.Layout(title="Top 10 Batsmen in IPL- Seasons till 2020",
                   xaxis=dict(title="Top 10 Batsmen"),
                   yaxis=dict(title="Runs Scored"))

# Create figure with all prepared data for plot
fig = go.Figure(data=data, layout=layout)
# Create a plot in your Python script directory with name "bar-chart.html"
iplot(fig)

## Top 10 Bowler in IPL- Seasons till 2020

In [None]:
bowling_tot=balls.groupby('bowler').apply(lambda x:np.sum(x['is_wicket'])).reset_index(name='wicket')
bowling_sorted=bowling_tot.sort_values(by='wicket',ascending=False)
top_bowler=bowling_sorted[:10] 

data = [go.Bar(
    x = top_bowler.bowler,
    y = top_bowler.wicket,
    marker = dict(color = random_colors(10),line=dict(color='#000000', width=1.5))
)]

layout = go.Layout(title="Top 10 Bowler in IPL- Seasons till 2020",
                   xaxis=dict(title="Top 10 Bowler"),
                   yaxis=dict(title="Wicket"))

# Create figure with all prepared data for plot
fig = go.Figure(data=data, layout=layout)
# Create a plot in your Python script directory with name "bar-chart.html"
iplot(fig)

## Merging the two datasets

In [None]:
data = pd.merge(left=matches, right=balls, on='id', how='right')
data.head()

In [None]:
print(matches.shape)
print(balls.shape)
print(data.shape)

In [None]:
data.info()

In [None]:
data.describe()

In [None]:
data['season'].unique()

# Extracting year from the date

In [None]:
data['date'] = pd.to_datetime(data['date'])
data['year'] = pd.DatetimeIndex(data['date']).year

In [None]:
data.head()

In [None]:
runs_by_years = data.groupby(by='year').sum()['total_runs']
runs_by_years = pd.DataFrame(runs_by_years)
runs_by_years.reset_index(inplace=True)
runs_by_years

## wicket taken over the year

In [None]:
wicket_by_years = data.groupby(by='year').sum()['is_wicket']
wicket_by_years = pd.DataFrame(wicket_by_years)
wicket_by_years.reset_index(inplace=True)

In [None]:
wicket_by_years

In [None]:
total_wicket = go.Scatter(
                    x=wicket_by_years['year'],
                    y=wicket_by_years['is_wicket'],
                    mode='lines',
                    name='wickets')

data = [total_wicket]

layout = go.Layout(title='wicket taken by year',
                  xaxis = dict(title='Year'),
                  yaxis = dict(title='Wicket'))

fig = go.Figure(data=data, layout=layout)

pyo.iplot(fig)

## Runs scored over the years

In [None]:
runs_by_years

### Using plotly

In [None]:
total_runs = go.Scatter(
                    x=runs_by_years['year'],
                    y=runs_by_years['total_runs'],
                    mode='lines',
                    name='runs')

data = [total_runs]

layout = go.Layout(title='Runs scored by year',
                  xaxis = dict(title='Year'),
                  yaxis = dict(title='Runs'))

fig = go.Figure(data=data, layout=layout)

pyo.iplot(fig)

### Using cufflinks

In [None]:
runs_by_years.iplot(kind='scatter', x='year', y='total_runs', title='Runs scored by year', xTitle='Year', yTitle='Runs')

# Preferred toss decision

In [None]:
toss_decisions = matches.groupby(by='toss_decision').count()
toss_decisions = pd.DataFrame(toss_decisions['id'])
toss_decisions.reset_index(inplace=True)
toss_decisions

### Using plotly

In [None]:
toss_decision = go.Bar(
                    x=toss_decisions['toss_decision'],
                    y=toss_decisions['id']
                )

data = [toss_decision]

layout = go.Layout(title='Toss decision',
                  xaxis = dict(title='Decision'),
                  yaxis = dict(title='count'))

fig = go.Figure(data=data, layout=layout)

pyo.iplot(fig)

### Using cufflinks

In [None]:
toss_decisions.iplot(kind='bar', x='toss_decision', y='id', title='Toss Decision', xTitle='Decision', yTitle='count')

# Totals runs and wickets by over

In [None]:
runs_and_wickets_by_over = balls.groupby(by='over').sum()
runs_and_wickets_by_over = pd.DataFrame(runs_and_wickets_by_over[['total_runs', 'is_wicket']])
runs_and_wickets_by_over.reset_index(inplace=True)
runs_and_wickets_by_over

### Using plotly

In [None]:
runs_and_wickets_by_overs = go.Scatter(
                    x=runs_and_wickets_by_over['over'],
                    y=runs_and_wickets_by_over['total_runs'],
                    text=runs_and_wickets_by_over['is_wicket'],
                    mode='markers',
                    marker=dict(size=runs_and_wickets_by_over['is_wicket']/10,
                               color=runs_and_wickets_by_over['total_runs']/10,
                               showscale=True)
                )

data = [runs_and_wickets_by_overs]

layout = go.Layout(title='Runs and wicket by over',
                  xaxis = dict(title='Over'),
                  yaxis = dict(title='Runs'))

fig = go.Figure(data=data, layout=layout)

pyo.iplot(fig)

### Using cufflinks

Failed to get colorscale in cufflinks

In [None]:
runs_and_wickets_by_over.iplot(kind='scatter', x='over', y='total_runs', mode='markers',
                               title='Runs and wickets by over',
                               xTitle='Over', yTitle='Runs', size=runs_and_wickets_by_over['is_wicket']/10)

# Runs distribution over wise

In [None]:
balls = pd.read_csv('IPL Ball-by-Ball 2008-2020.csv')
runs_overs = balls[['total_runs', 'over']]
runs_overs

### Using plotly

In [None]:
runs_over = go.Box(
                    x=runs_overs['over'],
                    y=runs_overs['total_runs']
                )

data = [runs_over]

layout = go.Layout(title='Runs distribution over wise',
                  xaxis = dict(title='Over'),
                  yaxis = dict(title='Runs'))

fig = go.Figure(data=data, layout=layout)

pyo.iplot(fig)

### Using cufflinks

A simple one-liner doesn't seem to work here. See the last plot which is correct.

In [None]:
runs_overs.iplot(kind='box', y='over',title='Runs distribution over wise', xTitle='Over', yTitle='Runs')

# Runs distribution match wise

In [None]:
runs_by_match = balls.groupby(by='id').sum()
runs_by_match = pd.DataFrame(runs_by_match[['total_runs']])
runs_by_match.reset_index(inplace=True)
runs_by_match

In [None]:
runs_by_match = go.Histogram(
                    x=runs_by_match['total_runs']
                )

data = [runs_by_match]

layout = go.Layout(title='Runs distribution match wise',
                  xaxis = dict(title='Runs'))

fig = go.Figure(data=data, layout=layout)

pyo.iplot(fig)

# Runs balls wise distributions

In [None]:
balls['total_runs'].iplot(kind='hist', title='Runs balls wise distributions', xTitle='Runs', yTitle='Count')

### Testing the different themes

In [None]:
themes = cf.getThemes()
themes

In [None]:
for theme in themes:
    balls['total_runs'].iplot(kind='hist', theme=theme, title=theme+' :Runs balls wise distributions ', xTitle='Runs', yTitle='Count')

# Runs distribution over wise (using cufflinks)

I had to put this here as it was causing a time-out and preventing other plots from loading.

In [None]:
runs_overs = balls[['total_runs', 'over']]
runs_overs.pivot(columns='over', values='total_runs').iplot(kind='box')

### Preprocessing and normalization of the data

In [None]:
data = pd.merge(left=matches, right=balls, on='id', how='right')
data.head()

In [None]:
data.info()

In [None]:
matches.head()

In [None]:
matches[pd.isnull(matches['winner'])]

In [None]:
matches['winner'].fillna('Draw', inplace=True)

In [None]:
matches.loc[241,'winner']

In [None]:
matches['winner'].value_counts()

In [None]:
matches[pd.isnull(matches['city'])]

In [None]:
matches['city'].fillna('UAE', inplace=True)

In [None]:
matches.loc[414,'city']

In [None]:
matches['toss_decision'].value_counts()

In [None]:
encode = {'city':{'Mumbai':1 ,'Kolkata':2,'Delhi':3,'Bangalore':4,'Hyderabad':5,'Chennai':6,'Chandigarh':7,'Jaipur':8,'Pune':9,'Abu Dhabi':10,'Dubai':11,'Bengaluru':12,'Durban':13,'Visakhapatnam':14,'Ahmedabad':15,'Centurion':16,'Sharjah':17,'Rajkot':18,'Dharamsala':19,'Indore':20,'Johannesburg':21,'Port Elizabeth':22,'Ranchi':23,'Cape Town':24,'Cuttack':25,'Raipur':26,'Kochi':27,'Kanpur':28,'Nagpur':29,'Kimberley':30,'East London':31,'Bloemfontein':32,'UAE':33},
          'team1': {'Mumbai Indians':1,'Kolkata Knight Riders':2,'Royal Challengers Bangalore':3,'Delhi Capitals':4,'Chennai Super Kings':5,'Rajasthan Royals':6,'Delhi Daredevils':7,'Gujarat Lions':8,'Kings XI Punjab':9,'Sunrisers Hyderabad':10,'Rising Pune Supergiant':11,'Kochi Tuskers Kerala':12,'Pune Warriors':13,'Deccan Chargers':14},
          'team2': {'Mumbai Indians':1,'Kolkata Knight Riders':2,'Royal Challengers Bangalore':3,'Delhi Capitals':4,'Chennai Super Kings':5,'Rajasthan Royals':6,'Delhi Daredevils':7,'Gujarat Lions':8,'Kings XI Punjab':9,'Sunrisers Hyderabad':10,'Rising Pune Supergiant':11,'Kochi Tuskers Kerala':12,'Pune Warriors':13,'Deccan Chargers':14},
          'toss_winner': {'Mumbai Indians':1,'Kolkata Knight Riders':2,'Royal Challengers Bangalore':3,'Delhi Capitals':4,'Chennai Super Kings':5,'Rajasthan Royals':6,'Delhi Daredevils':7,'Gujarat Lions':8,'Kings XI Punjab':9,'Sunrisers Hyderabad':10,'Rising Pune Supergiant':11,'Kochi Tuskers Kerala':12,'Pune Warriors':13,'Deccan Chargers':14,'Rising Pune Supergiants':11},
          'winner': {'Mumbai Indians':1,'Kolkata Knight Riders':2,'Royal Challengers Bangalore':3,'Delhi Capitals':4,'Chennai Super Kings':5,'Rajasthan Royals':6,'Delhi Daredevils':7,'Gujarat Lions':8,'Kings XI Punjab':9,'Sunrisers Hyderabad':10,'Rising Pune Supergiant':11,'Kochi Tuskers Kerala':12,'Pune Warriors':13,'Deccan Chargers':14,'Draw':15}}
matches.replace(encode, inplace=True)
matches.head()

In [None]:
matches['result'].value_counts()

In [None]:
matches['eliminator'].value_counts()

In [None]:
matches['method'].value_counts()

In [None]:
encode = {'toss_decision': {'field':1,'bat':2},
          'result': {'wicket':1,'runs':2,'tie':3},
          'eliminator': {'N':1,'Y':2}}
matches.replace(encode, inplace=True)
matches.head()

In [None]:
encode = {'M. Chinnaswamy Stadium':1, 'Punjab Cricket Association Stadium':2,
       'Feroz Shah Kotla':3, 'Wankhede Stadium':4, 'Eden Gardens':5,
       'Sawai Mansingh Stadium':6, 'Rajiv Gandhi International Stadium':7,
       'M.A. Chidambaram Stadium':8, 'Dr DY Patil Sports Academy':9, 'Newlands':10,
       "St George's Park":11, 'Kingsmead':12, 'SuperSport Park':13, 'Buffalo Park':14,
       'New Wanderers Stadium':15, 'De Beers Diamond Oval':16, 'OUTsurance Oval':17,
       'Brabourne Stadium':18, 'Sardar Patel Stadium, Motera':19, 'Barabati Stadium':20,
       'Vidarbha Cricket Association Stadium, Jamtha':21,
       'Himachal Pradesh Cricket Association Stadium':22, 'Nehru Stadium':23,
       'Holkar Cricket Stadium':24,
       'Dr. Y.S. Rajasekhara Reddy ACA-VDCA Cricket Stadium':25,
       'Subrata Roy Sahara Stadium':26,
       'Shaheed Veer Narayan Singh International Stadium':27,
       'JSCA International Stadium Complex':28, 'Sheikh Zayed Stadium':29,
       'Sharjah Cricket Stadium':30, 'Dubai International Cricket Stadium':31,
       'Maharashtra Cricket Association Stadium':32,
       'Saurashtra Cricket Association Stadium':33, 'Green Park':34,
       'M.Chinnaswamy Stadium':35}
matches.replace(encode, inplace=True)
matches.head()

In [None]:
matches.info()

### Normalization of data

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaling = MinMaxScaler()
X = scaling.fit_transform(data[['id','neutral_venue','inning','over','ball','batsman_runs','extra_runs','total_runs','non_boundary','is_wicket']])

In [None]:
X_meaned = X - np.mean(X , axis = 0)

cov_mat = np.cov(X_meaned , rowvar = False)

In [None]:
mean_vec = np.mean(X, axis=0)
cov_mat = (X - mean_vec).T.dot((X - mean_vec)) / (X.shape[0]-1)
print('Covariance matrix \n%s' %cov_mat)

In [None]:
matches = matches[['team1','team2','city','toss_decision','toss_winner','venue','winner']]
matches.head()

In [None]:
matches.info()

In [None]:
df = pd.DataFrame(matches)
df.describe()

In [None]:
pd.options.display.float_format = '{:,.0f}'.format
df

In [None]:
df['city'].value_counts()

In [None]:
df.apply(lambda x: sum(x.isnull()),axis=0) 
    #find the null values in every column

In [None]:
df.head()

# Implement different algorithm to report the accuracy

In [None]:
#Import models from scikit learn module:
from sklearn.linear_model import LogisticRegression
#from sklearn.cross_validation import KFold   #For K-fold cross validation
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier, export_graphviz
from sklearn import metrics

#Generic function for making a classification model and accessing performance:
def classification_model(model, data, predictors, outcome):
      model.fit(data[predictors],data[outcome])
      predictions = model.predict(data[predictors])
      print(predictions)
      accuracy = metrics.accuracy_score(predictions,data[outcome])
      print('Accuracy : %s' % '{0:.3%}'.format(accuracy))

### logistic Regression

In [None]:
outcome_var=['winner']
predictor_var = ['team1', 'team2', 'venue', 'toss_winner','city','toss_decision']
model =LogisticRegression()
classification_model(model, df,predictor_var,outcome_var)

### NAive bayes algorithm

In [None]:
from sklearn.naive_bayes import GaussianNB
outcome_var=['winner']
predictor_var = ['team1', 'team2', 'venue', 'toss_winner','city','toss_decision']
model = GaussianNB() 
classification_model(model, df,predictor_var,outcome_var)

## KNN algorithm

In [None]:
#applying knn algorithm
from sklearn.neighbors import KNeighborsClassifier
model = KNeighborsClassifier(n_neighbors=3)
classification_model(model, df,predictor_var,outcome_var)

## Linear regression

In [None]:
X = df.iloc[:, :-1].values
y = df.iloc[:, 1].values

In [None]:
X.shape

In [None]:
y.shape

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 45)
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, y_train)
# Predicting the Test set results
y_pred = regressor.predict(X_test)
print('Coefficients: \n', regressor.coef_)
# The mean squared error
print("Mean squared error: %.2f" % np.mean((regressor.predict(X_test) -y_test)**2))
# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % regressor.score(X_test, y_test))