In [None]:


import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier,RandomForestRegressor


pd.set_option("display.max_columns",None)
pd.set_option("display.max_rows",None)
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))



In [None]:
match_data = pd.read_csv("/kaggle/input/odi-cricket-matches-19712017/ContinousDataset.csv")
x, y = match_data.shape
print("Rows = ",x)
print("Columns = ",y)
match_data.head(5)

In [None]:
unique_teams = match_data["Winner"].unique()
print(unique_teams)

In [None]:
len(unique_teams)

In [None]:
winner_count = match_data.loc[:,["Winner","Scorecard"]].groupby("Winner").count()
winner_count["Percentage"] = round(winner_count["Scorecard"]*100/x,0);
winner_count = winner_count.sort_values("Scorecard",ascending=False)
winner_count

In [None]:
winner_count.iloc[:,0:1].plot(kind="bar",figsize=(20,3))
plt.xticks(rotation=45)
plt.xlabel("Teams")
plt.ylabel("Wins")
plt.title("Team wise Matches wins")
plt.grid()
plt.show()

In [None]:
winner_count.iloc[:,1:2].plot(kind="bar",figsize=(20,3))
plt.xticks(rotation=45)
plt.xlabel("Teams")
plt.ylabel("Wins %")
plt.title("Team wise wins%")
plt.grid()
plt.show()

 # **Defined the Data Frame**

In [None]:
team_wise_matches_info = pd.DataFrame(columns=["TeamName","MatchesPlayed","MatchesWon","WonPercentage","LossPercentage","Total_Home_Played","Total_Home_Wins","Home_Wins%","Home_Innings_1st_1st","Home_Innings_1st_2nd","Home_Innings_2nd_1st","Home_Innings_2nd_2nd","Total_Away_Played","Total_Away_Wins","Away_Wins%","Away_Innings_1st_1st","Away_Innings_1st_2nd","Away_Innings_2nd_1st","Away_Innings_2nd_2nd"])

# **Defined the team_statistics function to calculate the team wise statistics and map those statistics to Data Frame**

In [None]:
def team_statistics(team,i):
    #Calculating the total matches played by a team
    total_matches_played = match_data[(match_data["Team 1"] == team) | (match_data["Team 2"] == team)]
    total_matches_played_rows,total_matches_played_columns = total_matches_played.shape
   
    #Calculating the total matches won by a team
    total_matches_won = match_data[((match_data["Team 1"] == team) | (match_data["Team 2"] == team)) & (match_data["Winner"] == team)]
    total_matches_won_rows, total_matches_won_columns = total_matches_won.shape
    
    #Calculating the total matches played by a team at Home Country
    total_home_played = match_data[((match_data["Team 1"] == team) | (match_data["Team 2"] == team)) & (match_data["Host_Country"] == team)]
    total_home_played_rows, total_home_played_columns = total_home_played.shape
    
    #Calculating the total matches won by a team at Home Country
    team_home_won = match_data[((match_data["Team 1"] == team) | (match_data["Team 2"] == team)) & (match_data["Winner"] == team) & (match_data["Host_Country"] == team)]
    team_home_won_rows,team_home_won_columns = team_home_won.shape
    
    #Calculating the total matches won by a team1 in First Innings at Home Country 
    team_home_innings1_1st_country_won = match_data[(match_data["Team 1"] == team) & (match_data["Winner"] == team) & (match_data["Host_Country"] == team) & (match_data["Innings_Team1"] == "First")]
    team_home_innings1_1st_country_won_rows,team_home_innings1_1st_country_won_columns = team_home_innings1_1st_country_won.shape
    
    #Calculating the total matches won by a team1 in Second Innings at Home Country 
    team_home_innings1_2nd_country_won = match_data[(match_data["Team 1"] == team) & (match_data["Winner"] == team) & (match_data["Host_Country"] == team) & (match_data["Innings_Team1"] == "Second")]
    team_home_innings1_2nd_country_won_rows,team_home_innings1_2nd_country_won_columns = team_home_innings1_2nd_country_won.shape
  
    #Calculating the total matches won by a team2 in First Innings at Home Country 
    team_home_innings2_1st_country_won = match_data[(match_data["Team 2"] == team) & (match_data["Winner"] == team) & (match_data["Host_Country"] == team) & (match_data["Innings_Team2"] == "First")]
    team_home_innings2_1st_country_won_rows,team_home_innings2_1st_country_won_columns = team_home_innings2_1st_country_won.shape
    
    #Calculating the total matches won by a team2 in Second Innings at Home Country 
    team_home_innings2_2nd_country_won = match_data[(match_data["Team 2"] == team) & (match_data["Winner"] == team) & (match_data["Host_Country"] == team) & (match_data["Innings_Team2"] == "Second")]
    team_home_innings2_2nd_country_won_rows,team_home_innings2_2nd_country_won_columns = team_home_innings2_2nd_country_won.shape
   
    #Calculating the total matches played by a team at Away Country
    total_away_played = match_data[((match_data["Team 1"] == team) | (match_data["Team 2"] == team)) & (match_data["Host_Country"] != team)]
    total_away_played_rows, total_away_played_columns = total_away_played.shape
    
    #Calculating the total matches won by a team at Away Country
    team_away_won = match_data[((match_data["Team 1"] == team) | (match_data["Team 2"] == team)) & (match_data["Winner"] == team) & (match_data["Host_Country"] != team)]
    team_away_won_rows,team_away_won_columns = team_away_won.shape
    
    #Calculating the total matches won by a team1 in First Innings at Away Country
    team_away_innings1_1st_country_won = match_data[(match_data["Team 1"] == team) & (match_data["Winner"] == team) & (match_data["Host_Country"] != team) & (match_data["Innings_Team1"] == "First")]
    team_away_innings1_1st_country_won_rows,team_away_innings1_1st_country_won_columns = team_away_innings1_1st_country_won.shape
   
    #Calculating the total matches won by a team1 in Second Innings at Away Country 
    team_away_innings1_2nd_country_won = match_data[(match_data["Team 1"] == team) & (match_data["Winner"] == team) & (match_data["Host_Country"] != team) & (match_data["Innings_Team1"] == "Second")]
    team_away_innings1_2nd_country_won_rows,team_away_innings1_2nd_country_won_columns = team_away_innings1_2nd_country_won.shape
    
    #Calculating the total matches won by a team2 in First Innings at Away Country 
    team_away_innings2_1st_country_won = match_data[(match_data["Team 2"] == team) & (match_data["Winner"] == team) & (match_data["Host_Country"] != team) & (match_data["Innings_Team2"] == "First")]
    team_away_innings2_1st_country_won_rows,team_away_innings2_1st_country_won_columns = team_away_innings2_1st_country_won.shape
    
    #Calculating the total matches won by a team2 in Second Innings at Away Country 
    team_away_innings2_2nd_country_won = match_data[(match_data["Team 2"] == team) & (match_data["Winner"] == team) & (match_data["Host_Country"] != team) & (match_data["Innings_Team2"] == "Second")]
    team_away_innings2_2nd_country_won_rows,team_away_innings2_2nd_country_won_columns = team_away_innings2_2nd_country_won.shape
        
    team_wise_matches_info.at[i,"TeamName"] = team
    team_wise_matches_info.at[i,"MatchesPlayed"] = total_matches_played_rows
    team_wise_matches_info.at[i,"MatchesWon"] = total_matches_won_rows
    team_wise_matches_info.at[i,"WonPercentage"] = round(total_matches_won_rows*100/total_matches_played_rows,2)
    team_wise_matches_info.at[i,"LossPercentage"] = 100-round(total_matches_won_rows*100/total_matches_played_rows,2)
    team_wise_matches_info.at[i,"Total_Home_Played"] = total_home_played_rows
    team_wise_matches_info.at[i,"Total_Home_Wins"] = team_home_won_rows
    team_wise_matches_info.at[i,"Home_Innings_1st_1st"] = team_home_innings1_1st_country_won_rows
    team_wise_matches_info.at[i,"Home_Innings_1st_2nd"] = team_home_innings1_2nd_country_won_rows
    team_wise_matches_info.at[i,"Home_Innings_2nd_1st"] = team_home_innings2_1st_country_won_rows
    team_wise_matches_info.at[i,"Home_Innings_2nd_2nd"] = team_home_innings2_2nd_country_won_rows
    team_wise_matches_info.at[i,"Total_Away_Played"] = total_away_played_rows
    team_wise_matches_info.at[i,"Total_Away_Wins"] = team_away_won_rows
    team_wise_matches_info.at[i,"Away_Innings_1st_1st"] = team_away_innings1_1st_country_won_rows
    team_wise_matches_info.at[i,"Away_Innings_1st_2nd"] = team_away_innings1_2nd_country_won_rows
    team_wise_matches_info.at[i,"Away_Innings_2nd_1st"] = team_away_innings2_1st_country_won_rows
    team_wise_matches_info.at[i,"Away_Innings_2nd_2nd"] = team_away_innings2_2nd_country_won_rows
    
    if total_home_played_rows > 0:
         team_wise_matches_info.at[i,"Home_Wins%"] = round(team_home_won_rows*100/total_home_played_rows,2)
    else :
         team_wise_matches_info.at[i,"Home_Wins%"] = 0
            
    if total_away_played_rows > 0:
         team_wise_matches_info.at[i,"Away_Wins%"] = round(team_away_won_rows*100/total_away_played_rows,2)
    else :
         team_wise_matches_info.at[i,"Away_Wins%"] = 0

In [None]:
i=0

for team in unique_teams:
    data = team_statistics(team,i)
    i=i+1   

In [None]:
team_wise_matches_info = team_wise_matches_info.sort_values(["MatchesPlayed","MatchesWon"],ascending=False)
team_wise_matches_info.head()

In [None]:
team_wise_matches_info.iloc[:,0:3].plot(kind="bar",x="TeamName",figsize=(25,3))
plt.xticks(rotation=45)
plt.xlabel("Teams")
plt.ylabel("Wins %")
plt.title("Matches Played Vs Matches Won Percentage")
plt.grid()
plt.show()

# **India has played most matches among all the teams**

In [None]:
def func(pct, allvalues): 
    absolute = int(pct / 100.*np.sum(allvalues)) 
    return "{:.1f}%\n({:d}%)".format(pct, absolute)

explode = (0.1, 0.0, 0.2, 0.3, 0.0, 0.0) 
wp = { 'linewidth' : 1, 'edgecolor' : "green" } 

fig, ax = plt.subplots(figsize =(10, 7)) 
wedges, texts, autotexts = ax.pie(team_wise_matches_info["WonPercentage"],autopct = lambda pct: func(pct, team_wise_matches_info["WonPercentage"]),labels = team_wise_matches_info["TeamName"],shadow = True,startangle = 90,wedgeprops = wp, textprops = dict(color ="black"))

ax.legend(wedges, team_wise_matches_info["TeamName"],title ="Cars",loc ="center left", bbox_to_anchor =(1.5, 0, 0.5, 1)) 

plt.setp(autotexts, size = 7, weight ="bold") 
ax.set_title("Matches Won Percentage") 

# **Team Wise Winning Percentages**

In [None]:
team_wise_matches_info_won_details  = pd.pivot_table(team_wise_matches_info,index=["MatchesPlayed"],columns="TeamName",values="MatchesWon",fill_value=0,aggfunc=sum)
plt.figure(figsize=(25,8))
sns.heatmap(team_wise_matches_info_won_details,annot=True,fmt=".0f")
plt.show()

# **Australia has won the most matches among all the teams**

In [None]:
team_wise_matches_info_won_details  = pd.pivot_table(team_wise_matches_info,index=["MatchesPlayed"],columns="TeamName",values="WonPercentage",fill_value=0,aggfunc=sum)
plt.figure(figsize=(25,8))
sns.heatmap(team_wise_matches_info_won_details,annot=True,fmt="1.02f")
plt.xticks(rotation=45)
plt.show()

# ****South Africa has most matches winning % among all the teams****

In [None]:
ground_wise_team_won_data = match_data[match_data["Host_Country"] == "India"].pivot_table(index="Ground",columns="Winner",values="Scorecard",fill_value=0,aggfunc="count")
ground_wise_team_won_data

In [None]:
plt.figure(figsize=(25,10))
sns.heatmap(ground_wise_team_won_data,linecolor='black',linewidth=1,annot=True, fmt=".0f")
plt.show()

# **India Won most matches in Banglore**

In [None]:
team_wise_matches_info.head()

In [None]:
team_wise_matches_info.sort_values("MatchesPlayed",ascending=False).head(1)

# **India has played most matches among all the teams**

In [None]:
team_wise_matches_info.sort_values("MatchesWon",ascending=False).head(1)

# **Australia has won most matches among all the teams**

In [None]:
team_wise_matches_info.sort_values("WonPercentage",ascending=False).head(1)

# **South Africa has most winning percentage among all the teams**

In [None]:
team_wise_matches_info.sort_values("Total_Home_Played",ascending=False).head(1)

# **Australia has played most matches in home town among all the teams**

In [None]:
team_wise_matches_info.sort_values("Total_Home_Wins",ascending=False).head(1)

# **Australia has won most matches in home town among all the teams**

In [None]:
team_wise_matches_info.sort_values("Home_Wins%",ascending=False).head(1)

# **Hong Kong has won most matches winning percentage in home town among all the teams**

In [None]:
team_wise_matches_info.sort_values("Total_Away_Played",ascending=False).head(1)

# **Pakistan has played most matches outside [](http://)home town among all the teams**

In [None]:
team_wise_matches_info.sort_values("Total_Away_Wins",ascending=False).head(1)

# **Pakistan has won most matches outside home town among all the teams**

In [None]:
team_wise_matches_info.sort_values("Away_Wins%",ascending=False).head(1)

# **Australia has won most matches winning percentage outside home town among all the teams**

In [None]:
match_data_df = team_wise_matches_info

In [None]:
from sklearn.model_selection import train_test_split
from sklearn import metrics, tree

match_data_df.TeamName = pd.Categorical(match_data_df.TeamName)
match_data_df["TeamName"] = match_data_df.TeamName.cat.codes

match_data_df

In [None]:
pred_columns = match_data_df[:]
prediction_var = pred_columns.columns
print(list(prediction_var))

In [None]:
train,test = train_test_split(match_data_df,test_size=0.2)
print(train.shape)
print(test.shape)

In [None]:
train_X = train[prediction_var]
train_Y = train["TeamName"]
print(list(train.columns))

test_X = train[prediction_var]
test_Y = train["TeamName"]

In [None]:
gnb = GaussianNB()
y_pred_gnb = gnb.fit(train_X,train_Y).predict(test_X)

In [None]:
cnf_matrix_gnb = metrics.confusion_matrix(test_Y,y_pred_gnb)
print(cnf_matrix_gnb)

In [None]:
print(metrics.accuracy_score(test_Y,y_pred_gnb))

In [None]:
model = RandomForestClassifier(n_estimators=5)
model.fit(train_X,train_Y)
prediction = model.predict(test_X)
print(round(metrics.accuracy_score(test_Y,prediction),2))

In [None]:
label_df = pd.read_csv("/kaggle/input/odi-cricket-matches-19712017/LabelledDataset.csv")

In [None]:
label_df.head()

In [None]:
label_df.isna().sum()

In [None]:
labels = label_df.pop("Winner")

In [None]:
label_df.head()

In [None]:
x_train, x_test, y_train, y_test = train_test_split(label_df, labels, test_size=0.25,random_state = 25)

In [None]:
rfc = RandomForestClassifier(n_estimators= 10, criterion="entropy")
rfc.fit(x_train, y_train)

In [None]:
y_pred = rfc.predict(x_test)

In [None]:
from sklearn.metrics import confusion_matrix  
cm= confusion_matrix(y_test, y_pred)  
cm

In [None]:
originalDataset_df = pd.read_csv("/kaggle/input/odi-cricket-matches-19712017/originalDataset.csv")
originalDataset_df.head(50)

In [None]:
originalDataset_df.isna().sum()

In [None]:
originalDataset_df = originalDataset_df.dropna(how="any")

In [None]:
originalDataset_df.head(50)

In [None]:
india_df = originalDataset_df[((originalDataset_df["Team 1"] == "India") | (originalDataset_df["Team 2"] == "India"))]
idx = (india_df['Team 2'] == "India")
india_df

In [None]:
india_df.loc[idx,['Team 1','Team 2']] = india_df.loc[idx,['Team 2','Team 1']].values
india_df

In [None]:
india_ground_level_won_df = india_df.pivot_table(index="Team 1",columns="Ground",values="Scorecard",fill_value=0,aggfunc="count")
india_ground_level_won_df.head()
plt.figure(figsize=(25,1))
sns.heatmap(india_ground_level_won_df,linecolor='black',linewidth=1,annot=True, fmt=".0f")
plt.show()

In [None]:
ground_name = input("Enter Ground Name = ")
ground_level_won_df = originalDataset_df[(originalDataset_df["Ground"]==ground_name)].pivot_table(index="Winner",columns="Ground",values="Scorecard",fill_value=0,aggfunc="count")
ground_level_won_df.head()
plt.figure(figsize=(25,5))
sns.heatmap(ground_level_won_df,linecolor='black',linewidth=1,annot=True, fmt=".0f")
plt.show()

In [None]:
india_head_to_head_matches_count = india_df.pivot_table(index="Team 2",columns="Team 1",values="Winner",fill_value=0,aggfunc="count")
india_head_to_head_matches_count.sort_values("India",ascending=False)

In [None]:
head_to_head_matches_count = originalDataset_df.pivot_table(index="Team 1",columns="Team 2",values="Winner",fill_value=0,aggfunc="count")
head_to_head_matches_count

In [None]:
CategoricalDataset = pd.read_csv("/kaggle/input/odi-cricket-matches-19712017/CategoricalDataset.csv")
CategoricalDataset.head()