# IPL CHAT BOX - Batting Team - PRANJAL

### PROBLEM STATEMENT : 

##### To develop a Q&A chat bot which responds to user's queries based on NLP statistics.

#### .



#### .

In [342]:
import numpy as np
import pandas as pd
import random

# --- NLTK PACKAGE ---
import nltk

# Tokenizers
from nltk.tokenize import word_tokenize, sent_tokenize, PunktSentenceTokenizer, RegexpTokenizer

# Stemming and Lemmatizing
from nltk.stem import PorterStemmer, WordNetLemmatizer

# Stopwords
from nltk.corpus import stopwords, state_union

# Lesk Module
from nltk.wsd import lesk

# Tagger
from nltk.tag import UnigramTagger, BigramTagger, BrillTagger

In [2]:
matches = pd.read_csv('matches.csv')
deliveries = pd.read_csv('deliveries.csv')

In [3]:
matches.head()

Unnamed: 0,id,season,city,date,team1,team2,toss_winner,toss_decision,result,dl_applied,winner,win_by_runs,win_by_wickets,player_of_match,venue,umpire1,umpire2
0,1,2008,Bangalore,2008-04-18,Kolkata Knight Riders,Royal Challengers Bangalore,Royal Challengers Bangalore,field,normal,0,Kolkata Knight Riders,140,0,BB McCullum,M Chinnaswamy Stadium,Asad Rauf,RE Koertzen
1,2,2008,Chandigarh,2008-04-19,Chennai Super Kings,Kings XI Punjab,Chennai Super Kings,bat,normal,0,Chennai Super Kings,33,0,MEK Hussey,"Punjab Cricket Association Stadium, Mohali",MR Benson,SL Shastri
2,3,2008,Delhi,2008-04-19,Rajasthan Royals,Delhi Daredevils,Rajasthan Royals,bat,normal,0,Delhi Daredevils,0,9,MF Maharoof,Feroz Shah Kotla,Aleem Dar,GA Pratapkumar
3,4,2008,Mumbai,2008-04-20,Mumbai Indians,Royal Challengers Bangalore,Mumbai Indians,bat,normal,0,Royal Challengers Bangalore,0,5,MV Boucher,Wankhede Stadium,SJ Davis,DJ Harper
4,5,2008,Kolkata,2008-04-20,Deccan Chargers,Kolkata Knight Riders,Deccan Chargers,bat,normal,0,Kolkata Knight Riders,0,5,DJ Hussey,Eden Gardens,BF Bowden,K Hariharan


In [4]:
deliveries.head(20)

Unnamed: 0,match_id,inning,batting_team,bowling_team,over,ball,batsman,non_striker,bowler,is_super_over,...,bye_runs,legbye_runs,noball_runs,penalty_runs,batsman_runs,extra_runs,total_runs,player_dismissed,dismissal_kind,fielder
0,1,1,Kolkata Knight Riders,Royal Challengers Bangalore,1,1,SC Ganguly,BB McCullum,P Kumar,0,...,0,1,0,0,0,1,1,,,
1,1,1,Kolkata Knight Riders,Royal Challengers Bangalore,1,2,BB McCullum,SC Ganguly,P Kumar,0,...,0,0,0,0,0,0,0,,,
2,1,1,Kolkata Knight Riders,Royal Challengers Bangalore,1,3,BB McCullum,SC Ganguly,P Kumar,0,...,0,0,0,0,0,1,1,,,
3,1,1,Kolkata Knight Riders,Royal Challengers Bangalore,1,4,BB McCullum,SC Ganguly,P Kumar,0,...,0,0,0,0,0,0,0,,,
4,1,1,Kolkata Knight Riders,Royal Challengers Bangalore,1,5,BB McCullum,SC Ganguly,P Kumar,0,...,0,0,0,0,0,0,0,,,
5,1,1,Kolkata Knight Riders,Royal Challengers Bangalore,1,6,BB McCullum,SC Ganguly,P Kumar,0,...,0,0,0,0,0,0,0,,,
6,1,1,Kolkata Knight Riders,Royal Challengers Bangalore,1,7,BB McCullum,SC Ganguly,P Kumar,0,...,0,1,0,0,0,1,1,,,
7,1,1,Kolkata Knight Riders,Royal Challengers Bangalore,2,1,BB McCullum,SC Ganguly,Z Khan,0,...,0,0,0,0,0,0,0,,,
8,1,1,Kolkata Knight Riders,Royal Challengers Bangalore,2,2,BB McCullum,SC Ganguly,Z Khan,0,...,0,0,0,0,4,0,4,,,
9,1,1,Kolkata Knight Riders,Royal Challengers Bangalore,2,3,BB McCullum,SC Ganguly,Z Khan,0,...,0,0,0,0,4,0,4,,,


In [416]:
team_list = matches.team1.unique()
batsman_list = list(deliveries.batsman.unique())
bowler_list = list(deliveries.bowler.unique())
fielder_list = list(deliveries.fielder.unique())

# FUNCTIONS

### BATSMAN STATS

#### 1. Total Runs scored in Match X by team Y

In [470]:
def total_runs_team_match(team, match_id):
    x = deliveries.groupby(['match_id','batting_team'])['total_runs'].sum()
    total_runs_match = x[match_id][team]
    #return team, total_runs_match, match_id  
    d = {'team':team, 'runs':total_runs_match, 'match':match_id}
    return d

In [471]:
total_runs_team_match('Royal Challengers Bangalore', 1)

{'match': 1, 'runs': 82, 'team': 'Royal Challengers Bangalore'}

#### 2. Runs scored by Batsman B1 in Match X

In [232]:
def runs_batsman_match(batsman_name, match_id):
    x = deliveries.groupby(['match_id', 'batsman'])['batsman_runs'].sum()
    runs = x[match_id][batsman_name]
    return batsman_name, runs, match_id

In [478]:
runs_batsman_match('SC Ganguly', 1)

('SC Ganguly', 10, 1)

#### 3. Max scorer in match X in Team Y

In [455]:
def max_score_batsman_match_inTeam(match_id, team):
    x = deliveries.groupby(['match_id','batting_team', 'batsman'])['batsman_runs'].sum()
    name = x[match_id][team].idxmax()
    runs = x[match_id][team].max()
    return name, team, runs, match_id  

In [456]:
max_score_batsman_match_inTeam(1, 'Royal Challengers Bangalore')

('P Kumar', 'Royal Challengers Bangalore', 18, 1)

#### 4. Max scorer in match X

In [461]:
def max_score_batsman_match(match_id):
    x = deliveries.groupby(['match_id', 'batsman'])['batsman_runs'].sum()
    name = x[match_id].idxmax()
    runs = x[match_id].max()
    return name, runs, match_id  

In [462]:
max_score_batsman_match(4)

('RV Uthappa', 48, 4)

#### Lowest Scorer in Match X

In [13]:
def min_score_batsman_match(match_id):
    x = deliveries.groupby(['match_id', 'batsman'])['batsman_runs'].sum()
    name = x[match_id].idxmin()
    runs = x[match_id].min()
    return [name, runs]  


In [14]:
min_score_batsman_match(1)

['B Akhil', 0]

#### Min scorer in match X in Team Y

In [15]:
def min_score_batsman_match_inTeam(match_id, team):
    x = deliveries.groupby(['match_id','batting_team', 'batsman'])['batsman_runs'].sum()
    name = x[match_id][team].idxmin()
    runs = x[match_id][team].min()
    return [name, runs]  

In [16]:
min_score_batsman_match_inTeam(1, 'Kolkata Knight Riders')

['Mohammad Hafeez', 5]

#### Number of Balls played by Batsman B1 in match Y

In [17]:
def balls_faced_batsman_match(batsman, match_id):
    x = deliveries[(deliveries['batsman'] == batsman) & (deliveries['match_id'] == match_id) & (deliveries['wide_runs'] == 0)]
    return x.shape[0]

In [18]:
balls_faced_batsman_match('SC Ganguly', 1)

12

#### Strike Rate of Batsman B1 in Match X

In [19]:
def strikeRate_batsman_match(batsman, match_id):
    runs = runs_batsman_match(batsman, match_id)
    balls = balls_faced_batsman_match(batsman, match_id)
    
    strike_rate = runs/balls * 100
    return strike_rate

In [20]:
strikeRate_batsman_match('BB McCullum', 1)

216.43835616438358

#### Total Number of Dot Balls played by Batsman B1 in Match X

In [21]:
def dot_balls_batsman_match(batsman, match_id):
    x = deliveries[(deliveries['batsman'] == batsman) & (deliveries['match_id'] == match_id) & (deliveries['total_runs'] == 0)]
    dot_balls = x.shape[0]
    return dot_balls

In [22]:
dot_balls_batsman_match('SC Ganguly', 1)

6

#### Total Number of 4's played by Batsman B1 in Match X

In [23]:
def b_4_batsman_match(batsman, match_id):
    x = deliveries[(deliveries['batsman'] == batsman) & (deliveries['match_id'] == match_id) & (deliveries['total_runs'] == 4)]
    b_4 = x.shape[0]
    return b_4

In [24]:
b_4_batsman_match('SC Ganguly', 1)

2

#### Total Number of 6's played by Batsman B1 in Match X

In [25]:
def b_6_batsman_match(batsman, match_id):
    x = deliveries[(deliveries['batsman'] == batsman) & (deliveries['match_id'] == match_id) & (deliveries['total_runs'] == 6)]
    b_6 = x.shape[0]
    return b_6

In [26]:
b_6_batsman_match('SC Ganguly', 1)

0

#### i^th Highest Scorer

In [27]:
def highest_scorer(i):
    #player_runs = deliveries.groupby('batsman')['batsman_runs'].sum().sort_values(ascending =False)
    player_name = deliveries.groupby('batsman')['batsman_runs'].sum().sort_values(ascending =False).iloc[i:i+1]
    return  player_name

In [28]:
highest_scorer(2)

batsman
ST Jayasuriya    514
Name: batsman_runs, dtype: int64

#### Total Number of fours in Match X by team Y

In [29]:
def team_fours(match_id, batting_team):
    team_fours = deliveries[deliveries.batsman_runs == 4]
    team_fours_count = team_fours.groupby(['match_id','batting_team']).count()['inning']
    return team_fours_count[match_id][batting_team]

In [30]:
team_fours(1,'Kolkata Knight Riders')

15

#### Total Number of sixes in Match X by team Y

In [31]:
def team_sixes(match_id, batting_team):
    team_sixes = deliveries[deliveries.batsman_runs == 6]
    team_sixes_count = team_sixes.groupby(['match_id','batting_team']).count()['inning']
    return team_sixes_count[match_id][batting_team]

In [32]:
team_sixes(1,'Kolkata Knight Riders')

14

### RUNS - ALL MATCH STATS

#### Total Runs scored in Entire IPL by Batsman B1

In [33]:
def total_runs_batsman_IPL(batsman):
    x = deliveries.groupby(['batsman'])['batsman_runs'].sum()
    runs_batsman = x[batsman]
    return runs_batsman

In [34]:
total_runs_batsman_IPL('BB McCullum')

188

#### Total Runs scored in Entire IPL season 01 by team X

In [35]:
def total_runs_team_IPL(team):
    x = deliveries.groupby(['batting_team'])['total_runs'].sum()
    total_runs_IPL = x[team]
    return total_runs_IPL

In [36]:
total_runs_team_IPL('Chennai Super Kings')

2520

#### Orange CAP

In [37]:
def orange_cap():
    x = deliveries.groupby(['batsman'])['batsman_runs'].sum()
    max_scorer = [x.idxmax(), x.max()]
    return max_scorer

In [38]:
orange_cap_name = orange_cap()[0]
orange_cap_total_runs = orange_cap()[1]

print(orange_cap_name, "-->", orange_cap_total_runs)

SE Marsh --> 616


#### Highest Runs scored in an innings by a Batsman

In [39]:
def highest_runs_batsman_innings():
    x = deliveries.groupby(['match_id', 'batsman'])['batsman_runs'].sum().sort_values(ascending=False)
    batsman_and_match = [ x.idxmax(), x.max()]
    
    return batsman_and_match

In [40]:
highest_runs_batsman_innings_NAME = highest_runs_batsman_innings()[0][1]
highest_runs_batsman_innings_MATCH = highest_runs_batsman_innings()[0][0]
highest_runs_batsman_innings_RUNS = highest_runs_batsman_innings()[1]

print(highest_runs_batsman_innings_NAME, " IN MATCH = ", highest_runs_batsman_innings_MATCH, " --->", highest_runs_batsman_innings_RUNS)

BB McCullum  IN MATCH =  1  ---> 158


#### Highest Strike Rate of Batsman Overall

In [41]:
def total_runs_scored_IPL():
    runs_count = deliveries.groupby('batsman')['batsman_runs'].sum()
    return runs_count

def total_ball_faced_IPL():
    balls = deliveries[(deliveries.wide_runs == 0)].groupby('batsman')['inning']
    balls_count = balls.count()
    return balls_count

def total_strike_rate_IPL(i):
    strike_rate = (total_runs_scored_IPL()/total_ball_faced_IPL())*100
    return strike_rate.sort_values(ascending = False).iloc[i:i+1]

In [42]:
total_strike_rate_IPL(0)

batsman
Umar Gul    205.263158
dtype: float64

#### Most ball played by a player in IPL

In [43]:
total_ball_faced_IPL().sort_values(ascending = False).iloc[0:1]

batsman
SE Marsh    441
Name: inning, dtype: int64

#### Total 4's by player X in IPL

In [44]:
def overall_fours_count(batsman):
    fours = deliveries[deliveries.batsman_runs == 4]
    fours_count = fours.groupby('batsman').count()['inning']
    return fours_count[batsman]

In [45]:
overall_fours_count('BB McCullum')

13

#### Total 6's by player X in IPL

In [46]:
def overall_sixes_count(batsman):
    sixes = deliveries[deliveries.batsman_runs == 6]
    sixes_count = sixes.groupby('batsman').count()['inning']
    return sixes_count[batsman]

In [47]:
overall_sixes_count('BB McCullum')

15

#### Maximum 4's by a batsman in IPL

In [48]:
def most_fours_count(i):
    fours = deliveries[deliveries.batsman_runs == 4]
    fours_count = fours.groupby('batsman').count()['inning']
    return fours_count.sort_values(ascending = False).iloc[i:i+1]

In [49]:
for index_val, series_val in most_fours_count(0).iteritems():
        print(index_val, series_val)

G Gambhir 68


#### Maximum 6's by a batsman in IPL

In [50]:
def most_sixes_count():
    sixes = deliveries[deliveries.batsman_runs == 6]
    sixes_count = sixes.groupby('batsman').count()['inning'].sort_values(ascending = False).iloc[0:1]
    return sixes_count

In [51]:
for index_val, series_val in most_sixes_count().iteritems():
        print(index_val, series_val)

ST Jayasuriya 31


## CLASSIFICATION

##### Train sentences

In [393]:
train_sentences = [ 
                    
                    # B = Batsman;  R = Runs;  M = Match;  T = Team;
    
    
                    
                    # F1 - Question: (Runs R1 scored by Team T1 in Match M1, 'Classifier = runs': T,R,M)
    
                    ('How much did RCB Score in Match 1?','runs'),
                    ('RCB Score in match 1?','runs'),
                    ('RCB runs in match 1','runs'),
                    ('total score of DD in 3rd match','runs'),
                    ('totals runs of Deccan in 8th match','runs'),
                    ('final score of DD in 3rd match','runs'),
                    
    
                    # F2 - Question: (Runs R1 scored by Batsman B1 in Match M1, 'Classifier = runs': B,R,M)
                    
                    ('Total runs scored by SC Ganguly in match 5?' ,'runs'),
                    ('SC Ganguly score in match 1?','runs'),
                    ('how many runs did Ganguly score in match 2?','runs'),
                    ("Sachin's score in 4th match?", 'runs'),
                    ('McCullum scored how much in match 3?','runs'),
                    ("Dravid's runs in match 2?",'runs'),
                    ("Dravid's score in match 5?",'runs'),
    
                       
                    # F3 - Question: (Maximum Runs R1 scored by Batsman B1 in Match M1, 'Classifier = max_runs': R,M)
                    
                    ('Rahul maximum score in match 3','max_runs'),
                    ("who was the leading run scorer in match 9", 'max_runs'),
                    ('highest score by a batsman in match 3','max_runs'),
                    ('maximum scorer of match 2','max_runs'),
                    ("who was the top scorer in match 6", 'max_runs'),                   
                    ("who was the leading run scorer in match 9", 'max_runs'),
                    ('highest score in match 3','max_runs'),
                    ("who scored maximum runs in match 6", 'max_runs'),
                    
    
    
                    
                    # Question: (Maximum Runs R1 scored by Batsman B1/Match M1, 'Classifier = max_runs')
    
                    ("who scored the maximum run match 3 by DD",'max_runs'),
                    ("top scorer for DD in match 3",'max_runs'),
                    ("top score in DD match 4?",'max_runs'),
                    ("highest scorer for DD in match 4",'max_runs'),

    
                    ("who scored the maximum run match 3 by DD",'max_runs'),
                    ("top scorer for DD in match 3",'max_runs'),
                   
                    # Question: (Minimum Runs scored by Batsman B1/Match M1, 'Classifier = min_runs')
    
                    ('minimum score in match 3', 'min_runs'),
                    ('lowest score in match 3','min_runs'),
                    ('minimum scorer of match 2','min_runs'),
                    ("who scored the minimum run match 3 by DD",'min_runs'),
                    ("least scorer for DD in match 3",'min_runs'),
                    ("lowest score in DD match 4?",'min_runs'),
                    ("low scorer for DD in match 4",'min_runs'),
                    ('minimum score in match 3', 'min_runs'),
                    ('lowest score in match 3','min_runs'),
                    ('minimumscorer of match 2','min_runs'),
                    ('who scored less runs in match 3?','min_runs'),
                    ('who was the least run scorer in match 3?','min_runs'),
                    ("who scored the minimum run match 3 by DD",'min_runs'),
                    ("least scorer for DD in match 3",'min_runs'),
                    ("lowest score in DD match 4?",'min_runs'),
                    ("low scorer for DD in match 4",'min_runs'),
    
                    ("who scored the minimum run match 3 by DD",'max_runs'),
                    ("who scored maximum runs in match 6", 'max_runs'),
                   
                     # Question: (Total Runs scored by Team T1/Batsman B1/Match M1, 'Classifier = total_runs')
    
                    ("total runs scored by sachin?",'total_runs'),
                    ("aggregate runs by Abhay?",'total_runs'),
                    ("how many runs has sachin made in ipl1?",'total_runs'),
                    ("Total runs made by sachin?",'total_runs'),
                    ("What is the total runs made by RCB?",'total_runs'),
                    ("what is the runs made by RCB in ipl 9?",'total_runs'),
                    ("What is the sum total runs made by RCB",'total_runs'),
                    ("total runs scored by sachin?",'total_runs'),
                    ("aggregate runs by Abhay?",'total_runs'),
                    ("how many runs has sachin made in ipl1?",'total_runs'),
                    ("Total runs made by sachin?",'total_runs'),
                    ("What is the total runs made by RCB?",'total_runs'),
                    ("what is the runs made by RCB in ipl 9?",'total_runs'),
                    ("What is the sum total runs made by RCB",'total_runs'),
                   
    
                    # Question: (Highest Run(s) scored by Team T1/Batsman B1/Match M1, 'Classifier = highest_scorer')
    
                    ("who was 3rd highest scorer in match 3?",'ith_highest_scorer'),
                    ("who was 3rd top scorer in match 4?",'ith_highest_scorer'),
                    ("who was 2nd maximum run scorer in match 6?",'ith_highest_scorer'), 
                   
    
                    # Question: (Dot balls faced by Team T1/Batsman B1/Match M1, 'Classifier = dot_balls')
    
                    ("rahul faced how many dot balls in match 1",'dot_balls'),
                    ("rahul faced how many dots in match 1",'dot_balls'),
                   
    
                    # Question: (4's scored by Team T1/Batsman B1/Match M1, 'Classifier = fours')
    
                    ("how many fours did Kohli score in match 2?",'fours'),
                    ("4s hit by Sachin in match 1?",'fours'),
                    ("Fours hit by Abhay in match 5?",'fours'),
                    ("how many boundaries are scored by rcb in match 6",'fours'),
                    ("how many 4s are scored by rcb in match 3",'fours'),
                    ("how many fours are scored by rcb in match 6",'fours'),
                    ("how many boundaries are hit by rcb in match 6",'fours'),
                    ("how many 4s are hit by rcb in match 6",'fours'),
                    ("how many fours are hit by rcb in match 6",'fours'),
                    ("what are total boundaries of rcb in match 6",'fours'),
                    ("what are total fours of rcb in match 6",'fours'),
                    ("what are total 4s of rcb in match 5",'fours'),
                    ("how many fours are hit by kohli in this season?",'fours'),
                    ("how many 4s are hit by kohli in this season?",'fours'),
                    ("how many boundaries are hit by kohli in this season?",'fours'),
                    ("how many fours are scored by kohli in this season?",'fours'),
                    ("how many 4s are scored by kohli in this season?",'fours'),
                    ("how many boundaries are hit by kohli in this season?",'fours'),
                    ("how many boundaries are hit by kohli in this entire ipl?",'fours'),
                    ("how many 4s are hit by kohli in this entire ipl?",'fours'),
                    ("how many fours are hit by kohli in this entire ipl?",'fours'),
                    ("how many boundaries are hit by kohli in this whole ipl?",'fours'),
                    ("how many 4s are hit by kohli in this whole ipl?",'fours'),
                    ("how many fours are hit by kohli in this whole ipl?",'fours'),
                    ("who scored most fours in entire ipl?",'fours'),
                    ("who hit most fours in entire ipl?",'fours'),
                    ("who scored most 4s in entire ipl?",'fours'),
                    ("who hit most 4s in entire ipl?",'fours'), 
                    ("who scored most boundaries in entire ipl?",'fours'),
                    ("who hit most boundaries in entire ipl?",'fours'), 
                    ("who scored most fours in whole ipl?",'fours'),
                    ("who hit most fours in whole ipl?",'fours'),
                    ("who scored most 4s in whole ipl?",'fours'),
                    ("who hit most 4s in whole ipl?",'fours'), 
                    ("who scored most boundaries in whole ipl?",'fours'),
                    ("who hit most boundaries in whole ipl?",'fours'), 
                    ("who scored most fours in this season?",'fours'),
                    ("who hit most fours in this season?",'fours'),
                    ("who scored most 4s in this season?",'fours'),
                    ("who hit most 4s in this season?",'fours'), 
                    ("who scored most boundaries in this season?",'fours'),
                    ("who hit most boundaries in this season?",'fours'),
                   
    
                    # Question: (6's scored by Team T1/Batsman B1/Match M1, 'Classifier = sixes')
    
                    ("how many 6s are scored by rcb in match 6",'sixes'),
                    ("how many sixes are scored by rcb in match 6",'sixes'),
                    ("how many 6s are hit by rcb in match 6",'sixes'),
                    ("how many sixes are hit by rcb in match 6",'sixes'),
                    ("what are total sixes of rcb in match 6",'sixes'),
                    ("what are total 6s of rcb in match 5",'sixes'), 
                    ("how many sixes are hit by kohli in this season?",'sixes'),
                    ("how many 6s are hit by kohli in this season?",'sixes'),
                    ("how many sixes are scored by kohli in this season?",'sixes'),
                    ("how many 6s are scored by kohli in this season?",'sixes'),
                    ("how many 6s are hit by kohli in this entire ipl?",'sixes'),
                    ("how many sixes are hit by kohli in this entire ipl?",'sixes'),
                    ("how many 6s are hit by kohli in this whole ipl?",'sixes'),
                    ("how many sixes are hit by kohli in this whole ipl?",'sixes'),  
                    ("who scored most sixes in entire ipl?",'sixes'),
                    ("who hit most sixes in entire ipl?",'sixes'),
                    ("who scored most 6s in entire ipl?",'sixes'),
                    ("who hit most 6s in entire ipl?",'sixes'), 
                    ("who scored most sixes in whole ipl?",'sixes'),
                    ("who hit most sixes in whole ipl?",'sixes'),
                    ("who scored most 6s in whole ipl?",'sixes'),
                    ("who hit most 6s in whole ipl?",'sixes'), 
                    ("who scored most sixes in this season?",'sixes'),
                    ("who hit most sixes in this season?",'sixes'),
                    ("who scored most 6s in this season?",'sixes'),
                    ("who hit most 6s in this season?",'sixes'),
                    ("how many sixes did Kohli score in match 2?",'sixes'),
                    ("6s hit by Sachin in match 1?",'sixes'),
                    ("Sixes hit by Abhay in match 5?",'sixes'),
                    ("how many sixes did Kohli hit in match 2?",'sixes')
                  ]

##### Feature Extraction

In [394]:
def feature_extractor(words):
    
    features = {'highest': 0
                ,'scored': 0
                ,'runs': 0
                ,'scorer': 0
                ,'score':0
                ,'match':0
                ,'wickets': 0
                ,'boundary': 0
                ,'fours':0
                ,'4s':0
                ,'six':0
                ,'sixes':0
                ,'6s':0
                ,'hit':0
                ,'four':0
                ,'aggregate':0
                ,'total':0
                ,'team':0
                ,'lead':0
                ,'leading':0
                ,'maximum':0
                ,'max':0
                ,'minimum':0
                ,'min':0
                ,'least':0
                ,'less':0
                ,'1st':0,'2nd':0,'3rd':0,'4th':0,'5th':0,'6th':0,'7th':0,'8th':0,'9th':0,'10th':0    
                ,'dot':0
                ,'dots':0
                ,'faced':0 
                ,'entire':0
                ,'whole':0
                ,'season':0
               }
    
    word_counts = nltk.Counter(words)
    
    for word in word_counts:
        if word in features:
            features[word] = word_counts[word]
    return features

#### TRAINING - Naive Bayes Classifier

In [395]:
naive_bayes_classifier = nltk.NaiveBayesClassifier.train([(feature_extractor(nltk.word_tokenize(sentence)), label) for sentence, label in train_sentences])

----

# TEST QUESTION

In [424]:
question = 'Who scored the maximum runs from Delhi Daredevils in match 3 ?'

____

##### Lemmatized Words

In [425]:
pattern = RegexpTokenizer(r'\w+')
words = pattern.tokenize(question)

stop_words = set(stopwords.words('english'))
lm = WordNetLemmatizer()

lem_words = []

for w in words:
    if w not in stop_words:
        lem_words.append(lm.lemmatize(w))

##### Classifer

In [426]:
classifier = naive_bayes_classifier.classify(feature_extractor(lem_words))

##### Tagging and Chunking

In [427]:
tag = nltk.pos_tag(lem_words)

chunkGram = r"""Chunk:{<NNP.?>*<NNP.?>*}"""
chunkParser = nltk.RegexpParser(chunkGram)
chunked = chunkParser.parse(tag)

##### Joining Leaves

In [428]:
for node in chunked:
    if hasattr(node, "label"):
        words = [word for word, tag in node.leaves()]
        chunked_words = ' '.join(words)

chunked_words

'Delhi Daredevils'

---------------------------------

====

## Answer Formulation

In [473]:
ans = { 'total_runs_team_match' : [ '{team} score {runs} in match {match}'
                                  
                                   ]}

In [422]:
answers = { 
            'runs_batsman_match' : ['{batsman} scored {runs} runs in match {match}',
                                    'In match {match} {batsman} scored {runs} runs',
                                    '{runs} runs were scored by {batsman} in match {match}'
                                    ],
    
            'total_runs_team_match' : ['{batsman} scored {runs} runs in match {match}',
                                       'In match {match} {batsman} scored {runs} runs',
                                       '{runs} runs were scored by {batsman} in match {match}',
                                       'Match {match} saw {batsman} scoring {runs} runs',
                                       'A total of {runs} was scored by {batsman} in match {match}'
                                    ],
            'max_score_batsman_match_inTeam' : ['{batsman} of {team} scored {runs} runs in match {match}',
                                                'In match {team} {batsman} of {team} scored {runs} runs',
                                                '{runs} runs were scored by {batsman} of {team} in match {match}',
                                                ],
            'max_score_batsman_match' : ['{batsman} of {team} scored {runs} runs in match {match}',
                                         'In match {match} {batsmam} of {team} scored {runs} runs',
                                         'c runs were scored by {batsman} of {team} in match {match}',
                                        ]
}

In [423]:
answers['total_runs_team_match' ]

['a scored b runs in match c',
 'In match c a scored b runs',
 'b runs were scored by a in match c',
 'Match c saw a scoring b runs',
 'A total of b was scored by a in match c']

In [442]:
reply = random.choice(answers['max_score_batsman_match_inTeam'])
reply            
#result = setting_values(max_score_batsman_match_inTeam(team_name, int(match_id), reply, 4))
#print(' '.join(str(w) for w in result))

'c runs were scored by a of b in match d'

-----------

#### Results

In [452]:
def setting_values(values, reply, arg_count):
    if arg_count == 4:
        a = values[0]
        b = values[1]
        c = values[2]
        d = values[3]
    
        words = reply.split()
    
        for i in range(len(words)):
            if words[i] == 'a':
                words[i] = a
            elif words[i] == 'b':
                words[i] = b
            elif words[i] == 'c':
                words[i] = c
            elif words[i] == 'd':
                words[i] = d
       
        return(words)
        
    elif arg_count == 3:
        a = values[0]
        b = values[1]
        c = values[2]
    
        words = reply.split()
    
        for i in range(len(words)):
            if words[i] == 'a':
                words[i] = a
            elif words[i] == 'b':
                words[i] = b
            elif words[i] == 'c':
                words[i] = c
    
        return(words)
        
    elif arg_count == 2:
       
        a = values[0]
        b = values[1]
        
    elif arg_count == 1:
        a = values[0]

-------------------

In [447]:
chunked_words

'Delhi Daredevils'

In [472]:
total_runs_team_match(team_name,int(match_id))

{'match': 3, 'runs': 132, 'team': 'Delhi Daredevils'}

In [477]:
if classifier == 'runs':
    try:
        # Function 1.0 - Team - Runs - Match
        if chunked_words in team_list:
            team_name = chunked_words
            for i in range(len(chunked)): 
                if chunked[i][1] == 'CD':
                    match_id = chunked[i][0]
            result = total_runs_team_match(team_name,int(match_id))
            reply = random.choice(ans['total_runs_team_match'])
            reply.format(**result)
            
        else:
        # Function 2.0 - Batsman - Runs - Match
            for i in range(len(chunked)): 
                person_name = chunked_words
                if chunked[i][1] == 'CD':
                    match_id = chunked[i][0]
                    
            reply = random.choice(answers['runs_batsman_match'])
            result = setting_values(runs_batsman_match(person_name,int(match_id)), reply, 3)
            print(' '.join(str(w) for w in result))
    except:
        print("ERROR IN F1/2")

        
        
elif classifier == 'max_runs':
    try:
        # Function 3.0 - Team Batsman - Runs - Match
        if chunked_words in team_list:
            team_name = chunked_words
            for i in range(len(chunked)): 
                if chunked[i][1] == 'CD':
                    match_id = chunked[i][0]
                    
            reply = random.choice(answers['max_score_batsman_match_inTeam'])
            result = setting_values(max_score_batsman_match_inTeam(int(match_id), team_name), reply, 4)
            print(' '.join(str(w) for w in result))
            
        else:
        # Function 4.0 - Batsman -Runs - match
            for i in range(len(chunked)): 
                if chunked[i][1] == 'CD':
                    match_id = chunked[i][0]
            
            reply = random.choice(answers['max_score_batsman_match'])
            result = setting_values(max_score_batsman_match(int(match_id)), reply, 4)
            print(' '.join(str(w) for w in result))
            
            #print(max_score_batsman_match(int(match_id)))
    except:
        print("ERROR IN F3/4")

        
        
        
elif classifier =='min_runs':
    try:
        if chunked_words in team_list:
            team_name = chunked_words
            for i in range(len(chunked)): 
                if chunked[i][1] == 'CD':
                    match_id = chunked[i][0]
            print(min_score_batsman_match_inTeam(int(match_id), team_name))
        else:
            for i in range(len(chunked)): 
                if chunked[i][1] == 'CD':
                    match_id = chunked[i][0]
            print(min_score_batsman_match(int(match_id)))
    except:
        print("min_runs")



        
elif classifier == 'total_runs':
    try:
        if chunked_words in team_list:
            team_name = chunked_words
            print(total_runs_team_IPL(team_name))
        else :
            for i in range(len(chunked)): 
                person_name = chunked_words
            print(total_runs_batsman_IPL(person_name))
    except:
        print("total_runs")



        
elif classifier == 'fours':
    try:
        if chunked_words in team_list:
            team_name = chunked_words
            for i in range(len(chunked)): 
                if chunked[i][1] == 'CD':
                    match_id = chunked[i][0]
            print(team_fours(int(match_id), team_name))
        else:
            for i in range(len(chunked)): 
                if chunked[i][1] == 'CD':
                    match_id = chunked[i][0]
            print(b_4_batsman_match(person_name ,int(match_id)))
    except:
        print("fours")

        
        
        
        
elif classifier == 'sixes':
    try: 
        if chunked_words in team_list:
            team_name = chunked_words
            for i in range(len(chunked)): 
                if chunked[i][1] == 'CD':
                    match_id = chunked[i][0]
            print(team_sixes(int(match_id), team_name))
        else:
            for i in range(len(chunked)): 
                person_name = chunked_words
                if chunked[i][1] == 'CD':
                    match_id = chunked[i][0]
            print(b_6_batsman_match(person_name ,int(match_id)))
    except:
        print("sixes")

58 runs were scored by G Gambhir of Delhi Daredevils in match 3


#### Returns full Player name

In [224]:
def function_return_fullName(data):
    for w in (batsman_list or bowler_list or fielder_list):
        if w.lower() == data.lower():
            return w
        elif str(w.split()[len(w.split())-1]).lower() == data.lower():
            return w

In [225]:
function_return_fullName('McCullum')

'BB McCullum'

#### Returns full Team Name

In [221]:
teams_abbr = [ ('Kolkata Knight Riders', 'kolkata knight riders', 'kolkata', 'kolkata riders', 'kolkata rider', 'kolkata knights', 'kolkata knight', 'knight riders', 'knight rider', 'riders', 'k k riders', 'k knight riders', 'kkr'), 
               ('Chennai Super Kings', 'chennai super kings', 'chennai', 'chennai kings', 'chennai super', 'super kings', 'chennai kings', 'csk'),  
               ('Rajasthan Royals', 'rajasthan royals', 'rajasthan', 'rajasthan royal', 'rr'),
               ('Mumbai Indians', 'mumbai indians', 'mumbai', 'mumbai indian', 'indians', 'indian', 'mi'), 
               ('Deccan Chargers', 'deccan chargers', 'deccan', 'deccan charger', 'chargers', 'charger', 'dc'), 
               ('Kings XI Punjab', 'kings xi punjab', 'kings', 'punjab', 'kings XI', 'kings punjab',  'kxip','kp', 'kxp'), 
               ('Royal Challengers Bangalore', 'royal challengers bangalore', 'bangalore', 'royal challengers', 'royal challenger', 'royal bangalore', 'challengers bangalore', 'challenger bangalore', 'rcb', 'rb'),
               ('Delhi Daredevils', 'delhi daredevils', 'delhi', 'daredevils', 'delhi daredevil', 'dd') ]

In [222]:
def funtion_return_fullTeamName(data):
    for w in teams_abbr:
        if data.lower() in w:
            return(w[0])

In [223]:
funtion_return_fullTeamName('Royal Challengers Bangalore')

'Royal Challengers Bangalore'

In [466]:
person = { "name" : "Pranjal", "age": 24}

In [467]:
"{name} is {age}".format(**person)

'Pranjal is 24'

In [None]:

            reply = 