# IPL CHAT BOX

### PROBLEM STATEMENT : 

##### To develop a Q&A chat bot which responds to user's queries based on NLP statistics.

#### .



#### .

In [2]:
import math
import numpy as np
import pandas as pd

# --- NLTK PACKAGE ---
import nltk

# Tokenizers
from nltk.tokenize import word_tokenize, sent_tokenize, PunktSentenceTokenizer, RegexpTokenizer

# Stemming and Lemmatizing
from nltk.stem import PorterStemmer, WordNetLemmatizer

# Stopwords
from nltk.corpus import stopwords, state_union

# Lesk Module
from nltk.wsd import lesk

# Tagger
from nltk.tag import UnigramTagger, BigramTagger, BrillTagger


In [5]:
matches = pd.read_csv('/home/user/Downloads/IPLChatbot/matches.csv')
deliveries = pd.read_csv('/home/user/Downloads/IPLChatbot/deliveries.csv')

# FUNCTIONS

#### BATSMAN STATS

#### Runs scored by Batsman B1 in Match X

In [8]:
def runs_batsman_match(batsman_name, match_id):
    x = deliveries.groupby(['match_id', 'batsman'])['batsman_runs'].sum()
    runs = x[match_id][batsman_name]
    return runs  

In [9]:
runs_batsman_match('SC Ganguly', 1)

10

#### Total Runs scored in Match X by team Y

In [10]:
def total_runs_team_match(team, match_id):
    x = deliveries.groupby(['match_id','batting_team'])['total_runs'].sum()
    total_runs_match = x[match_id][team]
    return total_runs_match  

In [11]:
total_runs_team_match('Royal Challengers Bangalore', 1)

82

#### Max scorer in match X

In [12]:
def max_score_batsman_match(match_id):
    x = deliveries.groupby(['match_id', 'batsman'])['batsman_runs'].sum()
    name = x[match_id].idxmax()
    runs = x[match_id].max()
    return [name, runs]  

In [13]:
max_score_batsman_match(4)

['RV Uthappa', 48]

#### Max scorer in match X in Team Y

In [14]:
def max_score_batsman_match_inTeam(match_id, team):
    x = deliveries.groupby(['match_id','batting_team', 'batsman'])['batsman_runs'].sum()
    name = x[match_id][team].idxmax()
    runs = x[match_id][team].max()
    return [name, runs]  

In [15]:
max_score_batsman_match_inTeam(1, 'Royal Challengers Bangalore')

['P Kumar', 18]

#### Lowest Scorer in Match X

In [16]:
def min_score_batsman_match(match_id):
    x = deliveries.groupby(['match_id', 'batsman'])['batsman_runs'].sum()
    name = x[match_id].idxmin()
    runs = x[match_id].min()
    return [name, runs]  


In [17]:
min_score_batsman_match(1)

['B Akhil', 0]

#### Min scorer in match X in Team Y

In [18]:
def min_score_batsman_match_inTeam(match_id, team):
    x = deliveries.groupby(['match_id','batting_team', 'batsman'])['batsman_runs'].sum()
    name = x[match_id][team].idxmin()
    runs = x[match_id][team].min()
    return [name, runs]  

In [19]:
min_score_batsman_match_inTeam(1, 'Kolkata Knight Riders')

['Mohammad Hafeez', 5]

#### Number of Balls played by Batsman B1 in match Y

In [20]:
def balls_faced_batsman_match(batsman, match_id):
    x = deliveries[(deliveries['batsman'] == batsman) & (deliveries['match_id'] == match_id) & (deliveries['wide_runs'] == 0)]
    return x.shape[0]

In [21]:
balls_faced_batsman_match('SC Ganguly', 1)

12

#### Strike Rate of Batsman B1 in Match X

In [22]:
def strikeRate_batsman_match(batsman, match_id):
    runs = runs_batsman_match(batsman, match_id)
    balls = balls_faced_batsman_match(batsman, match_id)
    
    strike_rate = runs/balls * 100
    return strike_rate

In [23]:
strikeRate_batsman_match('BB McCullum', 1)

216.43835616438358

#### Total Number of Dot Balls played by Batsman B1 in Match X

In [24]:
def dot_balls_batsman_match(batsman, match_id):
    x = deliveries[(deliveries['batsman'] == batsman) & (deliveries['match_id'] == match_id) & (deliveries['total_runs'] == 0)]
    dot_balls = x.shape[0]
    return dot_balls

In [25]:
dot_balls_batsman_match('SC Ganguly', 1)

6

#### Total Number of 4's played by Batsman B1 in Match X

In [26]:
def b_4_batsman_match(batsman, match_id):
    x = deliveries[(deliveries['batsman'] == batsman) & (deliveries['match_id'] == match_id) & (deliveries['total_runs'] == 4)]
    b_4 = x.shape[0]
    return b_4

In [27]:
b_4_batsman_match('SC Ganguly', 1)

2

#### Total Number of 6's played by Batsman B1 in Match X

In [28]:
def b_6_batsman_match(batsman, match_id):
    x = deliveries[(deliveries['batsman'] == batsman) & (deliveries['match_id'] == match_id) & (deliveries['total_runs'] == 6)]
    b_6 = x.shape[0]
    return b_6

In [29]:
b_6_batsman_match('SC Ganguly', 1)

0

#### i^th Highest Scorer

In [30]:
def highest_scorer(i):
    #player_runs = deliveries.groupby('batsman')['batsman_runs'].sum().sort_values(ascending =False)
    player_name = deliveries.groupby('batsman')['batsman_runs'].sum().sort_values(ascending =False).iloc[i:i+1]
    return  player_name

In [31]:
highest_scorer(2)

batsman
ST Jayasuriya    514
Name: batsman_runs, dtype: int64

#### Total Number of fours in Match X by team Y

In [32]:
def team_fours(match_id, batting_team):
    team_fours = deliveries[deliveries.batsman_runs == 4]
    team_fours_count = team_fours.groupby(['match_id','batting_team']).count()['inning']
    return team_fours_count[match_id][batting_team]

In [33]:
team_fours(1,'Kolkata Knight Riders')

15

#### Total Number of sixes in Match X by team Y

In [34]:
def team_sixes(match_id, batting_team):
    team_sixes = deliveries[deliveries.batsman_runs == 6]
    team_sixes_count = team_sixes.groupby(['match_id','batting_team']).count()['inning']
    return team_sixes_count[match_id][batting_team]

In [35]:
team_sixes(1,'Kolkata Knight Riders')

14

## RUNS - ALL MATCH STATS

#### Total Runs scored in Entire IPL by Batsman B1

In [36]:
def total_runs_batsman_IPL(batsman):
    x = deliveries.groupby(['batsman'])['batsman_runs'].sum()
    runs_batsman = x[batsman]
    return runs_batsman

In [37]:
total_runs_batsman_IPL('BB McCullum')

188

#### Total Runs scored in Entire IPL season 01 by team X

In [38]:
def total_runs_team_IPL(team):
    x = deliveries.groupby(['batting_team'])['total_runs'].sum()
    total_runs_IPL = x[team]
    return total_runs_IPL

In [39]:
total_runs_team_IPL('Chennai Super Kings')

2520

#### Orange CAP

In [40]:
def orange_cap():
    x = deliveries.groupby(['batsman'])['batsman_runs'].sum()
    max_scorer = [x.idxmax(), x.max()]
    return max_scorer

In [41]:
orange_cap_name = orange_cap()[0]
orange_cap_total_runs = orange_cap()[1]

print(orange_cap_name, "-->", orange_cap_total_runs)

SE Marsh --> 616


#### Highest Runs scored in an innings by a Batsman

In [42]:
def highest_runs_batsman_innings():
    x = deliveries.groupby(['match_id', 'batsman'])['batsman_runs'].sum().sort_values(ascending=False)
    batsman_and_match = [ x.idxmax(), x.max()]
    
    return batsman_and_match

In [43]:
highest_runs_batsman_innings_NAME = highest_runs_batsman_innings()[0][1]
highest_runs_batsman_innings_MATCH = highest_runs_batsman_innings()[0][0]
highest_runs_batsman_innings_RUNS = highest_runs_batsman_innings()[1]

print(highest_runs_batsman_innings_NAME, " IN MATCH = ", highest_runs_batsman_innings_MATCH, " --->", highest_runs_batsman_innings_RUNS)

BB McCullum  IN MATCH =  1  ---> 158


#### Highest Strike Rate of Batsman Overall

In [44]:
def total_runs_scored_IPL():
    runs_count = deliveries.groupby('batsman')['batsman_runs'].sum()
    return runs_count

def total_ball_faced_IPL():
    balls = deliveries[(deliveries.wide_runs == 0)].groupby('batsman')['inning']
    balls_count = balls.count()
    return balls_count

def total_strike_rate_IPL(i):
    strike_rate = (total_runs_scored_IPL()/total_ball_faced_IPL())*100
    return strike_rate.sort_values(ascending = False).iloc[i:i+1]

In [45]:
total_strike_rate_IPL(0)

batsman
Umar Gul    205.263158
dtype: float64

#### Most ball played by a player in IPL

In [46]:
total_ball_faced_IPL().sort_values(ascending = False).iloc[0:1]

batsman
SE Marsh    441
Name: inning, dtype: int64

#### Total 4's by player X in IPL

In [47]:
def overall_fours_count(batsman):
    fours = deliveries[deliveries.batsman_runs == 4]
    fours_count = fours.groupby('batsman').count()['inning']
    return fours_count[batsman]

In [48]:
overall_fours_count('BB McCullum')

13

#### Total 6's by player X in IPL

In [49]:
def overall_sixes_count(batsman):
    sixes = deliveries[deliveries.batsman_runs == 6]
    sixes_count = sixes.groupby('batsman').count()['inning']
    return sixes_count[batsman]

In [50]:
overall_sixes_count('BB McCullum')

15

#### Maximum 4's by a batsman in IPL

In [51]:
def most_fours_count(i):
    fours = deliveries[deliveries.batsman_runs == 4]
    fours_count = fours.groupby('batsman').count()['inning']
    return fours_count.sort_values(ascending = False).iloc[i:i+1]

In [52]:
for index_val, series_val in most_fours_count(0).iteritems():
        print(index_val, series_val)

G Gambhir 68


#### Maximum 6's by a batsman in IPL

In [53]:
def most_sixes_count():
    sixes = deliveries[deliveries.batsman_runs == 6]
    sixes_count = sixes.groupby('batsman').count()['inning'].sort_values(ascending = False).iloc[0:1]
    return sixes_count

In [54]:
for index_val, series_val in most_sixes_count().iteritems():
        print(index_val, series_val)

ST Jayasuriya 31


### Bowler Stats

#### Wickets taken by a bowler X in match Y

In [55]:
def wickets_by_bowler(bowler, match_id):
    total_wickets = deliveries[(deliveries['match_id'] == match_id) & (deliveries['bowler'] == bowler)].dismissal_kind.count()
    run_outs = deliveries[(deliveries['match_id'] == match_id) & (deliveries['bowler'] == bowler) & (deliveries['dismissal_kind']== 'run out')].dismissal_kind.count()
    return total_wickets-run_outs

In [56]:
wickets_by_bowler('SC Ganguly', 1)

2

#### Runs conceded by a bowler X in match Y

In [57]:
def runs_conceded_by_bowler(bowler, match_id):
    return (deliveries.groupby(['match_id','bowler']).total_runs.sum()-deliveries.groupby(['match_id','bowler']).bye_runs.sum()-deliveries.groupby(['match_id','bowler']).legbye_runs.sum())[match_id][bowler]

In [58]:
runs_conceded_by_bowler('SC Ganguly', 1)

21

#### Number of balls bowled by a bowler X including extras in Match Y

In [59]:
def balls_by_bowler(bowler, match_id):
    return (deliveries.groupby(['match_id','bowler']).ball.agg('count'))[match_id][bowler]

In [60]:
balls_by_bowler('SC Ganguly',1)

25

#### Number of overs bowled by bowler X in match Y

In [61]:
def overs_by_bowler(bowler, match_id):
    balls = (deliveries[(deliveries['match_id']==match_id)& (deliveries['bowler']== bowler) & (deliveries['wide_runs'] == 0) & (deliveries['is_super_over'] == 0) & (deliveries['noball_runs']==0)]).ball.count()
    overs = float(int(balls/6) + float(balls%6)/10)
    return (overs)

In [62]:
overs_by_bowler('LR Shukla', 1)

1.1

#### Number of extra runs conceded by bowler X in match Y (wides, noballs)

In [63]:
def extras_by_bowler(bowler, match_id):
        wide = (deliveries[(deliveries['match_id']==match_id)& (deliveries['bowler']== bowler) & (deliveries['is_super_over'] == 0)]).wide_runs.sum()
        noball = (deliveries[(deliveries['match_id']==match_id)& (deliveries['bowler']== bowler) & (deliveries['is_super_over'] == 0)]).noball_runs.sum()
        byes = (deliveries[(deliveries['match_id']==match_id)& (deliveries['bowler']== bowler) & (deliveries['is_super_over'] == 0)]).bye_runs.sum()
        leg_byes = (deliveries[(deliveries['match_id']==match_id)& (deliveries['bowler']== bowler) & (deliveries['is_super_over'] == 0)]).legbye_runs.sum()
    
        return wide, noball

In [64]:
wide, noball = extras_by_bowler('SC Ganguly', 1)
print("Wides = ",wide," ","Noballs = ", noball)

Wides =  1   Noballs =  0


#### Economy rate of bowler X in match Y

In [65]:
def economy_rate(bowler, match_id):
    import math
    runs = runs_conceded_by_bowler(bowler, match_id)
    overs = overs_by_bowler(bowler, match_id)
    frac, whole = math.modf(overs)
    total = whole + frac*10/6
    return runs/total

In [66]:
economy_rate('SC Ganguly', 1)

5.25

#### Bowler taking highest number of wickets in a match X according to rank Y

In [67]:
def highest_wickets_by_bowler(match_id, rank = 1):
    bowlers = deliveries[deliveries['match_id'] == match_id].bowler.unique()
    wick_by_bowler = {}
    for bowler in bowlers:
        wick_by_bowler[bowler] = (wickets_by_bowler(bowler, match_id))
    data = sorted(wick_by_bowler.items(), key=lambda x:x[1], reverse=True)
    return data[rank-1]
    

In [68]:
highest_wickets_by_bowler(58,1)

('YK Pathan', 3)

#### Bowler having highest economy rate in a match X according to rank Y

In [69]:
def highest_economy_rate(match_id, rank = 1):
    bowlers = deliveries[deliveries['match_id'] == match_id].bowler.unique()
    eco_by_bowler = {}
    for bowler in bowlers:
        eco_by_bowler[bowler] = round((economy_rate(bowler, match_id)),2)
    data = sorted(eco_by_bowler.items(), key=lambda x:x[1])
    return data[rank-1]
    

In [70]:
highest_economy_rate(22)

('IK Pathan', 4.5)

## Economy rate of any bowler any match any rank

In [71]:
def overall_economy_rate_by_bowler(match_id=0, team=None,bowler=None):
    bowler_eco =[]
    if(match_id==0):
        if(team is not None):
            runs_conceded = deliveries[deliveries['bowling_team'] == team].total_runs.sum()
            balls = (deliveries[(deliveries['bowling_team'] == team) & (deliveries['wide_runs'] == 0) & (deliveries['is_super_over'] == 0) & (deliveries['noball_runs']==0)]).ball.count()
            dot_balls = (deliveries[(deliveries['bowling_team'] == team) & (deliveries['wide_runs'] == 0) & (deliveries['is_super_over'] == 0) & (deliveries['noball_runs']==0)& (deliveries['total_runs']==0)]).ball.count()
            overs = float(int(balls/6) + float(balls%6)/10)
            frac, whole = math.modf(overs)
            total = whole + frac*10/6
            economy_rate = runs_conceded/total
            bowler_eco.append((team, economy_rate, balls, overs, dot_balls))
        elif (bowler == None): 
            bowlers = deliveries.bowler.unique()
            for bowler in bowlers:
                runs_conceded = deliveries[deliveries['bowler'] == bowler].total_runs.sum()-deliveries[deliveries['bowler'] == bowler].bye_runs.sum()-deliveries[deliveries['bowler'] == bowler].legbye_runs.sum() 
                balls = (deliveries[(deliveries['bowler']== bowler) & (deliveries['wide_runs'] == 0) & (deliveries['is_super_over'] == 0) & (deliveries['noball_runs']==0)]).ball.count()
                dot_balls = (deliveries[(deliveries['bowler']== bowler) & (deliveries['wide_runs'] == 0) & (deliveries['is_super_over'] == 0) & (deliveries['noball_runs']==0)& (deliveries['total_runs']==0)]).ball.count()
                overs = float(int(balls/6) + float(balls%6)/10)
                frac, whole = math.modf(overs)
                total = whole + frac*10/6
                economy_rate = runs_conceded/total
                bowler_eco.append((bowler, economy_rate, balls, overs, dot_balls))
        else:
            runs_conceded = deliveries[deliveries['bowler'] == bowler].total_runs.sum()-deliveries[deliveries['bowler'] == bowler].bye_runs.sum()-deliveries[deliveries['bowler'] == bowler].legbye_runs.sum()
            balls = (deliveries[(deliveries['bowler']== bowler) & (deliveries['wide_runs'] == 0) & (deliveries['is_super_over'] == 0) & (deliveries['noball_runs']==0)]).ball.count()
            dot_balls = (deliveries[(deliveries['bowler']== bowler) & (deliveries['wide_runs'] == 0) & (deliveries['is_super_over'] == 0) & (deliveries['noball_runs']==0)& (deliveries['total_runs']==0)]).ball.count()
            overs = float(int(balls/6) + float(balls%6)/10)
            frac, whole = math.modf(overs)
            total = whole + frac*10/6
            economy_rate = runs_conceded/total
            bowler_eco.append((bowler, economy_rate, balls, overs, dot_balls))
    else:
        if (team is not None):
            runs_conceded = deliveries[(deliveries['match_id'] == match_id)&(deliveries['bowling_team'] == team)].total_runs.sum()
            balls = (deliveries[(deliveries['match_id'] == match_id)&(deliveries['bowling_team'] == team) & (deliveries['wide_runs'] == 0) & (deliveries['is_super_over'] == 0) & (deliveries['noball_runs']==0)]).ball.count()
            dot_balls = (deliveries[(deliveries['match_id'] == match_id)&(deliveries['bowling_team'] == team) & (deliveries['wide_runs'] == 0) & (deliveries['is_super_over'] == 0) & (deliveries['noball_runs']==0)& (deliveries['total_runs']==0)]).ball.count()
            overs = float(int(balls/6) + float(balls%6)/10)
            frac, whole = math.modf(overs)
            total = whole + frac*10/6
            economy_rate = runs_conceded/total
            bowler_eco.append((team, economy_rate, balls, overs, dot_balls))
        elif (bowler == None): 
            bowlers = deliveries[deliveries['match_id'] == match_id].bowler.unique()
            for bowler in bowlers:
                runs_conceded = deliveries[(deliveries['match_id'] == match_id)&(deliveries['bowler'] == bowler)].total_runs.sum()-deliveries[(deliveries['match_id'] == match_id)&(deliveries['bowler'] == bowler)].bye_runs.sum()-deliveries[(deliveries['match_id'] == match_id)&(deliveries['bowler'] == bowler)].legbye_runs.sum()
                balls = (deliveries[(deliveries['match_id'] == match_id)&(deliveries['bowler']== bowler) & (deliveries['wide_runs'] == 0) & (deliveries['is_super_over'] == 0) & (deliveries['noball_runs']==0)]).ball.count()
                dot_balls = (deliveries[(deliveries['match_id'] == match_id)&(deliveries['bowler']== bowler) & (deliveries['wide_runs'] == 0) & (deliveries['is_super_over'] == 0) & (deliveries['noball_runs']==0)& (deliveries['total_runs']==0)]).ball.count()
                overs = float(int(balls/6) + float(balls%6)/10)
                frac, whole = math.modf(overs)
                total = whole + frac*10/6
                economy_rate = runs_conceded/total
                bowler_eco.append((bowler, economy_rate, balls, overs, dot_balls))
        else:
            runs_conceded = deliveries[(deliveries['match_id'] == match_id)&(deliveries['bowler'] == bowler)].total_runs.sum()-deliveries[(deliveries['match_id'] == match_id)&(deliveries['bowler'] == bowler)].bye_runs.sum()-deliveries[(deliveries['match_id'] == match_id)&(deliveries['bowler'] == bowler)].legbye_runs.sum()
            balls = (deliveries[(deliveries['match_id'] == match_id)&(deliveries['bowler']== bowler) & (deliveries['wide_runs'] == 0) & (deliveries['is_super_over'] == 0) & (deliveries['noball_runs']==0)]).ball.count()
            dot_balls = (deliveries[(deliveries['match_id'] == match_id)&(deliveries['bowler']== bowler) & (deliveries['wide_runs'] == 0) & (deliveries['is_super_over'] == 0) & (deliveries['noball_runs']==0)& (deliveries['total_runs']==0)]).ball.count()
            overs = float(int(balls/6) + float(balls%6)/10)
            frac, whole = math.modf(overs)
            total = whole + frac*10/6
            economy_rate = runs_conceded/total
            bowler_eco.append((bowler, economy_rate, balls, overs, dot_balls))
    
    return  bowler_eco


# Bowler who took best economy rate in match 2?


In [72]:
def bowler_balls(match_id=0, team = None, bowler=None, economy_rate=None, balls=None, overs=None,dot_balls=None, rank=1):
    eco_balls_over = overall_economy_rate_by_bowler(match_id=match_id,team=team, bowler=bowler)
    if(economy_rate is not None):
        eco_balls_over = sorted(eco_balls_over, key=lambda x: x[1])
        return eco_balls_over[rank-1][0],eco_balls_over[rank-1][1] 
    elif(balls is not None):
        eco_balls_over = sorted(eco_balls_over, key=lambda x: x[2], reverse=True)
        return eco_balls_over[rank-1][0],eco_balls_over[rank-1][2]
    elif(overs is not None):
        eco_balls_over = sorted(eco_balls_over, key=lambda x: x[3], reverse=True)
        return eco_balls_over[rank-1][0],eco_balls_over[rank-1][3]
    elif(dot_balls is not None):
        eco_balls_over = sorted(eco_balls_over, key=lambda x: x[4], reverse=True)
        return eco_balls_over[rank-1][0],eco_balls_over[rank-1][4]
    else:
        eco_balls_over = sorted(eco_balls_over, key=lambda x: x[1])
        return eco_balls_over[rank-1]

In [73]:
bowler_balls(team = 'Chennai Super Kings')

('Chennai Super Kings', 8.424275560415529, 1829, 304.5, 653)

In [74]:
deliveries.columns

Index(['match_id', 'inning', 'batting_team', 'bowling_team', 'over', 'ball',
       'batsman', 'non_striker', 'bowler', 'is_super_over', 'wide_runs',
       'bye_runs', 'legbye_runs', 'noball_runs', 'penalty_runs',
       'batsman_runs', 'extra_runs', 'total_runs', 'player_dismissed',
       'dismissal_kind', 'fielder'],
      dtype='object')

## Runs conceded by any bowler any match any  rank

In [75]:
def overall_runs_conceded(match_id=0,team=None, bowler=None):
    over_all_runs = []
    if(match_id==0):
        if(team is not None):
            total_runs_conceded = deliveries[deliveries['bowling_team'] == team].total_runs.sum()
            bye_runs_conceded = deliveries[deliveries['bowling_team'] == team].bye_runs.sum()
            legbye_runs_conceded = deliveries[deliveries['bowling_team'] == team].legbye_runs.sum()
            wide = deliveries[deliveries['bowling_team'] == team].wide_runs.sum()
            noball = deliveries[deliveries['bowling_team'] == team].noball_runs.sum()
            four_boundary_conceded = deliveries[(deliveries['bowling_team'] == team)&(deliveries['batsman_runs']==4)].batsman_runs.count()
            six_boundary_conceded = deliveries[(deliveries['bowling_team'] == team)&(deliveries['batsman_runs']==6)].batsman_runs.count()
            runs = total_runs_conceded
            total_boundaries = four_boundary_conceded + six_boundary_conceded
            extras = wide + noball + bye_runs_conceded + legbye_runs_conceded
            over_all_runs.append((team, runs ,wide, noball,extras, four_boundary_conceded, six_boundary_conceded, total_boundaries))          
        elif (bowler == None):
            bowlers = deliveries.bowler.unique()
            for bowler in bowlers:
                total_runs_conceded = deliveries[deliveries['bowler'] == bowler].total_runs.sum()
                bye_runs_conceded = deliveries[deliveries['bowler'] == bowler].bye_runs.sum()
                legbye_runs_conceded = deliveries[deliveries['bowler'] == bowler].legbye_runs.sum()
                wide = deliveries[deliveries['bowler']== bowler].wide_runs.sum()
                noball = deliveries[deliveries['bowler']== bowler].noball_runs.sum()
                four_boundary_conceded = deliveries[(deliveries['bowler'] == bowler)&(deliveries['batsman_runs']==4)].batsman_runs.count()
                six_boundary_conceded = deliveries[(deliveries['bowler'] == bowler)&(deliveries['batsman_runs']==6)].batsman_runs.count()
                runs = total_runs_conceded - bye_runs_conceded - legbye_runs_conceded
                total_boundaries = four_boundary_conceded + six_boundary_conceded
                extras = wide + noball + bye_runs_conceded + legbye_runs_conceded
                over_all_runs.append((bowler, runs ,wide, noball,extras, four_boundary_conceded, six_boundary_conceded, total_boundaries))
                
                
        else:
            total_runs_conceded = deliveries[deliveries['bowler'] == bowler].total_runs.sum()
            wide = deliveries[deliveries['bowler']== bowler].wide_runs.sum()
            noball = deliveries[deliveries['bowler']== bowler].noball_runs.sum()    
            bye_runs_conceded = deliveries[deliveries['bowler'] == bowler].bye_runs.sum()
            legbye_runs_conceded = deliveries[deliveries['bowler'] == bowler].legbye_runs.sum()
            four_boundary_conceded = deliveries[(deliveries['bowler'] == bowler)&(deliveries['batsman_runs']==4)].batsman_runs.count()
            six_boundary_conceded = deliveries[(deliveries['bowler'] == bowler)&(deliveries['batsman_runs']==6)].batsman_runs.count()
            runs = total_runs_conceded - bye_runs_conceded - legbye_runs_conceded
            total_boundaries = four_boundary_conceded + six_boundary_conceded
            extras = wide + noball + bye_runs_conceded + legbye_runs_conceded
            over_all_runs.append((bowler, runs ,wide, noball,extras, four_boundary_conceded, six_boundary_conceded, total_boundaries))
            
    else:
        if(team is not None):
            total_runs_conceded = deliveries[(deliveries['match_id'] == match_id)&(deliveries['bowling_team'] == team)].total_runs.sum()
            wide = deliveries[(deliveries['match_id']==match_id)& (deliveries['bowling_team'] == team)].wide_runs.sum()
            noball = deliveries[(deliveries['match_id']==match_id)& (deliveries['bowling_team'] == team)].noball_runs.sum()    
            bye_runs_conceded = deliveries[(deliveries['match_id'] == match_id)&(deliveries['bowling_team'] == team)].bye_runs.sum()
            legbye_runs_conceded = deliveries[(deliveries['match_id'] == match_id)&(deliveries['bowling_team'] == team)].legbye_runs.sum()
            four_boundary_conceded = deliveries[(deliveries['match_id'] == match_id)&(deliveries['bowling_team'] == team)&(deliveries['batsman_runs']==4)].batsman_runs.count()
            six_boundary_conceded = deliveries[(deliveries['match_id'] == match_id)&(deliveries['bowling_team'] == team)&(deliveries['batsman_runs']==6)].batsman_runs.count()
            runs = total_runs_conceded
            total_boundaries = four_boundary_conceded + six_boundary_conceded
            extras = wide + noball + bye_runs_conceded + legbye_runs_conceded
            over_all_runs.append((team, runs ,wide, noball,extras, four_boundary_conceded, six_boundary_conceded, total_boundaries))
    
        elif(bowler == None):
            bowlers = deliveries[deliveries['match_id'] == match_id].bowler.unique()
            for bowler in bowlers:
                total_runs_conceded = deliveries[(deliveries['match_id'] == match_id)&(deliveries['bowler'] == bowler)].total_runs.sum()
                bye_runs_conceded = deliveries[(deliveries['match_id'] == match_id)&(deliveries['bowler'] == bowler)].bye_runs.sum()
                legbye_runs_conceded = deliveries[(deliveries['match_id'] == match_id)&(deliveries['bowler'] == bowler)].legbye_runs.sum()
                wide = deliveries[(deliveries['match_id']==match_id)& (deliveries['bowler']== bowler)].wide_runs.sum()
                noball = deliveries[(deliveries['match_id']==match_id)& (deliveries['bowler']== bowler)].noball_runs.sum()
                four_boundary_conceded = deliveries[(deliveries['match_id'] == match_id)&(deliveries['bowler'] == bowler)&(deliveries['batsman_runs']==4)].batsman_runs.count()
                six_boundary_conceded = deliveries[(deliveries['match_id'] == match_id)&(deliveries['bowler'] == bowler)&(deliveries['batsman_runs']==6)].batsman_runs.count()
                runs = total_runs_conceded - bye_runs_conceded - legbye_runs_conceded
                total_boundaries = four_boundary_conceded + six_boundary_conceded
                extras = wide + noball + bye_runs_conceded + legbye_runs_conceded
                over_all_runs.append((bowler, runs ,wide, noball,extras, four_boundary_conceded, six_boundary_conceded, total_boundaries))
    
                
        else:
            total_runs_conceded = deliveries[(deliveries['match_id'] == match_id)&(deliveries['bowler'] == bowler)].total_runs.sum()
            wide = deliveries[(deliveries['match_id']==match_id)& (deliveries['bowler']== bowler)].wide_runs.sum()
            noball = deliveries[(deliveries['match_id']==match_id)& (deliveries['bowler']== bowler)].noball_runs.sum()    
            bye_runs_conceded = deliveries[(deliveries['match_id'] == match_id)&(deliveries['bowler'] == bowler)].bye_runs.sum()
            legbye_runs_conceded = deliveries[(deliveries['match_id'] == match_id)&(deliveries['bowler'] == bowler)].legbye_runs.sum()
            four_boundary_conceded = deliveries[(deliveries['match_id'] == match_id)&(deliveries['bowler'] == bowler)&(deliveries['batsman_runs']==4)].batsman_runs.count()
            six_boundary_conceded = deliveries[(deliveries['match_id'] == match_id)&(deliveries['bowler'] == bowler)&(deliveries['batsman_runs']==6)].batsman_runs.count()
            runs = total_runs_conceded - bye_runs_conceded - legbye_runs_conceded
            total_boundaries = four_boundary_conceded + six_boundary_conceded
            extras = wide + noball + bye_runs_conceded + legbye_runs_conceded
            over_all_runs.append((bowler, runs ,wide, noball,extras, four_boundary_conceded, six_boundary_conceded, total_boundaries))
    return over_all_runs

In [76]:
def bowler_runs(match_id=0, team=None, bowler=None, runs=None, wide=None, noball=None,extras=None, fours=None, sixes=None, boundary=None, rank=1):
    bowler_stats_data = overall_runs_conceded(match_id=match_id,team=team, bowler=bowler)
    if (runs is not None):
        bowler_stats_data = sorted(bowler_stats_data, key=lambda x: x[1], reverse=True)
        return bowler_stats_data[rank-1][0:2]
    elif (wide is not None):
        bowler_stats_data = sorted(bowler_stats_data, key=lambda x: x[2], reverse=True)
        return bowler_stats_data[rank-1][0],bowler_stats_data[rank-1][2]
    elif (noball is not None):
        bowler_stats_data = sorted(bowler_stats_data, key=lambda x: x[3], reverse=True)
        return bowler_stats_data[rank-1][0],bowler_stats_data[rank-1][3]
    elif (extras is not None):
        bowler_stats_data = sorted(bowler_stats_data, key=lambda x: x[4], reverse=True)
        return bowler_stats_data[rank-1][0],bowler_stats_data[rank-1][4]
    elif (fours is not None):
        bowler_stats_data = sorted(bowler_stats_data, key=lambda x: x[5], reverse=True)
        return bowler_stats_data[rank-1][0],bowler_stats_data[rank-1][5]
    elif (sixes is not None):
        bowler_stats_data = sorted(bowler_stats_data, key=lambda x: x[6], reverse=True)
        return bowler_stats_data[rank-1][0],bowler_stats_data[rank-1][6]
    elif (boundary is not None):
        bowler_stats_data = sorted(bowler_stats_data, key=lambda x: x[7], reverse=True)
        return bowler_stats_data[rank-1][0],bowler_stats_data[rank-1][7]
    else:
        bowler_stats_data = sorted(bowler_stats_data, key=lambda x: x[1], reverse=True)
        return bowler_stats_data[rank-1][0:2]

In [77]:
bowler_runs(team = 'Chennai Super Kings')

('Chennai Super Kings', 2568)

## Wickets by any bowler any match any rank

In [78]:
def wickets_function(match_id=0, team=None, bowler=None):
    if(match_id == 0):
        match_cond = True
    else:
        match_cond = (deliveries['match_id'] == match_id)
        
    if (team is None):
        team_cond = True
    else:
        team_cond = (deliveries['bowling_team'] == team)
        
    if (bowler == None):
        bowler_cond = True
    else: 
        bowler_cond = (deliveries['bowler'] == bowler)
        
    total_wickets = deliveries[(match_cond)&(bowler_cond)].dismissal_kind.count()
    run_outs = deliveries[(match_cond)&(bowler_cond) & (deliveries['dismissal_kind']== 'run out')].dismissal_kind.count()
    wickets = total_wickets - run_outs
    bowler_wicket.append((bowler, wickets))

In [79]:
def bowler_wickets(match_id=0, team=None, bowler=None,rank =1):
    bowler_wicket =[]
    if(match_id==0):
        if (team is not None):
            total_wickets = deliveries[deliveries['bowling_team'] == team].dismissal_kind.count()
            wickets_total = total_wickets
            bowlers = deliveries[deliveries['bowling_team'] == team].bowler.unique()
            for bowler in bowlers:
                total_wickets = deliveries[deliveries['bowler'] == bowler].dismissal_kind.count()
                run_outs = deliveries[(deliveries['bowler'] == bowler) & (deliveries['dismissal_kind']== 'run out')].dismissal_kind.count()
                wickets = total_wickets - run_outs
                bowler_wicket.append((bowler, wickets, team, wickets_total))
        
        elif (bowler == None): 
            bowlers = deliveries.bowler.unique()
            for bowler in bowlers:
                total_wickets = deliveries[deliveries['bowler'] == bowler].dismissal_kind.count()
                run_outs = deliveries[(deliveries['bowler'] == bowler) & (deliveries['dismissal_kind']== 'run out')].dismissal_kind.count()
                wickets = total_wickets - run_outs
                bowler_wicket.append((bowler, wickets))
        else:
            total_wickets = deliveries[deliveries['bowler'] == bowler].dismissal_kind.count()
            run_outs = deliveries[(deliveries['bowler'] == bowler) & (deliveries['dismissal_kind']== 'run out')].dismissal_kind.count()
            wickets = total_wickets - run_outs
            bowler_wicket.append((bowler, wickets))
    else:
        if (team is not None):
            total_wickets = deliveries[(deliveries['match_id'] == match_id) & (deliveries['bowling_team'] == team)].dismissal_kind.count()
            wickets_total = total_wickets 
            bowlers = deliveries[(deliveries['match_id'] == match_id)& (deliveries['bowling_team'] == team)].bowler.unique()
            for bowler in bowlers:
                total_wickets = deliveries[(deliveries['match_id'] == match_id)&(deliveries['bowler'] == bowler)].dismissal_kind.count()
                run_outs = deliveries[(deliveries['match_id'] == match_id)&(deliveries['bowler'] == bowler) & (deliveries['dismissal_kind']== 'run out')].dismissal_kind.count()
                wickets = total_wickets - run_outs
                bowler_wicket.append((bowler, wickets, team, wickets_total))
        elif (bowler == None):
            bowlers = deliveries[(deliveries['match_id'] == match_id)].bowler.unique()
            for bowler in bowlers:
                total_wickets = deliveries[(deliveries['match_id'] == match_id)&(deliveries['bowler'] == bowler)].dismissal_kind.count()
                run_outs = deliveries[(deliveries['match_id'] == match_id)&(deliveries['bowler'] == bowler) & (deliveries['dismissal_kind']== 'run out')].dismissal_kind.count()
                wickets = total_wickets - run_outs
                bowler_wicket.append((bowler, wickets))
        else:
            total_wickets = deliveries[(deliveries['match_id'] == match_id)&(deliveries['bowler'] == bowler)].dismissal_kind.count()
            run_outs = deliveries[(deliveries['match_id'] == match_id)&(deliveries['bowler'] == bowler) & (deliveries['dismissal_kind']== 'run out')].dismissal_kind.count()
            wickets = total_wickets - run_outs
            bowler_wicket.append((bowler, wickets))
    bowler_wicket = sorted(bowler_wicket, key=lambda x: x[1], reverse=True)
    return  bowler_wicket[rank-1]




# Bowler who took highest number of wickets in match 2?
# The player who has taken most numbe of wickets in match 4?
# Highest wicket taker in overall match 3?
# Which bowler took more number of wickets in match 10?
# More number of wickets in match 12 are taken by?
# Leading wicket taker of match 18?
# Leading wicket taker of first match?
# Highest wicket taker of second match?
# Who secured more number of wickets in match 33?
# Which player took more number of wickets in match 54?
# How many wickets did SC Ganguly take in match match 1?

In [80]:
bowler_wickets(match_id=1,team='Kolkata Knight Riders', rank=1)

('AB Agarkar', 3, 'Kolkata Knight Riders', 10)

#### Purple Cap

In [81]:
def purple_cap():
    print("Purple Cap: ", bowler_wickets(rank = 1))

In [82]:
purple_cap()

Purple Cap:  ('Sohail Tanvir', 22)


# Training Part

In [83]:
def feature_extractor(sentence):
    features = {'highest': 0,
                'runs': 0,
                'wickets': 0,
                'boundary': 0,
                'wicket': 0,
                'match': 0,
                'leading': 0,
                'overall': 0,
                'bowler': 0,
                'deliveries': 0,
                'dismissals': 0,
                'overs': 0,
                'bowled': 0,
                'balls': 0,
                'fours': 0,
                'sixes': 0,
                'conceded': 0,
                'bowl': 0,
                'lead': 0,
                'economy': 0,
                'economical': 0,
                'economic' : 0,
                'economy rate': 0,
                'rate': 0,
                'maximum': 0,
                'dot': 0,
                'dots': 0,
                'give': 0,
                'giver': 0,
                'given': 0,
                'expensive': 0
               }
    tokenized_sentence = nltk.word_tokenize(sentence)
    word_counts = nltk.Counter(tokenized_sentence)
    for word in word_counts:
        if word in features:
            features[word] = word_counts[word]
    return features

def feature_extractor_rank(sentence):
    features = {'highest': 0,
                'leading': 0,
                'lead': 0,
                'high': 0,
                'more':0,
                'most': 0,
                'maximum': 0,
                'max':0,
                'top':0,
                'bottom':0,
                'upper':0,
                'min':0,
                'least': 0,
                'less':0,
                'minimum': 0,
                'least': 0,
                'low': 0,
                'lowest': 0,
                'best': 0,
                'good': 0,
                'better': 0,
                'first':0,
                'last':0,
                }
    tokenized_sentence = nltk.word_tokenize(sentence)
    word_counts = nltk.Counter(tokenized_sentence)
    for word in word_counts:
        if word in features:
            features[word] = word_counts[word]
    return features

def feature_extractor_balls(sentence):
    features = {'deliveries': 0,
                'balls': 0,
                'overs': 0,
                'over': 0,
                'economy': 0,
                'economical': 0,
                'economic' : 0,
                'economy rate': 0,
                'expensive': 0,
                'rate': 0,
                'dot': 0,
                'dots': 0,
                'dotballs': 0,
                'dot-balls': 0,
                    
                   }
    tokenized_sentence = nltk.word_tokenize(sentence)
    word_counts = nltk.Counter(tokenized_sentence)
    for word in word_counts:
        if word in features:
            features[word] = word_counts[word]
    return features

def feature_extractor_runs(sentence):
    features = {'sixes': 0,
                'six':0,
                '6s': 0,
                '4s': 0,
                'fours':0,
                'four':0,
                'concede':0,
                'conceded':0,
                'runs':0,
                'boundaries':0,
                'boundary':0,
                'score':0,
                'giver': 0,
                'given': 0,
                'give': 0,
                'gave':0,
                'wide':0,
                'wides': 0,
                'wide delivery':0,
                'wide deliveries':0,
                'wide ball':0,
                'wide balls': 0,
                'wide-ball':0,
                'wide-balls':0,
                'noball': 0,
                'noballs': 0,
                'extra': 0,
                'extras': 0,
                'dot': 0,
                'dots': 0,
                'dotballs': 0,
                'dot-balls': 0
                   }
    tokenized_sentence = nltk.word_tokenize(sentence)
    word_counts = nltk.Counter(tokenized_sentence)
    for word in word_counts:
        if word in features:
            features[word] = word_counts[word]
    return features

In [84]:
train_sentences = [# bowler and wickets related
                    ("how many wickets did sc ganguly take in match 1?", "bowler_wickets"),
                    ("ganguly's wickets in match 1?","bowler_wickets"),
                    ("dravid's wickets in match 15?","bowler_wickets"),
                    ("fernando's wickets in 1st match?","bowler_wickets"),
                    ("Dravid's wickets in 2nd match of IPL?","bowler_wickets"),
                    ("bowler who took highest number of wickets in match 2?", "bowler_wickets"),
                    ("the player who has taken most numbe of wickets in match 4?", "bowler_wickets"),
                    ("highest wicket taker in overall match 3?", "bowler_wickets"),
                    ("which bowler took more number of wickets in match 10?", "bowler_wickets"),
                    ("more number of wickets in match 12 are taken by?", "bowler_wickets"),
                    ("leading wicket taker of match 18?", "bowler_wickets"),
                    ("who secured more number of wickets in match 33?", "bowler_wickets"),
                    ("which player took more number of wickets in match 54?", "bowler_wickets"),
                    ("how many wickets did SC Ganguly take in match match 1?", "bowler_wickets"),
                    ("who secured more number of wickets in overall IPL?", "bowler_wickets"),
                    ("which bowler has most number of dismissals in match 19?","bowler_wickets"),
                    ("highest wicket taker in IPL?","bowler_wickets"),
                    ("how many wickets did sm pollock take in ipl?","bowler_wickets"),
                    ("CRD Fernando's wickets in match 31?","bowler_wickets"),
                    ("crd fernando's wickets in ipl?","bowler_wickets"),
                    ("how many wickets has ganguly taken in ipl ?","bowler_wickets"),
                    ("how many wickets had ganguly taken in ipl?","bowler_wickets"),
                    # bowler and balls related
                    ("how many balls did SR Tendulkar bowl in match 1?","bowler_balls" ),
                    ("which bowler bowled the least number of balls in match 10?","bowler_balls"),
                    ("who bowled the least number of deliveries in IPL?","bowler_balls"),
                    ("who bowled the most number of balls in IPL?", "bowler_balls"),
                    ("who bowled the least number of balls in IPL?", "bowler_balls"),
                    ("second highest count of number of balls bowled in a match","bowler_balls"),
                    ("how many overs did SC Ganguly bowl in match1?","bowler_balls"),
                    ("who bowled the most number of overs ib IPL?", "bowler_balls"),
                    ("who bowled the least number of overs ib IPL?", "bowler_balls"),
                    ("what were the number of overs bolwed by Dravid in final?","bowler_balls"),
                    ("third lowest count of number of overs bolwed in IPL","bowler_balls"),
                    ("bowler with best economy rate?", "bowler_balls"),
                    ("who is the player with lowest economy rate?","bowler_balls"),
                    ("the most economical bowler in match 10?","bowler_balls"),
                    ("who is the bowler having lowest economy rate in match 18?","bowler_balls"),
                    ("who is the bowler with high number of dot balls?","bowler_balls"),
                    ("most of dots are bowled by which player in match 15?","bowler_balls"),
                    ("who bowled most number of dot deliveries?","bowler_balls"),
                    ("who is the most expensive bowler?", "bowler_balls"),
                    # bowler and runs related
                    ("how many runs did sr tendulkar concede in ipl?","bolwer_runs"),
                    ("how many sixes did SC Ganguly give in IPL?","bowler_runs"),
                    ("who conceded most number of runs in IPL ?","bowler_runs"),
                    ("who conceded most nuber of fours in match 10?","bowler_runs"),
                    ("bowler who conceded most numer of sixes in match 12?","bowler_runs"),
                    ("which bowler conceded lowest boundaries in match 38?","bowler_runs"),
                    ("who has given less number of sixes in match 2?","bowler_runs"),
                    ("how many runs given by SC Ganguly in match 1?","bowler_runs"),
                    ("how many boundaries conceded by BC Lee?","bowler_runs"),
                    ("which bowler conceded maximum number of runs?","bowler_runs"),
                    ("who got hit with most number of sixes?","bowler_runs"),
                    ("what is the number of runs given by RA Jadeja?","bowler_runs"),
                    ("who has given most number of runs in final?","bowler_runs"),
                    ("bowler who gave less number of runs in semi-final?","bowler_runs"),
                    ("highest run giver in match 8?", "bowler_runs"),
                    ("most runs are given by which bowler in match 3?","bowler_runs")
             ]

train_sentences_rank = [
                    ("who is the bowler with lowest economy rate?","low"),#
                    ("least expensive bowler?","low"),#
                    ("which bowler gave lowest number of fours?","low"),
                    ("who bowled low number of noballs","low"),
                    ("who bowled lowest number of extras","low"),    
                    ("who is the lowest economic bowler","low"),#
                    ("Who is the bowler with lowest economy?","low"),#
                    ("which bowler conceded lowest boundaries in match 38?","low"),
                    ("bowler who gave less number of runs in semi-final?","low"),
                    ("who bowled the least number of overs ib IPL?", "low"),
                    ("third lowest count of number of overs bolwed in IPL","low"),
                    ("who is the player with lowest economy rate?","low"),#
                    ("who is the bowler having lowest economy rate in match 18?","low"),
                    ("who has given less number of sixes in match 2?","low"),
                    ("who is the bowler who gave less four in his innings?","low"),
                    ("which bowler bowled the least number of balls in match 10?","low"),
                    ("who bowled the least number of deliveries in IPL?","low"),
                    ("who bowled the least number of balls in IPL?", "low"),
                    ("bowler with best economy rate?", "high"),#
                    ("second highest count of number of balls bowled in a match","high"),
                    ("how many overs did SC Ganguly bowl in match1?","high"),#
                    ("who bowled the most number of overs ib IPL?", "high"),
                    ("who is the most economic bowler","high"),#
                    ("who is the bowler with high number of dot balls?","high"),
                    ("who is the bowler with high number of dot-balls?","high"),
                    ("most of dots are bowled by which player in match 15?","high"),
                    ("who bowled most number of dot deliveries?","high"),
                    ("who is the most expensive bowler?", "high"), 
                    ("how many wide balls did SC Ganguly bowl","high"),
                    ("highest wides are bowled by?","high"),
                    ("who bowled most number of wide-deliveries","high"),
                    ("who bowled most number of wide-delivery","high"),
                    ("the bowler with more number of extra deliveries", "high"),
                    ("who conceded most number of runs in IPL ?","high"),
                    ("who conceded most nuber of fours in match 10?","high"),
                    ("bowler who conceded most numer of sixes in match 12?","high"),
                    ("which bowler conceded maximum number of runs?","high"),
                    ("who got hit with most number of sixes?","high"),
                    ("who has given most number of runs in final?","high"),
                    ("highest run giver in match 8?", "high"),
                    ("who bowled most number of no-balls","high"),
                    ("who bowled most number of extras","high"),
                    ("most runs are given by which bowler in match 3?","high"),
                    ("highest boundary giver?","high"),
                    ("who gave highest number of 4s?","high"),
                    ("highet number of six are given by which bowler?","high"),
                    ("most number of boundaries are conceded by which bowler?","high"),
                    ("highest number of 4s are given by ?","high"),
                    ("the most economical bowler in match 10?","high"),
                    ("highest number of 6s are given by ?","high"),
                    ("highest number of boundaries are given by ?","high"),
                    ("who bowled the most number of balls in IPL?", "high"),
    
]

train_sentences_balls =[
                    ("how many balls did SR Tendulkar bowl in match 1?","balls" ),
                    ("which bowler bowled the least number of balls in match 10?","balls"),
                    ("who bowled the least number of deliveries in IPL?","balls"),
                    ("who bowled the most number of balls in IPL?", "balls"),
                    ("who bowled the least number of balls in IPL?", "balls"),
                    ("second highest count of number of balls bowled in a match","balls"),
                    ("how many overs did SC Ganguly bowl in match1?","overs"),
                    ("who bowled the most number of overs ib IPL?", "overs"),
                    ("who bowled the least number of overs ib IPL?", "overs"),
                    ("what were the number of overs bolwed by Dravid in final?","overs"),
                    ("third lowest count of number of overs bolwed in IPL","overs"),
                    ("bowler with best economy rate?", "economy_rate"),
                    ("who is the player with lowest economy rate?","economy_rate"),
                    ("the most economical bowler in match 10?","economy_rate"),
                    ("who is the bowler having lowest economy rate in match 18?","economy_rate"),
                    ("who is the most economic bowler","economic_rate"),
                    ("who is the lowest economic bowler","economy_rate"),
                    ("Who is the bowler with lowest economy?","economy_rate"),
                    ("who is the bowler with high number of dot balls?","dot_balls"),
                    ("who is the bowler with high number of dot-balls?","dot_balls"),
                    ("most of dots are bowled by which player in match 15?","dot_balls"),
                    ("who bowled most number of dot deliveries?","dot_balls"),
                    ("who is the most expensive bowler?", "economy_rate"),
                    
                ]

train_sentences_runs =[
                    ("how many wide balls did SC Ganguly bowl","wide_balls"),
                    ("highest wides are bowled by?","wide_balls"),
                    ("who bowled most number of wide-deliveries","wide_balls"),
                    ("who bowled most number of wide-delivery","wide_balls"),
                    ("who bowled low number of noballs","no_balls"),
                    ("who bowled most number of no-balls","no_balls"),
                    ("who bowled most number of extras","extras"),
                    ("who bowled lowest number of extras","extras"),
                    ("the bowler with more number of extra deliveries", "extras"),
                    ("how many runs did sr tendulkar concede in ipl?","runs"),
                    ("how many sixes did SC Ganguly give in IPL?","sixes"),
                    ("who conceded most number of runs in IPL ?","runs"),
                    ("who conceded most nuber of fours in match 10?","fours"),
                    ("bowler who conceded most numer of sixes in match 12?","sixes"),
                    ("which bowler conceded lowest boundaries in match 38?","boundaries"),
                    ("who has given less number of sixes in match 2?","sixes"),
                    ("how many runs given by SC Ganguly in match 1?","runs"),
                    ("how many boundaries conceded by BC Lee?","boundaries"),
                    ("which bowler conceded maximum number of runs?","runs"),
                    ("who got hit with most number of sixes?","sixes"),
                    ("what is the number of runs given by RA Jadeja?","runs"),
                    ("who has given most number of runs in final?","runs"),
                    ("bowler who gave less number of runs in semi-final?","runs"),
                    ("highest run giver in match 8?", "runs"),
                    ("most runs are given by which bowler in match 3?","runs"),
                    ("highest boundary giver?","boundaries"),
                    ("who gave highest number of 4s?","fours"),
                    ("which bowler gave lowest number of fours?","fours"),
                    ("who is the bowler who gave less four in his innings?","fours"),
                    ("who gave more number of 4s in match 6?", "fours"),
                    ("who gave more number of 6s in match 4?", "sixes"),
                    ("highet number of six are given by which bowler?","sixes"),
                    ("most number of boundaries are conceded by which bowler?","boundaries"),
                    ("highest number of 4s are given by ?","fours"),
                    ("highest number of 6s are given by ?","sixes"),
                    ("highest number of boundaries are given by ?","boundaries"),
                ]

In [85]:
candidate_questions = list(list(zip(*train_sentences))[0])

In [86]:
for question in candidate_questions:
    tagged_question = []
    for tagged_word in nltk.pos_tag(nltk.word_tokenize(question)):
        tagged_question.append(nltk.tuple2str(tagged_word))
#     print(' '.join(tagged_question))

In [87]:
corrected_train = """how/WRB many/JJ wickets/NNS did/VBD sc/NNP ganguly/NNP take/VB in/IN match/NN 1/CD ?/.
bowler/NN who/WP took/VBD highest/JJS number/NN of/IN wickets/NNS in/IN match/NN 2/CD ?/.
the/DT player/NN who/WP has/VBZ taken/VBN most/JJS numbe/JJ of/IN wickets/NNS in/IN match/NN 4/CD ?/.
highest/JJS wicket/NN taker/NN in/IN overall/JJ match/NN 3/CD ?/.
tanveer/NNP 's/POS wickets/NNS in/IN ipl/NN ?/.
ganguly/NNP 's/POS wickets/NNS in/IN IPL/NN ?/.
wickets/NNS of/IN ganguly/NNP in/IN IPL/NN ?/.
wickets/NNS of/IN sc/NNP ganguly/NNP in/IN IPL/NN ?/.
wickets/NNS of/IN Ganguly/NNP in/IN IPL/NN ?/.
wickets/NNS of/IN Sc/NNP ganguly/NNP in/IN IPL/NN ?/.
wickets/NNS of/IN kolkata/NNP knight/NNP riders/NNP in/IN IPL/NN ?/.
wickets/NNS of/IN KKR/NNP in/IN IPL/NN ?/.
wickets/NNS of/IN royal/NNP challengers/NNP in/IN IPL/NN ?/.
dravid/NNP 's/POS wickets/NNS in/IN match/NN 15/CD ?/.
ds/NNP kulkarni/NNP 's/POS wickets/NNS in/IN ipl/NN ?/.
DR/NNP Smith/NNP 's/POS wickets/NNS in/IN ipl/NN ?/.
dw/NNP steyn/NNP 's/POS wickets/NNS in/IN ipl/NN ?/.
Kumar/NNP 's/POS wickets/NNS in/IN ipl/NN ?/.
how/WRB many/JJ wickets/NNS has/VBZ ganguly/NNP taken/VBN in/IN ipl/NN ?/.
how/WRB many/JJ wickets/NNS has/VBZ Royal/NNP Challengers/NNP Bangalore/NNP taken/VBN in/IN ipl/NN ?/.
how/WRB many/JJ wickets/NNS has/VBZ royal/NNP challengers/NNP bangalore/NNP taken/VBN in/IN ipl/NN ?/.
how/WRB many/JJ wickets/NNS had/VBD Royal/NNP Challengers/NNP Bangalore/NNP taken/VBN in/IN ipl/NN ?/.
how/WRB many/JJ wickets/NNS had/VBD royal/NNP challengers/NNP bangalore/NNP taken/VBN in/IN ipl/NN ?/.
how/WRB many/JJ wickets/NNS has/VBZ Kolkata/NNP Knight/NNP Riders/NNP taken/VBN in/IN ipl/NN ?/.
how/WRB many/JJ wickets/NNS has/VBZ kolkata/NNP knight/NNP riders/NNP taken/VBN in/IN ipl/NN ?/.
how/WRB many/JJ wickets/NNS had/VBD Kolkata/NNP Knight/NNP Riders/NNP taken/VBN in/IN ipl/NN ?/.
how/WRB many/JJ wickets/NNS had/VBD kolkata/NNP knight/NNP riders/NNP taken/VBN in/IN ipl/NN ?/.
how/WRB many/JJ wickets/NNS did/VBD Royal/NNP Challengers/NNP Bangalore/NNP take/VB in/IN ipl/NN ?/.
how/WRB many/JJ wickets/NNS did/VBD royal/NNP challengers/NNP bangalore/NNP take/VB in/IN ipl/NN ?/.

how/WRB many/JJ wickets/NNS did/VBD Kolkata/NNP Knight/NNP Riders/NNP take/VB in/IN ipl/NN ?/.
how/WRB many/JJ wickets/NNS did/VBD kolkata/NNP knight/NNP riders/NNP take/VB in/IN ipl/NN ?/.
how/WRB many/JJ wickets/NNS has/VBZ SC/NNP Ganguly/NNP taken/VBN in/IN ipl/NN ?/.
how/WRB many/JJ wickets/NNS has/VBZ Ganguly/NNP taken/VBN in/IN ipl/NN ?/.
how/WRB many/JJ wickets/NNS has/VBZ sc/NNP ganguly/NNP taken/VBN in/IN ipl/NN ?/.
how/WRB many/JJ wickets/NNS had/VBD dravid/NNP taken/VBN in/IN ipl/NN ?/.
which/WDT bowler/NN took/VBD more/JJR number/NN of/IN wickets/NNS in/IN match/NN 10/CD ?/.
more/RBR number/NN of/IN wickets/NNS in/IN match/NN 12/CD are/VBP taken/VBN by/IN ?/.
leading/VBG wicket/NN taker/NN of/IN match/NN 18/CD ?/.
who/WP secured/VBD more/JJR number/NN of/IN wickets/NNS in/IN match/NN 33/CD ?/.
which/WDT player/NN took/VBD more/JJR number/NN of/IN wickets/NNS in/IN match/NN 54/CD ?/.
how/WRB many/JJ wickets/NNS did/VBD SC/NNP Ganguly/NNP take/VB in/IN match/NN match/NN 1/CD ?/.
who/WP secured/VBD more/JJR number/NN of/IN wickets/NNS in/IN overall/JJ IPL/NNP ?/.
which/WDT bowler/NN has/VBZ most/RBS number/NN of/IN dismissals/NNS in/IN match/NN 19/CD ?/.
highest/JJS wicket/NN taker/NN in/IN IPL/NNP ?/.
how/WRB many/JJ wickets/NNS did/VBD sm/NNP pollock/NNP take/NN in/IN ipl/NNP ?/.
CRD/NNP Fernando/NNP 's/POS wickets/NNS in/IN match/NN 31/CD ?/.
crd/NNP fernando/NNP 's/POS wickets/NNS in/IN ipl/NN ?/.
fernando/NNP 's/POS wickets/NNS in/IN ipl/NN ?/.
ganguly/NNP 's/POS wickets/NNS in/IN match/NN 1/CD ?/.
how/WRB many/JJ balls/NNS did/VBD SR/NNP Tendulkar/NNP bowl/NN in/IN match/NN 1/CD ?/.
which/WDT bowler/NN bowled/VBD the/DT least/JJS number/NN of/IN balls/NNS in/IN match/NN 10/CD ?/.
who/WP bowled/VBD the/DT least/JJS number/NN of/IN deliveries/NNS in/IN IPL/NNP ?/.
who/WP bowled/VBD the/DT most/RBS number/NN of/IN balls/NNS in/IN IPL/NNP ?/.
who/WP bowled/VBD the/DT least/JJS number/NN of/IN balls/NNS in/IN IPL/NNP ?/.
second/JJ highest/JJS count/NN of/IN number/NN of/IN balls/NNS bowled/VBN in/IN a/DT match/NN
how/WRB many/JJ overs/NNS did/VBD SC/NNP Ganguly/NNP bowl/NN in/IN match1/NN ?/.
who/WP bowled/VBD the/DT most/RBS number/NN of/IN overs/NNS ib/VBP IPL/NNP ?/.
who/WP bowled/VBD the/DT least/JJS number/NN of/IN overs/NNS ib/VBP IPL/NNP ?/.
what/WDT were/VBD the/DT number/NN of/IN overs/NNS bolwed/VBN by/IN Dravid/NNP in/IN final/JJ ?/.
third/JJ lowest/JJS count/NN of/IN number/NN of/IN overs/NNS bolwed/VBN in/IN IPL/NNP
bowler/NN with/IN best/JJ economy/NN rate/NN ?/.
who/WP is/VBZ the/DT player/NN with/IN lowest/JJ economy/NN rate/NN ?/.
the/DT most/RBS economical/JJ bowler/NN in/IN match/NN 10/CD ?/.
who/WP is/VBZ the/DT bowler/NN having/VBG lowest/JJS economy/NN rate/NN in/IN match/NN 18/CD ?/.
who/WP is/VBZ the/DT bowler/NN with/IN high/JJ number/NN of/IN dot/NN balls/NNS ?/.
most/JJS of/IN dots/NNS are/VBP bowled/VBN by/IN which/WDT player/NN in/IN match/NN 15/CD ?/.
who/WP bowled/VBD most/JJS number/NN of/IN dot/NN deliveries/NNS ?/.
who/WP is/VBZ the/DT most/RBS expensive/JJ bowler/NN ?/.
how/WRB many/JJ runs/NNS did/VBD sr/NNP tendulkar/NN concede/NN in/IN ipl/NN ?/.
how/WRB many/JJ sixes/NNS did/VBD ganguly/NNP give/VB in/IN IPL/NNP ?/.
who/WP conceded/VBD most/JJS number/NN of/IN runs/NNS in/IN IPL/NNP ?/.
who/WP conceded/VBD most/JJS nuber/NN of/IN fours/NNS in/IN match/NN 10/CD ?/.
bowler/NN who/WP conceded/VBD most/RBS numer/NNS of/IN sixes/NNS in/IN match/NN 12/CD ?/.
which/WDT bowler/NN conceded/VBD lowest/JJS boundaries/NNS in/IN match/NN 38/CD ?/.
who/WP has/VBZ given/VBN less/JJR number/NN of/IN sixes/NNS in/IN match/NN 2/CD ?/.
how/WRB many/JJ runs/NNS given/VBN by/IN SC/NNP Ganguly/NNP in/IN match/NN 1/CD ?/.
how/WRB many/JJ boundaries/NNS conceded/VBN by/IN BC/NNP Lee/NNP ?/.
which/WDT bowler/NN conceded/VBD maximum/JJ number/NN of/IN runs/NNS ?/.
who/WP got/VBD hit/VBN with/IN most/JJS number/NN of/IN sixes/NNS ?/.
what/WP is/VBZ the/DT number/NN of/IN runs/NNS given/VBN by/IN Ra/NNP Jadeja/NNP ?/.
who/WP has/VBZ given/VBN most/JJS number/NN of/IN runs/NNS in/IN final/JJ ?/.
bowler/NN who/WP gave/VBD less/JJR number/NN of/IN runs/NNS in/IN semi-final/JJ ?/.
highest/JJS run/NN giver/NN in/IN match/NN 8/CD ?/.
most/JJS runs/NNS are/VBP given/VBN by/IN which/WDT bowler/NN in/IN match/NN 3/CD ?/."""

In [88]:
tagged_question = []
for word in word_tokenize(corrected_train):
    tagged_question.append(nltk.str2tuple(word))
#print(tagged_question)
train_data = []
train_data.append(tagged_question)

In [89]:
from nltk.data import load
pos_tag = load('taggers/maxent_treebank_pos_tagger/english.pickle')

In [90]:
#Importing required modules for brill tagger
from nltk import brill_trainer, brill
from nltk import BrillTaggerTrainer

In [91]:
templates = [brill.Template(brill.Pos([1,1])),
    brill.Template(brill.Pos([2,2])),
    brill.Template(brill.Pos([1,2])),
    brill.Template(brill.Pos([1,3])),
    brill.Template(brill.Pos([1,1])),
    brill.Template(brill.Pos([2,2])),
    brill.Template(brill.Pos([1,2])),
    brill.Template(brill.Pos([1,3])),
    brill.Template(brill.Word([-1, -1])),
    brill.Template(brill.Word([-1, -1]))]

In [92]:
# from nltk.corpus import brown
# training_data = nltk.corpus.brown.tagged_sents()
unigram_tagger = nltk.UnigramTagger(train_data)

In [93]:
from nltk.tag import SequentialBackoffTagger

In [94]:
class POSTagger(SequentialBackoffTagger):
    def __init__(self, *args, **kwargs):
        SequentialBackoffTagger.__init__(self, *args, **kwargs)
    
    def choose_tag(self, tokens, index, history):
        word = tokens[index]
        return nltk.pos_tag([word])[0][1] if word != "" else None
custom_pos_tagger = POSTagger()

In [95]:
trainer_initial_pos = BrillTaggerTrainer(initial_tagger= custom_pos_tagger, templates=templates, trace=3, deterministic=True)

In [96]:
brill_tagger = trainer_initial_pos.train(train_data)
# training the corrected data

TBL train (fast) (seqs: 1; tokens: 949; tpls: 10; min score: 2; min acc: None)
Finding initial useful rules...
    Found 656 useful rules.

           B      |
   S   F   r   O  |        Score = Fixed - Broken
   c   i   o   t  |  R     Fixed = num tags changed incorrect -> correct
   o   x   k   h  |  u     Broken = num tags changed correct -> incorrect
   r   e   e   e  |  l     Other = num tags changed incorrect -> incorrect
   e   d   n   r  |  e
------------------+-------------------------------------------------------
  82  82   0   0  | .->None if Pos:None@[1]
  82  82   0   0  | None-> if Pos:.@[1]
  82  82   0   0  | .->None if Word:@[-1]
  18  23   5   1  | NN->NNP if Pos:IN@[2]
  14  14   0   0  | NN->NNP if Pos:POS@[1,2]
  11  12   1   0  | NN->NNP if Pos:NNP@[1,2]
  11  11   0   0  | VBN->VBD if Word:who@[-1]
   7   7   0   0  | NNS->NNP if Pos:NNP@[1]
   5   6   1   0  | RBR->JJR if Pos:NN@[1]
   4   4   0   0  | NNS->NNP if Word:knight@[-1]
   3   3   0   0  | NN->NNP if

In [97]:
test_sentences = """
ganguly's wickets in match 1? how many wickets did ganguly take in 1st match in IPL? Tanveer's total wickets in ipl?
"""

In [98]:
 brill_tagger.tag(word_tokenize(test_sentences))

[('ganguly', 'NNP'),
 ("'s", 'POS'),
 ('wickets', 'NNS'),
 ('in', 'IN'),
 ('match', 'NN'),
 ('1', 'CD'),
 ('?', '.'),
 ('how', 'WRB'),
 ('many', 'JJ'),
 ('wickets', 'NNS'),
 ('did', 'VBD'),
 ('ganguly', 'NNP'),
 ('take', 'VB'),
 ('in', 'IN'),
 ('1st', 'CD'),
 ('match', 'NN'),
 ('in', 'IN'),
 ('IPL', 'NNP'),
 ('?', '.'),
 ('Tanveer', 'NNP'),
 ("'s", 'POS'),
 ('total', 'JJ'),
 ('wickets', 'NNS'),
 ('in', 'IN'),
 ('ipl', 'NN'),
 ('?', '.')]

tagged_sentences = []
for sent in train_sentences:
    tagged_sentences.append(nltk.pos_tag(word_tokenize(sent[0])))

tagged_sentences[0][4][1]

sentences = (sent[0] for sent in train_sentences)

sentences_statement = ""
for sent in sentences:
    sentences_statement = sentences_statement + sent

sentences_statement

tokenizer = RegexpTokenizer(r'\w+')

for sent in sent_tokenize(sentences_statement):
    
    

In [99]:
naive_bayes_classifier = nltk.NaiveBayesClassifier.train([(feature_extractor(sentence), label) for sentence, label in train_sentences])

In [100]:
match_id = 0
team=None
bowler=None
runs=None
wide=None
noball=None
extras=None
fours=None
sixes=None
boundary=None
economy_rate=None
balls=None
overs=None
dot_balls=None
rank=1

In [101]:
# lemmatizer = WordNetLemmatizer()
# words_lem = []
# for i in word:
#     words_lem.append(lemmatizer.lemmatize(i))
# pos_tags = nltk.pos_tag(words_lem)

# pos_tags

names = [token for token, pos in pos_tags if pos.startswith('NNP')]
nouns = [token for token, pos in pos_tags if pos.startswith('NN')]
verbs = [token for token, pos in pos_tags if pos.startswith('V')]
adjectives = [token for token, pos in pos_tags if pos.startswith('J')]
numbers = [token for token, pos in pos_tags if pos.startswith('CD')]

print(names)
print(nouns)
print(verbs)
print(adjectives)
print(numbers)

Nouns = []
Verb = []
import re
reg = [r'NN|NN.',r'VB|VB.',r'RB|RB.',r'W..',r'CD', r'JJ|JJ.']
noun_reg = r'NN|NN.'
verb_reg = r'VB|VB.'
adverb_reg = r'RB|RB.'
question_reg = r'W..'
number_reg = r'CD'
nouns = []
verbs = []
adverbs = []
question = []
number = []
adjectives = []

for i in pos_tags:
    
    print(re.search(noun_reg, i[0]+i[1]))

        

In [102]:
# chunkGram = r"""Chunk:{<NNP.?>*<NNP.?>*}"""
# chunkParser = nltk.RegexpParser(chunkGram)
# chunked_noun = chunkParser.parse(pos_tags)

# Question

In [104]:
query  = input("Enter a number in words:")
tokens = nltk.word_tokenize(query)
query_tokens = []

Enter a number in words:ganguly wicket in match 1


In [106]:
import nltk
import re
import inflect

ModuleNotFoundError: No module named 'inflect'

In [None]:
p = inflect.engine()
word_to_number_mapping = {}
for i in range(1, 100):
    word_form = p.number_to_words(i)
    word_to_number_mapping[word_form] = i

In [None]:
query_tokens = []
for word in tokens:
    if(re.match(r'\d+', word)):
        m=re.findall(r'\d+', word)
        query_tokens.append(m[0])
    else:
        query_tokens.append(word)

In [None]:
def proper_string(query):
    new1 = []
    for word in query:
        if p.singular_noun(word)==False:
            new1.append(word)
        else:
            new1.append(p.singular_noun(word))
    return new1

In [None]:
new_query = proper_string(query_tokens)

In [None]:
def word2num(words):
    num = 0
    a = []
    b = []
    temp = []
    final = []
    keys_num = []
    num_list = {'first':1,'second':2,'third':3,'fourth':4,'fifth':5,'sixth':6,'seventh':7,'eighth':8,'nineth':9,
            'tenth':10,'eleventh':11,'twelfth':12,'thirteenth':13,'fourteenth':14,'fifteenth':15,'sixteenth':16,
            'seventeenth':17,'eighteenth':18,'nineteenth':19,'twentieth':20,'thirtieth':30,'fortieth':40,
            'fiftieth':50,'sixtieth':60,'seventieth':70,'eightieth':80,'ninetieth':90,'hundredth':100}
    for word in words:
        try:
            a.append(word_to_number_mapping[word])
            keys_num.append(words.index(word))
            
        except Exception as exception:
            if word not in num_list:
                final.append(word)
            if word in num_list:
                a.append(num_list[word])
                keys_num.append(words.index(word))
    for i in range(len(words)):
        for j in range(len(keys_num)):
            if(i == keys_num[j]):
                words[i] = a[j]

    for j in range(len(words)):
        if((type(words[j-1])==int)&(type(words[j])==int)):
            words[j-1] = words[j-1]+words[j]
            del words[j]
        break 
    
   
    return words,a ,keys_num        
    

In [None]:
words,a,keys_num = word2num(new_query)
for i in range(len(words)):
    for j in range(len(keys_num)):
        if(i == keys_num[j]):
            words[i] = a[j]

In [None]:
s = []
try:
    for j in range(len(words)):
        if((type(words[j])==str)&(type(words[j+1])==int)):
            s.append(words[j])
        if((type(words[j+1])!=int)):
            s.append(words[j])
        if((type(words[j])==int)&(type(words[j+1])==int)):
            words[j] = words[j+1]+words[j]
            s.append(words[j])
            del words[j+1]
            #j = j+1            
    
    
except:
    0

s.append(words[len(words)-1]) 


In [None]:
user_query =""
for i in s:
    user_query = user_query+" "+str(i) + " "

In [None]:
user_query

In [None]:
classifier = naive_bayes_classifier.classify(feature_extractor(query))

In [None]:
classifier

In [None]:
pos_tags_brill = brill_tagger.tag(word_tokenize(user_query))

In [None]:
#stop_words = set(stopwords.words("english"))
#word = [w for w in words
#        if w.lower() not in stop_words]

In [None]:
pos_tags_brill

In [None]:
chunkGram = r"""Chunk:{<NNP.?>*<NNP.?>*}"""
chunkParser = nltk.RegexpParser(chunkGram)

In [None]:
chunked_noun_brill = chunkParser.parse(pos_tags_brill)

In [None]:
# chunked_noun_brill.draw()

In [None]:
chunked_words = None
for node in chunked_noun_brill:
    if hasattr(node, "label"):
        words = [word for word, tag in node.leaves()]
        chunked_words = ' '.join(words)


In [None]:
print(chunked_words)

In [None]:
# for node in chunked_noun:
#     if hasattr(node, "label"):
#         words = [word for word, tag in node.leaves()]
#         chunked_words = ' '.join(words)
#     else:
#         chunked_words=None

In [None]:
names = [token for token, pos in pos_tags_brill if pos.startswith('NNP')] 
nouns = [token for token, pos in pos_tags_brill if pos.startswith('NN') and token not in names] 
verbs = [token for token, pos in pos_tags_brill if pos.startswith('V')]  
adjectives = [token for token, pos in pos_tags_brill if pos.startswith('J')] 
numbers = [token for token, pos in pos_tags_brill if pos.startswith('CD')]

In [None]:
# print(nouns)
# print(verbs)
# print(adjectives)

### Bowler full name

In [None]:
batsman_list = list(deliveries.batsman.unique())
bowler_list = list(deliveries.bowler.unique())
fielder_list = list(deliveries.fielder.unique())
team_list = list(deliveries.bowling_team.unique())

In [None]:
def function_return_fullName(data):
    for w in (batsman_list or bowler_list or fielder_list):
        if w.lower() == data.lower():
            return w
        elif str(w.split()[len(w.split())-1]).lower() == data.lower():
            return w

In [None]:
teams_abbr = [ ('Kolkata Knight Riders', 'kolkata knight riders', 'kolkata', 'kolkata riders', 'kolkata rider', 'kolkata knights', 'kolkata knight', 'knight riders', 'knight rider', 'riders', 'k k riders', 'k knight riders', 'kkr'), 
               ('Chennai Super Kings', 'chennai super kings', 'chennai', 'chennai kings', 'chennai super', 'super kings', 'chennai kings', 'csk'),  
               ('Rajasthan Royals', 'rajasthan royals', 'rajasthan', 'rajasthan royal', 'rr'),
               ('Mumbai Indians', 'mumbai indians', 'mumbai', 'mumbai indian', 'indians', 'indian', 'mi'), 
               ('Deccan Chargers', 'deccan chargers', 'deccan', 'deccan charger', 'chargers', 'charger', 'dc'), 
               ('Kings XI Punjab', 'kings xi punjab', 'kings', 'punjab', 'kings XI', 'kings punjab',  'kxip','kp', 'kxp'), 
               ('Royal Challengers Bangalore', 'royal challengers bangalore', 'bangalore', 'royal challengers', 'royal challenger', 'royal bangalore', 'challengers bangalore', 'challenger bangalore', 'rcb', 'rb'),
               ('Delhi Daredevils', 'delhi daredevils', 'delhi', 'daredevils', 'delhi daredevil', 'dd') ]

In [None]:
def funtion_return_fullTeamName(data):
    for w in teams_abbr:
        if data.lower() in w:
            return(w[0])

In [None]:
# funtion_return_fullTeamName('kkr')

In [None]:
chunked_words

In [None]:
bowler_name = function_return_fullName(chunked_words)
team_name = funtion_return_fullTeamName(chunked_words)

In [None]:
bowler_name

In [None]:
team_name

In [None]:
team_name in team_list

In [None]:
if (bowler_name in batsman_list or bowler_list or fielder_list):
    bowler = bowler_name

In [None]:
if( team_name in team_list):
    team = team_name

In [None]:
bowler

In [None]:
team

### Classifier extraction

In [None]:
naive_bayes_classifier_rank = nltk.NaiveBayesClassifier.train([(feature_extractor_rank(sentence), label) for sentence, label in train_sentences_rank])
naive_bayes_classifier_balls = nltk.NaiveBayesClassifier.train([(feature_extractor_balls(sentence), label) for sentence, label in train_sentences_balls])
naive_bayes_classifier_runs = nltk.NaiveBayesClassifier.train([(feature_extractor_balls(sentence), label) for sentence, label in train_sentences_runs])

In [None]:
classifier_rank = naive_bayes_classifier_rank.classify(feature_extractor_rank(query))
if(classifier_rank=='low'):
    rank = 0
elif(classifier_rank=='high'):
    rank = 1

In [None]:
print(classifier_rank)

In [None]:
print (rank)

In [None]:
boundaries_list = ['4s','6s','sixes','fours']
for i in pos_tags_brill:
    if (i[1]=='CD' and i[0] not in boundaries_list):
        match_id=int(i[0])

In [None]:
if(classifier=='bowler_wickets'):

    print(bowler_wickets(match_id= match_id, team= team, bowler= bowler, rank=rank))


    
    
if(classifier=='bowler_balls'):
    classifier_balls = naive_bayes_classifier_balls.classify(feature_extractor_balls(query))
    
    if(classifier_balls == 'economy_rate'):
        economy_rate = classifier_balls
    elif(classifier_balls == 'balls'):
        balls = classifier_balls
    elif(classifier_balls == 'overs'):
        overs = classifier_balls
    elif(classifier_balls == 'dot_balls'):
        dot_balls = classifier_balls

    print(bowler_balls(match_id=match_id,team=team, bowler=bowler,economy_rate=economy_rate, balls=balls, overs=overs,dot_balls=dot_balls, rank=rank))


    
    
if(classifier=='bowler_runs'):
    
    classifier_runs = naive_bayes_classifier_balls.classify(feature_extractor_runs(query))
    
    if(classifier_runs == 'runs'):
        runs = classifier_runs
    elif(classifier_runs == 'wide_balls'):
        wide = classifier_runs
    elif(classifier_runs == 'no_balls'):
        noball = classifier_runs
    elif(classifier_runs == 'extras'):
        extras = classifier_runs
    elif(classifier_runs == 'fours'):
        fours = classifier_runs
    elif(classifier_runs == 'sixes'):
        sixes = classifier_runs
        
    print(bowler_runs(match_id=match_id,team=team,bowler=bowler, runs = runs, wide=wide, noball=noball,extras=extras, fours=fours, sixes=sixes, rank=rank))

In [None]:
if (classifier=='bowler_wickets'):
    print(bowler_wickets(match_id= match_id, bowler= bowler, rank=rank))
if (classifier=='bowler_runs'):
    print(bowler_runs(match_id=match_id,bowler=bowler, runs = runs, wide=wide, noball=noball,extras=extras, fours=fours, sixes=sixes, rank=rank))
if(classifier=='bowler_balls'):
    print(bowler_balls(match_id=match_id, bowler=bowler,economy_rate=economy_rate, balls=balls, overs=overs,dot_balls=dot_balls, rank=rank))

Number
	
Tag
	
Description
1. 	CC 	Coordinating conjunction
2. 	CD 	Cardinal number
3. 	DT 	Determiner
4. 	EX 	Existential there
5. 	FW 	Foreign word
6. 	IN 	Preposition or subordinating conjunction
7. 	JJ 	Adjective
8. 	JJR 	Adjective, comparative
9. 	JJS 	Adjective, superlative
10. 	LS 	List item marker
11. 	MD 	Modal
12. 	NN 	Noun, singular or mass
13. 	NNS 	Noun, plural
14. 	NNP 	Proper noun, singular
15. 	NNPS 	Proper noun, plural
16. 	PDT 	Predeterminer
17. 	POS 	Possessive ending
18. 	PRP 	Personal pronoun
19. 	PRP$ 	Possessive pronoun
20. 	RB 	Adverb
21. 	RBR 	Adverb, comparative
22. 	RBS 	Adverb, superlative
23. 	RP 	Particle
24. 	SYM 	Symbol
25. 	TO 	to
26. 	UH 	Interjection
27. 	VB 	Verb, base form
28. 	VBD 	Verb, past tense
29. 	VBG 	Verb, gerund or present participle
30. 	VBN 	Verb, past participle
31. 	VBP 	Verb, non-3rd person singular present
32. 	VBZ 	Verb, 3rd person singular present
33. 	WDT 	Wh-determiner
34. 	WP 	Wh-pronoun
35. 	WP$ 	Possessive wh-pronoun
36. 	WRB 	Wh-adverb 

In [None]:
#match number