# IPL ChatBox - (MERGED)

### PROBLEM STATEMENT : 

##### To develop a Q&A chat bot which responds to user's queries based on NLP statistics.

#### Importing Packages

In [1]:
import math
import nltk
import re
import inflect
import json
import numpy as np
import pandas as pd
import random

from nltk.tag import SequentialBackoffTagger
from nltk.tokenize import word_tokenize, sent_tokenize, PunktSentenceTokenizer, RegexpTokenizer
from nltk.stem import PorterStemmer, WordNetLemmatizer
from nltk.corpus import stopwords, state_union
from nltk.wsd import lesk
from nltk.tag import UnigramTagger, BigramTagger, BrillTagger, brill, BrillTaggerTrainer
from nltk.chunk import ne_chunk

#### CSV Files

In [2]:
matches = pd.read_csv('matches.csv')
deliveries = pd.read_csv('deliveries.csv')

#### Ouput Main Function

In [3]:
def User_Input(question):
    output = chunker(question)
    return output

#### Functions 

In [4]:
#    -- KEY --
##   B = Batsman;  M = Match;  R = Runs;  Team = T;  Wickets = W;   

class Executors:
    
    def __init__(self):
        self.matches = pd.read_csv("matches.csv")
        self.deliveries = pd.read_csv('deliveries.csv')
    
    
    #  BATSMAN STATS --
    
    ## Total Runs scored by B in M
    def runs_batsman_match(self, batsman_name, match_id):
        x = self.deliveries.groupby(['match_id', 'batsman'])['batsman_runs'].sum()
        runs = x[match_id][batsman_name]
        return {'batsman':batsman_name, 'runs':runs, 'match':match_id}

    ## Total Runs scored in M by T
    def total_runs_team_match(self, team, match_id):
        x = self.deliveries.groupby(['match_id','batting_team'])['total_runs'].sum()
        total_runs_match = x[match_id][team] 
        return {'team':team, 'runs':total_runs_match, 'match':match_id}

    ## Max scorer M
    def max_score_batsman_match(self, match_id):
        x = self.deliveries.groupby(['match_id', 'batsman'])['batsman_runs'].sum()
        name = x[match_id].idxmax()
        runs = x[match_id].max()
        return {'batsman':name, 'runs':runs, 'match':match_id}

    ## Min scorer in M
    def min_score_batsman_match(self, match_id):
        x = self.deliveries.groupby(['match_id', 'batsman'])['batsman_runs'].sum()
        name = x[match_id].idxmin()
        runs = x[match_id].min()
        return {'batsman':name, 'runs':runs, 'match':match_id}

    ## Max scorer in M by T
    def max_score_batsman_match_inTeam(self, match_id, team):
        x = self.deliveries.groupby(['match_id','batting_team', 'batsman'])['batsman_runs'].sum()
        name = x[match_id][team].idxmax()
        runs = x[match_id][team].max()        
        return {'batsman':name, 'team':team, 'runs':runs, 'match':match_id}

    ## Min scorer in M
    def min_score_batsman_match(self, match_id):
        x = self.deliveries.groupby(['match_id', 'batsman'])['batsman_runs'].sum()
        name = x[match_id].idxmin()
        runs = x[match_id].min()
        return {'batsman':name, 'runs':runs, 'match':match_id}

    ## Min scorer in M by T
    def min_score_batsman_match_inTeam(self, match_id, team):
        x = self.deliveries.groupby(['match_id','batting_team', 'batsman'])['batsman_runs'].sum()
        name = x[match_id][team].idxmin()
        runs = x[match_id][team].min()
        return {'batsman':name, 'team':team, 'runs':runs, 'match':match_id}

    ## ORANGE CAP: Highest scorer 
    def highest_scorer(self):
        player_name = self.deliveries.groupby('batsman')['batsman_runs'].sum().sort_values(ascending =False).iloc[0:1]
        a = pd.DataFrame(player_name).reset_index()
        batsman = list(a['batsman'])
        runs = list(a['batsman_runs'])
        return {'batsman':batsman[0], 'runs':runs[0] }

    ## Total batsman runs by B
    def total_runs_batsman_IPL(self, batsman):
        x = self.deliveries.groupby(['batsman'])['batsman_runs'].sum()
        runs_batsman = x[batsman]
        return {'batsman':batsman, 'runs':runs_batsman }

    ## Total runs by T
    def total_runs_team_IPL(self, team):
        x = self.deliveries.groupby(['batting_team'])['total_runs'].sum()
        total_runs_IPL = x[team]
        return {'team':team, 'runs':total_runs_IPL}

    ## Dot balls faced by B in M
    def dot_balls_batsman_match(self, batsman, match_id):
        x = self.deliveries[(self.deliveries['batsman'] == batsman) & (self.deliveries['match_id'] == match_id) & (self.deliveries['total_runs'] == 0)]
        dot_balls = x.shape[0]
        return {'batsman':batsman, 'dot_balls':dot_balls, 'match':match_id}

    ## 4's by B in M
    def b_4_batsman_match(self, batsman, match_id):
        x = self.deliveries[(self.deliveries['batsman'] == batsman) & (self.deliveries['match_id'] == match_id) & (self.deliveries['total_runs'] == 4)]
        b_4 = x.shape[0]
        return {'batsman':batsman, 'fours':b_4, 'match':match_id}

    ## 6's by B in M
    def b_6_batsman_match(self, batsman, match_id):
        x = self.deliveries[(self.deliveries['batsman'] == batsman) & (self.deliveries['match_id'] == match_id) & (self.deliveries['total_runs'] == 6)]
        b_6 = x.shape[0]
        return {'batsman':batsman, 'sixes':b_6, 'match':match_id}
    
    ## 4's by T in M
    def team_fours(self, match_id, batting_team):
        team_fours = self.deliveries[self.deliveries.batsman_runs == 4]
        team_fours_count = team_fours.groupby(['match_id','batting_team']).count()['inning']
        return {'fours':team_fours_count, 'team':batting_team, 'match':match_id}

    ## 6's by T in M
    def team_sixes(self, match_id, batting_team):
        team_sixes = self.deliveries[self.deliveries.batsman_runs == 6]
        team_sixes_count = team_sixes.groupby(['match_id','batting_team']).count()['inning']
        return {'sixes':team_sixes_count, 'team':batting_team, 'match':match_id}

    ## Total 4's by B 
    def overall_fours_count(self, batsman):
        fours = self.deliveries[self.deliveries.batsman_runs == 4]
        fours_count = fours.groupby('batsman').count()['inning']
        return {'batsman':batsman, 'fours':fours_count }
    
    ## Total 6's by B 
    def overall_sixes_count(self, batsman):
        sixes = self.deliveries[self.deliveries.batsman_runs == 6]
        sixes_count = sixes.groupby('batsman').count()['inning']
        return {'batsman':batsman, 'sixes':sixes_count }
   
    ## Max 4's by B
    def most_fours_count(self):
        fours = self.deliveries[self.deliveries.batsman_runs == 4]
        fours_count = fours.groupby('batsman').count()['inning'].sort_values(ascending = False).iloc[0:1]
        a = pd.DataFrame(fours_count).reset_index()
        batsman = list(a['batsman'])
        fours = list(a['inning'])
        return {'batsman':batsman[0], 'fours':fours[0] }

    ## Max 6's by B
    def most_sixes_count(self):
        sixes = self.deliveries[self.deliveries.batsman_runs == 6]
        sixes_count = sixes.groupby('batsman').count()['inning'].sort_values(ascending = False).iloc[0:1]
        a = pd.DataFrame(sixes_count).reset_index()
        batsman = list(a['batsman'])
        sixes = list(a['inning'])
        return {'batsman':batsman[0], 'sixes':sixes[0] }

                
    ## Strike Rate of a B
    def balls_faced_batsman_match(self, batsman, match_id):
        x = self.deliveries[(self.deliveries['batsman'] == batsman) & (self.deliveries['match_id'] == match_id) & (self.deliveries['wide_runs'] == 0)]
        return x.shape[0]
    def strikeRate_batsman_match(self, batsman, match_id):
        runs = runs_batsman_match(batsman, match_id)
        balls = balls_faced_batsman_match(batsman, match_id)
        strike_rate = runs/balls * 100
        return strike_rate

    ## Max Strike Rate
    def total_runs_scored_IPL(self):
        runs_count = self.deliveries.groupby('batsman')['batsman_runs'].sum()
        return runs_count
    def total_ball_faced_IPL(self):
        balls = self.deliveries[(self.deliveries.wide_runs == 0)].groupby('batsman')['inning']
        balls_count = balls.count()
        return balls_count
    def total_strike_rate_IPL(self,i):
        strike_rate = (total_runs_scored_IPL()/total_ball_faced_IPL())*100
        return strike_rate.sort_values(ascending = False).iloc[i:i+1]
    
    
    
    #  BOWLING STATS --
    
    def overall_economy_rate_by_bowler(self, match_id=0, team=None, bowler=None):
        bowler_eco = []
        if(match_id==0):
            if(team is not None):
                runs_conceded = self.deliveries[self.deliveries['bowling_team'] == team].total_runs.sum()
                balls = (self.deliveries[(self.deliveries['bowling_team'] == team) & (self.deliveries['wide_runs'] == 0) & (self.deliveries['is_super_over'] == 0) & (self.deliveries['noball_runs']==0)]).ball.count()
                dot_balls = (self.deliveries[(self.deliveries['bowling_team'] == team) & (self.deliveries['wide_runs'] == 0) & (self.deliveries['is_super_over'] == 0) & (self.deliveries['noball_runs']==0)& (self.deliveries['total_runs']==0)]).ball.count()
                overs = float(int(balls/6) + float(balls%6)/10)
                frac, whole = math.modf(overs)
                total = whole + frac*10/6
                economy_rate = runs_conceded/total
                bowler_eco.append((team, economy_rate, balls, overs, dot_balls))
            elif (bowler == None): 
                bowlers = self.deliveries.bowler.unique()
                for bowler in bowlers:
                    runs_conceded = self.deliveries[self.deliveries['bowler'] == bowler].total_runs.sum()-deliveries[self.deliveries['bowler'] == bowler].bye_runs.sum()-deliveries[self.deliveries['bowler'] == bowler].legbye_runs.sum() 
                    balls = (self.deliveries[(self.deliveries['bowler']== bowler) & (self.deliveries['wide_runs'] == 0) & (self.deliveries['is_super_over'] == 0) & (self.deliveries['noball_runs']==0)]).ball.count()
                    dot_balls = (self.deliveries[(self.deliveries['bowler']== bowler) & (self.deliveries['wide_runs'] == 0) & (self.deliveries['is_super_over'] == 0) & (self.deliveries['noball_runs']==0)& (self.deliveries['total_runs']==0)]).ball.count()
                    overs = float(int(balls/6) + float(balls%6)/10)
                    frac, whole = math.modf(overs)
                    total = whole + frac*10/6
                    economy_rate = runs_conceded/total
                    bowler_eco.append((bowler, economy_rate, balls, overs, dot_balls))
            else:
                runs_conceded = self.deliveries[self.deliveries['bowler'] == bowler].total_runs.sum()-deliveries[self.deliveries['bowler'] == bowler].bye_runs.sum()-deliveries[self.deliveries['bowler'] == bowler].legbye_runs.sum()
                balls = (self.deliveries[(self.deliveries['bowler']== bowler) & (self.deliveries['wide_runs'] == 0) & (self.deliveries['is_super_over'] == 0) & (self.deliveries['noball_runs']==0)]).ball.count()
                dot_balls = (self.deliveries[(self.deliveries['bowler']== bowler) & (self.deliveries['wide_runs'] == 0) & (self.deliveries['is_super_over'] == 0) & (self.deliveries['noball_runs']==0)& (self.deliveries['total_runs']==0)]).ball.count()
                overs = float(int(balls/6) + float(balls%6)/10)
                frac, whole = math.modf(overs)
                total = whole + frac*10/6
                economy_rate = runs_conceded/total
                bowler_eco.append((bowler, economy_rate, balls, overs, dot_balls))
        else:
            if (team is not None):
                runs_conceded = self.deliveries[(self.deliveries['match_id'] == match_id)&(self.deliveries['bowling_team'] == team)].total_runs.sum()
                balls = (self.deliveries[(self.deliveries['match_id'] == match_id)&(self.deliveries['bowling_team'] == team) & (self.deliveries['wide_runs'] == 0) & (self.deliveries['is_super_over'] == 0) & (self.deliveries['noball_runs']==0)]).ball.count()
                dot_balls = (self.deliveries[(self.deliveries['match_id'] == match_id)&(self.deliveries['bowling_team'] == team) & (self.deliveries['wide_runs'] == 0) & (self.deliveries['is_super_over'] == 0) & (self.deliveries['noball_runs']==0)& (self.deliveries['total_runs']==0)]).ball.count()
                overs = float(int(balls/6) + float(balls%6)/10)
                frac, whole = math.modf(overs)
                total = whole + frac*10/6
                economy_rate = runs_conceded/total
                bowler_eco.append((team, economy_rate, balls, overs, dot_balls))
            elif (bowler == None): 
                bowlers = self.deliveries[self.deliveries['match_id'] == match_id].bowler.unique()
                for bowler in bowlers:
                    runs_conceded = self.deliveries[(self.deliveries['match_id'] == match_id)&(self.deliveries['bowler'] == bowler)].total_runs.sum()-deliveries[(self.deliveries['match_id'] == match_id)&(self.deliveries['bowler'] == bowler)].bye_runs.sum()-deliveries[(self.deliveries['match_id'] == match_id)&(self.deliveries['bowler'] == bowler)].legbye_runs.sum()
                    balls = (self.deliveries[(self.deliveries['match_id'] == match_id)&(self.deliveries['bowler']== bowler) & (self.deliveries['wide_runs'] == 0) & (self.deliveries['is_super_over'] == 0) & (self.deliveries['noball_runs']==0)]).ball.count()
                    dot_balls = (self.deliveries[(self.deliveries['match_id'] == match_id)&(self.deliveries['bowler']== bowler) & (self.deliveries['wide_runs'] == 0) & (self.deliveries['is_super_over'] == 0) & (self.deliveries['noball_runs']==0)& (self.deliveries['total_runs']==0)]).ball.count()
                    overs = float(int(balls/6) + float(balls%6)/10)
                    frac, whole = math.modf(overs)
                    total = whole + frac*10/6
                    economy_rate = runs_conceded/total
                    bowler_eco.append((bowler, economy_rate, balls, overs, dot_balls))
            else:
                runs_conceded = self.deliveries[(self.deliveries['match_id'] == match_id)&(self.deliveries['bowler'] == bowler)].total_runs.sum()-deliveries[(self.deliveries['match_id'] == match_id)&(self.deliveries['bowler'] == bowler)].bye_runs.sum()-deliveries[(self.deliveries['match_id'] == match_id)&(self.deliveries['bowler'] == bowler)].legbye_runs.sum()
                balls = (self.deliveries[(self.deliveries['match_id'] == match_id)&(self.deliveries['bowler']== bowler) & (self.deliveries['wide_runs'] == 0) & (self.deliveries['is_super_over'] == 0) & (self.deliveries['noball_runs']==0)]).ball.count()
                dot_balls = (self.deliveries[(self.deliveries['match_id'] == match_id)&(self.deliveries['bowler']== bowler) & (self.deliveries['wide_runs'] == 0) & (self.deliveries['is_super_over'] == 0) & (self.deliveries['noball_runs']==0)& (self.deliveries['total_runs']==0)]).ball.count()
                overs = float(int(balls/6) + float(balls%6)/10)
                frac, whole = math.modf(overs)
                total = whole + frac*10/6
                economy_rate = runs_conceded/total
                bowler_eco.append((bowler, economy_rate, balls, overs, dot_balls))

        return  bowler_eco
    
    def bowler_balls(self, match_id=0, team = None, bowler=None, economy_rate=None, balls=None, overs=None,dot_balls=None, rank=1):
        eco_balls_over = overall_economy_rate_by_bowler(match_id=match_id,team=team, bowler=bowler)
        if(economy_rate is not None):
            eco_balls_over = sorted(eco_balls_over, key=lambda x: x[1])
            return eco_balls_over[rank-1][0],eco_balls_over[rank-1][1] 
        elif(balls is not None):
            eco_balls_over = sorted(eco_balls_over, key=lambda x: x[2], reverse=True)
            return eco_balls_over[rank-1][0],eco_balls_over[rank-1][2]
        elif(overs is not None):
            eco_balls_over = sorted(eco_balls_over, key=lambda x: x[3], reverse=True)
            return eco_balls_over[rank-1][0],eco_balls_over[rank-1][3]
        elif(dot_balls is not None):
            eco_balls_over = sorted(eco_balls_over, key=lambda x: x[4], reverse=True)
            return eco_balls_over[rank-1][0],eco_balls_over[rank-1][4]
        else:
            eco_balls_over = sorted(eco_balls_over, key=lambda x: x[1])
            return eco_balls_over[rank-1]
        
    def overall_runs_conceded(self, match_id=0,team=None, bowler=None):
        over_all_runs = []
        if(match_id==0):
            if(team is not None):
                total_runs_conceded = self.deliveries[self.deliveries['bowling_team'] == team].total_runs.sum()
                bye_runs_conceded = self.deliveries[self.deliveries['bowling_team'] == team].bye_runs.sum()
                legbye_runs_conceded = self.deliveries[self.deliveries['bowling_team'] == team].legbye_runs.sum()
                wide = self.deliveries[self.deliveries['bowling_team'] == team].wide_runs.sum()
                noball = self.deliveries[self.deliveries['bowling_team'] == team].noball_runs.sum()
                four_boundary_conceded = self.deliveries[(self.deliveries['bowling_team'] == team)&(self.deliveries['batsman_runs']==4)].batsman_runs.count()
                six_boundary_conceded = self.deliveries[(self.deliveries['bowling_team'] == team)&(self.deliveries['batsman_runs']==6)].batsman_runs.count()
                runs = total_runs_conceded
                total_boundaries = four_boundary_conceded + six_boundary_conceded
                extras = wide + noball + bye_runs_conceded + legbye_runs_conceded
                over_all_runs.append((team, runs ,wide, noball,extras, four_boundary_conceded, six_boundary_conceded, total_boundaries))          
            elif (bowler == None):
                bowlers = self.deliveries.bowler.unique()
                for bowler in bowlers:
                    total_runs_conceded = self.deliveries[self.deliveries['bowler'] == bowler].total_runs.sum()
                    bye_runs_conceded = self.deliveries[self.deliveries['bowler'] == bowler].bye_runs.sum()
                    legbye_runs_conceded = self.deliveries[self.deliveries['bowler'] == bowler].legbye_runs.sum()
                    wide = self.deliveries[self.deliveries['bowler']== bowler].wide_runs.sum()
                    noball = self.deliveries[self.deliveries['bowler']== bowler].noball_runs.sum()
                    four_boundary_conceded = self.deliveries[(self.deliveries['bowler'] == bowler)&(self.deliveries['batsman_runs']==4)].batsman_runs.count()
                    six_boundary_conceded = self.deliveries[(self.deliveries['bowler'] == bowler)&(self.deliveries['batsman_runs']==6)].batsman_runs.count()
                    runs = total_runs_conceded - bye_runs_conceded - legbye_runs_conceded
                    total_boundaries = four_boundary_conceded + six_boundary_conceded
                    extras = wide + noball + bye_runs_conceded + legbye_runs_conceded
                    over_all_runs.append((bowler, runs ,wide, noball,extras, four_boundary_conceded, six_boundary_conceded, total_boundaries))


            else:
                total_runs_conceded = self.deliveries[self.deliveries['bowler'] == bowler].total_runs.sum()
                wide = self.deliveries[self.deliveries['bowler']== bowler].wide_runs.sum()
                noball = self.deliveries[self.deliveries['bowler']== bowler].noball_runs.sum()    
                bye_runs_conceded = self.deliveries[self.deliveries['bowler'] == bowler].bye_runs.sum()
                legbye_runs_conceded = self.deliveries[self.deliveries['bowler'] == bowler].legbye_runs.sum()
                four_boundary_conceded = self.deliveries[(self.deliveries['bowler'] == bowler)&(self.deliveries['batsman_runs']==4)].batsman_runs.count()
                six_boundary_conceded = self.deliveries[(self.deliveries['bowler'] == bowler)&(self.deliveries['batsman_runs']==6)].batsman_runs.count()
                runs = total_runs_conceded - bye_runs_conceded - legbye_runs_conceded
                total_boundaries = four_boundary_conceded + six_boundary_conceded
                extras = wide + noball + bye_runs_conceded + legbye_runs_conceded
                over_all_runs.append((bowler, runs ,wide, noball,extras, four_boundary_conceded, six_boundary_conceded, total_boundaries))

        else:
            if(team is not None):
                total_runs_conceded = self.deliveries[(self.deliveries['match_id'] == match_id)&(self.deliveries['bowling_team'] == team)].total_runs.sum()
                wide = self.deliveries[(self.deliveries['match_id']==match_id)& (self.deliveries['bowling_team'] == team)].wide_runs.sum()
                noball = self.deliveries[(self.deliveries['match_id']==match_id)& (self.deliveries['bowling_team'] == team)].noball_runs.sum()    
                bye_runs_conceded = self.deliveries[(self.deliveries['match_id'] == match_id)&(self.deliveries['bowling_team'] == team)].bye_runs.sum()
                legbye_runs_conceded = self.deliveries[(self.deliveries['match_id'] == match_id)&(self.deliveries['bowling_team'] == team)].legbye_runs.sum()
                four_boundary_conceded = self.deliveries[(self.deliveries['match_id'] == match_id)&(self.deliveries['bowling_team'] == team)&(self.deliveries['batsman_runs']==4)].batsman_runs.count()
                six_boundary_conceded = self.deliveries[(self.deliveries['match_id'] == match_id)&(self.deliveries['bowling_team'] == team)&(self.deliveries['batsman_runs']==6)].batsman_runs.count()
                runs = total_runs_conceded
                total_boundaries = four_boundary_conceded + six_boundary_conceded
                extras = wide + noball + bye_runs_conceded + legbye_runs_conceded
                over_all_runs.append((team, runs ,wide, noball,extras, four_boundary_conceded, six_boundary_conceded, total_boundaries))

            elif(bowler == None):
                bowlers = self.deliveries[self.deliveries['match_id'] == match_id].bowler.unique()
                for bowler in bowlers:
                    total_runs_conceded = self.deliveries[(self.deliveries['match_id'] == match_id)&(self.deliveries['bowler'] == bowler)].total_runs.sum()
                    bye_runs_conceded = self.deliveries[(self.deliveries['match_id'] == match_id)&(self.deliveries['bowler'] == bowler)].bye_runs.sum()
                    legbye_runs_conceded = self.deliveries[(self.deliveries['match_id'] == match_id)&(self.deliveries['bowler'] == bowler)].legbye_runs.sum()
                    wide = self.deliveries[(self.deliveries['match_id']==match_id)& (self.deliveries['bowler']== bowler)].wide_runs.sum()
                    noball = self.deliveries[(self.deliveries['match_id']==match_id)& (self.deliveries['bowler']== bowler)].noball_runs.sum()
                    four_boundary_conceded = self.deliveries[(self.deliveries['match_id'] == match_id)&(self.deliveries['bowler'] == bowler)&(self.deliveries['batsman_runs']==4)].batsman_runs.count()
                    six_boundary_conceded = self.deliveries[(self.deliveries['match_id'] == match_id)&(self.deliveries['bowler'] == bowler)&(self.deliveries['batsman_runs']==6)].batsman_runs.count()
                    runs = total_runs_conceded - bye_runs_conceded - legbye_runs_conceded
                    total_boundaries = four_boundary_conceded + six_boundary_conceded
                    extras = wide + noball + bye_runs_conceded + legbye_runs_conceded
                    over_all_runs.append((bowler, runs ,wide, noball,extras, four_boundary_conceded, six_boundary_conceded, total_boundaries))
            else:
                total_runs_conceded = self.deliveries[(self.deliveries['match_id'] == match_id)&(self.deliveries['bowler'] == bowler)].total_runs.sum()
                wide = self.deliveries[(self.deliveries['match_id']==match_id)& (self.deliveries['bowler']== bowler)].wide_runs.sum()
                noball = self.deliveries[(self.deliveries['match_id']==match_id)& (self.deliveries['bowler']== bowler)].noball_runs.sum()    
                bye_runs_conceded = self.deliveries[(self.deliveries['match_id'] == match_id)&(self.deliveries['bowler'] == bowler)].bye_runs.sum()
                legbye_runs_conceded = self.deliveries[(self.deliveries['match_id'] == match_id)&(self.deliveries['bowler'] == bowler)].legbye_runs.sum()
                four_boundary_conceded = self.deliveries[(self.deliveries['match_id'] == match_id)&(self.deliveries['bowler'] == bowler)&(self.deliveries['batsman_runs']==4)].batsman_runs.count()
                six_boundary_conceded = self.deliveries[(self.deliveries['match_id'] == match_id)&(self.deliveries['bowler'] == bowler)&(self.deliveries['batsman_runs']==6)].batsman_runs.count()
                runs = total_runs_conceded - bye_runs_conceded - legbye_runs_conceded
                total_boundaries = four_boundary_conceded + six_boundary_conceded
                extras = wide + noball + bye_runs_conceded + legbye_runs_conceded
                over_all_runs.append((bowler, runs ,wide, noball,extras, four_boundary_conceded, six_boundary_conceded, total_boundaries))
        return over_all_runs
    
    def bowler_runs(self, match_id=0, team=None, bowler=None, runs=None, wide=None, noball=None,extras=None, fours=None, sixes=None, boundary=None, rank=1):
        bowler_stats_data = overall_runs_conceded(match_id=match_id,team=team, bowler=bowler)
        if (runs is not None):
            bowler_stats_data = sorted(bowler_stats_data, key=lambda x: x[1], reverse=True)
            return bowler_stats_data[rank-1][0:2]
        elif (wide is not None):
            bowler_stats_data = sorted(bowler_stats_data, key=lambda x: x[2], reverse=True)
            return bowler_stats_data[rank-1][0],bowler_stats_data[rank-1][2]
        elif (noball is not None):
            bowler_stats_data = sorted(bowler_stats_data, key=lambda x: x[3], reverse=True)
            return bowler_stats_data[rank-1][0],bowler_stats_data[rank-1][3]
        elif (extras is not None):
            bowler_stats_data = sorted(bowler_stats_data, key=lambda x: x[4], reverse=True)
            return bowler_stats_data[rank-1][0],bowler_stats_data[rank-1][4]
        elif (fours is not None):
            bowler_stats_data = sorted(bowler_stats_data, key=lambda x: x[5], reverse=True)
            return bowler_stats_data[rank-1][0],bowler_stats_data[rank-1][5]
        elif (sixes is not None):
            bowler_stats_data = sorted(bowler_stats_data, key=lambda x: x[6], reverse=True)
            return bowler_stats_data[rank-1][0],bowler_stats_data[rank-1][6]
        elif (boundary is not None):
            bowler_stats_data = sorted(bowler_stats_data, key=lambda x: x[7], reverse=True)
            return bowler_stats_data[rank-1][0],bowler_stats_data[rank-1][7]
        else:
            bowler_stats_data = sorted(bowler_stats_data, key=lambda x: x[1], reverse=True)
            return bowler_stats_data[rank-1][0:2]
        
        def bowler_wickets(sematch_id=0, team=None, bowler=None,rank =1):
            bowler_wicket =[]
            if(match_id==0):
                if (team is not None):
                    total_wickets = self.deliveries[self.deliveries['bowling_team'] == team].dismissal_kind.count()
                    wickets_total = total_wickets
                    bowlers = self.deliveries[self.deliveries['bowling_team'] == team].bowler.unique()
                    for bowler in bowlers:
                        total_wickets = self.deliveries[self.deliveries['bowler'] == bowler].dismissal_kind.count()
                        run_outs = self.deliveries[(self.deliveries['bowler'] == bowler) & (self.deliveries['dismissal_kind']== 'run out')].dismissal_kind.count()
                        wickets = total_wickets - run_outs
                        bowler_wicket.append((bowler, wickets, team, wickets_total))

                elif (bowler == None): 
                    bowlers = self.deliveries.bowler.unique()
                    for bowler in bowlers:
                        total_wickets = self.deliveries[self.deliveries['bowler'] == bowler].dismissal_kind.count()
                        run_outs = self.deliveries[(self.deliveries['bowler'] == bowler) & (self.deliveries['dismissal_kind']== 'run out')].dismissal_kind.count()
                        wickets = total_wickets - run_outs
                        bowler_wicket.append((bowler, wickets))
                else:
                    total_wickets = self.deliveries[self.deliveries['bowler'] == bowler].dismissal_kind.count()
                    run_outs = self.deliveries[(self.deliveries['bowler'] == bowler) & (self.deliveries['dismissal_kind']== 'run out')].dismissal_kind.count()
                    wickets = total_wickets - run_outs
                    bowler_wicket.append((bowler, wickets))
            else:
                if (team is not None):
                    total_wickets = self.deliveries[(self.deliveries['match_id'] == match_id) & (self.deliveries['bowling_team'] == team)].dismissal_kind.count()
                    wickets_total = total_wickets 
                    bowlers = self.deliveries[(self.deliveries['match_id'] == match_id)& (self.deliveries['bowling_team'] == team)].bowler.unique()
                    for bowler in bowlers:
                        total_wickets = self.deliveries[(self.deliveries['match_id'] == match_id)&(self.deliveries['bowler'] == bowler)].dismissal_kind.count()
                        run_outs = self.deliveries[(self.deliveries['match_id'] == match_id)&(self.deliveries['bowler'] == bowler) & (self.deliveries['dismissal_kind']== 'run out')].dismissal_kind.count()
                        wickets = total_wickets - run_outs
                        bowler_wicket.append((bowler, wickets, team, wickets_total))
                elif (bowler == None):
                    bowlers = self.deliveries[(self.deliveries['match_id'] == match_id)].bowler.unique()
                    for bowler in bowlers:
                        total_wickets = self.deliveries[(self.deliveries['match_id'] == match_id)&(self.deliveries['bowler'] == bowler)].dismissal_kind.count()
                        run_outs = self.deliveries[(self.deliveries['match_id'] == match_id)&(self.deliveries['bowler'] == bowler) & (self.deliveries['dismissal_kind']== 'run out')].dismissal_kind.count()
                        wickets = total_wickets - run_outs
                        bowler_wicket.append((bowler, wickets))
                else:
                    total_wickets = self.deliveries[(self.deliveries['match_id'] == match_id)&(self.deliveries['bowler'] == bowler)].dismissal_kind.count()
                    run_outs = self.deliveries[(self.deliveries['match_id'] == match_id)&(self.deliveries['bowler'] == bowler) & (self.deliveries['dismissal_kind']== 'run out')].dismissal_kind.count()
                    wickets = total_wickets - run_outs
                    bowler_wicket.append((bowler, wickets))
            bowler_wicket = sorted(bowler_wicket, key=lambda x: x[1], reverse=True)
            return  bowler_wicket[rank-1]

In [5]:
exe = Executors()

#### Brill Tagging

In [6]:
# Loading Training Sentences
with open('tagged_training_sentences.txt') as tagged_sentence:
    corrected_train = tagged_sentence.read()

In [7]:
class POSTagger(SequentialBackoffTagger):
    def __init__(self, *args, **kwargs):
        SequentialBackoffTagger.__init__(self, *args, **kwargs)
    
    def choose_tag(self, tokens, index, history):
        word = tokens[index]
        return nltk.pos_tag([word])[0][1] if word != "" else None

In [8]:
custom_pos_tagger = POSTagger()

In [9]:
def transform_str2tuple(tagged_sentence):
    tagged_sentence_tuple = []
    sentences = tagged_sentence.split("\n")
    for sentence in sentences:
        tagged_question = []
        for word in word_tokenize(sentence):
            tagged_question.append(nltk.str2tuple(word))
        tagged_sentence_tuple.append(tagged_question)
    return tagged_sentence_tuple

In [10]:
def get_brill_tagger(tagged_sentences):
    templates = [brill.Template(brill.Pos([1,1])),
    brill.Template(brill.Pos([2,2])),
    brill.Template(brill.Pos([1,2])),
    brill.Template(brill.Pos([1,3])),
    brill.Template(brill.Pos([1,1])),
    brill.Template(brill.Pos([2,2])),
    brill.Template(brill.Pos([1,2])),
    brill.Template(brill.Pos([1,3])),
    brill.Template(brill.Word([-1, -1])),
    brill.Template(brill.Word([-1, -1]))]

    trainer_initial_pos = BrillTaggerTrainer(initial_tagger= custom_pos_tagger, templates=templates, trace=3, deterministic=True)
    brill_tagger = trainer_initial_pos.train(tagged_sentences, max_rules=10)
    
    return brill_tagger

In [11]:
brill_tagger = get_brill_tagger(transform_str2tuple(corrected_train))

TBL train (fast) (seqs: 138; tokens: 1451; tpls: 10; min score: 2; min acc: None)
Finding initial useful rules...
    Found 710 useful rules.

           B      |
   S   F   r   O  |        Score = Fixed - Broken
   c   i   o   t  |  R     Fixed = num tags changed incorrect -> correct
   o   x   k   h  |  u     Broken = num tags changed correct -> incorrect
   r   e   e   e  |  l     Other = num tags changed incorrect -> incorrect
   e   d   n   r  |  e
------------------+-------------------------------------------------------
 116 116   0   0  | .->None if Pos:None@[1]
 116 116   0   0  | None-> if Pos:.@[1]
 116 116   0   0  | .->None if Word:@[-1]
  34  42   8   9  | NN->NNP if Pos:NN@[1]
  15  18   3   8  | NN->NNP if Pos:IN@[2]
  14  14   0   0  | NN->NNP if Pos:POS@[1,2]
  14  14   0   0  | VBN->VBD if Word:who@[-1]
   8  11   3   0  | NN->NNP if Word:by@[-1]
   6   6   0   2  | NNP->VBZ if Pos:CD@[1]
   5   7   2   0  | NNS->NNP if Pos:NNP@[1]


#### Training Sentences

In [12]:
# ABHAY - 
#train_sentences = pd.read_csv("training_sentences_classifier.csv")

In [13]:
# PRANJAL
train_sentences = pd.read_csv("training_sentences_batsman.csv")

#### Features Extraction

In [14]:
def feature_extractor(words):
    features = {'highest': 0
                ,'scored': 0
                ,'runs': 0
                ,'scorer': 0
                ,'score':0
                ,'scored':0
                ,'match':0
                ,'wickets': 0
                ,'boundary': 0
                ,'fours':0
                ,'4s':0
                ,'six':0
                ,'sixes':0
                ,'6s':0
                ,'6':0
                ,'hit':0
                ,'four':0
                ,'aggregate':0
                ,'total':0
                ,'team':0
                ,'lead':0
                ,'leading':0
                ,'maximum':0
                ,'max':0
                ,'minimum':0
                ,'min':0
                ,'least':0
                ,'less':0
                ,'1st':0,'2nd':0,'3rd':0,'4th':0,'5th':0,'6th':0,'7th':0,'8th':0,'9th':0,'10th':0    
                ,'dot':0
                ,'dots':0
                ,'faced':0 
                ,'entire':0
                ,'whole':0
                ,'season':0
                ,'strike':0
                ,'rate':0
                ,'strikerate':0
               }
    
    word_counts = nltk.Counter(words)
    
    for word in word_counts:
        if word in features:
            features[word] = word_counts[word]
    return features

In [15]:
def feature_extractor(words):
    with open('features.json') as features:
        features = json.load(features)
    #tokenized_sentence = nltk.word_tokenize(sentence)
    word_counts = nltk.Counter(words)
    for word in word_counts:
        if word in features:
            features[word] = word_counts[word]
    return features

#### Training Naive Bayes Classifier

In [16]:
# ABHAY - index is there!
#naive_bayes_classifier = nltk.classify.naivebayes.NaiveBayesClassifier.train([(feature_extractor(nltk.word_tokenize(sentence)), label) for index, sentence, label in train_sentences.values])

In [17]:
# PRANJAL
naive_bayes_classifier = nltk.NaiveBayesClassifier.train([(feature_extractor(nltk.word_tokenize(sentence)), label) for sentence, label in train_sentences.values])

## TEST QUESTION

In [18]:
#question = 'highest scorer in match 2?'

#### Chunking Important words

In [19]:
def chunker(question):

    boundary_list =['4s','6s','sixes','fours']

    filtered_words = []

    words = word_tokenize(question)

    lm = WordNetLemmatizer()

    stop_words = stopwords.words('english')

    for word in words:
         if word.lower() not in stop_words:
                if word in boundary_list:
                    filtered_words.append(word)
                else:
                    filtered_words.append(lm.lemmatize(word))

    classifier = naive_bayes_classifier.classify(feature_extractor(filtered_words))

    tagged = brill_tagger.tag(filtered_words)
    chunkGram = r"""Chunk:{<NN.?>*<NNP.?>*}"""
    chunkParser = nltk.RegexpParser(chunkGram)
    chunked = chunkParser.parse(tagged)

    chunked_words = []

    for node in chunked:
        if hasattr(node, "label"):
            words = [word for word, tag in node.leaves()]
            chunked_words += words

   #print("Classifier = ", classifier)        

    chunked_dict = function_return_fullName(chunked_words)

    output = classify(classifier, chunked, chunked_dict)

    return output

#### Returns Player's Full Name

In [20]:
#Returns full Player name -

def function_return_fullName(chunked_words):
    batsman_list = list(deliveries.batsman.unique())
    bowler_list = list(deliveries.bowler.unique())
    fielder_list = list(deliveries.fielder.unique())
    
    temp = {'player':[],'team':[]}
    
    teams_abbr = [ ('Kolkata Knight Riders', 'kolkata knight riders', 'kolkata', 'kolkata riders', 'kolkata rider', 'kolkata knights', 'kolkata knight', 'knight riders', 'knight rider', 'riders', 'k k riders', 'k knight riders', 'kkr'), 
               ('Chennai Super Kings', 'chennai super kings', 'chennai', 'chennai kings', 'chennai super', 'super kings', 'chennai kings', 'csk'),  
               ('Rajasthan Royals', 'rajasthan royals', 'rajasthan', 'rajasthan royal', 'rr'),
               ('Mumbai Indians', 'mumbai indians', 'mumbai', 'mumbai indian', 'indians', 'indian', 'mi'), 
               ('Deccan Chargers', 'deccan chargers', 'deccan', 'deccan charger', 'chargers', 'charger', 'dc'), 
               ('Kings XI Punjab', 'kings xi punjab', 'kings', 'punjab', 'kings XI', 'kings punjab',  'kxip','kp', 'kxp'), 
               ('Royal Challengers Bangalore', 'royal challengers bangalore', 'bangalore', 'royal challengers', 'royal challenger', 'royal bangalore', 'challengers bangalore', 'challenger bangalore', 'rcb', 'rb'),
               ('Delhi Daredevils', 'delhi daredevils', 'delhi', 'daredevils', 'delhi daredevil', 'dd') ]
               
    for data in chunked_words:
        for w in (batsman_list or bowler_list or fielder_list):
            if w.lower() == data.lower():
                temp['player'].append(w)
            elif str(w.split()[len(w.split())-1]).lower() == data.lower():
                temp['player'].append(w)
        for w in teams_abbr:
            if data.lower() in w:
                temp['team'].append(w[0])         

    return temp

#### Classification

In [21]:
def classify(classifier, chunked, chunked_dict):
    print(classifier)
    if classifier == 'runs':
        try:
            if chunked_dict['team'] != []:
                team_name = chunked_dict['team'][0]
                for i in range(len(chunked)):
                    if type(chunked[i]) is tuple:
                        if chunked[i][1] == 'CD':
                            match_id = chunked[i][0]           
                result = exe.total_runs_team_match(team_name, int(match_id))
                reply = random.choice(answers['total_runs_team_match'])
                return(reply.format(**result))
            else :
                for i in range(len(chunked)):
                    person_name = chunked_dict['player'][0]
                    if type(chunked[i]) is tuple:
                        if chunked[i][1] == 'CD':
                            match_id = chunked[i][0]
                result = exe.runs_batsman_match(person_name, int(match_id))
                reply = random.choice(answers['runs_batsman_match'])
                return(reply.format(**result))

        except:
            return("Error in 1")



    elif classifier =='max_runs':
        try:
            if chunked_dict['team'] != []:
                team_name = chunked_dict['team'][0]
                for i in range(len(chunked)): 
                    if type(chunked[i]) is tuple:
                        if chunked[i][1] == 'CD':
                            match_id = chunked[i][0]
                result = exe.max_score_batsman_match_inTeam(int(match_id), team_name)
                reply = random.choice(answers['max_score_batsman_match_inTeam'])
                return(reply.format(**result))
            else:
                for i in range(len(chunked)):
                    if type(chunked[i]) is tuple:
                        if chunked[i][1] == 'CD':
                            match_id = chunked[i][0]
                result = exe.max_score_batsman_match(int(match_id))
                reply = random.choice(answers['max_score_batsman_match'])
                return(reply.format(**result))

        except:
            return("Error in 2")



    elif classifier =='min_runs':
        try:
            if chunked_dict['team'] != []:
                team_name = chunked_dict['team'][0]
                for i in range(len(chunked)):
                    if type(chunked[i]) is tuple:
                        if chunked[i][1] == 'CD':
                            match_id = chunked[i][0]
                result = exe.min_score_batsman_match_inTeam(int(match_id), team_name)
                reply = random.choice(answers['min_score_batsman_match_inTeam'])
                return(reply.format(**result))
            else:
                for i in range(len(chunked)): 
                    if type(chunked[i]) is tuple:
                        if chunked[i][1] == 'CD':
                            match_id = chunked[i][0]
                result = exe.min_score_batsman_match(int(match_id))
                reply = random.choice(answers['min_score_batsman_match'])
                return(reply.format(**result))

        except:
            return("Error in 3")


    elif classifier == 'total_runs':
        try:
            if chunked_dict['team'] != []:
                team_name = chunked_dict['team'][0]
                result = exe.total_runs_team_IPL(team_name)
                reply = random.choice(answers['total_runs_team_IPL'])
                return(reply.format(**result))
            else :
                for i in range(len(chunked)): 
                    person_name = chunked_dict['player'][0]
                result = exe.total_runs_batsman_IPL(person_name)
                reply = random.choice(answers['total_runs_batsman_IPL'])
                return(reply.format(**result))

        except:
            return("Error in 4")


    if classifier == 'fours':
        print("hi")
        try:
            if chunked_dict['team'] != []:
                team_name = chunked_dict['team'][0]
                for i in range(len(chunked)): 
                    if type(chunked[i]) is tuple:
                        if chunked[i][1] == 'CD':
                            flag = 0
                            for p in boundary_list:
                                if chunked[i][1] == p:
                                    flag = 1
                            if flag == 0:
                                match_id = chunked[i][0]
                result = exe.team_fours(int(match_id), team_name)
                reply = random.choice(answers['team_fours'])
                return(reply.format(**result))
            else:
                person_name = chunked_dict['player'][0]
                print(person_name)
                for i in range(len(chunked)): 
                    if type(chunked[i]) is tuple:
                        if chunked[i][1] == 'CD':
                            flag = 0
                            for p in boundary_list:
                                if chunked[i][0] == p:
                                    flag = 1
                            if flag == 0:
                                match_id = chunked[i][0]  
                print(match_id)                
                result = exe.b_4_batsman_match(person_name ,int(match_id))
                reply = random.choice(answers['b_4_batsman_match'])
                return(reply.format(**result))

        except:
            return("Error in 5")


    if classifier == 'sixes':
        try:
            if chunked_dict['team'] != []:
                team_name = chunked_dict['team'][0]
                for i in range(len(chunked)): 
                    if type(chunked[i]) is tuple:
                        if chunked[i][1] == 'CD':
                            flag = 0
                            for p in boundary_list:
                                if chunked[i][1] == p:
                                    flag = 1
                            if flag == 0:
                                match_id = chunked[i][0]
                result = exe.team_sixes(int(match_id), team_name)
                reply = random.choice(answers['team_sixes'])
                return(reply.format(**result))
            else:
                person_name = chunked_dict['player'][0]
                for i in range(len(chunked)): 
                    if type(chunked[i]) is tuple:
                        if chunked[i][1] == 'CD':
                            flag = 0
                            for p in boundary_list:
                                if chunked[i][1] == p:
                                    flag = 1
                            if flag == 0:
                                match_id = chunked[i][0] 
                result = exe.b_6_batsman_match(person_name ,int(match_id))
                reply = random.choice(answers['b_6_batsman_match'])
                return(reply.format(**result))

        except:
            return("Error in 6")

------

## Answer Formulation

In [22]:
answers = { 
            'total_runs_team_match' : ['{team} scored {runs} runs in match {match}',
                                       'In match {match} {team} scored {runs} runs',
                                       '{runs} runs were scored by {team} in match {match}',
                                       'Match {match} saw {team} scoring {runs} runs',
                                       'A total of {runs} was scored by {team} in match {match}'
                                    ],
    
            'runs_batsman_match' : ['{batsman} scored {runs} runs in match {match}',
                                    'In match {match} {batsman} scored {runs} runs',
                                    '{runs} runs were scored by {batsman} in match {match}'
                                    ],
    
            'max_score_batsman_match_inTeam' : ['{batsman} of {team} scored {runs} runs in match {match}',
                                                'In match {team} {batsman} of {team} scored {runs} runs',
                                                '{runs} runs were scored by {batsman} of {team} in match {match}',
                                                ],
            'max_score_batsman_match' : ['{batsman} of scored {runs} runs in match {match}',
                                         'In match {match} {batsman} scored {runs} runs',
                                          '{batsman} in match {match} scored {runs} runs',
                                          '{runs} runs were scored by {batsman} in {match}',
                                          'In match {match} {batsman} scored {runs} runs',
                                        ],
              'min_score_batsman_match' : ['{batsman} of scored {runs} runs in match {match}',
                                         'In match {match} {batsman} scored {runs} runs',
                                          '{batsman} in match {match} scored {runs} runs',
                                          '{runs} runs were scored by {batsman} in {match}',
                                          'In match {match} {batsman} scored {runs} runs',
                                        ],
    
    # PHANI -    
            'max_score_batsman_match_inTeam':['{batsman} of {team} scored {runs} runs in match {match}',
                                               'In match {match} {batsman} of {team} scored {runs} runs',
                                               '{runs} runs were scored by {batsman} of {team} in match {match}'
                                             ],

            'total_runs_batsman_IPL':['{batsman} scored {runs} runs in ipl',
                                      'In this season {batsman} scored {runs} runs',
                                      '{runs} runs were scored by {batsman} in the whole ipl'
                                     ],

            'total_runs_team_IPL':['{team} scored {runs} runs in ipl',
                                  'In this season {team} scored {runs} runs',
                                  '{runs} runs were scored by {team} in the whole ipl'
                                  ],

            'dot_balls_batsman_match':['{batsman} faced {dot_balls} dot balls in match {match}',
                                       'In match {match} {batsman} faced {dot_balls} dot balls',
                                       '{dot_balls} dot balls were faced by {batsman} in match {match}'
                                      ],

            'b_4_batsman_match':['{batsman} hit {fours} fours in match {match}',
                                 'In match {match} {batsman} hit {fours} fours',
                                 '{fours} fours were hit by {batsman} in match {match}'
                                ],

            'b_6_batsman_match':['{batsman} hit {sixes} sixes in match {match}',
                                 'In match {match} {batsman} scored {sixes} runs',
                                 '{sixes} were hit by {batsman} in match {match}'
                                ],

            'team_fours':['{team} hit {fours} fours in match {match}',
                          'In match {match} {team} hit {fours} fours',
                          '{fours} fours were hit by {team} in match {match}'
                         ],

            'team_sixes':['{team} hit {sixes} sixes in match {match}',
                          'In match {match} {team} scored {sixes} runs',
                          '{sixes} were hit by {batsman} in match {match}'
                         ],

            'overall_fours_count':['{batsman} hit {fours} fours in ipl',
                                   'In this season {batsman} hit {fours} fours',
                                   '{fours} fours were hit by {batsman} in ipl'
                                   ],

            'overall_sixes_count':['{batsman} hit {sixes} sixes in ipl',
                                   'In this season {batsman} hit {sixes} sixes',
                                   '{sixes} sixes were hit by {batsman} in ipl'
                                  ],
            'highest_scorer':['{batsman} scored maximum runs,he scored {runs} runs in ipl',
                              'In this season {batsman} scored maximum runs,he scored {runs} runs',
                              'maximum runs {runs} runs were scored by {batsman} in the whole ipl'
                             ],
            'most_fours_count':['{batsman} hit most fours,he hit {fours} fours in ipl',
                                'In this season {batsman} hit most fours,he {fours} fours',
                                'most fours {fours} fours were hit by {batsman} in ipl'
                               ],

            'most_sixes':['{batsman} hit most sixes,he hit {sixes} sixes in ipl',
                          'In this season {batsman} hit most sixes,he hit {sixes} sixes',
                          'most sixes {sixes} sixes were hit by {batsman} in ipl'
                          ]   
}

-----------------------

_________________________

# ASK YOUR QUESTION HERE!!!

In [23]:

question = "Total fours scored by SC Ganguly in match 5?"

print(User_Input(question))

fours
hi
SC Ganguly
Error in 5
