### IPL Dropped catches data

- Match ID
- Match name
- Match Date
- Ball
- Bowler
- Fielder
- Batsman
- Fielding position
- Fielding team
- Batting team

In [1]:
import requests
from bs4 import BeautifulSoup
# from selenium import webdriver
# from webdriver_manager.chrome import ChromeDriverManager
# from selenium.webdriver.support.ui import WebDriverWait
import pandas as pd
from collections import defaultdict
import re
import os
import time
from spacy.lang.en import English
from spacy.pipeline import EntityRuler
import spacy
config = {
   "phrase_matcher_attr": None,
   "validate": True,
   "overwrite_ents": True,
   "ent_id_sep": "||",
}
nlp = spacy.load("en_core_web_sm")
ruler=nlp.add_pipe("entity_ruler", config=config)

# from selenium.webdriver.chrome.options import Options
# chrome_options = Options()  
# chrome_options.add_argument("--headless")
# chromedriver="./data/chromedriver_updated"

OSError: [E050] Can't find model 'en_core_web_sm'. It doesn't seem to be a Python package or a valid path to a data directory.

In [None]:
# !python -m spacy download en_core_web_sm

In [None]:
fielding_df=pd.read_excel('../Assignment/data/cricketEntities.xlsx')
fielding_patterns=[{"label":"F-POS","pattern":i} for i in fielding_df['scoring_zones'].tolist()]
ruler.add_patterns(fielding_patterns)

In [None]:
from allennlp.predictors.predictor import Predictor
import allennlp_models.tagging
predictor = Predictor.from_path("https://storage.googleapis.com/allennlp-public-models/ner-elmo.2021-02-12.tar.gz")

In [None]:
def check_overs_format(overs):
    '''Function to adjust cricbuzz overs to cricinfo overs'''
    try:
        int(overs)
        adjusted_overs=(int(overs)-1)+0.6
    except Exception as ex:
        adjusted_overs=overs
    return adjusted_overs

In [None]:
def parse_cricbuzz_urls(cricbuzz_match_url):
    '''Parse any cricbuzz URL'''
    driver = webdriver.Chrome(executable_path=chromedriver,options=chrome_options)
    driver.get(cricbuzz_match_url)
    cricbuzz_match_soup = BeautifulSoup(driver.page_source, 'html.parser')
    driver.quit()
    return cricbuzz_match_soup

In [None]:
def comm_text_drop_catches(comm_text,allen_nlp_predictor):
    '''Extract the following from Cricbuzz commentary text'''
    '''Returns 
    - Action performer (bowler)
    - Action receiver (batsman)
    - Action performer (fielder)
    - Fielding position (fielding entity)'''
    players=[]
    try:
        #bowler=comm_text.split(',')[0].split('to')[0].strip().lower()
        bowler=comm_text.split(',')[0][:comm_text.split(',')[0].find(' to ')].strip().lower()
        #batsman=comm_text.split(',')[0].split('to')[1].strip().lower()
        batsman=comm_text.split(',')[0][comm_text.split(',')[0].find(' to '):].replace(' to ','').strip().lower()
    except Exception as ex:
        bowler=''
        batsman=''
    
    ##Use Allen NLP named entities to recognize fielders in play
    ner_results=allen_nlp_predictor.predict(
    sentence=comm_text)
        
    ent_dict=([(ner_results['tags'][n],word) for n,word in enumerate(ner_results['words']) if 'PER' in ner_results['tags'][n]])
    
    if len(ent_dict)==0:
        fielder=''
    else:
        ##Handle B-PER I-PER and U-PER logic
        players=[(ent_dict[n-1][1].lower()+' '+e[1].lower()+' '+ent_dict[n+1][1].lower()) for n,e in enumerate(ent_dict) if 'I-PER' in e[0]]
        
        ##Handle B-PER U-PER logic
        p1=[(e[1].lower()+' '+ent_dict[n+1][1].lower()).strip() for n,e in enumerate(ent_dict) if 'B-PER' in e[0]]
        b_dummy=[players.append(p) for p in p1 if p not in players]
        
        ##Handle U-PER logic
        
        p2=[e[1].lower() for n,e in enumerate(ent_dict) if ('U-PER' in e[0])&(len(list(filter(lambda x: e[1] in x, players)))==0)]
        a_dummy=[players.append(p) for p in p2 if p not in players]
        
        ##Ensure no repeat of substrings
        players=[p for n,p in enumerate(players) if len(list(filter(lambda x:p in x,players)))<=1]
        
        ##Adjust duplicate substrings at this stage
        
#     else:
#         for n,e in enumerate(ent_dict):
#             if 'B-PER' in e[0]:
#                 players.append(e[1]+' '+ent_dict[n+1][1])

#             if 'U-PER' in e[0]:
#                 players.append(e[1])
    
        ##If no other fielder other than the bowler is present then it means fielder is the bowler
        players=list(set(players))
        ##Eliminate the batsman since he cannot be the fielder
        players=[p for p in players if p not in [batsman]]
        if (len(players)==1)&(bowler in players):
            fielder=bowler
        else:
            fielder=','.join(set([p for p in players if p not in [bowler]]))


        fielder=','.join(set([p for p in players if p not in [bowler,batsman]]))
    
    ##Use spacy NLP to get custom fielding positions
    doc = nlp(comm_text)
    fielding_positions=','.join(set([ent.text for ent in doc.ents if ent.label_=='F-POS']))
    
#     return pd.DataFrame({'bowler':[bowler],
#                          'batsman':[batsman],
#                          'fielder':[fielder],
#                          'fielding_position':[fielding_positions]})

    return dict({'bowler':[bowler],
                         'batsman':[batsman],
                         'fielder':[fielder],
                         'fielding_position':[fielding_positions]})


In [None]:
def get_dropped_soup(cricbuzz_url,link_text="Dropped Catches"):
    '''Function to get specific highlights section of Cricbuzz using a link text'''
    driver = webdriver.Chrome(executable_path=chromedriver,options=chrome_options)
    driver.get(cricbuzz_url)
    key_events_soup=[]
    cricbuzz_soup2= BeautifulSoup(driver.page_source, 'html.parser')
    try:
        match_name=[h.text for h in cricbuzz_soup2.find_all('h1',{'class':'cb-nav-hdr cb-font-18 line-ht24'})][0].strip().split('-')[0].strip()
        venue=[re.sub('\W+',' ', c.text ) for c in cricbuzz_soup2.find_all('a',{'itemprop':'location'})][0].strip()
    except Exception as ex:
        match_name,venue='',''

    ##Get all links that need to be clicked in web page
    link_texts=[]
    for cs in cricbuzz_soup2.find_all('a',{'class':'cb-nav-pill-1'}):
        ##Most navigation bars have innings in the list
        if ('Inns' in cs.text):
            link_texts.append(cs.text.strip())

    for l in link_texts:
            try:
                loadMoreButton=driver.find_element_by_link_text(l)
                loadMoreButton.click()
                time.sleep(3)
                playerButton=driver.find_element_by_link_text(link_text)
                playerButton.click()
            except Exception as ex:
                pass

            ##Give it sufficient time to scrape the full highlights content
            time.sleep(8)
            soup=BeautifulSoup(driver.page_source, 'html.parser')
            key_events_soup.append(soup)

    print ("Full scraping of key events complete...")

    driver.quit()
    
    return match_name,venue,link_texts,key_events_soup

In [None]:
def process_dropped_soup(innings_list,innings_soup):
    '''Return dataframe of innings and dropped catch commentary texts'''
    match_dropped_df=pd.DataFrame()
    for inum, inngs in enumerate(innings_list):
        soup=innings_soup[inum]
        try:
            over_number=[check_overs_format(k1.text) for k1 in soup.find_all('div',{'class':'cb-mat-mnu-wrp cb-ovr-num ng-binding ng-scope'})]
            over_comm_text=[k2.text.strip() for k2 in soup.find_all('p',{'class':'cb-com-ln ng-binding cb-col cb-col-90'})]
        
        except Exception as ex:
            over_number=[]
            over_comm_text=[]
            
        inngs_drop_df=pd.DataFrame({'innings':inngs,'overs':over_number,'commentary_text':over_comm_text})
        
        match_dropped_df=pd.concat([inngs_drop_df,match_dropped_df])
    
#     if match_dropped_df.shape[0]==0:
#         match_dropped_df=pd.DataFrame({'match_name':[match_name],'venue':[venue]})

    return match_dropped_df

#### Get list of cricbuzz URLs

In [None]:
cricbuzz_highlights_url='https://www.cricbuzz.com/cricket-match-highlights/35657/pbks-vs-srh-14th-match-indian-premier-league-2021'

In [None]:
cricbuzz_highlights_soup=parse_cricbuzz_urls(cricbuzz_highlights_url)

In [None]:
cricbuzz_match_facts=cricbuzz_highlights_url.replace('cricket-match-highlights','cricket-match-facts')

In [None]:
match_name,venue,innings_list,innings_soup=get_dropped_soup(cricbuzz_url)

In [None]:
match_dropped_df=process_dropped_soup(innings_list,innings_soup)
match_dropped_df['venue']=venue
match_dropped_df['match_name']=match_name
match_dropped_df['comm_parse']=match_dropped_df['commentary_text'].apply(lambda x:comm_text_drop_catches(x,allen_nlp_predictor))
match_dropped_df['bowler']=match_dropped_df['comm_parse'].apply(lambda x:x['bowler'][0] if len(x['bowler'])==1 else x['bowler'])
match_dropped_df['batsman']=match_dropped_df['comm_parse'].apply(lambda x:x['batsman'][0] if len(x['batsman'])==1 else x['batsman'])
match_dropped_df['fielder']=match_dropped_df['comm_parse'].apply(lambda x:x['fielder'][0] if len(x['fielder'])==1 else x['fielder'])
match_dropped_df['fielding_position']=match_dropped_df['comm_parse'].apply(lambda x:x['fielding_position'][0] if len(x['fielding_position'])==1 else x['fielding_position'])

In [None]:
match_dropped_df

In [None]:
def parse_cricbuzz_urls(cricbuzz_match_url):
    '''Parse any cricbuzz URL'''
    driver = webdriver.Chrome(executable_path=chromedriver,options=chrome_options)
    driver.get(cricbuzz_match_url)
    cricbuzz_match_soup = BeautifulSoup(driver.page_source, 'html.parser')
    driver.quit()
    return cricbuzz_match_soup

cricbuzz_highlights_soup=parse_cricbuzz_urls(cricbuzz_highlights_url)

In [None]:
##Get fielding keeper and captain
cricbuzz_facts_soup=parse_cricbuzz_urls(cricbuzz_match_facts)

In [None]:
playing_x1_soup=[pl for pl in cricbuzz_facts_soup.find_all('div',{'class':'cb-col cb-col-27 cb-mat-fct-itm text-bold'}) if pl.text=='Playing:']

In [None]:
playing_x1_soup

In [None]:
teams=[t.strip() for t in cricbuzz_highlights_soup.find('h1',{'class':'cb-nav-hdr cb-font-18 line-ht24'}).
       text.split(',')[0].split('vs')]

teams

In [None]:
playing_x1_dict=defaultdict(list)
##Reversed since we are interested in identifying fielding captains and fielding keepers
for pn,psoup in enumerate(reversed(playing_x1_soup)):
    captain=[plr.text.split('(')[0].strip() for plr in psoup.next_element.next_element.next_element.find_all('a') if '(c)' in plr.text  or '(c & wk)' in plr.text][0]
    keeper=[plr.text.split('(')[0].strip() for plr in psoup.next_element.next_element.next_element.find_all('a') if '(wk)' in plr.text  or '(c & wk)' in plr.text][0]
    playing_x1_dict[teams[pn]]=[captain,keeper]

In [None]:
innings_ck_df=pd.DataFrame()
for k,v in dict(playing_x1_dict).items():
    df=pd.DataFrame({k:v})
    innings_ck_df=pd.concat([df,innings_ck_df])
    
innings_ck_df=innings_ck_df.melt().dropna().reset_index(drop=True)
innings_ck_df['Role']=['Captain','WK','Captain','WK']
innings_ck_df.columns=['Team','Player','Role']

In [None]:
innings_ck_df=innings_ck_df.pivot_table(index='Team',columns='Role',values='Player',aggfunc='sum').reset_index()

In [None]:
innings_ck_df

In [None]:
def check_overs_format(overs):
    '''Function to adjust cricbuzz overs to cricinfo overs'''
    try:
        int(overs)
        adjusted_overs=(int(overs)-1)+0.6
    except Exception as ex:
        adjusted_overs=overs
    return adjusted_overs

In [None]:
from allennlp.predictors.predictor import Predictor
import allennlp_models.tagging

allen_nlp_predictor = Predictor.from_path("https://storage.googleapis.com/allennlp-public-models/ner-elmo.2021-02-12.tar.gz")

In [None]:
fielding_df=pd.read_excel('../Assignment/data/cricketEntities.xlsx')
fielding_patterns=[{"label":"F-POS","pattern":i} for i in fielding_df['scoring_zones'].tolist()]
ruler.add_patterns(fielding_patterns)

In [None]:
def comm_text_drop_catches(comm_text,allen_nlp_predictor):
    '''Extract the following from Cricbuzz commentary text'''
    '''Returns 
    - Action performer (bowler)
    - Action receiver (batsman)
    - Action performer (fielder)
    - Fielding position (fielding entity)'''
    players=[]
    try:
        #bowler=comm_text.split(',')[0].split('to')[0].strip().lower()
        bowler=comm_text.split(',')[0][:comm_text.split(',')[0].find(' to ')].strip().lower()
        #batsman=comm_text.split(',')[0].split('to')[1].strip().lower()
        batsman=comm_text.split(',')[0][comm_text.split(',')[0].find(' to '):].replace(' to ','').strip().lower()
    except Exception as ex:
        bowler=''
        batsman=''
    
    ##Use Allen NLP named entities to recognize fielders in play
    ner_results=allen_nlp_predictor.predict(
    sentence=comm_text)
        
    ent_dict=([(ner_results['tags'][n],word) for n,word in enumerate(ner_results['words']) if 'PER' in ner_results['tags'][n]])
    
    if len(ent_dict)==0:
        fielder=''
    else:
        ##Handle B-PER I-PER and U-PER logic
        players=[(ent_dict[n-1][1].lower()+' '+e[1].lower()+' '+ent_dict[n+1][1].lower()) for n,e in enumerate(ent_dict) if 'I-PER' in e[0]]
        
        ##Handle B-PER U-PER logic
        p1=[(e[1].lower()+' '+ent_dict[n+1][1].lower()).strip() for n,e in enumerate(ent_dict) if 'B-PER' in e[0]]
        b_dummy=[players.append(p) for p in p1 if p not in players]
        
        ##Handle U-PER logic
        
        p2=[e[1].lower() for n,e in enumerate(ent_dict) if ('U-PER' in e[0])&(len(list(filter(lambda x: e[1] in x, players)))==0)]
        a_dummy=[players.append(p) for p in p2 if p not in players]
        
        ##Ensure no repeat of substrings
        players=[p for n,p in enumerate(players) if len(list(filter(lambda x:p in x,players)))<=1]
        
        ##Adjust duplicate substrings at this stage
        
#     else:
#         for n,e in enumerate(ent_dict):
#             if 'B-PER' in e[0]:
#                 players.append(e[1]+' '+ent_dict[n+1][1])

#             if 'U-PER' in e[0]:
#                 players.append(e[1])
    
        ##If no other fielder other than the bowler is present then it means fielder is the bowler
        players=list(set(players))
        ##Eliminate the batsman since he cannot be the fielder
        players=[p for p in players if p not in [batsman]]
        if (len(players)==1)&(bowler in players):
            fielder=bowler
        else:
            fielder=','.join(set([p for p in players if p not in [bowler]]))


        fielder=','.join(set([p for p in players if p not in [bowler,batsman]]))
    
    ##Use spacy NLP to get custom fielding positions
    doc = nlp(comm_text)
    fielding_positions=','.join(set([ent.text for ent in doc.ents if ent.label_=='F-POS']))
    
#     return pd.DataFrame({'bowler':[bowler],
#                          'batsman':[batsman],
#                          'fielder':[fielder],
#                          'fielding_position':[fielding_positions]})

    return dict({'bowler':[bowler],
                         'batsman':[batsman],
                         'fielder':[fielder],
                         'fielding_position':[fielding_positions]})


In [None]:
comm_text="Hasan Mahmud to Kjorn Ottley, 2 runs, that was in the air for a while, but it just evades Tamim Iqbal at mid-on. Kjorn Ottley got hurried due to the pace on this short ball. He went for a pull and got a top-edge as Tamim tried his best to sprint across and gets his hands to it, but couldn't"

In [None]:
comm_text_drop_catches(comm_text,allen_nlp_predictor)

In [None]:
##Extract match commentary text to get highlights and dropped catches

In [None]:
cricbuzz_url='https://www.cricbuzz.com/cricket-match-highlights/32257/ind-vs-eng-2nd-test-england-tour-of-india-2021'

In [None]:
def get_dropped_soup(cricbuzz_url,link_text="Dropped Catches"):
    '''Function to get specific highlights section of Cricbuzz using a link text'''
    driver = webdriver.Chrome(executable_path=chromedriver,options=chrome_options)
    driver.get(cricbuzz_url)
    key_events_soup=[]
    cricbuzz_soup2= BeautifulSoup(driver.page_source, 'html.parser')
    try:
        match_name=[h.text for h in cricbuzz_soup2.find_all('h1',{'class':'cb-nav-hdr cb-font-18 line-ht24'})][0].strip().split('-')[0].strip()
        venue=[re.sub('\W+',' ', c.text ) for c in cricbuzz_soup2.find_all('a',{'itemprop':'location'})][0].strip()
    except Exception as ex:
        match_name,venue='',''

    ##Get all links that need to be clicked in web page
    link_texts=[]
    for cs in cricbuzz_soup2.find_all('a',{'class':'cb-nav-pill-1'}):
        ##Most navigation bars have innings in the list
        if ('Inns' in cs.text):
            link_texts.append(cs.text.strip())

    for l in link_texts:
            try:
                loadMoreButton=driver.find_element_by_link_text(l)
                loadMoreButton.click()
                time.sleep(3)
                playerButton=driver.find_element_by_link_text(link_text)
                playerButton.click()
            except Exception as ex:
                pass

            ##Give it sufficient time to scrape the full highlights content
            time.sleep(8)
            soup=BeautifulSoup(driver.page_source, 'html.parser')
            key_events_soup.append(soup)

    print ("Full scraping of key events complete...")

    driver.quit()
    
    return match_name,venue,link_texts,key_events_soup

In [None]:
def process_dropped_soup(innings_list,innings_soup):
    '''Return dataframe of innings and dropped catch commentary texts'''
    match_dropped_df=pd.DataFrame()
    for inum, inngs in enumerate(innings_list):
        soup=innings_soup[inum]
        try:
            over_number=[check_overs_format(k1.text) for k1 in soup.find_all('div',{'class':'cb-mat-mnu-wrp cb-ovr-num ng-binding ng-scope'})]
            over_comm_text=[k2.text.strip() for k2 in soup.find_all('p',{'class':'cb-com-ln ng-binding cb-col cb-col-90'})]
        
        except Exception as ex:
            over_number=[]
            over_comm_text=[]
            
        inngs_drop_df=pd.DataFrame({'innings':inngs,'overs':over_number,'commentary_text':over_comm_text})
        
        match_dropped_df=pd.concat([inngs_drop_df,match_dropped_df])
    
#     if match_dropped_df.shape[0]==0:
#         match_dropped_df=pd.DataFrame({'match_name':[match_name],'venue':[venue]})

    return match_dropped_df

In [None]:
match_name,venue,innings_list,innings_soup=get_dropped_soup(cricbuzz_url)

In [None]:
match_dropped_df=process_dropped_soup(innings_list,innings_soup)

In [None]:
match_dropped_df['venue']=venue
match_dropped_df['match_name']=match_name

In [None]:
match_dropped_df

In [None]:
match_dropped_df['comm_parse']=match_dropped_df['commentary_text'].apply(lambda x:comm_text_drop_catches(x,allen_nlp_predictor))

In [None]:
match_dropped_df['bowler']=match_dropped_df['comm_parse'].apply(lambda x:x['bowler'][0] if len(x['bowler'])==1 else x['bowler'])
match_dropped_df['batsman']=match_dropped_df['comm_parse'].apply(lambda x:x['batsman'][0] if len(x['batsman'])==1 else x['batsman'])
match_dropped_df['fielder']=match_dropped_df['comm_parse'].apply(lambda x:x['fielder'][0] if len(x['fielder'])==1 else x['fielder'])
match_dropped_df['fielding_position']=match_dropped_df['comm_parse'].apply(lambda x:x['fielding_position'][0] if len(x['fielding_position'])==1 else x['fielding_position'])

In [None]:
match_dropped_df

### RAW

In [None]:
dropped_button_txt="Dropped Catches"


In [None]:
cricbuzz_url='https://www.cricbuzz.com/cricket-match-highlights/35627/kkr-vs-mi-5th-match-indian-premier-league-2021'

In [None]:
def parse_cricbuzz_scorecard(cricbuzz_match_url):
    '''Parse scorecard from Cricbuzz URL'''
    driver = webdriver.Chrome(executable_path=chromedriver)
    driver.get(cricbuzz_match_url)
    cricbuzz_match_soup = BeautifulSoup(driver.page_source, 'html.parser')
    driver.quit()
    return cricbuzz_match_soup

cricbuzz_match_soup=parse_cricbuzz_scorecard(cricbuzz_match_url)

In [None]:
comm_text.split(',')[0].split('to')[0].strip()

In [None]:
text=''.join(comm_text.split(',')[1:]).strip()

In [None]:
comm_text.split(',')[0].split('to')[1].strip()

In [None]:
driver = webdriver.Chrome(executable_path=chromedriver,options=chrome_options)
#driver = webdriver.Chrome(executable_path=chromedriver)
driver.get(cricbuzz_url)
key_events_soup=[]
cricbuzz_soup2= BeautifulSoup(driver.page_source, 'html.parser')

##Get all links that need to be clicked in web page
link_texts=[]
for cs in cricbuzz_soup2.find_all('a',{'class':'cb-nav-pill-1'}):
    if ('1st Inns' in cs.text):
        link_texts.append(cs.text.strip())

for l in link_texts:
        try:
            loadMoreButton=driver.find_element_by_link_text(l)
            loadMoreButton.click()
            time.sleep(3)
            playerButton=driver.find_element_by_link_text("Dropped Catches")
            playerButton.click()
        except Exception as ex:
            pass
        
        ##Give it sufficient time to scrape the full highlights content
        time.sleep(8)
        soup=BeautifulSoup(driver.page_source, 'html.parser')
        key_events_soup.append(soup)

print ("Full scraping of key events complete...")

driver.quit()

In [None]:
match_name=[h.text for h in cricbuzz_soup2.find_all('h1',{'class':'cb-nav-hdr cb-font-18 line-ht24'})][0].strip().split('-')[0].strip()
            

In [None]:
venue=[re.sub('\W+',' ', c.text ) for c in cricbuzz_soup2.find_all('a',{'itemprop':'location'})][0].strip()

In [None]:
match_name,venue

In [None]:
over_number=[check_overs_format(k1.text) for k1 in key_events_soup[0].find_all('div',{'class':'cb-mat-mnu-wrp cb-ovr-num ng-binding ng-scope'})]

In [None]:
process_dropped_soup(link_texts,key_events_soup)

In [None]:
for k in key_events_soup[1]:
    try:
    over_number=[check_overs_format(k1.text) for k1 in k.find_all('div',{'class':'cb-mat-mnu-wrp cb-ovr-num ng-binding ng-scope'})]
    over_comm_text=[k2.text.strip() for k2 in k.find_all('p',{'class':'cb-com-ln ng-binding cb-col cb-col-90'})]

In [None]:
pd.DataFrame({'innings':'PBKS 1st inns','overs':over_number,'commentary_text':over_comm_text})

In [None]:
comm_text="Unadkat to Ruturaj Gaikwad, FOUR, almost a wicket first ball! Gaikwad escapes a golden duck. Goes chasing at this away-angler, away from the body and the poke results in a thick edge. Tewatia at gully dives across, gets his fingertips but it doesn't stick. Races away past short third man who chases slugglishly and fails to prevent the boundary. What a start for Unadkat and RR!"

In [None]:
doc = nlp(comm_text)
print([(ent.text, ent.label_) for ent in doc.ents])

In [None]:
ner_results=allen_nlp_predictor.predict(
    sentence=comm_text)

In [None]:
ent_dict=([(ner_results['tags'][n],word) for n,word in enumerate(ner_results['words']) if 'PER' in ner_results['tags'][n]])

In [None]:
ent_dict

In [None]:
##Handle B-PER I-PER and U-PER logic
players=[(ent_dict[n-1][1].lower()+' '+e[1].lower()+' '+ent_dict[n+1][1].lower()) for n,e in enumerate(ent_dict) if 'I-PER' in e[0]]

##Handle B-PER U-PER logic
p1=[(e[1]+' '+ent_dict[n+1][1]).strip() for n,e in enumerate(ent_dict) if 'B-PER' in e[0]]
b_dummy=[players.append(p) for p in p1 if p not in players]

##Handle U-PER logic
p2=[e[1] for n,e in enumerate(ent_dict) if ('U-PER' in e[0])&(len(list(filter(lambda x: e[1] in x, players)))==0)]
a_dummy=[players.append(p) for p in p2 if p not in players]

In [None]:
players

In [None]:
[p for p in players if 'U-PER' in e[0]]

In [None]:
batsman='van der dussen'
bowler='Haris Rauf'

In [None]:
##If no other fielder other than the bowler is present then it means fielder is the bowler
players=list(set(players))
##Eliminate the batsman since he cannot be the fielder
players=[p for p in players if p not in [batsman]]
if (len(players)==1)&(bowler in players):
    fielder=bowler
else:
    fielder=','.join(set([p for p in players if p not in [bowler]]))


fielder=','.join(set([p for p in players if p not in [bowler,batsman]]))

In [None]:
fielder

In [None]:
l=[(e[1]+' '+ent_dict[n+1][1]).strip() for n,e in enumerate(ent_dict) if 'B-PER' in e[0]]

In [None]:
l

In [None]:
res = list(filter(lambda x: 'Morris' in x, l))

res

In [None]:
[l1 for l1 in l if 'Sakariya' in l1]

In [None]:
###Check for I-pers

In [None]:
players=[(ent_dict[n-1][1].lower()+' '+e[1].lower()+' '+ent_dict[n+1][1].lower()) for n,e in enumerate(ent_dict) if 'I-PER' in e[0]]

In [None]:
b=[players.append(p) for p in p1 if p not in players]


In [None]:
p1=[(e[1]+' '+ent_dict[n+1][1]).strip() for n,e in enumerate(ent_dict) if 'B-PER' in e[0]]

In [None]:
players[5]='van der'

In [None]:
players

In [None]:
p2=[e[1] for n,e in enumerate(ent_dict) if ('U-PER' in e[0])&(len(list(filter(lambda x: e[1] in x, players)))==0)]

In [None]:
[p for n,p in enumerate(players) if len(list(filter(lambda x:p in x,players)))<=1]

In [None]:
a=[players.append(p) for p in p2 if p not in players]


In [None]:
players

In [None]:

for n,e in enumerate(ent_dict):
    if 'B-PER' in e[0]:
        
        players.append(e[1]+' '+ent_dict[n+1][1])
        
    if 'U-PER' in e[0]:
        players.append(e[1])
        set([e[1] for n,e in enumerate(ent_dict) if ('U-PER' in e[0])&(len(list(filter(lambda x: e[1] in x, l)))==0)])

In [None]:
players=list(set(players))

In [None]:
players

In [None]:
[p for p in players if 'Sakariya' in p]

In [None]:
fielder=','.join(set([p for p in players if p not in [bowler,batsman]]))

In [None]:
s

In [None]:
bowler='Russell'

In [None]:
players=[p for p in players if p not in [batsman]]

In [None]:
##Action performer (bowler)
##Action receiver (batsman)
## Action performer (fielder)
##Get fielding position (Fielding entity)