In [2]:
from urllib.request import urlopen
from urllib.error import HTTPError
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import json
import re

In [3]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [4]:
#Get scoring data from cricket api
def get_url_metadata(match_id):
    url = f'https://cricketapi.platform.iplt20.com//fixtures/{match_id}/scoring'
    return url

#Get the data in the parsed html format
def get_soup_from_url(url):
    try:
        html = urlopen(url).read()
    except HTTPError:
        print("Link Cannot be Reached", url)
        return -1
        
    #soup = BeautifulSoup(html,"lxml")
    soup = BeautifulSoup(html,"html.parser")
    return str(soup)

In [5]:
def get_metadata_df_from_matchid(match_id):
  m = json.loads(get_soup_from_url(get_url_metadata(match_id)))
  this_match = pd.DataFrame([{k: v for k,v in m['matchInfo'].items() if k in [
    'matchDate', 'matchEndDate','isLimitedOvers', 'description', 'matchType', 'tournamentLabel']}])
  this_match['match_id'] = match_id
  try:
      this_match['toss_elected'] = m['matchInfo']['additionalInfo']['toss.elected']
  except:
      this_match['toss_elected'] = ''
  this_match['venue_id'] = m['matchInfo']['venue']['id']
  try:
      this_match['team1_wk'] = m['matchInfo']['teams'][0]['wicketKeeper']['id']
      this_match['team2_wk'] = m['matchInfo']['teams'][1]['wicketKeeper']['id']
  except:
      this_match['team1_wk'] = ''
      this_match['team2_wk'] = ''
  this_match['team1'] = m['matchInfo']['teams'][0]['team']['fullName']
  this_match['team2'] = m['matchInfo']['teams'][1]['team']['fullName']
  match_df = this_match
  venue_df = pd.DataFrame([m['matchInfo']['venue']])
  player_df = pd.concat([pd.DataFrame(m['matchInfo']['teams'][0]['players']),
                         pd.DataFrame(m['matchInfo']['teams'][1]['players'])]).drop_duplicates()
  
  #venue_df.drop('coordinates',axis=1, inplace=True)
  player_df['batter_hand'] = player_df.rightHandedBat.apply(lambda x: 'R' if x else 'L')
  player_df['bowler_hand'] = player_df.rightArmedBowl.apply(lambda x: 'R' if x else 'L')
  match_df.matchType = match_df.apply(lambda x: 'W_' + x.matchType if 
               re.search('women', x.tournamentLabel.lower()) else x.matchType,
              axis=1)
  match_df['toss_winner'] = match_df.toss_elected.apply(lambda x: str(x).strip().lower().split(',')[0])
  match_df['toss_decision'] = match_df.toss_elected.apply(lambda x: str(x).lower().strip('.').split(' ')[-1])
  match_df['toss_decision'] = match_df.toss_decision.apply(lambda x: 'field' if str(x)=='bowl' else str(x))
  match_df['toss_decision'] = match_df.toss_decision.apply(lambda x: x if str(x) in ['field','bat'] else '')
  match_df.drop('toss_elected', axis=1, inplace=True)
  return {'match_metadata': match_df,
          'player_metadata': player_df,
          'venue_metadata': venue_df}

In [6]:
S_2012=pd.read_csv('S_2012.csv',header=None)
S_2013=pd.read_csv('S_2013.csv',header=None)
S_2014=pd.read_csv('S_2014.csv',header=None)
S_2015=pd.read_csv('S_2015.csv',header=None)
S_2016=pd.read_csv('S_2016.csv',header=None)
S_2017=pd.read_csv('S_2017.csv',header=None)
S_2018=pd.read_csv('S_2018.csv',header=None)
S_2019=pd.read_csv('S_2019.csv',header=None)
S_2020=pd.read_csv('S_2020.csv',header=None)
S_2021=pd.read_csv('S_2021.csv',header=None)

In [7]:
def get_player_metadata(season):
    df=[]
    for i in range(len(season)):
        ID=int(season.iloc[i])
        try:
            a=get_metadata_df_from_matchid(ID)
            a_player=a['player_metadata']
            df.append(a_player)
        except Exception:
            pass
    data_f=pd.concat(df)   
    final_player=data_f.drop_duplicates()
    final_player.drop(['rightArmedBowl', 'rightHandedBat'], axis=1, inplace=True)
    return(final_player)

In [8]:
players_2012=get_player_metadata(S_2012)
players_2013=get_player_metadata(S_2013)
players_2014=get_player_metadata(S_2014)
players_2015=get_player_metadata(S_2015)
players_2016=get_player_metadata(S_2016)
players_2017=get_player_metadata(S_2017)
players_2018=get_player_metadata(S_2018)
players_2019=get_player_metadata(S_2019)
players_2020=get_player_metadata(S_2020)
players_2021=get_player_metadata(S_2021)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


In [10]:
frames=[players_2012,players_2013,players_2014,players_2015,players_2016,players_2017,players_2018,players_2019,players_2020,players_2021]

In [11]:
players=pd.concat(frames)

In [12]:
players_final=players.drop_duplicates()

In [20]:
players_final.to_csv('players.csv',index=False)

In [21]:
players_2012

Unnamed: 0,id,fullName,shortName,nationality,dateOfBirth,bowlingStyle,batter_hand,bowler_hand
0,7,Murali Vijay,M Vijay,Indian,1984-04-01,,L,L
1,24,Faf du Plessis,F du Plessis,South African,1984-07-13,LEG_SPIN,R,R
2,12,Subramaniam Badrinath,S Badrinath,Indian,1980-08-30,,L,L
3,14,Suresh Raina,S Raina,Indian,1986-11-27,OFF_SPIN,L,R
4,1,MS Dhoni,MS Dhoni,Indian,1981-07-07,MEDIUM_SEAM,R,R
5,25,Dwayne Bravo,DJ Bravo,West Indian,1983-10-07,MEDIUM_SEAM,R,R
6,9,Ravindra Jadeja,R Jadeja,Indian,1988-12-06,ORTHODOX,L,L
7,26,Albie Morkel,A Morkel,South African,1981-06-10,,L,L
8,8,Ravichandran Ashwin,R Ashwin,Indian,1986-09-17,OFF_SPIN,R,R
9,10,Shadab Jakati,S Jakati,Indian,1980-11-27,,L,L


Unnamed: 0,id,fullName,shortName,nationality,dateOfBirth,rightArmedBowl,rightHandedBat,bowlingStyle,batter_hand,bowler_hand
0,7,Murali Vijay,M Vijay,Indian,1984-04-01,False,False,,L,L
1,24,Faf du Plessis,F du Plessis,South African,1984-07-13,True,True,LEG_SPIN,R,R
2,12,Subramaniam Badrinath,S Badrinath,Indian,1980-08-30,False,False,,L,L
3,14,Suresh Raina,S Raina,Indian,1986-11-27,True,False,OFF_SPIN,L,R
4,1,MS Dhoni,MS Dhoni,Indian,1981-07-07,True,True,MEDIUM_SEAM,R,R
5,25,Dwayne Bravo,DJ Bravo,West Indian,1983-10-07,True,True,MEDIUM_SEAM,R,R
6,9,Ravindra Jadeja,R Jadeja,Indian,1988-12-06,False,False,ORTHODOX,L,L
7,26,Albie Morkel,A Morkel,South African,1981-06-10,False,False,,L,L
8,8,Ravichandran Ashwin,R Ashwin,Indian,1986-09-17,True,True,OFF_SPIN,R,R
9,10,Shadab Jakati,S Jakati,Indian,1980-11-27,False,False,,L,L
