In [2]:
from urllib.request import urlopen
from urllib.error import HTTPError
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import json
import re

In [3]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [4]:
#Get Hawkeye data from cricket api
def get_url_hawkeye(match_id):
    try:
        url = f'https://cricketapi-icc.pulselive.com//fixtures/{match_id}/uds/stats'
    except:
        url = f'https://cricketapi.platform.iplt20.com//fixtures/{match_id}/uds/stats'
    return url

In [5]:
#Get scoring data from cricket api
def get_url_metadata(match_id):
    try:
        url = f'https://cricketapi-icc.pulselive.com//fixtures/{match_id}/scoring'
    except:
        url = f'https://cricketapi.platform.iplt20.com//fixtures/{match_id}/scoring'
    return url

In [6]:
#Get the data in the parsed html format
def get_soup_from_url(url):
    try:
        html = urlopen(url).read()
    except HTTPError:
        print("Link Cannot be Reached", url)
        return -1
        
    #soup = BeautifulSoup(html,"lxml")
    soup = BeautifulSoup(html,"html.parser")
    return str(soup)

In [7]:
def get_tracking_df_from_matchid(match_id):
  try:
    df = pd.DataFrame(
        [[k]+v.split(',') for i in json.loads(get_soup_from_url(get_url_hawkeye(match_id)))['data'] 
         for k,v in i.items()],
        columns = ['over','ball_num','batter','non-striker',
                   'bowler','speed','catcher','dismissal_desc',
                   'total_extras','runs','bowler_extras','extra_type',
                  'otw','length','line','line_at_stumps',
                  'height_at_stumps','shot_dist0','shot_dist1','blank2',
                   'blank3','blank4']
    )
    df['match_id'] = str(match_id)
    if ((df.shape[0] == 0) | 
       ((df.speed.nunique() == 1) & 
        (df.length.nunique() == 1) & 
        (df.line.nunique() == 1) & 
        (df.line_at_stumps.nunique() == 1) & 
        (df.height_at_stumps.nunique() == 1))) :
        return
    else:
      df['over'] = df.over.apply(lambda x: str(x).split('.'))
      df['match_inn'] = df.over.apply(lambda x: x[0])
      df['over_ball'] = pd.to_numeric(df.over.apply(lambda x: x[2]), errors='coerce')
      df['over_num'] = pd.to_numeric(df.over.apply(lambda x: x[1]), errors='coerce')
      df.drop('over', axis=1, inplace=True)

      df['speed'] = pd.to_numeric(df['speed'], errors='coerce')*3.6
      df.loc[df.speed < 0, 'speed'] = np.nan

      df['length'] = pd.to_numeric(df['length'], errors='coerce')
      df['line'] = pd.to_numeric(df['line'], errors='coerce')
      df['line_at_stumps'] = pd.to_numeric(df['line_at_stumps'], errors='coerce')
      df['height_at_stumps'] = pd.to_numeric(df['height_at_stumps'], errors='coerce')
      df['deviation'] = df.line_at_stumps - df.line
      return df
  except:
    print(f"couldn't retrieve data for match {match_id}. Please check {get_url_hawkeye(match_id)} to debug")
    return

In [8]:
wtc=get_tracking_df_from_matchid(23469)

In [10]:
def get_metadata_df_from_matchid(match_id):
  m = json.loads(get_soup_from_url(get_url_metadata(match_id)))
  this_match = pd.DataFrame([{k: v for k,v in m['matchInfo'].items() if k in [
    'matchDate', 'matchEndDate','isLimitedOvers', 'description', 'matchType', 'tournamentLabel']}])
  this_match['match_id'] = match_id
  try:
      this_match['toss_elected'] = m['matchInfo']['additionalInfo']['toss.elected']
  except:
      this_match['toss_elected'] = ''
  this_match['venue_id'] = m['matchInfo']['venue']['id']
  try:
      this_match['team1_wk'] = m['matchInfo']['teams'][0]['wicketKeeper']['id']
      this_match['team2_wk'] = m['matchInfo']['teams'][1]['wicketKeeper']['id']
  except:
      this_match['team1_wk'] = ''
      this_match['team2_wk'] = ''
  this_match['team1'] = m['matchInfo']['teams'][0]['team']['fullName']
  this_match['team2'] = m['matchInfo']['teams'][1]['team']['fullName']
  match_df = this_match
  venue_df = pd.DataFrame([m['matchInfo']['venue']])
  player_df = pd.concat([pd.DataFrame(m['matchInfo']['teams'][0]['players']),
                         pd.DataFrame(m['matchInfo']['teams'][1]['players'])]).drop_duplicates()
  
  #venue_df.drop('coordinates',axis=1, inplace=True)
  player_df['batter_hand'] = player_df.rightHandedBat.apply(lambda x: 'R' if x else 'L')
  player_df['bowler_hand'] = player_df.rightArmedBowl.apply(lambda x: 'R' if x else 'L')
  match_df.matchType = match_df.apply(lambda x: 'W_' + x.matchType if 
               re.search('women', x.tournamentLabel.lower()) else x.matchType,
              axis=1)
  match_df['toss_winner'] = match_df.toss_elected.apply(lambda x: str(x).strip().lower().split(',')[0])
  match_df['toss_decision'] = match_df.toss_elected.apply(lambda x: str(x).lower().strip('.').split(' ')[-1])
  match_df['toss_decision'] = match_df.toss_decision.apply(lambda x: 'field' if str(x)=='bowl' else str(x))
  match_df['toss_decision'] = match_df.toss_decision.apply(lambda x: x if str(x) in ['field','bat'] else '')
  match_df.drop('toss_elected', axis=1, inplace=True)
  return {'match_metadata': match_df,
          'player_metadata': player_df,
          'venue_metadata': venue_df}


In [11]:
wtc_meta=get_metadata_df_from_matchid(23469)

In [12]:
wtc.head(40)

Unnamed: 0,ball_num,batter,non-striker,bowler,speed,catcher,dismissal_desc,total_extras,runs,bowler_extras,extra_type,otw,length,line,line_at_stumps,height_at_stumps,shot_dist0,shot_dist1,blank2,blank3,blank4,match_id,match_inn,over_ball,over_num,deviation
0,1,107,179,3840,131.8032,-1,,2,2,2,,y,6.214,-0.061,0.182,0.574,72,37,,,,23469,1,1,1,0.243
1,2,107,179,3840,139.0428,-1,,0,0,0,,y,7.023,-0.259,-0.021,0.759,0,0,,,,23469,1,2,1,0.238
2,3,107,179,3840,142.2108,-1,,0,0,0,,y,9.435,-0.435,-0.051,0.924,0,0,,,,23469,1,3,1,0.384
3,4,107,179,3840,138.366,-1,,2,2,2,,y,7.585,-0.204,0.173,0.781,69,19,,,,23469,1,4,1,0.377
4,5,107,179,3840,140.9256,-1,,0,0,0,,y,8.482,-0.462,-0.148,0.796,0,0,,,,23469,1,5,1,0.314
5,6,107,179,3840,143.82,-1,,1,1,1,,y,7.998,-0.359,0.065,0.795,67,19,,,,23469,1,6,1,0.424
6,1,107,179,1616,128.304,-1,,1,1,1,,y,3.099,-0.022,0.232,0.349,56,51,,,,23469,1,1,2,0.254
7,2,179,107,1616,131.9508,-1,,0,0,0,,y,7.603,-0.138,0.328,0.806,0,0,,,,23469,1,2,2,0.466
8,3,179,107,1616,137.9448,-1,,0,0,0,,y,5.359,-0.169,0.037,0.752,0,0,,,,23469,1,3,2,0.206
9,4,179,107,1616,129.114,-1,,0,0,0,,y,6.465,-0.424,-0.128,0.612,0,0,,,,23469,1,4,2,0.296


In [15]:
wtc_meta['match_metadata']

Unnamed: 0,matchDate,matchEndDate,isLimitedOvers,description,matchType,tournamentLabel,match_id,venue_id,team1_wk,team2_wk,team1,team2,toss_winner,toss_decision
0,2021-04-09T19:30:00+0530,2021-04-09T23:00:00+0530,True,Match 1,IPLT20,IPL 2021,23469,1,2975,233,Mumbai Indians,Royal Challengers Bangalore,royal challengers bangalore,field
