In [1]:
import json
import os
import matplotlib.pyplot as plt
json_files_path = r'E:\SJ\cricket\analysis\IPL\json'

In [2]:
def process_season(season):
    if '/' in str(season):
        t = {'2020/21': 2020, '2007/08': 2008, '2009/10': 2010}
        return t[season]
    else:
        return int(season)


In [None]:
# Structure of IPL json file
'''
FILE:
meta, info, innings
    meta:
    ['data_version', 'created', 'revision']

    info:
    ['balls_per_over', 'city', 'dates', 'event',
     'gender', 'match_type', 'officials', 'outcome',
     'overs', 'player_of_match',
     'players', 'registry', 'season', 'team_type', 'teams', 'toss', 'venue']
    
    innings: A list containing both the inngings
        [0]: 
        ['team', 'overs', 'powerplays']
            overs: A list containing all the overs bowled in the innnings
                [j]:
                ['over', 'deliveries']
                    over: no. of the over being bowled, j
                    deliveries: A list containing each and every delivery
                        [k]:
                        ['batter', 'bowler', 'non_striker', 'runs']
                        runs: breakdown of the delivery
                        ['batter', 'extras', 'total']
                            batter: Runs given to the batsman
                            extras: 0 if no extras were scored
                            else a dictionary breaking down the extras
                            total: Total runs on that delivery
            powerplays: list containing all the powerplays, in the case of T20
            this only has one element
                [0]:
                ['from', 'to', 'type']
        
        [1]:
        ['team', 'overs', 'powerplays', 'target']
            target: Only the second innings has the target attr
            ['overs', 'runs']
'''


In [None]:
# Structure of Overs_dict

'''
overs_dict: 
    season_no.:
        team_name: This data is stored according to the team's bowling 
        and is a list of all the innings bowled by the team in that season
        [batting, inngings, total, wickets, extras]
            batting: Name of the batting team
            innings: A list containing all the overs bowled be the team
            [i]: proxy over no., which contains all the over data as in the 
                JSON file, but has a few extra keys,
                like-
                runs: total no. of runs given in that over
                extras: no. of runs given becuase of wides & noballs
                wickets: no. of wickets lost in that oover
                batters: dictionary of all the batsman that faced a ball 
                    in the ith over, like-
                    batter_name: runs socred by him
                wicket_list: list of dictionaries of wickets lost in the ith over
                    this dict has the following keys: [batsman,bowler, type of dismissal,
                    name of fielder if its a catch or run out]   
            total:
            wickets:
            extras:         
'''


In [17]:
# Process Data from all the json files
overs_dict = {}
nums = 0
for season in range(2008, 2023):
    overs_dict[season] = {}
for file in os.listdir(json_files_path):
    nums += 1
    with open(json_files_path+'/'+file) as f:
        match = json.loads(f.read())
        season = process_season(match['info']['season'])
        teams = set(match['info']['teams'])
        for inn in match['innings']:
            batting_team = {inn['team']}
            bowling_team = (teams-batting_team).pop()
            overs_dict[season].setdefault(bowling_team, [])
            innings = []
            match_data = {'batting': inn['team'],
                          'innings': innings,
                          }

            for over in inn['overs']:
                runs = 0
                extras = 0
                wickets = 0
                over['batters'] = {}
                for delivery in over['deliveries']:
                    over['batters'].setdefault(delivery['batter'], 0)
                    over['batters'][delivery['batter']
                                    ] += delivery['runs']['batter']
                    runs += delivery['runs']['total']
                    if 'extras' in delivery:
                        for extra in delivery['extras']:
                            if extra in ('wides', 'noballs'):
                                extras += delivery['runs']['extras']
                    if 'wickets' in delivery:
                        wickets += 1
                        over.setdefault('wicket_list', [])
                        over['wicket_list'].append(delivery['wickets'])
                over['runs'] = runs
                over['extras'] = extras
                over['wickets'] = wickets
                innings.append(over)
            overs_dict[season][bowling_team].append(match_data)
print(f'{nums} files processed')


950 files processed


In [12]:
# Save data into overs_data.json 
with open('./overs_data.json', 'w') as file:
    file.write(json.dumps(overs_dict))

In [4]:
# Read data from overs_data.json
with open('./overs_data.json', 'r') as file:
    overs_dict = json.loads(file.read())

In [7]:
# Add Tota Runs, Total Wickets taken and extras given in the 
# entire innings by the bowling team to the overs_dict json
for season, season_data in overs_dict.items():
    # season = process_season(season)
    for team, matches in season_data.items():
        for i, match in enumerate(matches):
            runs_total = 0
            wickets_total = 0
            extras_total = 0
            for over in match['innings']:
                runs_total += over['runs']
                wickets_total += over['wickets']
                extras_total += over['extras']

            overs_dict[season][team][i].setdefault('total', runs_total)
            overs_dict[season][team][i].setdefault('wickets', wickets_total)
            overs_dict[season][team][i].setdefault('extras', extras_total)
            