In [14]:
import os
import json
import pandas as pd

def calculate_fantasy_points_from_json(json_file):
    with open(json_file, 'r') as f:
        match_data = json.load(f)

    # Extract match date
    match_date = match_data.get("info", {}).get("dates", ["Unknown"])[0]

    # Initialize fantasy points
    players = {player: 0 for team in match_data['info']['players'].values() for player in team}
    batter_scores = {player: 0 for player in players}
    bowler_wickets = {player: 0 for player in players}
    fielder_catches = {player: 0 for player in players}
    batter_balls = {player: 0 for player in players}
    bowler_runs = {player: 0 for player in players}
    bowler_balls = {player: 0 for player in players}

    # Process deliveries
    for inning in match_data.get('innings', []):
        for over in inning['overs']:
            for delivery in over['deliveries']:
                batter = delivery['batter']
                bowler = delivery['bowler']
                runs = delivery['runs']['batter']
                wickets = delivery.get('wickets', [])

                # Skip missing players
                if batter not in players:
                    continue
                
                # Batting points
                players[batter] += BAT_POINT_RUN * runs
                batter_scores[batter] += runs
                batter_balls[batter] += 1
                if runs >= 6:
                    players[batter] += BAT_POINT_SIX
                elif runs >= 4:
                    players[batter] += BAT_POINT_BOUNDARY

                if any(wicket['player_out'] == batter for wicket in wickets) and batter_scores[batter] == 0:
                    players[batter] += BAT_POINT_DUCK

                # Bowling and fielding points
                if bowler in players:
                    for wicket in wickets:
                        if wicket['kind'] != 'run out':
                            players[bowler] += BOWL_POINT_WICKET
                            bowler_wickets[bowler] += 1
                            if wicket['kind'] in ['lbw', 'bowled']:
                                players[bowler] += BOWL_POINT_BONUS

                for wicket in wickets:
                    for fielder in wicket.get('fielders', []):
                        field_name = fielder['name']
                        if field_name in players:
                            if wicket['kind'] == 'caught':
                                players[field_name] += FIELD_POINT_CATCH
                                fielder_catches[field_name] += 1
                            elif wicket['kind'] == 'stumped':
                                players[field_name] += FIELD_POINT_STUMPING
                            elif wicket['kind'] == 'run out':
                                if 'direct_hit' in fielder:
                                    players[field_name] += FIELD_POINT_RUN_OUT_DIRECT
                                else:
                                    players[field_name] += FIELD_POINT_RUN_OUT

                # Update bowler stats
                if bowler in players:
                    bowler_runs[bowler] += runs
                    bowler_balls[bowler] += 1

    # Additional scoring
    for player in batter_scores:
        if batter_scores[player] >= 100:
            players[player] += BAT_POINT_CENTURY
        elif batter_scores[player] >= 50:
            players[player] += BAT_POINT_HALF_CENTURY

    for player in fielder_catches:
        if fielder_catches[player] >= 3:
            players[player] += FIELD_POINT_3_CATCH_BONUS

    for player in bowler_wickets:
        if bowler_wickets[player] >= 5:
            players[player] += BOWL_POINT_5_WICKET_BONUS
        elif bowler_wickets[player] >= 4:
            players[player] += BOWL_POINT_4_WICKET_BONUS

    for player in bowler_runs:
        if bowler_balls[player] >= 30:
            economy_rate = bowler_runs[player] / (bowler_balls[player] / 6)
            if economy_rate < 2.5:
                players[player] += ECONOMY_POINT_2_5
            elif economy_rate < 3.5:
                players[player] += ECONOMY_POINT_3_49
            elif economy_rate < 4.5:
                players[player] += ECONOMY_POINT_4_5
            elif economy_rate >= 8 and economy_rate < 9:
                players[player] += ECONOMY_POINT_8_9
            elif economy_rate >= 9:
                players[player] += ECONOMY_POINT_ABOVE_9

    for player in batter_scores:
        if batter_balls[player] >= 20:
            strike_rate = (batter_scores[player] / batter_balls[player]) * 100
            if strike_rate > 140:
                players[player] += STRIKE_RATE_POINT_140
            elif strike_rate > 120:
                players[player] += STRIKE_RATE_POINT_120_140
            elif strike_rate > 100:
                players[player] += STRIKE_RATE_POINT_100_120
            elif strike_rate >= 40:
                players[player] += STRIKE_RATE_POINT_40_50
            elif strike_rate >= 30:
                players[player] += STRIKE_RATE_POINT_30_40
            elif strike_rate < 30:
                players[player] += STRIKE_RATE_POINT_BELOW_30

    # Convert to DataFrame
    df = pd.DataFrame({
        "Player Name": list(players.keys()),
        "Fantasy Points": list(players.values()),
        "Match Date": match_date
    })

    return df

# Processing all match files
def process_all_matches(folders):
    dataframes = []

    for folder in folders:
        for root, _, files in os.walk(folder):
            for file in files:
                if file.endswith(".json"):  # Only process JSON files
                    file_path = os.path.join(root, file)
                    try:
                        df = calculate_fantasy_points_from_json(file_path)
                        dataframes.append(df)
                    except Exception as e:
                        print(f"Error processing {file_path}: {e}")

    # Combine all DataFrames into one
    final_df = pd.concat(dataframes, ignore_index=True)
    return final_df

# Example usage
folders = ["data1_json","data2_json","data3_json"]  # Replace with actual folder path
df = process_all_matches(folders)
df.to_csv("fantasy_points_data.csv", index=False)

In [12]:
df

Unnamed: 0,Player Name,Fantasy Points,Match Date
0,SP Fleming,7,2002-12-29
1,NJ Astle,85,2002-12-29
2,MS Sinclair,86,2002-12-29
3,CD McMillan,13,2002-12-29
4,L Vincent,38,2002-12-29
...,...,...,...
52683,AT Nidamanuru,37,2024-11-11
52684,T van der Gugten,18,2024-11-11
52685,RE van der Merwe,98,2024-11-11
52686,K Klein,77,2024-11-11
