In [2]:
import os
import pandas as pd

def extract_unique_stadiums(dataset_base_dir):
    unique_stadiums = set()

    for year in range(2015, 2025):
        dataset_year_dir = os.path.join(dataset_base_dir, str(year))

        if not os.path.exists(dataset_year_dir):
            print(f"Skipping {year} - Dataset folder not found")
            continue

        for csv_file in os.listdir(dataset_year_dir):
            if not csv_file.endswith('.csv'):
                continue

            file_path = os.path.join(dataset_year_dir, csv_file)
            df = pd.read_csv(file_path)

            if 'stadium' in df.columns:
                unique_stadiums.update(df['stadium'].dropna().unique())

    return unique_stadiums

if __name__ == '__main__':
    dataset_base_dir = 'dataset'
    unique_stadiums = extract_unique_stadiums(dataset_base_dir)
    print("Unique Stadiums:")
    for stadium in sorted(unique_stadiums):
        print(stadium)


Unique Stadiums:
Andhra Cricket Association-Visakhapatnam District Cricket Association Stadium
Arun Jaitley Stadium
Barsapara Cricket Stadium
Bharat Ratna Shri Atal Bihari Vajpayee Ekana Cricket Stadium
Brabourne Stadium
Dr DY Patil Sports Academy
Dr. Y.S. Rajasekhara Reddy ACA-VDCA Cricket Stadium
Dubai International Cricket Stadium
Eden Gardens
Feroz Shah Kotla
Green Park
Himachal Pradesh Cricket Association Stadium
Holkar Cricket Stadium
JSCA International Stadium Complex
M Chinnaswamy Stadium
MA Chidambaram Stadium
Maharaja Yadavindra Singh International Cricket Stadium
Maharashtra Cricket Association Stadium
Narendra Modi Stadium
Punjab Cricket Association IS Bindra Stadium
Punjab Cricket Association Stadium
Rajiv Gandhi International Stadium
Sardar Patel Stadium
Saurashtra Cricket Association Stadium
Sawai Mansingh Stadium
Shaheed Veer Narayan Singh International Stadium
Sharjah Cricket Stadium
Sheikh Zayed Stadium
Wankhede Stadium
Zayed Cricket Stadium


In [6]:
import os
import pandas as pd

def update_stadium_name(dataset_base_dir):
    for year in range(2015, 2025):
        dataset_year_dir = os.path.join(dataset_base_dir, str(year))

        if not os.path.exists(dataset_year_dir):
            print(f"Skipping {year} - Dataset folder not found")
            continue

        for csv_file in os.listdir(dataset_year_dir):
            if not csv_file.endswith('.csv'):
                continue

            file_path = os.path.join(dataset_year_dir, csv_file)
            df = pd.read_csv(file_path)

            if 'stadium' in df.columns:
                df['stadium'] = df['stadium'].replace('Punjab Cricket Association IS Bindra Stadium', 'Punjab Cricket Association Stadium')
                df.to_csv(file_path, index=False)
                print(f"Updated {csv_file}")

if __name__ == '__main__':
    dataset_base_dir = 'dataset'
    update_stadium_name(dataset_base_dir)


Updated 33b.csv
Updated 6a.csv
Updated 4b.csv
Updated 38b.csv
Updated 47a.csv
Updated 20a.csv
Updated 37a.csv
Updated 23a.csv
Updated 31a.csv
Updated 59a.csv
Updated 1b.csv
Updated 40a.csv
Updated 43a.csv
Updated 30b.csv
Updated 54a.csv
Updated 21b.csv
Updated 45b.csv
Updated 34a.csv
Updated 29b.csv
Updated 48a.csv
Updated 18a.csv
Updated 50a.csv
Updated 29a.csv
Updated 36b.csv
Updated 37b.csv
Updated 32b.csv
Updated 13a.csv
Updated 28b.csv
Updated 35a.csv
Updated 18b.csv
Updated 54b.csv
Updated 55b.csv
Updated 30a.csv
Updated 33a.csv
Updated 17b.csv
Updated 9a.csv
Updated 41a.csv
Updated 12a.csv
Updated 7b.csv
Updated 60a.csv
Updated 57a.csv
Updated 36a.csv
Updated 50b.csv
Updated 58a.csv
Updated 4a.csv
Updated 58b.csv
Updated 39b.csv
Updated 21a.csv
Updated 26a.csv
Updated 3b.csv
Updated 44b.csv
Updated 57b.csv
Updated 56b.csv
Updated 55a.csv
Updated 15b.csv
Updated 32a.csv
Updated 3a.csv
Updated 19b.csv
Updated 27b.csv
Updated 45a.csv
Updated 53b.csv
Updated 24a.csv
Updated 42a.csv


In [12]:
import os
import pandas as pd

def categorize_grounds(dataset_base_dir, ground_dimensions_path):
    # Load ground dimensions
    ground_dimensions_df = pd.read_csv(ground_dimensions_path)

    # Define categories based on straight delivery dimension
    def get_ground_type(straight_dimension):
        if straight_dimension < 70:
            return 'Small'
        elif 70 <= straight_dimension < 80:
            return 'Medium'
        else:
            return 'Large'

    # Create a mapping from ground name to ground type
    ground_type_mapping = {
        row['Ground']: get_ground_type(row['Straight'])
        for _, row in ground_dimensions_df.iterrows()
    }

    # Process each year's folder
    for year in range(2015, 2025):
        dataset_year_dir = os.path.join(dataset_base_dir, str(year))

        if not os.path.exists(dataset_year_dir):
            print(f"Skipping {year} - Dataset folder not found")
            continue

        for csv_file in os.listdir(dataset_year_dir):
            if not csv_file.endswith('.csv'):
                continue

            file_path = os.path.join(dataset_year_dir, csv_file)
            df = pd.read_csv(file_path)

            if 'ground' in df.columns:
                df['Ground_Type'] = df['ground'].map(ground_type_mapping)
                df.to_csv(file_path, index=False)
                print(f"Updated {csv_file}")

if __name__ == '__main__':
    dataset_base_dir = 'dataset'
    ground_dimensions_path = 'Ground_Dimension.csv'
    categorize_grounds(dataset_base_dir, ground_dimensions_path)


In [14]:
import pandas as pd

# Load the CSV file into a DataFrame
df = pd.read_csv('Ground_Dimension.csv')

# Add the 'Ground_Type' column based on the 'Straight' values
df['Ground_Type'] = pd.cut(df['Straight'], bins=[-float('inf'), 70, 80, float('inf')], labels=['Small', 'Medium', 'Large'])

# Save the updated DataFrame back to a CSV file
df.to_csv('Ground_Dimension_Updated.csv', index=False)

# Print the updated DataFrame
print(df)


                                               Ground  Straight  Leg  Off  \
0   Andhra Cricket Association-Visakhapatnam Distr...        99   71   79   
1                                Arun Jaitley Stadium        67   61   69   
2                           Barsapara Cricket Stadium        74   60   66   
3   Bharat Ratna Shri Atal Bihari Vajpayee Ekana C...        81   67   71   
4                                   Brabourne Stadium        68   50   54   
5                          Dr DY Patil Sports Academy        91   64   71   
6   Dr. Y.S. Rajasekhara Reddy ACA-VDCA Cricket St...        99   71   79   
7                 Dubai International Cricket Stadium        83   61   69   
8                                        Eden Gardens        84   63   71   
9                                          Green Park        75   68   72   
10       Himachal Pradesh Cricket Association Stadium        79   65   68   
11                             Holkar Cricket Stadium        69   60   67   

In [22]:
import os
import pandas as pd

# Load the Ground_Dimension_Updated CSV to create a mapping of ground names to ground types
ground_mapping = pd.read_csv('Ground_Dimension_Updated.csv')
ground_type_mapping = dict(zip(ground_mapping['Ground'], ground_mapping['Ground_Type']))

# Define the path to the dataset folder
dataset_path = 'dataset'

# Iterate through each year folder
for year in range(2015, 2025):
    year_folder = os.path.join(dataset_path, str(year))

    # Check if the year folder exists
    if os.path.exists(year_folder):
        # Iterate through each CSV file in the year folder
        for filename in os.listdir(year_folder):
            if filename.endswith('.csv'):
                file_path = os.path.join(year_folder, filename)

                # Load the CSV file
                df = pd.read_csv(file_path)

                # Add the Ground_Type column based on the mapping
                df['Stadium_Type'] = df['stadium'].map(ground_type_mapping)

                # Reorder columns to place Ground_Type next to Stadium
                cols = list(df.columns)
                stadium_index = cols.index('stadium')
                cols.insert(stadium_index + 1, cols.pop())  # Move Ground_Type to the desired position
                df = df[cols]

                # Save the updated DataFrame back to the CSV file
                df.to_csv(file_path, index=False)

print("All files have been updated with the Ground_Type column next to Stadium.")


All files have been updated with the Ground_Type column next to Stadium.
