# Team name correction 

In [1]:
import pandas as pd

def replace_team_names(csv_path, replacement_dict, output_path=None):
    """
    Replace values in 'team1' and 'team2' columns based on a user-provided dictionary.

    Args:
        csv_path (str): Path to the input CSV file.
        replacement_dict (dict): Dictionary where keys are old names and values are new names.
        output_path (str, optional): Path to save the updated CSV. If None, overwrites the input CSV.
    """
    # Read the CSV
    df = pd.read_csv(csv_path)
    
    # Replace in both columns
    for column in ['team1', 'team2', 'toss_winner']:
        if column in df.columns:
            df[column] = df[column].map(replacement_dict).fillna(df[column])
        else:
            raise ValueError(f"Column '{column}' not found in CSV.")
    
    # Save the updated CSV
    if output_path is None:
        output_path = csv_path  # overwrite
    df.to_csv(output_path, index=False)
    
    print(f"Replacement done! File saved at: {output_path}")


In [2]:
csv_path = '/Users/nandhakishorecs/Documents/IITM/Jan_2025/DA5402/DA5402_AI_Application/data/processed_dataset.csv'

df = pd.read_csv(csv_path)
df.head() 

Unnamed: 0,match_id,inning,total_runs,is_wicket,over,team1,team2,toss_winner,venue,batting_strength,bowling_strength,target_score
0,335982,1,1.760701,-1.472561,0.161779,Royal Challengers Bengaluru,Kolkata Knight Riders,Royal Challengers Bengaluru,M Chinnaswamy Stadium,-0.203053,0.357183,1.760701
1,335983,1,2.3221,-0.52684,0.161779,Punjab Kings,Chennai Super Kings,Chennai Super Kings,"Punjab Cricket Association Stadium, Mohali",-0.510395,1.251168,2.3221
2,335984,1,-1.139859,0.891741,0.161779,Delhi Capitals,Rajasthan Royals,Rajasthan Royals,Feroz Shah Kotla,-1.226504,0.12373,-1.139859
3,335985,1,-0.017061,0.418881,0.161779,Mumbai Indians,Royal Challengers Bengaluru,Mumbai Indians,Wankhede Stadium,0.838779,-0.238281,-0.017061
4,335986,1,-1.732446,1.837462,-0.785538,Kolkata Knight Riders,Sunrisers Hyderabad,Sunrisers Hyderabad,Eden Gardens,0.384641,-1.730324,-1.732446


In [3]:
import numpy as np 
np.unique(np.array(df['team1']))

array(['Chennai Super Kings', 'Delhi Capitals', 'Gujarat Lions',
       'Gujarat Titans', 'Kochi Tuskers Kerala', 'Kolkata Knight Riders',
       'Lucknow Super Giants', 'Mumbai Indians', 'Pune Warriors',
       'Punjab Kings', 'Rajasthan Royals', 'Rising Pune Supergiant',
       'Rising Pune Supergiants', 'Royal Challengers Bengaluru',
       'Sunrisers Hyderabad'], dtype=object)

In [4]:
replacement = {
    'Royal Challengers Bangalore': 'Royal Challengers Bengaluru',
    'Kings XI Punjab': 'Punjab Kings',
    'Deccan Chargers': 'Sunrisers Hyderabad', 
    'Delhi Daredevils' : 'Delhi Capitals',
    'Gujarat Titans' : 'Gujarat Lions'
}

replace_team_names(csv_path, replacement)

Replacement done! File saved at: /Users/nandhakishorecs/Documents/IITM/Jan_2025/DA5402/DA5402_AI_Application/data/processed_dataset.csv


In [5]:
import pandas as pd

def drop_elements_in_column(csv_path, column_name, elements_to_drop, output_path=None):
    # Read the CSV
    df = pd.read_csv(csv_path)
    
    # Check if the column exists
    if column_name not in df.columns:
        raise ValueError(f"Column '{column_name}' not found in the CSV.")
    
    # Drop the rows where column has unwanted elements
    df = df[~df[column_name].isin(elements_to_drop)]
    
    # Save the updated CSV
    if output_path is None:
        output_path = csv_path  # overwrite
    df.to_csv(output_path, index=False)
    
    print(f"Dropped elements {elements_to_drop} from column '{column_name}'. File saved at: {output_path}")


In [6]:
elements = [
    'Pune Warriors', 
    'Rising Pune Supergiants', 
    'Rising Pune Supergiant', 
    'Kochi Tuskers Kerala'
]
drop_elements_in_column(csv_path, 'team1', elements)
drop_elements_in_column(csv_path, 'team2', elements)
drop_elements_in_column(csv_path, 'toss_winner', elements)


Dropped elements ['Pune Warriors', 'Rising Pune Supergiants', 'Rising Pune Supergiant', 'Kochi Tuskers Kerala'] from column 'team1'. File saved at: /Users/nandhakishorecs/Documents/IITM/Jan_2025/DA5402/DA5402_AI_Application/data/processed_dataset.csv
Dropped elements ['Pune Warriors', 'Rising Pune Supergiants', 'Rising Pune Supergiant', 'Kochi Tuskers Kerala'] from column 'team2'. File saved at: /Users/nandhakishorecs/Documents/IITM/Jan_2025/DA5402/DA5402_AI_Application/data/processed_dataset.csv
Dropped elements ['Pune Warriors', 'Rising Pune Supergiants', 'Rising Pune Supergiant', 'Kochi Tuskers Kerala'] from column 'toss_winner'. File saved at: /Users/nandhakishorecs/Documents/IITM/Jan_2025/DA5402/DA5402_AI_Application/data/processed_dataset.csv


In [8]:
csv = pd.read_csv('/Users/nandhakishorecs/Documents/IITM/Jan_2025/DA5402/DA5402_AI_Application/data/processed_dataset.csv')
csv.head()

Unnamed: 0,match_id,inning,total_runs,is_wicket,over,team1,team2,toss_winner,venue,batting_strength,bowling_strength,target_score
0,335982,1,1.760701,-1.472561,0.161779,Royal Challengers Bengaluru,Kolkata Knight Riders,Royal Challengers Bengaluru,M Chinnaswamy Stadium,-0.203053,0.357183,1.760701
1,335983,1,2.3221,-0.52684,0.161779,Punjab Kings,Chennai Super Kings,Chennai Super Kings,"Punjab Cricket Association Stadium, Mohali",-0.510395,1.251168,2.3221
2,335984,1,-1.139859,0.891741,0.161779,Delhi Capitals,Rajasthan Royals,Rajasthan Royals,Feroz Shah Kotla,-1.226504,0.12373,-1.139859
3,335985,1,-0.017061,0.418881,0.161779,Mumbai Indians,Royal Challengers Bengaluru,Mumbai Indians,Wankhede Stadium,0.838779,-0.238281,-0.017061
4,335986,1,-1.732446,1.837462,-0.785538,Kolkata Knight Riders,Sunrisers Hyderabad,Sunrisers Hyderabad,Eden Gardens,0.384641,-1.730324,-1.732446


In [12]:
np.unique(np.array(csv['venue']))

array(['Arun Jaitley Stadium', 'Arun Jaitley Stadium, Delhi',
       'Barabati Stadium', 'Barsapara Cricket Stadium, Guwahati',
       'Bharat Ratna Shri Atal Bihari Vajpayee Ekana Cricket Stadium, Lucknow',
       'Brabourne Stadium', 'Brabourne Stadium, Mumbai', 'Buffalo Park',
       'De Beers Diamond Oval', 'Dr DY Patil Sports Academy',
       'Dr DY Patil Sports Academy, Mumbai',
       'Dr. Y.S. Rajasekhara Reddy ACA-VDCA Cricket Stadium',
       'Dr. Y.S. Rajasekhara Reddy ACA-VDCA Cricket Stadium, Visakhapatnam',
       'Dubai International Cricket Stadium', 'Eden Gardens',
       'Eden Gardens, Kolkata', 'Feroz Shah Kotla', 'Green Park',
       'Himachal Pradesh Cricket Association Stadium',
       'Himachal Pradesh Cricket Association Stadium, Dharamsala',
       'Holkar Cricket Stadium', 'JSCA International Stadium Complex',
       'Kingsmead', 'M Chinnaswamy Stadium',
       'M Chinnaswamy Stadium, Bengaluru', 'M.Chinnaswamy Stadium',
       'MA Chidambaram Stadium', 'MA Ch

# Stadium Name correcction

In [18]:
csv_path = '/Users/nandhakishorecs/Documents/IITM/Jan_2025/DA5402/DA5402_AI_Application/dags/matches.csv'

df = pd.read_csv(csv_path)
df.head() 

Unnamed: 0,match_id,season,city,date,match_type,player_of_match,venue,team1,team2,toss_winner,toss_decision,winner,result,result_margin,target_runs,target_overs,super_over,method,umpire1,umpire2
0,335982,2007/08,Bangalore,2008-04-18,League,BB McCullum,M Chinnaswamy Stadium,Royal Challengers Bangalore,Kolkata Knight Riders,Royal Challengers Bangalore,field,Kolkata Knight Riders,runs,140.0,223.0,20.0,N,,Asad Rauf,RE Koertzen
1,335983,2007/08,Chandigarh,2008-04-19,League,MEK Hussey,"Punjab Cricket Association Stadium, Mohali",Kings XI Punjab,Chennai Super Kings,Chennai Super Kings,bat,Chennai Super Kings,runs,33.0,241.0,20.0,N,,MR Benson,SL Shastri
2,335984,2007/08,Delhi,2008-04-19,League,MF Maharoof,Feroz Shah Kotla,Delhi Daredevils,Rajasthan Royals,Rajasthan Royals,bat,Delhi Daredevils,wickets,9.0,130.0,20.0,N,,Aleem Dar,GA Pratapkumar
3,335985,2007/08,Mumbai,2008-04-20,League,MV Boucher,Wankhede Stadium,Mumbai Indians,Royal Challengers Bangalore,Mumbai Indians,bat,Royal Challengers Bangalore,wickets,5.0,166.0,20.0,N,,SJ Davis,DJ Harper
4,335986,2007/08,Kolkata,2008-04-20,League,DJ Hussey,Eden Gardens,Kolkata Knight Riders,Deccan Chargers,Deccan Chargers,bat,Kolkata Knight Riders,wickets,5.0,111.0,20.0,N,,BF Bowden,K Hariharan


In [20]:
len(np.unique(np.array(df['venue'])))

58

In [26]:
import pandas as pd

def replace_team_names(df, replacement_dict, output_path=None):
    """
    Replace values in 'team1' and 'team2' columns based on a user-provided dictionary.

    Args:
        csv_path (str): Path to the input CSV file.
        replacement_dict (dict): Dictionary where keys are old names and values are new names.
        output_path (str, optional): Path to save the updated CSV. If None, overwrites the input CSV.
    """
    # Read the CSV
    # df = pd.read_csv(csv_path)
    
    # Replace in both columns
    for column in ['venue']:
        if column in df.columns:
            df[column] = df[column].map(replacement_dict).fillna(df[column])
        else:
            raise ValueError(f"Column '{column}' not found in CSV.")
    
    # Save the updated CSV
    if output_path is None:
        output_path = csv_path  # overwrite
    df.to_csv(output_path, index=False)
    
    print(f"Replacement done! File saved at: {output_path}")


In [27]:
np.unique(np.array(df['venue']))

array(['Arun Jaitley Stadium', 'Arun Jaitley Stadium, Delhi',
       'Barabati Stadium', 'Barsapara Cricket Stadium, Guwahati',
       'Bharat Ratna Shri Atal Bihari Vajpayee Ekana Cricket Stadium, Lucknow',
       'Brabourne Stadium', 'Brabourne Stadium, Mumbai', 'Buffalo Park',
       'De Beers Diamond Oval', 'Dr DY Patil Sports Academy',
       'Dr DY Patil Sports Academy, Mumbai',
       'Dr. Y.S. Rajasekhara Reddy ACA-VDCA Cricket Stadium',
       'Dr. Y.S. Rajasekhara Reddy ACA-VDCA Cricket Stadium, Visakhapatnam',
       'Dubai International Cricket Stadium', 'Eden Gardens',
       'Eden Gardens, Kolkata', 'Feroz Shah Kotla', 'Green Park',
       'Himachal Pradesh Cricket Association Stadium',
       'Himachal Pradesh Cricket Association Stadium, Dharamsala',
       'Holkar Cricket Stadium', 'JSCA International Stadium Complex',
       'Kingsmead', 'M Chinnaswamy Stadium',
       'M Chinnaswamy Stadium, Bengaluru', 'M.Chinnaswamy Stadium',
       'MA Chidambaram Stadium', 'MA Ch

In [28]:
replacement = {
    'Arun Jaitley Stadium': 'Arun Jaitley Stadium, Delhi',
	'Barabati Stadium': 'Barabati Stadium, Odissa',
	'Brabourne Stadium': 'Brabourne Stadium, Mumbai', 
    'Buffalo Park': 'Buffalo Park, South Africa',
	'De Beers Diamond Oval': 'De Beers Diamond Oval, South Africa', 
    'Dr DY Patil Sports Academy': 'Dr DY Patil Sports Academy, Mumbai',
	'Dr. Y.S. Rajasekhara Reddy ACA-VDCA Cricket Stadium': 'Dr. Y.S. Rajasekhara Reddy ACA-VDCA Cricket Stadium, Visakhapatnam',
	'Eden Gardens': 'Eden Gardens, Kolkata', 
    'Feroz Shah Kotla': 'Feroz Shah Kotla, Delhi',
    'Green Park': 'Green Park, Kanpur',
	'Himachal Pradesh Cricket Association Stadium': 'Himachal Pradesh Cricket Association Stadium, Dharamsala',
	'Holkar Cricket Stadium': 'Holkar Cricket Stadium, Indore', 
    'JSCA International Stadium Complex' : 'JSCA International Stadium Complex, Ranchi',
	'Kingsmead': 'Kingsmead, South Africa', 
    'M Chinnaswamy Stadium': 'M Chinnaswamy Stadium, Bengaluru',
	'M.Chinnaswamy Stadium': 'M Chinnaswamy Stadium, Bengaluru',
	'MA Chidambaram Stadium': 'MA Chidambaram Stadium, Chepauk, Chennai', 
    'MA Chidambaram Stadium, Chepauk': 'MA Chidambaram Stadium, Chepauk, Chennai',
	'Maharashtra Cricket Association Stadium': 'Maharashtra Cricket Association Stadium, Pune',
	'Nehru Stadium': 'Nehru Stadium, Guwahati',
	'New Wanderers Stadium': 'New Wanderers Stadium, South Africa', 
    'Newlands': 'Newlands, South Africa', 
    'OUTsurance Oval': 'OUTsurance Oval, South Africa',
	'Punjab Cricket Association IS Bindra Stadium': 'Punjab Cricket Association IS Bindra Stadium, Mohali',
	'Punjab Cricket Association IS Bindra Stadium, Mohali, Chandigarh': 'Punjab Cricket Association IS Bindra Stadium, Mohali',
	'Punjab Cricket Association Stadium, Mohali': 'Punjab Cricket Association IS Bindra Stadium, Mohali',
	'Rajiv Gandhi International Stadium': 'Rajiv Gandhi International Stadium, Uppal, Hyderabad',
	'Rajiv Gandhi International Stadium, Uppal': 'Rajiv Gandhi International Stadium, Uppal, Hyderabad',
	'Saurashtra Cricket Association Stadium': 'Saurashtra Cricket Association Stadium, Rajkot', 
    'Sawai Mansingh Stadium':'Sawai Mansingh Stadium, Jaipur',
	'Shaheed Veer Narayan Singh International Stadium': 'Shaheed Veer Narayan Singh International Stadium, New Raipur',
	'Sheikh Zayed Stadium': 'Zayed Cricket Stadium, Abu Dhabi',
	"St George's Park": "St George's Park, South Africa", 
    'Subrata Roy Sahara Stadium': 'Maharashtra Cricket Association Stadium, Pune',
	'SuperSport Park':'SuperSport Park, South Africa', 
	'Wankhede Stadium': 'Wankhede Stadium, Mumbai',
}
replace_team_names(df, replacement)

Replacement done! File saved at: /Users/nandhakishorecs/Documents/IITM/Jan_2025/DA5402/DA5402_AI_Application/dags/matches.csv


In [29]:
f = pd.read_csv('/Users/nandhakishorecs/Documents/IITM/Jan_2025/DA5402/DA5402_AI_Application/dags/matches.csv')
np.unique(np.array(f['venue']))

array(['Arun Jaitley Stadium, Delhi', 'Barabati Stadium, Odissa',
       'Barsapara Cricket Stadium, Guwahati',
       'Bharat Ratna Shri Atal Bihari Vajpayee Ekana Cricket Stadium, Lucknow',
       'Brabourne Stadium, Mumbai', 'Buffalo Park, South Africa',
       'De Beers Diamond Oval, South Africa',
       'Dr DY Patil Sports Academy, Mumbai',
       'Dr. Y.S. Rajasekhara Reddy ACA-VDCA Cricket Stadium, Visakhapatnam',
       'Dubai International Cricket Stadium', 'Eden Gardens, Kolkata',
       'Feroz Shah Kotla, Delhi', 'Green Park, Kanpur',
       'Himachal Pradesh Cricket Association Stadium, Dharamsala',
       'Holkar Cricket Stadium, Indore',
       'JSCA International Stadium Complex, Ranchi',
       'Kingsmead, South Africa', 'M Chinnaswamy Stadium, Bengaluru',
       'MA Chidambaram Stadium, Chepauk, Chennai',
       'Maharaja Yadavindra Singh International Cricket Stadium, Mullanpur',
       'Maharashtra Cricket Association Stadium, Pune',
       'Narendra Modi Stadium, A

In [30]:
len(np.unique(np.array(f['venue'])))

38

# Drift


In [1]:
import pandas as pd


df = pd.read_csv('/Users/nandhakishorecs/Documents/IITM/Jan_2025/DA5402/DA5402_AI_Application/dags/ipl_data_processed.csv')
# Suppose your dataframe is called df
# Step 1: Create a boolean mask where any of the 3 columns contain 'LSG'
mask = (df['team1'] == 'Lucknow Super Giants') | (df['team2'] == 'Lucknow Super Giants') | (df['toss_winner'] == 'Lucknow Super Giants')

# Step 2: Split the dataframe
df_lsg = df[mask].copy()         # Rows where LSG is involved
# df_main = df[~mask].copy()        # Rows where LSG is not involved
df_lsg.to_csv('drift.csv')