In [None]:
import pandas as pd

# STEP 1: Load data
df = pd.read_csv("top_50_2020.csv")

# STEP 2: Split the artists
df["Artist Name(s)"] = df["Artist Name(s)"].str.split(",")

# STEP 3: Explode artists into multiple rows
df = df.explode("Artist Name(s)")

# STEP 4: Clean artist names (strip spaces and remove extra text)
df["Artist Name(s)"] = df["Artist Name(s)"].str.strip()
df["Artist Name(s)"] = df["Artist Name(s)"].str.replace(r"\(.*?\)", "", regex=True)
df["Artist Name(s)"] = df["Artist Name(s)"].str.replace("feat.", "", case=False)
df["Artist Name(s)"] = df["Artist Name(s)"].str.replace("&", ",")
df["Artist Name(s)"] = df["Artist Name(s)"].str.strip()

# Optional: title case for uniformity
df["Artist Name(s)"] = df["Artist Name(s)"].str.title()

# STEP 5: Gender mapping dictionary (expanded from your dataset) Used ChatGPt for writing this patt of the code
gender_dict = {
    "Anirudh Ravichander": "Male",
    "Dhanush": "Male",
    "Shreya Ghoshal": "Female",
    "Sid Sriram": "Male",
    "Jonita Gandhi": "Female",
    "Hariharan": "Male",
    "Sunidhi Chauhan": "Female",
    "Karthik": "Male",
    "Chinmayi": "Female",
    "Bombay Jayashri": "Female",
    "S. P. Balasubrahmanyam": "Male",
    "Neeti Mohan": "Female",
    "Vijay": "Male",
    "Mahalakshmi Iyer": "Female",
    "Suchitra": "Female",
    "Krishnakumar Kunnath": "Male",
    "Yogi B": "Male",
    "Malini": "Female",
    "Sam C.S.": "Male",
    "Sathyaprakash": "Male",
    "Deepak Blue": "Male",
    "Pooja Av": "Female",
    "Haricharan": "Male",
    "Arjun Chandy": "Male",
    "Suzanne D'Mello": "Female",
    "Blaaze": "Male",
    "Murtuza Khan": "Male",
    "Qadir Khan": "Male",
    "Chinmayi": "Female",
    "Alisha Thomas": "Female",
    "Badshah": "Male",
    "Vijay Prakash": "Male",
    "Shweta Mohan": "Female",
    "Alaap Raju": "Male",
    "Prashanthini": "Female",
    "Emcee Jesz": "Male",
    "Sri Charan": "Male",
    "Madhan Karky": "Male",
    "Abhay Jodhpurkar": "Male",
    "Saindhavi": "Female",
    "Shakthisree Gopalan": "Female",
    "Dhee": "Female",
    "Arunraja Kamaraj": "Male",
    "Kavita Krishnamurthy": "Female",
    "Naresh Iyer": "Male",
    "Tipu": "Male",
    "A.R. Rahman": "Male",
    "Devan Ekambaram": "Male",
    "Clinton Cerejo": "Male",
    "Dominique Cerejo": "Female",
    "P. Jayachandran": "Male",
    "Justin Prabhakaran": "Male",
    "Nivas K Prasanna": "Male",
    "Vivek - Mervin": "Male",
    "Mervin Solomon": "Male",
    "D. Imman": "Male",
    "Harihara Sudhan": "Male",
    "Unnikrishnan": "Male",
    "Vishal Chandrashekhar": "Male",
    "Datto Radha Ravi": "Male",
    "Siddharth": "Male",
    "Anthony Daasan": "Male",
    "Marana Gana Viji": "Male",
    "Naveen Madhav": "Male",
    "Aaryan Dinesh Kanagaratnam": "Male",
    "Darbuka Siva": "Male",
    "Kailash Kher": "Male"
}

# STEP 6: Map gender
df["gender"] = df["Artist Name(s)"].map(gender_dict)

# STEP 7: Classify songs by singer gender
def classify_gender(gender_list):
    genders = set(g for g in gender_list if pd.notna(g))
    if len(genders) == 1:
        return list(genders)[0]
    elif len(genders) > 1:
        return "Duet"
    else:
        return "Unknown"

df_summary = df.groupby(["Track Name", "Album Name"], as_index=False).agg({
    "Popularity": "max",
    "gender": classify_gender
}).rename(columns={"gender": "singer_gender_type"})

# STEP 8: Save final result
df_summary.to_excel("top_50_2020_classified.xlsx", index=False)

print("✅ Done! Output saved as top_50_2020_classified.xlsx")


✅ Done! Output saved as top_50_2020_classified.xlsx


In [11]:
import pandas as pd

# STEP 1: Load the CSV
df = pd.read_csv("top_50_2021.csv")

# STEP 2: Split multiple artists in a single row
df["Artist Name(s)"] = df["Artist Name(s)"].str.split(",")

# STEP 3: Explode into multiple rows
df = df.explode("Artist Name(s)")

# STEP 4: Clean artist names
df["Artist Name(s)"] = df["Artist Name(s)"].str.strip()
df["Artist Name(s)"] = df["Artist Name(s)"].str.replace(r"\(.*?\)", "", regex=True)
df["Artist Name(s)"] = df["Artist Name(s)"].str.replace("feat.", "", case=False)
df["Artist Name(s)"] = df["Artist Name(s)"].str.replace("&", ",")
df["Artist Name(s)"] = df["Artist Name(s)"].str.title()

# STEP 5: Gender dictionary (updated)
gender_dict = {
    "Anirudh Ravichander": "Male",
    "Dhanush": "Male",
    "Shreya Ghoshal": "Female",
    "Sid Sriram": "Male",
    "Jonita Gandhi": "Female",
    "Hariharan": "Male",
    "Sunidhi Chauhan": "Female",
    "Karthik": "Male",
    "Chinmayi": "Female",
    "Bombay Jayashri": "Female",
    "S. P. Balasubrahmanyam": "Male",
    "Neeti Mohan": "Female",
    "Vijay": "Male",
    "Mahalakshmi Iyer": "Female",
    "Suchitra": "Female",
    "Krishnakumar Kunnath": "Male",
    "Yogi B": "Male",
    "Malini": "Female",
    "Sam C.S.": "Male",
    "Sathyaprakash": "Male",
    "Deepak Blue": "Male",
    "Pooja Av": "Female",
    "Haricharan": "Male",
    "Arjun Chandy": "Male",
    "Suzanne D'Mello": "Female",
    "Blaaze": "Male",
    "Murtuza Khan": "Male",
    "Qadir Khan": "Male",
    "Alisha Thomas": "Female",
    "Badshah": "Male",
    "Vijay Prakash": "Male",
    "Shweta Mohan": "Female",
    "Alaap Raju": "Male",
    "Prashanthini": "Female",
    "Emcee Jesz": "Male",
    "Sri Charan": "Male",
    "Madhan Karky": "Male",
    "Abhay Jodhpurkar": "Male",
    "Saindhavi": "Female",
    "Shakthisree Gopalan": "Female",
    "Dhee": "Female",
    "Arunraja Kamaraj": "Male",
    "Kavita Krishnamurthy": "Female",
    "Naresh Iyer": "Male",
    "Tipu": "Male",
    "A.R. Rahman": "Male",
    "Devan Ekambaram": "Male",
    "Clinton Cerejo": "Male",
    "Dominique Cerejo": "Female",
    "P. Jayachandran": "Male",
    "Justin Prabhakaran": "Male",
    "Nivas K Prasanna": "Male",
    "Vivek - Mervin": "Male",
    "Mervin Solomon": "Male",
    "D. Imman": "Male",
    "Harihara Sudhan": "Male",
    "Unnikrishnan": "Male",
    "Vishal Chandrashekhar": "Male",
    "Datto Radha Ravi": "Male",
    "Siddharth": "Male",
    "Anthony Daasan": "Male",
    "Marana Gana Viji": "Male",
    "Naveen Madhav": "Male",
    "Aaryan Dinesh Kanagaratnam": "Male",
    "Darbuka Siva": "Male",
    "Kailash Kher": "Male",
    "Bjorn Surrao": "Male",
    "Therukural Arivu": "Male",
    "Manasi": "Female",
    "Ananya Bhat": "Female",
    "Yuvan Shankar Raja": "Male",
    "Sanjith Hegde": "Male",
    "G.V. Prakash": "Male",
    "Aditi Shankar": "Female"
}

# STEP 6: Map gender
df["gender"] = df["Artist Name(s)"].map(gender_dict)

# STEP 7: Group and classify gender type per track
def classify_gender(gender_list):
    genders = set(g for g in gender_list if pd.notna(g))
    if len(genders) == 1:
        return list(genders)[0]
    elif len(genders) > 1:
        return "Duet"
    else:
        return "Unknown"

df_summary = df.groupby(["Track Name"], as_index=False).agg({
    "Popularity": "max",
    "gender": classify_gender
}).rename(columns={"gender": "singer_gender_type"})

# STEP 8: Save to Excel
df_summary.to_excel("top_50_2021_classified.xlsx", index=False)
print("✅ 2021 processed using Track Name and saved as top_50_2021_classified.xlsx")


✅ 2021 processed using Track Name and saved as top_50_2021_classified.xlsx


In [12]:
import pandas as pd

# STEP 1: Load the file
df = pd.read_csv("top_50_2022.csv")

# STEP 2: Explode artist names
df["Artist Name(s)"] = df["Artist Name(s)"].str.split(",")
df = df.explode("Artist Name(s)")

# STEP 3: Clean artist names
df["Artist Name(s)"] = df["Artist Name(s)"].str.strip()
df["Artist Name(s)"] = df["Artist Name(s)"].str.replace(r"\(.*?\)", "", regex=True)
df["Artist Name(s)"] = df["Artist Name(s)"].str.replace("feat.", "", case=False)
df["Artist Name(s)"] = df["Artist Name(s)"].str.replace("&", ",")
df["Artist Name(s)"] = df["Artist Name(s)"].str.title()

# STEP 4: Gender dictionary (2020 + 2021 + updated 2022 artists)
gender_dict = {
    "Anirudh Ravichander": "Male",
    "Dhanush": "Male",
    "Shreya Ghoshal": "Female",
    "Sid Sriram": "Male",
    "Jonita Gandhi": "Female",
    "Hariharan": "Male",
    "Sunidhi Chauhan": "Female",
    "Karthik": "Male",
    "Chinmayi": "Female",
    "Bombay Jayashri": "Female",
    "S. P. Balasubrahmanyam": "Male",
    "Neeti Mohan": "Female",
    "Vijay": "Male",
    "Mahalakshmi Iyer": "Female",
    "Suchitra": "Female",
    "Krishnakumar Kunnath": "Male",
    "Yogi B": "Male",
    "Malini": "Female",
    "Sam C.S.": "Male",
    "Sathyaprakash": "Male",
    "Deepak Blue": "Male",
    "Pooja Av": "Female",
    "Haricharan": "Male",
    "Arjun Chandy": "Male",
    "Suzanne D'Mello": "Female",
    "Blaaze": "Male",
    "Murtuza Khan": "Male",
    "Qadir Khan": "Male",
    "Alisha Thomas": "Female",
    "Badshah": "Male",
    "Vijay Prakash": "Male",
    "Shweta Mohan": "Female",
    "Alaap Raju": "Male",
    "Prashanthini": "Female",
    "Emcee Jesz": "Male",
    "Sri Charan": "Male",
    "Madhan Karky": "Male",
    "Abhay Jodhpurkar": "Male",
    "Saindhavi": "Female",
    "Shakthisree Gopalan": "Female",
    "Dhee": "Female",
    "Arunraja Kamaraj": "Male",
    "Kavita Krishnamurthy": "Female",
    "Naresh Iyer": "Male",
    "Tipu": "Male",
    "A.R. Rahman": "Male",
    "Devan Ekambaram": "Male",
    "Clinton Cerejo": "Male",
    "Dominique Cerejo": "Female",
    "P. Jayachandran": "Male",
    "Justin Prabhakaran": "Male",
    "Nivas K Prasanna": "Male",
    "Vivek - Mervin": "Male",
    "Mervin Solomon": "Male",
    "D. Imman": "Male",
    "Harihara Sudhan": "Male",
    "Unnikrishnan": "Male",
    "Vishal Chandrashekhar": "Male",
    "Datto Radha Ravi": "Male",
    "Siddharth": "Male",
    "Anthony Daasan": "Male",
    "Marana Gana Viji": "Male",
    "Naveen Madhav": "Male",
    "Aaryan Dinesh Kanagaratnam": "Male",
    "Darbuka Siva": "Male",
    "Kailash Kher": "Male",
    "Bjorn Surrao": "Male",
    "Therukural Arivu": "Male",
    "Manasi": "Female",
    "Ananya Bhat": "Female",
    "Yuvan Shankar Raja": "Male",
    "Sanjith Hegde": "Male",
    "G.V. Prakash": "Male",
    "Aditi Shankar": "Female",
    "Sean Roldan": "Male",             # New in 2022
    "Rakshita Suresh": "Female",       # New in 2022
    "Sreehari": "Male",                # New in 2022
    "Gana Bala": "Male",
    "Sivaangi": "Female",
    "S. Janaki": "Female",
    "Shilpa Rao": "Female",
    "Vishal Mishra": "Male",
    "Vijay Yesudas": "Male"
}

# STEP 5: Map genders
df["gender"] = df["Artist Name(s)"].map(gender_dict)

# STEP 6: Classify songs by gender type
def classify_gender(gender_list):
    genders = set(g for g in gender_list if pd.notna(g))
    if len(genders) == 1:
        return list(genders)[0]
    elif len(genders) > 1:
        return "Duet"
    else:
        return "Unknown"

df_summary = df.groupby("Track Name", as_index=False).agg({
    "Popularity": "max",
    "gender": classify_gender
}).rename(columns={"gender": "singer_gender_type"})

# STEP 7: Save to Excel
df_summary.to_excel("top_50_2022_classified.xlsx", index=False)
print("✅ 2022 processed and saved as top_50_2022_classified.xlsx")


✅ 2022 processed and saved as top_50_2022_classified.xlsx


In [13]:
import pandas as pd

# STEP 1: Load the CSV
df = pd.read_csv("top_50_2023.csv")

# STEP 2: Explode multiple artist names
df["Artist Name(s)"] = df["Artist Name(s)"].str.split(",")
df = df.explode("Artist Name(s)")

# STEP 3: Clean artist names
df["Artist Name(s)"] = df["Artist Name(s)"].str.strip()
df["Artist Name(s)"] = df["Artist Name(s)"].str.replace(r"\(.*?\)", "", regex=True)
df["Artist Name(s)"] = df["Artist Name(s)"].str.replace("feat.", "", case=False)
df["Artist Name(s)"] = df["Artist Name(s)"].str.replace("&", ",")
df["Artist Name(s)"] = df["Artist Name(s)"].str.title()

# STEP 4: Gender dictionary (including 2023 updates)
gender_dict = {
    # Reused from previous years
    "Anirudh Ravichander": "Male",
    "Sid Sriram": "Male",
    "Shreya Ghoshal": "Female",
    "Jonita Gandhi": "Female",
    "Dhanush": "Male",
    "A.R. Rahman": "Male",
    "Karthik": "Male",
    "Sunidhi Chauhan": "Female",
    "S. P. Balasubrahmanyam": "Male",
    "Neeti Mohan": "Female",
    "Vijay": "Male",
    "Chinmayi": "Female",
    "Yuvan Shankar Raja": "Male",
    "Shweta Mohan": "Female",
    "Sanjith Hegde": "Male",
    "Rakshita Suresh": "Female",
    "Sean Roldan": "Male",
    "Sivaangi": "Female",
    "Saindhavi": "Female",
    "Haricharan": "Male",
    "Vishal Mishra": "Male",
    "Aditi Shankar": "Female",
    "Manasi": "Female",
    "G.V. Prakash": "Male",
    "Gana Bala": "Male",
    "Therukural Arivu": "Male",
    "Bjorn Surrao": "Male",
    "Ananya Bhat": "Female",
    "Vijay Yesudas": "Male",

    # ✅ New 2023 additions (based on recent Tamil hits)
    "Priya Mali": "Female",
    "Kapil Kapilan": "Male",
    "Shilpa Rao": "Female",
    "Aditya Rk": "Male",
    "Shashaa Tirupati": "Female",
    "Kharesma Ravichandran": "Female",
    "Sreekanth Hariharan": "Male",
    "Shabir": "Male",
    "Deepthi Suresh": "Female",
    "Shakthisree Gopalan": "Female",
    "Dhee": "Female",
    "Arivu": "Male",
    "Sathya Prakash": "Male",
    "Vivek Siva": "Male",
    "Santhosh Narayanan": "Male"
}

# STEP 5: Map gender to each artist
df["gender"] = df["Artist Name(s)"].map(gender_dict)

# STEP 6: Classify by gender type
def classify_gender(gender_list):
    genders = set(g for g in gender_list if pd.notna(g))
    if len(genders) == 1:
        return list(genders)[0]
    elif len(genders) > 1:
        return "Duet"
    else:
        return "Unknown"

df_summary = df.groupby("Track Name", as_index=False).agg({
    "Popularity": "max",
    "gender": classify_gender
}).rename(columns={"gender": "singer_gender_type"})

# STEP 7: Save to Excel
df_summary.to_excel("top_50_2023_classified.xlsx", index=False)
print("✅ 2023 file processed and saved as top_50_2023_classified.xlsx")


✅ 2023 file processed and saved as top_50_2023_classified.xlsx


In [14]:
import pandas as pd

# STEP 1: Load the CSV
df = pd.read_csv("top_50_2024.csv")

# STEP 2: Split and explode artist names
df["Artist Name(s)"] = df["Artist Name(s)"].str.split(",")
df = df.explode("Artist Name(s)")

# STEP 3: Clean artist names
df["Artist Name(s)"] = df["Artist Name(s)"].str.strip()
df["Artist Name(s)"] = df["Artist Name(s)"].str.replace(r"\(.*?\)", "", regex=True)
df["Artist Name(s)"] = df["Artist Name(s)"].str.replace("feat.", "", case=False)
df["Artist Name(s)"] = df["Artist Name(s)"].str.replace("&", ",")
df["Artist Name(s)"] = df["Artist Name(s)"].str.title()

# STEP 4: Gender dictionary (includes 2020 to 2024 artists)
gender_dict = {
    "Anirudh Ravichander": "Male",
    "Sid Sriram": "Male",
    "Shreya Ghoshal": "Female",
    "Jonita Gandhi": "Female",
    "Dhanush": "Male",
    "A.R. Rahman": "Male",
    "Karthik": "Male",
    "Sunidhi Chauhan": "Female",
    "S. P. Balasubrahmanyam": "Male",
    "Neeti Mohan": "Female",
    "Vijay": "Male",
    "Chinmayi": "Female",
    "Yuvan Shankar Raja": "Male",
    "Shweta Mohan": "Female",
    "Sanjith Hegde": "Male",
    "Rakshita Suresh": "Female",
    "Sean Roldan": "Male",
    "Sivaangi": "Female",
    "Saindhavi": "Female",
    "Haricharan": "Male",
    "Vishal Mishra": "Male",
    "Aditi Shankar": "Female",
    "Manasi": "Female",
    "G.V. Prakash": "Male",
    "Gana Bala": "Male",
    "Therukural Arivu": "Male",
    "Bjorn Surrao": "Male",
    "Ananya Bhat": "Female",
    "Vijay Yesudas": "Male",
    "Shashaa Tirupati": "Female",
    "Shabir": "Male",
    "Priya Mali": "Female",
    "Kapil Kapilan": "Male",
    "Aditya Rk": "Male",
    "Shilpa Rao": "Female",
    "Kharesma Ravichandran": "Female",
    "Sreekanth Hariharan": "Male",
    "Deepthi Suresh": "Female",
    "Shakthisree Gopalan": "Female",
    "Dhee": "Female",
    "Arivu": "Male",
    "Sathya Prakash": "Male",
    "Santhosh Narayanan": "Male",
    "Vivek Siva": "Male",

    # ✅ Add any 2024-specific new singers here as you encounter them
    "Varsha S Krishnan": "Female",
    "Armaan Malik": "Male",
    "Anuv Jain": "Male",
    "Sireesha Bhagavatula": "Female",
    "Akhil": "Male"
}

# STEP 5: Map gender
df["gender"] = df["Artist Name(s)"].map(gender_dict)

# STEP 6: Classify gender type per track
def classify_gender(gender_list):
    genders = set(g for g in gender_list if pd.notna(g))
    if len(genders) == 1:
        return list(genders)[0]
    elif len(genders) > 1:
        return "Duet"
    else:
        return "Unknown"

df_summary = df.groupby("Track Name", as_index=False).agg({
    "Popularity": "max",
    "gender": classify_gender
}).rename(columns={"gender": "singer_gender_type"})

# STEP 7: Save final file
df_summary.to_excel("top_50_2024_classified.xlsx", index=False)
print("✅ 2024 processed and saved as top_50_2024_classified.xlsx")


✅ 2024 processed and saved as top_50_2024_classified.xlsx
