In [25]:
import pandas as pd
import os
import glob

# Path to the folder containing your CSV files
folder_path = './'  # Replace with your actual folder path

# Use glob to get all CSV files in the folder
csv_files = glob.glob(os.path.join(folder_path, '*.csv'))

# Create an empty list to store individual dataframes
dfs = []

# Expected columns based on your screenshot
expected_columns = ['Artist', 'Title', 'Album', 'Year', 'Date', 'Lyric']

# Loop through each CSV file, read it properly, and append to the list
for csv in csv_files:
    # Read with explicit delimiter and column names
    df = pd.read_csv(csv, delimiter=',')
    
    # Check if dataframe has the expected columns
    # If not, try to fix or skip this file
    if not all(col in df.columns for col in expected_columns):
        print(f"Warning: {csv} doesn't have all expected columns. Attempting to fix...")
        # Try reading with different parameters
        df = pd.read_csv(csv, delimiter='\t')  # Try tab delimiter
        
        # If still missing columns, skip this file
        if not all(col in df.columns for col in expected_columns):
            print(f"Skipping {csv} due to incompatible structure")
            continue
    
    # Only keep the expected columns
    df = df[expected_columns]
    
    # Remove any rows with NaN values in critical columns
    df = df.dropna(subset=['Artist', 'Title'])
    
    dfs.append(df)
    print(f"Processed: {csv}")

# Concatenate all dataframes in the list
combined_df = pd.concat(dfs, ignore_index=True)

# Check for and remove duplicate entries
combined_df = combined_df.drop_duplicates()

# Save the combined dataframe to a new CSV file
output_path = os.path.join(folder_path, 'combined_artists.csv')
combined_df.to_csv(output_path, index=False)

print(f"Combined CSV saved to: {output_path}")
print(f"Total rows in combined file: {len(combined_df)}")

# Display the first few rows to verify
print("\nFirst 5 rows of the combined file:")
print(combined_df.head())

Processed: ./DuaLipa.csv
Processed: ./ArianaGrande.csv
Processed: ./CharliePuth.csv
Processed: ./Drake.csv
Processed: ./BTS.csv
Processed: ./BillieEilish.csv
Processed: ./CardiB.csv
Processed: ./Eminem.csv
Processed: ./LadyGaga.csv
Processed: ./combined_artists.csv
Processed: ./NickiMinaj.csv
Processed: ./Beyonce.csv
Processed: ./Maroon5.csv
Processed: ./EdSheeran.csv
Processed: ./JustinBieber.csv
Processed: ./TaylorSwift.csv
Processed: ./SelenaGomez.csv
Processed: ./ColdPlay.csv
Processed: ./Rihanna.csv
Processed: ./KatyPerry.csv
Processed: ./PostMalone.csv
Processed: ./Khalid.csv
Combined CSV saved to: ./combined_artists.csv
Total rows in combined file: 6027

First 5 rows of the combined file:
     Artist                  Title             Album    Year        Date  \
0  Dua Lipa              New Rules          Dua Lipa  2017.0  2017-06-02   
1  Dua Lipa        Don’t Start Now  Future Nostalgia  2019.0  2019-11-01   
2  Dua Lipa                  IDGAF          Dua Lipa  2017.0  2017-

In [29]:
# Path to your combined CSV file
combined_file = 'combined_artists.csv'  # Update path if it's in a different folder

# Read the combined CSV file
df = pd.read_csv(combined_file)

# Display the first 5 rows
print("First 5 rows of combined_artists.csv:")
df.head()

First 5 rows of combined_artists.csv:


Unnamed: 0,Artist,Title,Album,Year,Date,Lyric
0,Dua Lipa,New Rules,Dua Lipa,2017.0,2017-06-02,one one one one one talkin' in my sleep at n...
1,Dua Lipa,Don’t Start Now,Future Nostalgia,2019.0,2019-11-01,if you don't wanna see me did a full 80 craz...
2,Dua Lipa,IDGAF,Dua Lipa,2017.0,2017-06-02,you call me all friendly tellin' me how much y...
3,Dua Lipa,Blow Your Mind (Mwah),Dua Lipa,2016.0,2016-08-26,i know it's hot i know we've got something tha...
4,Dua Lipa,Be the One,Dua Lipa,2015.0,2015-10-30,i see the moon i see the moon i see the moon o...


In [31]:
df.tail(10)

Unnamed: 0,Artist,Title,Album,Year,Date,Lyric
6017,Khalid,OTW (BURNS Version),,2018.0,2018-06-28,khalid put it in drive i'll be outside i'll be...
6018,Khalid,​wildflower (rough),K3*,2020.0,2020-03-23,mm yeah mm yeah mm yeah yeah spend my days c...
6019,Khalid,Better (noclue? Remix),,2018.0,2018-12-21,love to see you shine in the night like the di...
6020,Khalid,Raining In Miami,,,,lyrics for this song have yet to be released p...
6021,Khalid,Young dumb,,2017.0,2017-02-02,so you're still thinking of me just like i kno...
6022,Khalid,Khalid - Vertigo (Tradução Português),,2018.0,2018-10-28,será que é melhor apenas acreditar nas teorias...
6023,Khalid,Better (Miles Away Remix),,2018.0,2018-12-12,i'm not really drunk i never get that fucked u...
6024,Khalid,Khalid - Better (Official Music Video),,2018.0,2018-05-07,users considering it's a virus or malware must...
6025,Khalid,Perfect Lover,,,,lyrics for this song have yet to be released p...
6026,Khalid,Better (Rennie! Remix),,2019.0,2019-02-01,love to see you shine in the night like the di...


In [37]:
df.iloc[3000]['Lyric']

"big boi pure players pure players   big boi slowly as sir luscious step up like a qdawg atown's up deuces down it's the new call and you can dial 888cutsomething i'm your kinky operator when you wanna fuck something ain't no dial tones just milestones of pleasure to reach to each his own i'm a stone cold aquarian freak humanitarian barbarian but under the sheets it's the player from the point and your girl from the d   beyoncé are you infatuated with me i could end your curiosity if you don't think i'm too rude here's your chance to make your move i sit and wait for nobody people say that my my style is so crazy i think you like it baby do you want to get nasty i dare you to undress me   beyoncé baby let me know if you wanna roll with a hip hop star bamboos on pants down low i'm a rock star baby let me know if you wanna ride with a hip hop star i'm a rock star   beyoncé do i blow you away do i stimulate your mind would you taste my love if i give you time i have none to waste with you

In [39]:
df.iloc[3000]['Artist']

'Beyoncé'