### Prueba extract spotify

In [1]:
import os
import pandas as pd


def extract_spotify_data() -> pd.DataFrame:
    """
    Extract data from the Spotify dataset CSV file.

    Returns:
        pd.DataFrame: DataFrame containing the data from spotify_dataset.csv
    """
    try:
        os.chdir("../../Workshop_002")
    except FileNotFoundError:
        print("""
            FileNotFoundError - The directory may not exist or you are not located in the specified path.
            """)
        raise 

    csv_path = "data/spotify_dataset.csv"
    if not os.path.exists(csv_path):
        print(f"""
            FileNotFoundError - The file {csv_path} was not found in the current directory: {os.getcwd()}
            """)
        raise FileNotFoundError(f"File {csv_path} not found")

    try:
        df = pd.read_csv(csv_path, encoding="utf-8")
    except Exception as e:
        print(f"""
            Error reading the CSV file: {str(e)}
            """)
        raise

    return df

In [2]:
df = extract_spotify_data()
df.head()

Unnamed: 0.1,Unnamed: 0,track_id,artists,album_name,track_name,popularity,duration_ms,explicit,danceability,energy,...,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,track_genre
0,0,5SuOikwiRyPMVoIQDJUgSV,Gen Hoshino,Comedy,Comedy,73,230666,False,0.676,0.461,...,-6.746,0,0.143,0.0322,1e-06,0.358,0.715,87.917,4,acoustic
1,1,4qPNDBW1i3p13qLCt0Ki3A,Ben Woodward,Ghost (Acoustic),Ghost - Acoustic,55,149610,False,0.42,0.166,...,-17.235,1,0.0763,0.924,6e-06,0.101,0.267,77.489,4,acoustic
2,2,1iJBSr7s7jYXzM8EGcbK5b,Ingrid Michaelson;ZAYN,To Begin Again,To Begin Again,57,210826,False,0.438,0.359,...,-9.734,1,0.0557,0.21,0.0,0.117,0.12,76.332,4,acoustic
3,3,6lfxq3CG4xtTiEg7opyCyx,Kina Grannis,Crazy Rich Asians (Original Motion Picture Sou...,Can't Help Falling In Love,71,201933,False,0.266,0.0596,...,-18.515,1,0.0363,0.905,7.1e-05,0.132,0.143,181.74,3,acoustic
4,4,5vjLSffimiIP26QG5WcN2K,Chord Overstreet,Hold On,Hold On,82,198853,False,0.618,0.443,...,-9.681,1,0.0526,0.469,0.0,0.0829,0.167,119.949,4,acoustic


### Prueba dataset grammys

In [1]:
import os
import json
import pandas as pd
from sqlalchemy import create_engine
from typing import Dict, Any


def extract_grammy_data():

    """ Extract data of the grammy dataset from the database."""

    try:
        os.chdir("../../Workshop_002")
    except FileNotFoundError:
        print("""
            FileNotFoundError - The directory may not exist or you are not located in the specified path.
            """)
    os.chdir("..")
    print(os.getcwd())

    with open("Workshop_002/credentials.json", "r", encoding="utf-8") as file:
        credentials = json.load(file)

    db_host = credentials["db_host"]
    db_name = credentials["db_name"]
    db_user = credentials["db_user"]
    db_password = credentials["db_password"]
    
    engine = create_engine(f"postgresql://{db_user}:{db_password}@{db_host}:5432/{db_name}")
    query = "SELECT * FROM grammys_raw_data"
    with engine.connect() as conn:
        df = pd.read_sql(sql=query, con=conn.connection)
    
    return df

In [2]:
df=extract_grammy_data()
df.head()

c:\Users\natym\Desktop


  df = pd.read_sql(sql=query, con=conn.connection)


Unnamed: 0,year,title,published_at,updated_at,category,nominee,artist,workers,img,winner
0,2019,62nd Annual GRAMMY Awards (2019),2020-05-19T05:10:28-07:00,2020-05-19T05:10:28-07:00,Record Of The Year,Bad Guy,Billie Eilish,"Finneas O'Connell, producer; Rob Kinelski & Fi...",https://www.grammy.com/sites/com/files/styles/...,True
1,2019,62nd Annual GRAMMY Awards (2019),2020-05-19T05:10:28-07:00,2020-05-19T05:10:28-07:00,Record Of The Year,"Hey, Ma",Bon Iver,"BJ Burton, Brad Cook, Chris Messina & Justin V...",https://www.grammy.com/sites/com/files/styles/...,True
2,2019,62nd Annual GRAMMY Awards (2019),2020-05-19T05:10:28-07:00,2020-05-19T05:10:28-07:00,Record Of The Year,7 rings,Ariana Grande,"Charles Anderson, Tommy Brown, Michael Foster ...",https://www.grammy.com/sites/com/files/styles/...,True
3,2019,62nd Annual GRAMMY Awards (2019),2020-05-19T05:10:28-07:00,2020-05-19T05:10:28-07:00,Record Of The Year,Hard Place,H.E.R.,"Rodney “Darkchild” Jerkins, producer; Joseph H...",https://www.grammy.com/sites/com/files/styles/...,True
4,2019,62nd Annual GRAMMY Awards (2019),2020-05-19T05:10:28-07:00,2020-05-19T05:10:28-07:00,Record Of The Year,Talk,Khalid,"Disclosure & Denis Kosiak, producers; Ingmar C...",https://www.grammy.com/sites/com/files/styles/...,True


In [3]:
def drop_null_values(df: pd.DataFrame) -> pd.DataFrame:
    """
    Drop rows with null values from the DataFrame.

    Args:
        df (pd.DataFrame): The DataFrame to modify.

    Returns:
        pd.DataFrame: The modified DataFrame with null values dropped.
    """
    logging.info("Dropping rows with null values from the DataFrame.")
    return df.dropna().reset_index(drop=True)

In [4]:
import logging
import pandas as pd
drop_null_values(df)

Unnamed: 0.1,Unnamed: 0,track_id,artists,album_name,track_name,popularity,duration_ms,explicit,danceability,energy,...,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,track_genre
0,0,5SuOikwiRyPMVoIQDJUgSV,Gen Hoshino,Comedy,Comedy,73,230666,False,0.676,0.4610,...,-6.746,0,0.1430,0.0322,0.000001,0.3580,0.7150,87.917,4,acoustic
1,1,4qPNDBW1i3p13qLCt0Ki3A,Ben Woodward,Ghost (Acoustic),Ghost - Acoustic,55,149610,False,0.420,0.1660,...,-17.235,1,0.0763,0.9240,0.000006,0.1010,0.2670,77.489,4,acoustic
2,2,1iJBSr7s7jYXzM8EGcbK5b,Ingrid Michaelson;ZAYN,To Begin Again,To Begin Again,57,210826,False,0.438,0.3590,...,-9.734,1,0.0557,0.2100,0.000000,0.1170,0.1200,76.332,4,acoustic
3,3,6lfxq3CG4xtTiEg7opyCyx,Kina Grannis,Crazy Rich Asians (Original Motion Picture Sou...,Can't Help Falling In Love,71,201933,False,0.266,0.0596,...,-18.515,1,0.0363,0.9050,0.000071,0.1320,0.1430,181.740,3,acoustic
4,4,5vjLSffimiIP26QG5WcN2K,Chord Overstreet,Hold On,Hold On,82,198853,False,0.618,0.4430,...,-9.681,1,0.0526,0.4690,0.000000,0.0829,0.1670,119.949,4,acoustic
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
113994,113995,2C3TZjDRiAzdyViavDJ217,Rainy Lullaby,#mindfulness - Soft Rain for Mindful Meditatio...,Sleep My Little Boy,21,384999,False,0.172,0.2350,...,-16.393,1,0.0422,0.6400,0.928000,0.0863,0.0339,125.995,5,world-music
113995,113996,1hIz5L4IB9hN3WRYPOCGPw,Rainy Lullaby,#mindfulness - Soft Rain for Mindful Meditatio...,Water Into Light,22,385000,False,0.174,0.1170,...,-18.318,0,0.0401,0.9940,0.976000,0.1050,0.0350,85.239,4,world-music
113996,113997,6x8ZfSoqDjuNa5SVP5QjvX,Cesária Evora,Best Of,Miss Perfumado,22,271466,False,0.629,0.3290,...,-10.895,0,0.0420,0.8670,0.000000,0.0839,0.7430,132.378,4,world-music
113997,113998,2e6sXL2bYv4bSz6VTdnfLs,Michael W. Smith,Change Your World,Friends,41,283893,False,0.587,0.5060,...,-10.889,1,0.0297,0.3810,0.000000,0.2700,0.4130,135.960,4,world-music


In [5]:


def categorize_energy(df: pd.DataFrame) -> pd.DataFrame:
    """
    Categorize the energy of tracks in the DataFrame.

    Args:
        df (pd.DataFrame): The DataFrame to modify.

    Returns:
        pd.DataFrame: The modified DataFrame with categorized energy.
    """
    logging.info("Categorizing the energy of tracks in the DataFrame.")
    bins=[0, 0.3, 0.7, 1]
    labels=['Low', 'Medium', 'High']
    df['energy'] = pd.cut(df['energy'], bins=bins, labels=labels)
    return df.reset_index(drop=True)

In [6]:
categorize_energy(df)

Unnamed: 0.1,Unnamed: 0,track_id,artists,album_name,track_name,popularity,duration_ms,explicit,danceability,energy,...,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,track_genre
0,0,5SuOikwiRyPMVoIQDJUgSV,Gen Hoshino,Comedy,Comedy,73,230666,False,0.676,Medium,...,-6.746,0,0.1430,0.0322,0.000001,0.3580,0.7150,87.917,4,acoustic
1,1,4qPNDBW1i3p13qLCt0Ki3A,Ben Woodward,Ghost (Acoustic),Ghost - Acoustic,55,149610,False,0.420,Low,...,-17.235,1,0.0763,0.9240,0.000006,0.1010,0.2670,77.489,4,acoustic
2,2,1iJBSr7s7jYXzM8EGcbK5b,Ingrid Michaelson;ZAYN,To Begin Again,To Begin Again,57,210826,False,0.438,Medium,...,-9.734,1,0.0557,0.2100,0.000000,0.1170,0.1200,76.332,4,acoustic
3,3,6lfxq3CG4xtTiEg7opyCyx,Kina Grannis,Crazy Rich Asians (Original Motion Picture Sou...,Can't Help Falling In Love,71,201933,False,0.266,Low,...,-18.515,1,0.0363,0.9050,0.000071,0.1320,0.1430,181.740,3,acoustic
4,4,5vjLSffimiIP26QG5WcN2K,Chord Overstreet,Hold On,Hold On,82,198853,False,0.618,Medium,...,-9.681,1,0.0526,0.4690,0.000000,0.0829,0.1670,119.949,4,acoustic
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
113995,113995,2C3TZjDRiAzdyViavDJ217,Rainy Lullaby,#mindfulness - Soft Rain for Mindful Meditatio...,Sleep My Little Boy,21,384999,False,0.172,Low,...,-16.393,1,0.0422,0.6400,0.928000,0.0863,0.0339,125.995,5,world-music
113996,113996,1hIz5L4IB9hN3WRYPOCGPw,Rainy Lullaby,#mindfulness - Soft Rain for Mindful Meditatio...,Water Into Light,22,385000,False,0.174,Low,...,-18.318,0,0.0401,0.9940,0.976000,0.1050,0.0350,85.239,4,world-music
113997,113997,6x8ZfSoqDjuNa5SVP5QjvX,Cesária Evora,Best Of,Miss Perfumado,22,271466,False,0.629,Medium,...,-10.895,0,0.0420,0.8670,0.000000,0.0839,0.7430,132.378,4,world-music
113998,113998,2e6sXL2bYv4bSz6VTdnfLs,Michael W. Smith,Change Your World,Friends,41,283893,False,0.587,Medium,...,-10.889,1,0.0297,0.3810,0.000000,0.2700,0.4130,135.960,4,world-music


In [7]:
def categorize_danceability(df: pd.DataFrame) -> pd.DataFrame:
    """
    Categorize the danceability of tracks in the DataFrame.

    Args:
        df (pd.DataFrame): The DataFrame to modify.

    Returns:
        pd.DataFrame: The modified DataFrame with categorized danceability.
    """
    logging.info("Categorizing the danceability of tracks in the DataFrame.")
    bins = [0, 0.3, 0.6, 1]
    labels = ['Low', 'Medium', 'High']
    df['danceability'] = pd.cut(df['danceability'], bins=bins, labels=labels)
    return df.reset_index(drop=True)

In [8]:
categorize_danceability(df)

Unnamed: 0.1,Unnamed: 0,track_id,artists,album_name,track_name,popularity,duration_ms,explicit,danceability,energy,...,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,track_genre
0,0,5SuOikwiRyPMVoIQDJUgSV,Gen Hoshino,Comedy,Comedy,73,230666,False,High,Medium,...,-6.746,0,0.1430,0.0322,0.000001,0.3580,0.7150,87.917,4,acoustic
1,1,4qPNDBW1i3p13qLCt0Ki3A,Ben Woodward,Ghost (Acoustic),Ghost - Acoustic,55,149610,False,Medium,Low,...,-17.235,1,0.0763,0.9240,0.000006,0.1010,0.2670,77.489,4,acoustic
2,2,1iJBSr7s7jYXzM8EGcbK5b,Ingrid Michaelson;ZAYN,To Begin Again,To Begin Again,57,210826,False,Medium,Medium,...,-9.734,1,0.0557,0.2100,0.000000,0.1170,0.1200,76.332,4,acoustic
3,3,6lfxq3CG4xtTiEg7opyCyx,Kina Grannis,Crazy Rich Asians (Original Motion Picture Sou...,Can't Help Falling In Love,71,201933,False,Low,Low,...,-18.515,1,0.0363,0.9050,0.000071,0.1320,0.1430,181.740,3,acoustic
4,4,5vjLSffimiIP26QG5WcN2K,Chord Overstreet,Hold On,Hold On,82,198853,False,High,Medium,...,-9.681,1,0.0526,0.4690,0.000000,0.0829,0.1670,119.949,4,acoustic
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
113995,113995,2C3TZjDRiAzdyViavDJ217,Rainy Lullaby,#mindfulness - Soft Rain for Mindful Meditatio...,Sleep My Little Boy,21,384999,False,Low,Low,...,-16.393,1,0.0422,0.6400,0.928000,0.0863,0.0339,125.995,5,world-music
113996,113996,1hIz5L4IB9hN3WRYPOCGPw,Rainy Lullaby,#mindfulness - Soft Rain for Mindful Meditatio...,Water Into Light,22,385000,False,Low,Low,...,-18.318,0,0.0401,0.9940,0.976000,0.1050,0.0350,85.239,4,world-music
113997,113997,6x8ZfSoqDjuNa5SVP5QjvX,Cesária Evora,Best Of,Miss Perfumado,22,271466,False,High,Medium,...,-10.895,0,0.0420,0.8670,0.000000,0.0839,0.7430,132.378,4,world-music
113998,113998,2e6sXL2bYv4bSz6VTdnfLs,Michael W. Smith,Change Your World,Friends,41,283893,False,Medium,Medium,...,-10.889,1,0.0297,0.3810,0.000000,0.2700,0.4130,135.960,4,world-music


In [9]:
def categorize_popularity(df: pd.DataFrame) -> pd.DataFrame:
    """
    Categorize the popularity of tracks in the DataFrame.

    Args:
        df (pd.DataFrame): The DataFrame to modify.

    Returns:
        pd.DataFrame: The modified DataFrame with categorized popularity.
    """
    logging.info("Categorizing the popularity of tracks in the DataFrame.")
    bins = [0, 30, 60, 80, 100]
    labels = ['Low', 'Medium', 'High', 'Very High']
    df['popularity'] = pd.cut(df['popularity'], bins=bins, labels=labels)
    return df.reset_index(drop=True)


In [10]:
categorize_popularity(df)

Unnamed: 0.1,Unnamed: 0,track_id,artists,album_name,track_name,popularity,duration_ms,explicit,danceability,energy,...,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,track_genre
0,0,5SuOikwiRyPMVoIQDJUgSV,Gen Hoshino,Comedy,Comedy,High,230666,False,High,Medium,...,-6.746,0,0.1430,0.0322,0.000001,0.3580,0.7150,87.917,4,acoustic
1,1,4qPNDBW1i3p13qLCt0Ki3A,Ben Woodward,Ghost (Acoustic),Ghost - Acoustic,Medium,149610,False,Medium,Low,...,-17.235,1,0.0763,0.9240,0.000006,0.1010,0.2670,77.489,4,acoustic
2,2,1iJBSr7s7jYXzM8EGcbK5b,Ingrid Michaelson;ZAYN,To Begin Again,To Begin Again,Medium,210826,False,Medium,Medium,...,-9.734,1,0.0557,0.2100,0.000000,0.1170,0.1200,76.332,4,acoustic
3,3,6lfxq3CG4xtTiEg7opyCyx,Kina Grannis,Crazy Rich Asians (Original Motion Picture Sou...,Can't Help Falling In Love,High,201933,False,Low,Low,...,-18.515,1,0.0363,0.9050,0.000071,0.1320,0.1430,181.740,3,acoustic
4,4,5vjLSffimiIP26QG5WcN2K,Chord Overstreet,Hold On,Hold On,Very High,198853,False,High,Medium,...,-9.681,1,0.0526,0.4690,0.000000,0.0829,0.1670,119.949,4,acoustic
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
113995,113995,2C3TZjDRiAzdyViavDJ217,Rainy Lullaby,#mindfulness - Soft Rain for Mindful Meditatio...,Sleep My Little Boy,Low,384999,False,Low,Low,...,-16.393,1,0.0422,0.6400,0.928000,0.0863,0.0339,125.995,5,world-music
113996,113996,1hIz5L4IB9hN3WRYPOCGPw,Rainy Lullaby,#mindfulness - Soft Rain for Mindful Meditatio...,Water Into Light,Low,385000,False,Low,Low,...,-18.318,0,0.0401,0.9940,0.976000,0.1050,0.0350,85.239,4,world-music
113997,113997,6x8ZfSoqDjuNa5SVP5QjvX,Cesária Evora,Best Of,Miss Perfumado,Low,271466,False,High,Medium,...,-10.895,0,0.0420,0.8670,0.000000,0.0839,0.7430,132.378,4,world-music
113998,113998,2e6sXL2bYv4bSz6VTdnfLs,Michael W. Smith,Change Your World,Friends,Medium,283893,False,Medium,Medium,...,-10.889,1,0.0297,0.3810,0.000000,0.2700,0.4130,135.960,4,world-music


In [13]:
def categorize_loudness(df: pd.DataFrame) -> pd.DataFrame:
    """
    Categorize the loudness of tracks in the DataFrame.

    Args:
        df (pd.DataFrame): The DataFrame to modify.

    Returns:
        pd.DataFrame: The modified DataFrame with categorized loudness.
    """
    logging.info("Categorizing the loudness of tracks in the DataFrame.")
    df['is_loud'] = df['loudness'] > -5
    return df.reset_index(drop=True)

In [14]:
categorize_loudness(df)

Unnamed: 0.1,Unnamed: 0,track_id,artists,album_name,track_name,popularity,duration_ms,explicit,danceability,energy,...,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,track_genre,is_loud
0,0,5SuOikwiRyPMVoIQDJUgSV,Gen Hoshino,Comedy,Comedy,High,230666,False,High,Medium,...,0,0.1430,0.0322,0.000001,0.3580,0.7150,87.917,4,acoustic,False
1,1,4qPNDBW1i3p13qLCt0Ki3A,Ben Woodward,Ghost (Acoustic),Ghost - Acoustic,Medium,149610,False,Medium,Low,...,1,0.0763,0.9240,0.000006,0.1010,0.2670,77.489,4,acoustic,False
2,2,1iJBSr7s7jYXzM8EGcbK5b,Ingrid Michaelson;ZAYN,To Begin Again,To Begin Again,Medium,210826,False,Medium,Medium,...,1,0.0557,0.2100,0.000000,0.1170,0.1200,76.332,4,acoustic,False
3,3,6lfxq3CG4xtTiEg7opyCyx,Kina Grannis,Crazy Rich Asians (Original Motion Picture Sou...,Can't Help Falling In Love,High,201933,False,Low,Low,...,1,0.0363,0.9050,0.000071,0.1320,0.1430,181.740,3,acoustic,False
4,4,5vjLSffimiIP26QG5WcN2K,Chord Overstreet,Hold On,Hold On,Very High,198853,False,High,Medium,...,1,0.0526,0.4690,0.000000,0.0829,0.1670,119.949,4,acoustic,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
113995,113995,2C3TZjDRiAzdyViavDJ217,Rainy Lullaby,#mindfulness - Soft Rain for Mindful Meditatio...,Sleep My Little Boy,Low,384999,False,Low,Low,...,1,0.0422,0.6400,0.928000,0.0863,0.0339,125.995,5,world-music,False
113996,113996,1hIz5L4IB9hN3WRYPOCGPw,Rainy Lullaby,#mindfulness - Soft Rain for Mindful Meditatio...,Water Into Light,Low,385000,False,Low,Low,...,0,0.0401,0.9940,0.976000,0.1050,0.0350,85.239,4,world-music,False
113997,113997,6x8ZfSoqDjuNa5SVP5QjvX,Cesária Evora,Best Of,Miss Perfumado,Low,271466,False,High,Medium,...,0,0.0420,0.8670,0.000000,0.0839,0.7430,132.378,4,world-music,False
113998,113998,2e6sXL2bYv4bSz6VTdnfLs,Michael W. Smith,Change Your World,Friends,Medium,283893,False,Medium,Medium,...,1,0.0297,0.3810,0.000000,0.2700,0.4130,135.960,4,world-music,False


In [15]:
def mapping_genre(df: pd.DataFrame) -> pd.DataFrame:
    """
    Map genre names into broader categories to a better use of the data.

    Args:
        df (pd.DataFrame): The DataFrame to modify.

    Returns:
        pd.DataFrame: The modified DataFrame with mapped genre names.
    """
    logging.info("Mapping genre names in the DataFrame.")
    genre_mapping = {
      'Rock': ['alt-rock', 'alternative', 'grunge', 'hard-rock', 'psych-rock', 'rock', 'rock-n-roll','rockabilly', 'indie', 'garage', 'j-rock'],
      'Metal': ['black-metal', 'death-metal', 'heavy-metal', 'metal', 'metalcore', 'grindcore','industrial'],
      'Punk': ['punk', 'punk-rock', 'emo'],
      'Pop': ['pop', 'power-pop', 'synth-pop', 'k-pop', 'j-pop', 'cantopop', 'mandopop','indie-pop', 'british', 'swedish'],
      'Film/Show Music': ['pop-film', 'disney', 'show-tunes', 'anime'],
      'Electronic': ['electronic', 'electro', 'idm', 'trip-hop'],
      'Dance': ['dance', 'club', 'edm'],
      'House': ['house', 'deep-house', 'chicago-house', 'progressive-house', 'detroit-techno','j-dance'],
      'Techno': ['techno', 'minimal-techno'],
      'Bass Music': ['dubstep', 'drum-and-bass', 'dub', 'breakbeat', 'hardstyle'],
      'Hip-Hop': ['hip-hop', 'r-n-b'],
      'Reggae/Dancehall': ['reggae', 'dancehall', 'reggaeton'],
      'Jazz': ['jazz', 'groove'],
      'Blues': ['blues', 'bluegrass', 'honky-tonk'],
      'Soul/Funk': ['soul', 'funk', 'gospel'],
      'Country': ['country'],
      'Folk': ['folk', 'singer-songwriter'],
      'Latin': ['latin', 'latino', 'salsa', 'samba', 'pagode', 'sertanejo', 'brazil', 'mpb','tango', 'spanish', 'forro'],
      'World Music': ['afrobeat', 'indian', 'iranian', 'malay', 'turkish', 'french', 'german','world-music'],
      'Classical': ['classical', 'opera', 'piano'],
      'Instrumental': ['acoustic', 'guitar', 'new-age'],
      'Ambient/Chill': ['ambient', 'chill', 'sleep', 'study'],
      'Mood': ['happy', 'sad', 'romance'],
      'Children': ['children', 'kids'],'Comedy/Novelty': ['comedy'],'Disco': ['disco'],'Goth': ['goth'],'Ska': ['ska'],'Party': ['party'],'J-Idol': ['j-idol']
    }
    genre_category_mapping = {genre: category for category, genres in genre_mapping.items() for genre in genres}
    df["track_genre"] = df["track_genre"].map(genre_category_mapping)
    return df.reset_index(drop=True)


In [16]:
mapping_genre(df)

Unnamed: 0.1,Unnamed: 0,track_id,artists,album_name,track_name,popularity,duration_ms,explicit,danceability,energy,...,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,track_genre,is_loud
0,0,5SuOikwiRyPMVoIQDJUgSV,Gen Hoshino,Comedy,Comedy,High,230666,False,High,Medium,...,0,0.1430,0.0322,0.000001,0.3580,0.7150,87.917,4,Instrumental,False
1,1,4qPNDBW1i3p13qLCt0Ki3A,Ben Woodward,Ghost (Acoustic),Ghost - Acoustic,Medium,149610,False,Medium,Low,...,1,0.0763,0.9240,0.000006,0.1010,0.2670,77.489,4,Instrumental,False
2,2,1iJBSr7s7jYXzM8EGcbK5b,Ingrid Michaelson;ZAYN,To Begin Again,To Begin Again,Medium,210826,False,Medium,Medium,...,1,0.0557,0.2100,0.000000,0.1170,0.1200,76.332,4,Instrumental,False
3,3,6lfxq3CG4xtTiEg7opyCyx,Kina Grannis,Crazy Rich Asians (Original Motion Picture Sou...,Can't Help Falling In Love,High,201933,False,Low,Low,...,1,0.0363,0.9050,0.000071,0.1320,0.1430,181.740,3,Instrumental,False
4,4,5vjLSffimiIP26QG5WcN2K,Chord Overstreet,Hold On,Hold On,Very High,198853,False,High,Medium,...,1,0.0526,0.4690,0.000000,0.0829,0.1670,119.949,4,Instrumental,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
113995,113995,2C3TZjDRiAzdyViavDJ217,Rainy Lullaby,#mindfulness - Soft Rain for Mindful Meditatio...,Sleep My Little Boy,Low,384999,False,Low,Low,...,1,0.0422,0.6400,0.928000,0.0863,0.0339,125.995,5,World Music,False
113996,113996,1hIz5L4IB9hN3WRYPOCGPw,Rainy Lullaby,#mindfulness - Soft Rain for Mindful Meditatio...,Water Into Light,Low,385000,False,Low,Low,...,0,0.0401,0.9940,0.976000,0.1050,0.0350,85.239,4,World Music,False
113997,113997,6x8ZfSoqDjuNa5SVP5QjvX,Cesária Evora,Best Of,Miss Perfumado,Low,271466,False,High,Medium,...,0,0.0420,0.8670,0.000000,0.0839,0.7430,132.378,4,World Music,False
113998,113998,2e6sXL2bYv4bSz6VTdnfLs,Michael W. Smith,Change Your World,Friends,Medium,283893,False,Medium,Medium,...,1,0.0297,0.3810,0.000000,0.2700,0.4130,135.960,4,World Music,False


### Transform spotify

In [7]:
""" Transform Spotify data for analysis. """

import pandas as pd
import logging

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s"
)


def delete_unnecessary_columns(df: pd.DataFrame) -> pd.DataFrame:
    """
    Delete unnecessary columns from the DataFrame.

    Args:
        df (pd.DataFrame): The DataFrame to modify.

    Returns:
        pd.DataFrame: The modified DataFrame with specified columns deleted.
    """
    logging.info("Deleting unnecessary columns from the DataFrame.")
    return df.drop(columns=['Unnamed: 0'], errors='ignore')


def drop_null_values(df: pd.DataFrame) -> pd.DataFrame:
    """
    Drop rows with null values from the DataFrame.

    Args:
        df (pd.DataFrame): The DataFrame to modify.

    Returns:
        pd.DataFrame: The modified DataFrame with null values dropped.
    """
    logging.info("Dropping rows with null values from the DataFrame.")
    return df.dropna().reset_index(drop=True)


def drop_duplicated_values(df: pd.DataFrame) -> pd.DataFrame:
    """
    Drop duplicated rows from the DataFrame.

    Args:
        df (pd.DataFrame): The DataFrame to modify.

    Returns:
        pd.DataFrame: The modified DataFrame with duplicated rows dropped.
    """
    logging.info("Dropping duplicated rows from the DataFrame.")
    return df.drop_duplicates().reset_index(drop=True)


def drop_duplicates_id(df: pd.DataFrame) -> pd.DataFrame:
    """
    Drop duplicated rows based on the 'id' column from the DataFrame.

    Args:
        df (pd.DataFrame): The DataFrame to modify.

    Returns:
        pd.DataFrame: The modified DataFrame with duplicated rows based on 'id' dropped.
    """
    logging.info("Dropping duplicated rows based on the 'id' column from the DataFrame.")
    return df.drop_duplicates(subset=['track_id']).reset_index(drop=True)


def mapping_genre(df: pd.DataFrame) -> pd.DataFrame:
    """
    Map genre names into broader categories to a better use of the data.

    Args:
        df (pd.DataFrame): The DataFrame to modify.

    Returns:
        pd.DataFrame: The modified DataFrame with mapped genre names.
    """
    logging.info("Mapping genre names in the DataFrame.")
    genre_mapping = {
      'Rock': ['alt-rock', 'alternative', 'grunge', 'hard-rock', 'psych-rock', 'rock', 'rock-n-roll','rockabilly', 'indie', 'garage', 'j-rock'],
      'Metal': ['black-metal', 'death-metal', 'heavy-metal', 'metal', 'metalcore', 'grindcore','industrial'],
      'Punk': ['punk', 'punk-rock', 'emo'],
      'Pop': ['pop', 'power-pop', 'synth-pop', 'k-pop', 'j-pop', 'cantopop', 'mandopop','indie-pop', 'british', 'swedish'],
      'Film/Show Music': ['pop-film', 'disney', 'show-tunes', 'anime'],
      'Electronic': ['electronic', 'electro', 'idm', 'trip-hop'],
      'Dance': ['dance', 'club', 'edm'],
      'House': ['house', 'deep-house', 'chicago-house', 'progressive-house', 'detroit-techno','j-dance'],
      'Techno': ['techno', 'minimal-techno'],
      'Bass Music': ['dubstep', 'drum-and-bass', 'dub', 'breakbeat', 'hardstyle'],
      'Hip-Hop': ['hip-hop', 'r-n-b'],
      'Reggae/Dancehall': ['reggae', 'dancehall', 'reggaeton'],
      'Jazz': ['jazz', 'groove'],
      'Blues': ['blues', 'bluegrass', 'honky-tonk'],
      'Soul/Funk': ['soul', 'funk', 'gospel'],
      'Country': ['country'],
      'Folk': ['folk', 'singer-songwriter'],
      'Latin': ['latin', 'latino', 'salsa', 'samba', 'pagode', 'sertanejo', 'brazil', 'mpb','tango', 'spanish', 'forro'],
      'World Music': ['afrobeat', 'indian', 'iranian', 'malay', 'turkish', 'french', 'german','world-music'],
      'Classical': ['classical', 'opera', 'piano'],
      'Instrumental': ['acoustic', 'guitar', 'new-age'],
      'Ambient/Chill': ['ambient', 'chill', 'sleep', 'study'],
      'Mood': ['happy', 'sad', 'romance'],
      'Children': ['children', 'kids'],'Comedy/Novelty': ['comedy'],'Disco': ['disco'],'Goth': ['goth'],'Ska': ['ska'],'Party': ['party'],'J-Idol': ['j-idol']
    }
    genre_category_mapping = {genre: category for category, genres in genre_mapping.items() for genre in genres}
    df["track_genre"] = df["track_genre"].map(genre_category_mapping)
    return df.reset_index(drop=True)


def drop_duplicates_by_content(df: pd.DataFrame) -> pd.DataFrame:
    """
    Drop duplicated rows ignoring the "track_name" and "artist" columns of the DataFrame.

    Args:
        df (pd.DataFrame): The DataFrame to modify.

    Returns:
        pd.DataFrame: The modified DataFrame with duplicated rows based on content dropped.
    """
    subset_cols = [col for col in df.columns if col not in ["track_id", "album_name"]]
    logging.info("Dropping duplicated rows based on the content of the DataFrame.")
    return df.drop_duplicates(subset=subset_cols).reset_index(drop=True)


def keep_more_popular(df: pd.DataFrame) -> pd.DataFrame:
    """
    Keep the most popular track for each artist in the DataFrame.

    Args:
        df (pd.DataFrame): The DataFrame to modify.

    Returns:
        pd.DataFrame: The modified DataFrame with the most popular track for each artist.
    """
    logging.info("Keeping the most popular track for each artist in the DataFrame.")
    idx = df.groupby(['track_name', 'artists'])['popularity'].idxmax()
    return df.loc[idx].reset_index(drop=True)


def change_duration(df: pd.DataFrame) -> pd.DataFrame:
    """
    Change the duration of tracks in the DataFrame from milliseconds to minutes.

    Args:
        df (pd.DataFrame): The DataFrame to modify.

    Returns:
        pd.DataFrame: The modified DataFrame with duration in minutes.
    """
    logging.info("Changing the duration of tracks from milliseconds to minutes.")
    df['duration_min'] = df['duration_ms'] / 60000
    df.drop(columns=['duration_ms'], inplace=True, errors='ignore')
    return df.reset_index(drop=True)


def categorize_popularity(df: pd.DataFrame) -> pd.DataFrame:
    """
    Categorize the popularity of tracks in the DataFrame.

    Args:
        df (pd.DataFrame): The DataFrame to modify.

    Returns:
        pd.DataFrame: The modified DataFrame with categorized popularity.
    """
    logging.info("Categorizing the popularity of tracks in the DataFrame.")
    bins = [0, 30, 60, 80, 100]
    labels = ['Low', 'Medium', 'High', 'Very High']
    df['popularity'] = pd.cut(df['popularity'], bins=bins, labels=labels)
    return df.reset_index(drop=True)


def categorize_danceability(df: pd.DataFrame) -> pd.DataFrame:
    """
    Categorize the danceability of tracks in the DataFrame.

    Args:
        df (pd.DataFrame): The DataFrame to modify.

    Returns:
        pd.DataFrame: The modified DataFrame with categorized danceability.
    """
    logging.info("Categorizing the danceability of tracks in the DataFrame.")
    bins = [0, 0.3, 0.6, 1]
    labels = ['Low', 'Medium', 'High']
    df['danceability'] = pd.cut(df['danceability'], bins=bins, labels=labels)
    return df.reset_index(drop=True)

def categorize_energy(df: pd.DataFrame) -> pd.DataFrame:
    """
    Categorize the energy of tracks in the DataFrame.

    Args:
        df (pd.DataFrame): The DataFrame to modify.

    Returns:
        pd.DataFrame: The modified DataFrame with categorized energy.
    """
    logging.info("Categorizing the energy of tracks in the DataFrame.")
    bins=[0, 0.3, 0.7, 1]
    labels=['Low', 'Medium', 'High']
    df['energy'] = pd.cut(df['energy'], bins=bins, labels=labels)
    return df.reset_index(drop=True)


def categorize_duration(df: pd.DataFrame) -> pd.DataFrame:
    """
    Categorize the duration of tracks in the DataFrame.

    Args:
        df (pd.DataFrame): The DataFrame to modify.

    Returns:
        pd.DataFrame: The modified DataFrame with categorized duration.
    """
    logging.info("Categorizing the duration of tracks in the DataFrame.")
    bins=[0, 2, 3.5, 5, 10, 20]
    labels=['Very Short', 'Short', 'Average', 'Long', 'Very Long']
    df['duration_min'] = pd.cut(df['duration_min'], bins=bins, labels=labels)
    return df.reset_index(drop=True)


def categorize_valence(df: pd.DataFrame) -> pd.DataFrame:
    """
    Categorize the valence of tracks in the DataFrame.

    Args:
        df (pd.DataFrame): The DataFrame to modify.

    Returns:
        pd.DataFrame: The modified DataFrame with categorized valence.
    """
    logging.info("Categorizing the valence of tracks in the DataFrame.")
    bins=[0, 0.2, 0.4, 0.6, 0.8, 1]
    labels=['Very Sad','Sad','Neutral','Happy','Very Happy']
    df['valence'] = pd.cut(df['valence'], bins=bins, labels=labels)
    return df.reset_index(drop=True)


def create_boolean(df: pd.DataFrame) -> pd.DataFrame:
    """
    Create boolean columns for the DataFrame.

    Args:
        df (pd.DataFrame): The DataFrame to modify.

    Returns:
        pd.DataFrame: The modified DataFrame with boolean columns.
    """
    logging.info("Creating boolean columns in the DataFrame.")
    df['is_loud'] = df['loudness'] > -5
    df['is_live'] = df['liveness'] > 0.8
    return df.reset_index(drop=True)


def delete_columns(df: pd.DataFrame) -> pd.DataFrame:
    """
    Delete specified columns from the DataFrame.

    Args:
        df (pd.DataFrame): The DataFrame to modify.

    Returns:
        pd.DataFrame: The modified DataFrame with specified columns deleted.
    """
    logging.info("Deleting specified columns from the DataFrame.")
    return df.drop(columns=['loudness', 'liveness','key', 'mode', 'time_signature', 'tempo', "speechiness", "acousticness", "instrumentalness"], errors='ignore')


def transform_spotify_data(df: pd.DataFrame) -> pd.DataFrame:
    """
    Transform the Spotify data for analysis.

    Args:
        df (pd.DataFrame): The DataFrame to transform.

    Returns:
        pd.DataFrame: The transformed DataFrame.
    """
    logging.info("Starting transformation of Spotify data.")
    df = delete_unnecessary_columns(df)
    df = drop_null_values(df)
    df = drop_duplicated_values(df)
    df = drop_duplicates_id(df)
    df = mapping_genre(df)
    df = drop_duplicates_by_content(df)
    df = keep_more_popular(df)
    df = change_duration(df)
    df = categorize_popularity(df)
    df = categorize_danceability(df)
    df = categorize_energy(df)
    df = categorize_duration(df)
    df = categorize_valence(df)
    df = create_boolean(df)
    df = delete_columns(df)
    df = delete_unnecessary_columns(df)

    logging.info("Transformation of Spotify data completed.")
    return df.reset_index(drop=True)

In [8]:
transform_spotify_data(df)

2025-04-10 03:04:06,652 - INFO - Starting transformation of Spotify data.
2025-04-10 03:04:06,655 - INFO - Deleting unnecessary columns from the DataFrame.
2025-04-10 03:04:06,677 - INFO - Dropping rows with null values from the DataFrame.
2025-04-10 03:04:06,771 - INFO - Dropping duplicated rows from the DataFrame.
2025-04-10 03:04:06,923 - INFO - Dropping duplicated rows based on the 'id' column from the DataFrame.
2025-04-10 03:04:06,957 - INFO - Mapping genre names in the DataFrame.
2025-04-10 03:04:07,007 - INFO - Dropping duplicated rows based on the content of the DataFrame.
2025-04-10 03:04:07,102 - INFO - Keeping the most popular track for each artist in the DataFrame.
2025-04-10 03:04:07,254 - INFO - Changing the duration of tracks from milliseconds to minutes.
2025-04-10 03:04:07,298 - INFO - Categorizing the popularity of tracks in the DataFrame.
2025-04-10 03:04:07,325 - INFO - Categorizing the danceability of tracks in the DataFrame.
2025-04-10 03:04:07,354 - INFO - Categ

Unnamed: 0,track_id,artists,album_name,track_name,popularity,explicit,danceability,energy,valence,track_genre,duration_min,is_loud,is_live
0,0fROT4kK5oTm8xO8PX6EJF,Rilès,!I'll Be Back!,!I'll Be Back!,Medium,True,High,Medium,Happy,World Music,Short,False,False
1,1hH0t381PIXmUVWyG1Vj3p,Brian Hyland,The Bashful Blond,"""A"" You're Adorable",Medium,False,High,Medium,Very Happy,Rock,Short,False,False
2,1B45DvGMoFWdbAEUH2qliG,Little Apple Band,The Favorite Songs Of Sesame Street,"""C"" IS FOR COOKIE",Medium,False,Medium,High,Very Happy,Children,Very Short,False,False
3,73lXf5if6MWVWnsgXhK8bd,Little Apple Band,Sesame Street and Friends,"""C"" is for Cookie",Low,False,High,Medium,Happy,Children,Very Short,False,False
4,0jmz4aHEIBCRgrcV2xEkwB,Traditional;Sistine Chapel Choir;Massimo Palom...,Classical Christmas,"""Christe, Redemptor omnium""",,False,Low,Low,Very Sad,Ambient/Chill,Average,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
81338,2oVHb8wyg6oC2iNpGBNvx1,HEIZE,Hotel del Luna (Original Television Soundtrack...,내 맘을 볼 수 있나요,High,False,Medium,Low,Very Sad,Pop,Average,False,False
81339,4kIpBfvK44bxqX7zo8K1oP,Gaho,ITAEWON CLASS (Original Television Soundtrack)...,시작,High,False,Medium,High,Neutral,Pop,Short,True,False
81340,4mHc7LUlO3k6AXeFV2EiJK,Yiruma,Yiruma Official Album 'Piano Therapy' (The Ori...,약속 (Piano Solo),Medium,False,Medium,Low,Very Sad,Instrumental,Short,False,False
81341,0tQesiSZJQOdHeAC7r59us,GODA,One Punch Man (Original Soundtrack),원펀맨 Theme - Sad Theme,Medium,False,Medium,Medium,Neutral,Classical,Average,False,False


### Transform grammy

In [11]:
""" Transform grammys data for analysis. """

import pandas as pd
import logging
import re

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s"
)


def delete_nominee_nulls(df: pd.DataFrame) -> pd.DataFrame:
    """Delete rows with null values in the 'nominee' column.

    Args:
        df (pd.DataFrame): The DataFrame to modify.

    Returns:
        pd.DataFrame: The modified DataFrame with null values removed.
    """
    logging.info("Deleting rows with null values in the 'nominee' column")
    return df.dropna(subset=['nominee'])


def delete_nulls_in_nonuseful(df: pd.DataFrame) -> pd.DataFrame:
    """Delete rows with null values in non-useful categories.

    Args:
        df (pd.DataFrame): The DataFrame to modify.

    Returns:
        pd.DataFrame: The modified DataFrame with null values removed.
    """
    logging.info("Deleting rows with null values in non-useful categories")
    categories_non_useful = [
    'Best Small Ensemble Performance (With or Without Conductor)',
    'Best Classical Vocal Performance',
    'Best Classical Vocal Soloist Performance',
    'Best Classical Performance - Instrumental Soloist or Soloists (With or Without Orchestra)',
    'Best Classical Performance - Vocal Soloist',
    'Best Performance - Instrumental Soloist or Soloists (With or Without Orchestra)',
    'Best Classical Performance - Vocal Soloist (With or Without Orchestra)'
    ]

    filter = (
        (df['artist'].isnull()) &
        (df['workers'].isnull()) &
        (df['category'].isin(categories_non_useful))
    )
    df = df[~filter]
    return df.reset_index(drop=True)


def impute_artist(df: pd.DataFrame) -> pd.DataFrame:
    """Impute missing values in the 'artist' column using the 'nominee' column.

    Args:
        df (pd.DataFrame): The DataFrame to modify.

    Returns:
        pd.DataFrame: The modified DataFrame with missing values imputed.
    """
    logging.info("Imputing missing values in the 'artist' column")
    condition = df['artist'].isnull() & df['workers'].isnull()
    df.loc[condition, 'artist'] = df.loc[condition, 'nominee']
    return df.reset_index(drop=True)

def imput_parenthesis_artists(df: pd.DataFrame) -> pd.DataFrame:
    """Impute missing values in the 'artist' column from the artist in the parenthesis in the 'workers' column.

    Args:
        df (pd.DataFrame): The DataFrame to modify.

    Returns:
        pd.DataFrame: The modified DataFrame with missing values imputed.
    """
    logging.info("Imputing missing values in the 'artist' column from the parenthesis")

    def extract_artist(workers):
        match = re.search(r'\((.*?)\)', workers)
        if match:
            return match.group(1)
        return None

    df["artist"] = (df.apply(lambda row:extract_artist(row["workers"])
            if pd.isna(row["artist"])
                else row["artist"], axis=1))
    return df.reset_index(drop=True)


def impute_artists_role(df: pd.DataFrame) -> pd.DataFrame:
    """Impute missing values in the 'artist' column using the roles that appear in the 'workers' column.

    Args:
        df (pd.DataFrame): The DataFrame to modify.

    Returns:
        pd.DataFrame: The modified DataFrame with missing values imputed.
    """
    logging.info("Imputing missing values in the 'artist' column using the 'role' column")

    def extract_artist(workers):
        if pd.isnull(workers):
            return None
        rule = re.match(r"([^,;]+), (soloist|composer|conductor|artist)", workers)
        if rule:
            return rule.group(1).strip()
        rule = re.match(r"(.+?(Featuring|&| and ).*?)(;|,|$)", workers, re.IGNORECASE)
        if rule:
            return rule.group(1).strip()
        return workers.strip()
    df['artist'] = df['artist'].fillna(df['workers'].apply(extract_artist))
    return df.reset_index(drop=True)


def replace_values(df: pd.DataFrame) -> pd.DataFrame:
    """Replace the value of '(Various Artists)' in the artist column for the value 'Various Artists' in the DataFrame.

    Args:
        df (pd.DataFrame): The DataFrame to modify.

    Returns:
        pd.DataFrame: The modified DataFrame with specified values replaced.
    """
    logging.info("Replacing specific values in the DataFrame")
    df['artist'] = df['artist'].replace({'(Various Artists)': 'Various Artists'})
    return df.reset_index(drop=True)


def change_column_name(df: pd.DataFrame) -> pd.DataFrame:
    """Change the name of a column in the DataFrame.

    Args:
        df (pd.DataFrame): The DataFrame to modify.

    Returns:
        pd.DataFrame: The modified DataFrame with renamed columns.
    """
    logging.info("Changing column names")
    df = df.rename(columns={'winner': 'nominated'})
    return df.reset_index(drop=True)


def delete_columns(df: pd.DataFrame) -> pd.DataFrame:
    """Delete unused columns from the DataFrame.
    Args:
        df (pd.DataFrame): The DataFrame to modify.

    Returns:
        pd.DataFrame: The modified DataFrame with specified columns deleted.
    """
    logging.info("Deleting columns")
    df = df.drop(columns=['published_at', 'updated_at', 'img', 'workers'], axis=1)
    return df.reset_index(drop=True)


def transform_grammy_data(df: pd.DataFrame) -> pd.DataFrame:
    """Transform the Grammy data for analysis.

    Args:
        df (pd.DataFrame): The DataFrame to transform.

    Returns:
        pd.DataFrame: The transformed DataFrame.
    """
    logging.info("Transforming Grammy data")
    df = delete_nominee_nulls(df)
    df = delete_nulls_in_nonuseful(df)
    df = impute_artist(df)
    df = imput_parenthesis_artists(df)
    df = impute_artists_role(df)
    df = replace_values(df)
    df = change_column_name(df)
    df = delete_columns(df)
    df['decade'] = (df['year'] // 10) * 10
    return df.reset_index(drop=True)

In [12]:
transform_grammy_data(df)

2025-04-10 04:04:28,904 - INFO - Transforming Grammy data
2025-04-10 04:04:28,905 - INFO - Deleting rows with null values in the 'nominee' column
2025-04-10 04:04:28,918 - INFO - Deleting rows with null values in non-useful categories
2025-04-10 04:04:28,932 - INFO - Imputing missing values in the 'artist' column
2025-04-10 04:04:28,936 - INFO - Imputing missing values in the 'artist' column from the parenthesis
2025-04-10 04:04:28,983 - INFO - Imputing missing values in the 'artist' column using the 'role' column
2025-04-10 04:04:28,996 - INFO - Replacing specific values in the DataFrame
2025-04-10 04:04:29,004 - INFO - Changing column names
2025-04-10 04:04:29,007 - INFO - Deleting columns


Unnamed: 0,year,title,category,nominee,artist,nominated,decade
0,2019,62nd Annual GRAMMY Awards (2019),Record Of The Year,Bad Guy,Billie Eilish,True,2010
1,2019,62nd Annual GRAMMY Awards (2019),Record Of The Year,"Hey, Ma",Bon Iver,True,2010
2,2019,62nd Annual GRAMMY Awards (2019),Record Of The Year,7 rings,Ariana Grande,True,2010
3,2019,62nd Annual GRAMMY Awards (2019),Record Of The Year,Hard Place,H.E.R.,True,2010
4,2019,62nd Annual GRAMMY Awards (2019),Record Of The Year,Talk,Khalid,True,2010
...,...,...,...,...,...,...,...
4787,1958,1st Annual GRAMMY Awards (1958),Best Classical Performance - Instrumentalist (...,Tchaikovsky: Piano Concerto No. 1 In B Flat Mi...,Symphony Of The Air Orchestra,True,1950
4788,1958,1st Annual GRAMMY Awards (1958),Best Classical Performance - Instrumentalist (...,Segovia Golden Jubilee,Andres Segovia,True,1950
4789,1958,1st Annual GRAMMY Awards (1958),Best Classical Performance - Chamber Music (In...,Beethoven: Quartet 130,"Alvin Dinkin, Paul Shure, Eleanor Aller Slatki...",True,1950
4790,1958,1st Annual GRAMMY Awards (1958),Best Classical Performance - Vocal Soloist (Wi...,Operatic Recital,Operatic Recital,True,1950
