In [1]:
# prompt: mount drive

from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
!git clone https://github.com/minzwon/sota-music-tagging-models.git
!cd sota-music-tagging-models && pip install -r requirements.txt

Cloning into 'sota-music-tagging-models'...
remote: Enumerating objects: 331, done.[K
remote: Counting objects: 100% (91/91), done.[K
remote: Compressing objects: 100% (35/35), done.[K
remote: Total 331 (delta 62), reused 56 (delta 56), pack-reused 240[K
Receiving objects: 100% (331/331), 436.62 MiB | 25.04 MiB/s, done.
Resolving deltas: 100% (151/151), done.
Updating files: 100% (60/60), done.
Collecting audioread==2.1.8 (from -r requirements.txt (line 1))
  Downloading audioread-2.1.8.tar.gz (21 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting essentia==2.1b6.dev184 (from -r requirements.txt (line 2))
  Downloading essentia-2.1b6.dev184.tar.gz (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m12.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[0mDiscarding [4;34mhttps://files.pythonhosted.org/packages/c2/83/3d3ce935032c72db531c7456fffe090dfce1d702aa8123873fdf049f0621/essenti

In [3]:
!pip install cog

Collecting cog
  Downloading cog-0.9.7-py3-none-any.whl (96 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/96.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m96.0/96.0 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
Collecting fastapi<0.99.0,>=0.75.2 (from cog)
  Downloading fastapi-0.98.0-py3-none-any.whl (56 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.0/57.0 kB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pydantic<2,>=1.9 (from cog)
  Downloading pydantic-1.10.15-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m20.1 MB/s[0m eta [36m0:00:00[0m
Collecting structlog<25,>=20 (from cog)
  Downloading structlog-24.1.0-py3-none-any.whl (65 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m65.7/65.7 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
Col

In [4]:
import sys
import tempfile
from pathlib import Path
import os
import torch
import librosa
import numpy as np
from torch.autograd import Variable
import matplotlib.pyplot as plt

import cog

model_dir = Path("/content/sota-music-tagging-models/training")
sys.path.insert(0, str(model_dir))

import model

SAMPLE_RATE = 16000
DATASET = "mtat"
MODEL_NAMES = {
    "Self-attention": "attention",
    "CRNN": "crnn",
    "FCN": "fcn",
    "Harmonic CNN": "hcnn",
    "MusicNN": "musicnn",
    "Sample-level CNN": "sample",
    "Sample-level CNN + Squeeze-and-excitation": "se",
}


class Predictor(cog.BasePredictor):
    def setup(self):
        if torch.cuda.is_available():
            self.device = torch.device("cuda:0")
        else:
            self.device = torch.device("cpu")

        self.models = {
            "fcn": model.FCN().to(self.device),
            "musicnn": model.Musicnn(dataset=DATASET).to(self.device),
            "crnn": model.CRNN().to(self.device),
            "sample": model.SampleCNN().to(self.device),
            "se": model.SampleCNNSE().to(self.device),
            "attention": model.CNNSA().to(self.device),
            "hcnn": model.HarmonicCNN().to(self.device),
        }
        self.input_lengths = {
            "fcn": 29 * 16000,
            "musicnn": 3 * 16000,
            "crnn": 29 * 16000,
            "sample": 59049,
            "se": 59049,
            "attention": 15 * 16000,
            "hcnn": 5 * 16000,
        }

        for key, mod in self.models.items():
            filename = os.path.join("/content/sota-music-tagging-models/models", DATASET, key, "best_model.pth")
            state_dict = torch.load(filename, map_location=self.device)
            if "spec.mel_scale.fb" in state_dict.keys():
                mod.spec.mel_scale.fb = state_dict["spec.mel_scale.fb"]
            mod.load_state_dict(state_dict)

        self.tags = np.load("/content/sota-music-tagging-models/split/mtat/tags.npy")

    # @cog.Input("input", type=Path, help="Input audio file")
    # @cog.Input(
    #     "variant",
    #     type=str,
    #     default="Harmonic CNN",
    #     options=MODEL_NAMES.keys(),
    #     help="Model variant",
    # )
    # @cog.Input(
    #     "output_format",
    #     type=str,
    #     default="Visualization",
    #     options=["Visualization", "JSON"],
    #     help="Output either a bar chart visualization or a JSON blob",
    # )
    def predict(self, input, variant, output_format):
        key = MODEL_NAMES[variant]
        model = self.models[key].eval()
        input_length = self.input_lengths[key]
        signal, _ = librosa.core.load(str(input), sr=SAMPLE_RATE)
        length = len(signal)
        hop = length // 2 - input_length // 2
        # print("length, input_length", length, input_length)
        # x = torch.zeros(1, input_length)
        x = torch.zeros(1, input_length).to(self.device)  # Ensure x is on the same device

        x[0] = torch.Tensor(signal[hop : hop + input_length]).unsqueeze(0)
        x = Variable(x)
        # print("x.max(), x.min(), x.mean()", x.max(), x.min(), x.mean())
        # asdf()
        out = model(x)
        result = dict(zip(self.tags, out[0].detach().cpu().numpy().tolist()))

        if output_format == "JSON":
            return result

        result_list = list(sorted(result.items(), key=lambda x: x[1]))
        plt.figure(figsize=[5, 10])
        plt.barh(
            np.arange(len(result_list)), [r[1] for r in result_list], align="center"
        )
        plt.yticks(np.arange(len(result_list)), [r[0] for r in result_list])
        plt.tight_layout()

        out_path = Path(tempfile.mkdtemp()) / "out.png"
        plt.savefig(out_path)
        return out_path



In [5]:
from pathlib import Path
path = Path("/content/drive/MyDrive/PBL7/Data/audio/23_03_2024/1FcM70fl2rKgfwL8DSSrHv.mp3")
predictor = Predictor()
predictor.setup()
result = predictor.predict(input=path,
                  variant="Harmonic CNN",
                  output_format="JSON")



In [6]:
# prompt: sort dict result by values

sorted_result = dict(sorted(result.items(), key=lambda x: x[1], reverse=True))
threshold = 0.1
filtered_keys = [key for key, value in sorted_result.items() if value > threshold]
print(filtered_keys)


['guitar', 'slow', 'solo', 'classical']


In [7]:
top_items = dict(sorted(sorted_result.items(), key=lambda x: x[1], reverse=True)[:10])
top_items

{'guitar': 0.987239420413971,
 'slow': 0.3532073497772217,
 'solo': 0.12823598086833954,
 'classical': 0.11691346019506454,
 'strings': 0.08882374316453934,
 'soft': 0.0693880170583725,
 'harp': 0.04544632509350777,
 'quiet': 0.034976597875356674,
 'no vocals': 0.033945873379707336,
 'no vocal': 0.03231186419725418}

In [8]:
top_keys = sorted(sorted_result, key=lambda x: sorted_result[x], reverse=True)[:10]
top_keys

['guitar',
 'slow',
 'solo',
 'classical',
 'strings',
 'soft',
 'harp',
 'quiet',
 'no vocals',
 'no vocal']

In [9]:
sorted_result

{'guitar': 0.987239420413971,
 'slow': 0.3532073497772217,
 'solo': 0.12823598086833954,
 'classical': 0.11691346019506454,
 'strings': 0.08882374316453934,
 'soft': 0.0693880170583725,
 'harp': 0.04544632509350777,
 'quiet': 0.034976597875356674,
 'no vocals': 0.033945873379707336,
 'no vocal': 0.03231186419725418,
 'no voice': 0.02278067357838154,
 'country': 0.019333036616444588,
 'piano': 0.012956622056663036,
 'harpsichord': 0.008719569072127342,
 'female': 0.006932004354894161,
 'drums': 0.0058342465199530125,
 'vocal': 0.005269770510494709,
 'classic': 0.005238550715148449,
 'woman': 0.004195552784949541,
 'female vocal': 0.004030252806842327,
 'fast': 0.003451228141784668,
 'singing': 0.0033275603782385588,
 'sitar': 0.003051752457395196,
 'vocals': 0.002477383939549327,
 'beat': 0.0016379287699237466,
 'female voice': 0.0016359976725652814,
 'indian': 0.001629218808375299,
 'flute': 0.001542694866657257,
 'voice': 0.001498732017353177,
 'pop': 0.0014255172573029995,
 'ambient'

In [10]:
import pandas as pd
df_spotify = pd.read_csv("/content/drive/MyDrive/PBL7/Data/tracks_metadata.csv")
df_spotify

Unnamed: 0,track_title,track_url,track_album,genre,track_id,preview_mp3,duration,artist_name,artist_url,release_date,...,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature
0,...Baby One More Time,https://open.spotify.com/track/3MjUtNVVq3C8Fn0...,...Baby One More Time (Digital Deluxe Version),Pop,3MjUtNVVq3C8Fn0MP3zhXa,https://p.scdn.co/mp3-preview/174e01719c3b06ee...,3:31,Britney Spears,https://open.spotify.com/artist/26dSoYclwsYLMA...,1999-01-12T00:00:00Z,...,0.0,-5.745,0.0,0.0307,0.20200,0.000131,0.4430,0.9070,92.960,4.0
1,DJ Got Us Fallin' In Love (feat. Pitbull),https://open.spotify.com/track/4356Typ82hUiFAy...,Raymond v Raymond (Expanded Edition),Pop,4356Typ82hUiFAynbLYbPn,https://p.scdn.co/mp3-preview/1619c7342806281d...,3:40,USHER,https://open.spotify.com/artist/23zg3TcAtWQy7J...,2010-03-30T00:00:00Z,...,7.0,-3.398,0.0,0.1090,0.03380,0.000000,0.0820,0.6540,119.963,4.0
2,People,https://open.spotify.com/track/26b3oVLrRUaaybJ...,People,Pop,26b3oVLrRUaaybJulow9kz,https://p.scdn.co/mp3-preview/abcbc9adf10ae490...,3:4,Libianca,https://open.spotify.com/artist/7kjSuFGKhLm8b5...,2022-12-06T00:00:00Z,...,10.0,-7.621,0.0,0.0678,0.55100,0.000013,0.1020,0.6930,124.357,5.0
3,Somebody That I Used To Know,https://open.spotify.com/track/1qDrWA6lyx8cLEC...,Making Mirrors,Pop,1qDrWA6lyx8cLECdZE7TV7,https://p.scdn.co/mp3-preview/36e1e9984c1a58cb...,4:4,Gotye,https://open.spotify.com/artist/2AsusXITU8P25d...,2011-01-01T00:00:00Z,...,0.0,-6.932,1.0,0.0371,0.54800,0.000115,0.0989,0.7480,129.059,4.0
4,Murder On The Dancefloor,https://open.spotify.com/track/4tKGFmENO69tZR9...,Read My Lips,Pop,4tKGFmENO69tZR9ahgZu48,https://p.scdn.co/mp3-preview/340bdf7b29f2725f...,3:50,Sophie Ellis-Bextor,https://open.spotify.com/artist/2cBh5lVMg222FF...,2002-06-06T00:00:00Z,...,1.0,-5.281,0.0,0.0299,0.00234,0.000026,0.3120,0.8870,117.310,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8319,Concerto for Flute and String Orchestra: I. An...,https://open.spotify.com/track/1kfmtzWgmbZWcyA...,Concerto for Flute and String Orchestra: I. An...,classical,1kfmtzWgmbZWcyARriIyoc,https://p.scdn.co/mp3-preview/3661ee9f9648b2ec...,6:39,André Jolivet,https://open.spotify.com/artist/5RLjTTgn3q1gx0...,2024-05-03T00:00:00Z,...,4.0,-26.549,0.0,0.0444,0.97300,0.412000,0.0683,0.0819,135.197,4.0
8320,Wedding in the Carpathians (1938): Allegro viv...,https://open.spotify.com/track/7yGgY0UAHyeKTzV...,Piano Concerto/ Wedding in the carpathians,classical,7yGgY0UAHyeKTzVLAnPKKA,https://p.scdn.co/mp3-preview/108b39a48ddd6f4c...,1:20,Paul Constantinescu,https://open.spotify.com/artist/0DKEKOqm05e1gO...,2024-05-03T00:00:00Z,...,5.0,-28.691,1.0,0.0428,0.97600,0.273000,0.7050,0.1140,107.951,3.0
8321,"Sonata for Violin & Cello, M. 73: III. Lent",https://open.spotify.com/track/0wPblZFusgHJYrt...,"Sonata for Violin & Cello, M. 73: III. Lent",classical,0wPblZFusgHJYrt2rcEDHw,https://p.scdn.co/mp3-preview/fd98ee8e7d5d5520...,6:25,Maurice Ravel,https://open.spotify.com/artist/17hR0sYHpx7VYT...,2024-05-03T00:00:00Z,...,9.0,-21.796,0.0,0.0497,0.98600,0.884000,0.0706,0.0373,106.614,4.0
8322,"Partita No. 1 in B-Flat Major, BWV 825: I. Pre...",https://open.spotify.com/track/5QRPfmI6oWCXuKv...,Bach: Six Partitas,classical,5QRPfmI6oWCXuKvSZLvMBl,https://p.scdn.co/mp3-preview/639b5d212b7d4970...,2:8,Johann Sebastian Bach,https://open.spotify.com/artist/5aIqB5nVVvmFsv...,2024-05-03T00:00:00Z,...,9.0,-21.912,1.0,0.0402,0.78500,0.932000,0.2130,0.6980,179.638,4.0


In [17]:
df_spotify_cover_art = df_spotify["cover_art"].iloc[:4010]


In [18]:
df_spotify_cover_art

0       https://i.scdn.co/image/ab67616d00001e028e4986...
1       https://i.scdn.co/image/ab67616d00001e0286b0c9...
2       https://i.scdn.co/image/ab67616d00001e02fc342f...
3       https://i.scdn.co/image/ab67616d00001e028ac576...
4       https://i.scdn.co/image/ab67616d00001e02b73615...
                              ...                        
4005    https://i.scdn.co/image/ab67616d00001e02d46a9d...
4006    https://i.scdn.co/image/ab67616d00001e02e71dd1...
4007    https://i.scdn.co/image/ab67616d00001e0272d893...
4008    https://i.scdn.co/image/ab67616d00001e0235243e...
4009    https://i.scdn.co/image/ab67616d00001e022143db...
Name: cover_art, Length: 4010, dtype: object

In [22]:
df_spotify_autotags = pd.read_csv("/content/drive/MyDrive/PBL7/Data/tracks_metadata_autotags.csv")
df_spotify_autotags.insert(loc=10, column='cover_art', value=df_spotify_cover_art)
df_spotify_autotags.columns

Index(['track_title', 'track_url', 'track_album', 'genre', 'track_id',
       'preview_mp3', 'duration', 'artist_name', 'artist_url', 'release_date',
       'cover_art', 'is_explicit', 'danceability', 'energy', 'key', 'loudness',
       'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness',
       'valence', 'tempo', 'time_signature', 'tags'],
      dtype='object')

In [23]:
df_spotify_autotags.to_csv("/content/drive/MyDrive/PBL7/Data/tracks_metadata_autotags.csv", index=False)


In [24]:
# # Merge 2 DataFrame theo cột 'track_id', sử dụng left join để giữ tất cả các hàng từ df_spotify
df_merged = df_spotify.merge(df_spotify_autotags[['track_id', 'tags']], on='track_id', how='left')
df_merged

Unnamed: 0,track_title,track_url,track_album,genre,track_id,preview_mp3,duration,artist_name,artist_url,release_date,...,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,tags
0,...Baby One More Time,https://open.spotify.com/track/3MjUtNVVq3C8Fn0...,...Baby One More Time (Digital Deluxe Version),Pop,3MjUtNVVq3C8Fn0MP3zhXa,https://p.scdn.co/mp3-preview/174e01719c3b06ee...,3:31,Britney Spears,https://open.spotify.com/artist/26dSoYclwsYLMA...,1999-01-12T00:00:00Z,...,-5.745,0.0,0.0307,0.20200,0.000131,0.4430,0.9070,92.960,4.0,"{'female': 0.5753094553947449, 'pop': 0.418311..."
1,DJ Got Us Fallin' In Love (feat. Pitbull),https://open.spotify.com/track/4356Typ82hUiFAy...,Raymond v Raymond (Expanded Edition),Pop,4356Typ82hUiFAynbLYbPn,https://p.scdn.co/mp3-preview/1619c7342806281d...,3:40,USHER,https://open.spotify.com/artist/23zg3TcAtWQy7J...,2010-03-30T00:00:00Z,...,-3.398,0.0,0.1090,0.03380,0.000000,0.0820,0.6540,119.963,4.0,"{'pop': 0.5264065861701965, 'techno': 0.394188..."
2,People,https://open.spotify.com/track/26b3oVLrRUaaybJ...,People,Pop,26b3oVLrRUaaybJulow9kz,https://p.scdn.co/mp3-preview/abcbc9adf10ae490...,3:4,Libianca,https://open.spotify.com/artist/7kjSuFGKhLm8b5...,2022-12-06T00:00:00Z,...,-7.621,0.0,0.0678,0.55100,0.000013,0.1020,0.6930,124.357,5.0,"{'female': 0.5619944334030151, 'woman': 0.4503..."
3,Somebody That I Used To Know,https://open.spotify.com/track/1qDrWA6lyx8cLEC...,Making Mirrors,Pop,1qDrWA6lyx8cLECdZE7TV7,https://p.scdn.co/mp3-preview/36e1e9984c1a58cb...,4:4,Gotye,https://open.spotify.com/artist/2AsusXITU8P25d...,2011-01-01T00:00:00Z,...,-6.932,1.0,0.0371,0.54800,0.000115,0.0989,0.7480,129.059,4.0,"{'guitar': 0.43328985571861267, 'pop': 0.28120..."
4,Murder On The Dancefloor,https://open.spotify.com/track/4tKGFmENO69tZR9...,Read My Lips,Pop,4tKGFmENO69tZR9ahgZu48,https://p.scdn.co/mp3-preview/340bdf7b29f2725f...,3:50,Sophie Ellis-Bextor,https://open.spotify.com/artist/2cBh5lVMg222FF...,2002-06-06T00:00:00Z,...,-5.281,0.0,0.0299,0.00234,0.000026,0.3120,0.8870,117.310,4.0,"{'pop': 0.49464142322540283, 'techno': 0.42997..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8319,Concerto for Flute and String Orchestra: I. An...,https://open.spotify.com/track/1kfmtzWgmbZWcyA...,Concerto for Flute and String Orchestra: I. An...,classical,1kfmtzWgmbZWcyARriIyoc,https://p.scdn.co/mp3-preview/3661ee9f9648b2ec...,6:39,André Jolivet,https://open.spotify.com/artist/5RLjTTgn3q1gx0...,2024-05-03T00:00:00Z,...,-26.549,0.0,0.0444,0.97300,0.412000,0.0683,0.0819,135.197,4.0,
8320,Wedding in the Carpathians (1938): Allegro viv...,https://open.spotify.com/track/7yGgY0UAHyeKTzV...,Piano Concerto/ Wedding in the carpathians,classical,7yGgY0UAHyeKTzVLAnPKKA,https://p.scdn.co/mp3-preview/108b39a48ddd6f4c...,1:20,Paul Constantinescu,https://open.spotify.com/artist/0DKEKOqm05e1gO...,2024-05-03T00:00:00Z,...,-28.691,1.0,0.0428,0.97600,0.273000,0.7050,0.1140,107.951,3.0,
8321,"Sonata for Violin & Cello, M. 73: III. Lent",https://open.spotify.com/track/0wPblZFusgHJYrt...,"Sonata for Violin & Cello, M. 73: III. Lent",classical,0wPblZFusgHJYrt2rcEDHw,https://p.scdn.co/mp3-preview/fd98ee8e7d5d5520...,6:25,Maurice Ravel,https://open.spotify.com/artist/17hR0sYHpx7VYT...,2024-05-03T00:00:00Z,...,-21.796,0.0,0.0497,0.98600,0.884000,0.0706,0.0373,106.614,4.0,
8322,"Partita No. 1 in B-Flat Major, BWV 825: I. Pre...",https://open.spotify.com/track/5QRPfmI6oWCXuKv...,Bach: Six Partitas,classical,5QRPfmI6oWCXuKvSZLvMBl,https://p.scdn.co/mp3-preview/639b5d212b7d4970...,2:8,Johann Sebastian Bach,https://open.spotify.com/artist/5aIqB5nVVvmFsv...,2024-05-03T00:00:00Z,...,-21.912,1.0,0.0402,0.78500,0.932000,0.2130,0.6980,179.638,4.0,


In [25]:
from pathlib import Path

predictor = Predictor()
predictor.setup()

def autotag(file_path : str, model_name : str) :

  result = predictor.predict(input=Path(file_path),
                    variant=model_name,
                    output_format="JSON")
  sorted_result = dict(sorted(result.items(), key=lambda x: x[1], reverse=True))

  # get list of keys
  # top_keys = sorted(sorted_result, key=lambda x: sorted_result[x], reverse=True)[:10]

  # get dict of items
  top_items = dict(sorted(sorted_result.items(), key=lambda x: x[1], reverse=True)[:10])

  return top_items




In [26]:
from datetime import datetime

dates = ['17_04_2024','24_04_2024','01_05_2024','08_05_2024']


In [27]:
import os
from tqdm import tqdm


AUDIO_PATH = "/content/drive/MyDrive/PBL7/Data/audio"
# df = df_spotify.copy()

# if 'tags' not in df.columns:
#     df['tags'] = None

folders = os.listdir(AUDIO_PATH)

for folder in tqdm(folders) :
  if folder in dates :
    print(folder)
    path = os.path.join(AUDIO_PATH,folder)
    files = os.listdir(path)
    print(path)
    for file in files :
      file_path = os.path.join(path,file)
      track_id = file.split(".")[0]

      # if df_merged.loc[df_merged['track_id'] == track_id, 'tags'].item() is not None :
      tags_list = autotag(file_path, model_name="Harmonic CNN")

      df_merged.loc[df_merged['track_id'] == track_id, 'tags'] = str(tags_list)



  0%|          | 0/8 [00:00<?, ?it/s]

17_04_2024
/content/drive/MyDrive/PBL7/Data/audio/17_04_2024


 62%|██████▎   | 5/8 [03:09<01:53, 37.92s/it]

24_04_2024
/content/drive/MyDrive/PBL7/Data/audio/24_04_2024


 75%|███████▌  | 6/8 [06:05<02:16, 68.49s/it]

01_05_2024
/content/drive/MyDrive/PBL7/Data/audio/01_05_2024


 88%|████████▊ | 7/8 [07:22<01:10, 70.57s/it]

08_05_2024
/content/drive/MyDrive/PBL7/Data/audio/08_05_2024


100%|██████████| 8/8 [09:17<00:00, 69.67s/it]


In [28]:
df_merged['tags']

0       {'female': 0.5753094553947449, 'pop': 0.418311...
1       {'pop': 0.5264065861701965, 'techno': 0.394188...
2       {'female': 0.5619944334030151, 'woman': 0.4503...
3       {'guitar': 0.43328985571861267, 'pop': 0.28120...
4       {'pop': 0.49464142322540283, 'techno': 0.42997...
                              ...                        
8319    {'flute': 0.9036129117012024, 'classical': 0.7...
8320    {'slow': 0.37244266271591187, 'electronic': 0....
8321    {'classical': 0.5282531380653381, 'strings': 0...
8322    {'classical': 0.788891077041626, 'harpsichord'...
8323    {'classical': 0.525346577167511, 'slow': 0.335...
Name: tags, Length: 8324, dtype: object

In [30]:
df_merged.to_csv("/content/drive/MyDrive/PBL7/Data/tracks_metadata_autotags.csv", index=False)

In [29]:
# prompt: check rows with column 'tags' is null in df_merged

df_merged[df_merged['tags'].isnull()]


Unnamed: 0,track_title,track_url,track_album,genre,track_id,preview_mp3,duration,artist_name,artist_url,release_date,...,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,tags
