In [30]:
import pandas as pd
from pathlib import Path
from datetime import datetime
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import tqdm
from sklearn.model_selection import KFold

In [2]:
BASE_DATE = datetime.strptime("2025-01-03", '%Y-%m-%d').timestamp()
print(BASE_DATE)

1735858800.0


In [3]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print("Device:", device)

Device: cpu


  return torch._C._cuda_getDeviceCount() > 0


# DATA

In [4]:
tracks_raw_data = pd.read_json(Path('../data_v2/tracks_artists.jsonl'), lines=True)
artists_raw_data = pd.read_json(Path('../data_v2/artists.jsonl'), lines=True)

# NORMALIZE DATES
tracks_raw_data["release_date"] = (pd.to_datetime(tracks_raw_data["release_date"], format='mixed').apply(lambda x: x.timestamp())).div(BASE_DATE)

# NORMALIZE DURATION
tracks_raw_data["duration_ms"] = tracks_raw_data["duration_ms"].div(tracks_raw_data["duration_ms"].max())

# NORMALIZE TEMPO
tracks_raw_data["tempo"] = tracks_raw_data["tempo"].div(tracks_raw_data["tempo"].max())

# EXPLICITE ENCODING
tracks_raw_data["explicit"] = tracks_raw_data["explicit"].apply(lambda x: [0, 1] if x else [1, 0])

# processs ARTIST HASH

def postprocess_hash_to_list(x):
    str_x = str(x)
    if len(str_x) < 8:
        str_x = "0" * (8 - len(str_x)) + str_x
    return [int(x) for x in str_x]

tracks_raw_data["id_artist_hash"] = tracks_raw_data["id_artist_hash"].apply(postprocess_hash_to_list)


In [5]:
class TracksDataset(Dataset):
    def __init__(self, tracks_data: pd.DataFrame):
        self.data = tracks_data

    def __len__(self):
        return len(self.data)

    def get_item(self, idx):
        return self.data.iloc[idx].values

    def __getitem__(self, idx):
        unpacked_data = []
        for data in self.data.iloc[idx].drop("id_track").values:
            if type(data) != list:
                unpacked_data.append(data)
            else:
                unpacked_data += data
        return torch.Tensor(unpacked_data)

In [14]:
BATCH_SIZE = 32
NUMBER_OF_INPUTS = 47

In [15]:
tracks_raw_dataloader = DataLoader(TracksDataset(tracks_raw_data), batch_size=BATCH_SIZE, shuffle=True)

# Code

In [34]:
def cross_validate_model(model_class: nn.Module, learning_rate, loss_module, dataset: Dataset, number_of_epochs: int = 10, number_of_splits: int = 5):
    kfold = KFold(n_splits=number_of_splits, shuffle=True)
    for fold, (train_ids, test_ids) in enumerate(kfold.split(dataset)):
        model = model_class(NUMBER_OF_INPUTS).to(device)
        print(f"\nFold {fold}")

        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
        scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)
        train_loader = DataLoader(dataset, batch_size=BATCH_SIZE, sampler=train_ids)
        test_loader = DataLoader(dataset, batch_size=BATCH_SIZE, sampler=test_ids)
        progress_bar = tqdm.tqdm(range(number_of_epochs), total=number_of_epochs, desc="Epoch")

        for _ in progress_bar:
            agregated_loss = 0
            for data in train_loader:

                data = data.to(device)

                preds = model(data)
                loss = loss_module(preds, data)

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                agregated_loss += loss.item()
            progress_bar.set_postfix({"Train loss": agregated_loss / len(train_loader)})
            model.train()
            scheduler.step()

        model.eval()
        test_loss = 0
        for data in test_loader:
            data = data.to(device)
            preds = model(data)
            loss = loss_module(preds, data)
            test_loss += loss.item()
        print(f"Test loss: {test_loss / len(test_loader)}")

# Modele

In [16]:
class Encoder(nn.Module):

    def __init__(self, hidden_dim: int):
        super().__init__()
        self.fc1 = nn.Linear(NUMBER_OF_INPUTS, 64)
        self.act1 = nn.Softsign()
        self.fc5 = nn.Linear(64, hidden_dim)

    def forward(self, x):
        x = self.act1(self.fc1(x))
        x = self.fc5(x)
        return x

In [17]:
class Decoder(nn.Module):

    def __init__(self, hidden_dim: int):
        super().__init__()
        self.fc1 = nn.Linear(hidden_dim, 64)
        self.act1 = nn.Softsign()
        self.fc5 = nn.Linear(64, NUMBER_OF_INPUTS)

    def forward(self, x):
        x = self.act1(self.fc1(x))
        x = self.fc5(x)
        return x

In [18]:
class Autoencoder(nn.Module):

    def __init__(self,
                 latent_dim: int,
                 encoder_class : object = Encoder,
                 decoder_class : object = Decoder):
        super().__init__()
        self.encoder = encoder_class(latent_dim)
        self.decoder = decoder_class(latent_dim)

    def forward(self, x):
        """
        The forward function takes in an image and returns the reconstructed image
        """
        z = self.encoder(x)
        x_hat = self.decoder(z)
        return x_hat

# Training

In [22]:
model = Autoencoder(32).to(device)

In [23]:
NUMBER_OF_EPOCHS = 10
LEARNING_RATE = 0.001


optimizer = torch.optim.Adam(model.parameters(), lr=0.0005)
loss_module = nn.L1Loss()
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)

In [27]:
model.train()
agregated_loss = 0
for epoch in tqdm.tqdm(range(NUMBER_OF_EPOCHS), total=NUMBER_OF_EPOCHS, desc="Epoch"):
    agregated_loss = 0
    for data in tracks_raw_dataloader:

        data = data.to(device)

        preds = model(data)
        loss = loss_module(preds, data)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        agregated_loss += loss.item()
    model.train()
    scheduler.step()

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 100%|██████████| 10/10 [01:29<00:00,  8.97s/it]


In [35]:
cross_validate_model(Autoencoder, 0.0005, nn.L1Loss(), TracksDataset(tracks_raw_data), 5, 5)


Fold 0


Epoch: 100%|██████████| 5/5 [00:38<00:00,  7.77s/it, Train loss=0.262]


Test loss: 0.2283819721522906

Fold 1


Epoch:  80%|████████  | 4/5 [00:39<00:09,  9.94s/it, Train loss=0.389]


KeyboardInterrupt: 