In [3]:
import numpy as np
import json
import torch

In [None]:
# Things to do:
# Glean a small portion of the data (10k?), each one arbitrarily treating one song as the target and removing that from the playlist, applying a scaler first.
# Design RNN (first do a basic NN off of summary) to accept variable number of inputs and get one output of a number of variables
# train RNN. RNN does not get used to pick the songs

In [4]:
dtype = torch.FloatTensor
with open("../data/playlist_data.json") as jsonfile:
    playlist_data = json.load(jsonfile)
with open("../data/track_audio_data.json") as jsonfile:
    track_data = json.load(jsonfile)

In [17]:
# Am going to need every track, only 10k playlists.
# Still should make a dict of tracks in playlists to see how many appear.
import random
sample_size = 10000
playlist_samples = random.sample(playlist_data, sample_size)
# Use pid to get summaries

In [6]:
ORD_FEATURES = ["danceability", "energy", "loudness", "mode", "speechiness",
            "acousticness", "liveness", "valence", "tempo", "duration_ms"]

CAT_FEATURES = {
    "time_signature": ["0", "1", "3", "4", "5"],
    "key": ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11"]
}

def playlist_summary(playlist):
    total_tracks = playlist["num_tracks"]
    audio_summary = dict.fromkeys(ORD_FEATURES, 0)
    for feat, values in CAT_FEATURES.items():
        for value in values:
            audio_summary[f"{feat}_{value}"] = 0
    for track_num in playlist["tracks"]:
        try:
            track_audio_feats = track_data[str(track_num)]
        except KeyError:
            continue

        if track_audio_feats is None:
            total_tracks -= 1
            continue
        for feat, val in track_audio_feats.items():
            if feat in CAT_FEATURES:
                audio_summary[f"{feat}_{val}"] += 1
            elif feat in ORD_FEATURES:
                audio_summary[feat] += val
            
    for feat in audio_summary.keys():
        audio_summary[feat] = round(audio_summary[feat] / total_tracks, 5)
    return audio_summary

def single_track_summary(track):
    audio_summary = dict.fromkeys(ORD_FEATURES, 0)
    for feat, values in CAT_FEATURES.items():
        for value in values:
            audio_summary[f"{feat}_{value}"] = 0
    track_audio_feats = track_data[str(track)]
    for feat, val in track_audio_feats.items():
        if feat in CAT_FEATURES:
            audio_summary[f"{feat}_{val}"] += 1
        elif feat in ORD_FEATURES:
            audio_summary[feat] += val
    return audio_summary


In [7]:
def playlist_x_y(playlist):
    masked_playlist = dict(playlist)
    masked_playlist['tracks'] = playlist['tracks'].copy()
    y_track = masked_playlist['tracks'].pop(random.randrange(playlist['num_tracks']))
    masked_playlist['num_tracks'] -= 1
    return playlist_summary(masked_playlist), single_track_summary(y_track)

In [8]:
sample_ids = [p['pid'] for p in playlist_samples]
sample_tracks = [p['tracks'] for p in playlist_samples]
sample_summaries = [playlist_summary(p) for p in playlist_samples]

In [18]:
from sklearn.model_selection import train_test_split

x_list, y_list = [], []
for sample in playlist_samples:
    x, y = playlist_x_y(sample)
    x_list.append(list(x.values()))
    y_list.append(list(y.values()))

sample_x = np.array(x_list, dtype=np.float64)
sample_y = np.array(y_list, dtype=np.float64)
num_feats = sample_y.shape[1]

# scaler = StandardScaler()
# scaled_x = scaler.fit_transform(sample_x)
# scaled_y = scaler.transform(sample_y)

train_x, valid_x, train_y, valid_y = train_test_split(sample_x, sample_y)

# Use inverse_transform when applying

In [43]:
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset

class PlaylistNet(nn.Module):

    def __init__(self):
        super(PlaylistNet, self).__init__()
        
        self.input_layer = nn.Linear(num_feats, 50, dtype=torch.float64)
        self.output_layer = nn.Linear(50, num_feats, dtype=torch.float64)
    
    def forward(self, x):
        x = torch.sigmoid(self.input_layer(x))
        # x = torch.relu(self.hidden_layer(x))
        x = self.output_layer(x)
        return x

class PlaylistDataset(Dataset):
    def __init__(self, playlists, targets):
        self.playlists = playlists
        self.targets = targets
    
    def __len__(self):
        return len(self.playlists)
    
    def __getitem__(self, idx):
        return self.playlists[idx], self.targets[idx]

model = PlaylistNet()

loss_fn = torch.nn.L1Loss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

train_data = PlaylistDataset(train_x, train_y)
valid_data = PlaylistDataset(valid_x, valid_y)

train_loader = DataLoader(train_data, batch_size=100, shuffle=True)
valid_loader = DataLoader(valid_data, batch_size=100, shuffle=True)

In [44]:
epochs = 5
for epoch in range(epochs):
    model.train(True)
    running_loss = 0.0

    for i, data in enumerate(train_loader):
        inputs, targets = data

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = loss_fn(outputs, targets)
        loss.backward()

        optimizer.step()
        running_loss += loss.item()
    print(f"Training loss at epoch {epoch + 1}: {running_loss}")
    model.train(False)

    running_vloss = 0.0

    for i, data in enumerate(valid_loader):
        inputs, targets = data
        outputs = model(inputs)
        loss = loss_fn(outputs, targets)
        running_vloss += loss.item()
    print(f"Validation loss at epoch {epoch + 1}: {running_vloss}")

Training loss at epoch 1: 657111.1436185674
Validation loss at epoch 1: 218775.5338209781
Training loss at epoch 2: 657058.910320662
Validation loss at epoch 2: 218760.53778420188
Training loss at epoch 3: 657014.0586577792
Validation loss at epoch 3: 218745.63040480617
Training loss at epoch 4: 656969.4306808783
Validation loss at epoch 4: 218730.74463707945
Training loss at epoch 5: 656924.6452942094
Validation loss at epoch 5: 218715.81163485823


In [45]:
model.eval()
values = list(playlist_summary(playlist_samples[0]).values())
print(values)
x = torch.tensor(values, dtype=torch.float64)
print(model(x).tolist())


[0.59042, 0.72268, -6.26432, 0.60526, 0.05313, 0.13094, 0.18722, 0.46711, 113.19379, 239733.26316, 0.0, 0.0, 0.07895, 0.92105, 0.0, 0.07895, 0.23684, 0.05263, 0.0, 0.10526, 0.07895, 0.05263, 0.10526, 0.05263, 0.15789, 0.02632, 0.05263]
[0.6293630483626644, 0.6752987499242384, -6.506638996377977, 0.9971395727775517, 0.0590685016063939, 0.10381959210899479, 0.1287376637102075, 0.47131849530970976, 39.60992694265453, 39.27887272922309, -0.030177136103844773, -0.003578005534715392, 0.0037794819342839917, 0.9876365290202315, 0.011548265363761315, -0.015178420940060972, -0.026062852202577383, 0.017181676312142263, 0.007820147387078022, 0.00028877383282177593, 0.015417293462005766, -0.011544718988168463, 0.008132012049267438, -0.030710337618963898, -0.003371229428191125, 0.00582371096691925, 0.030470169090965574]
