In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
import lightning as L
from torch.utils.data import TensorDataset, DataLoader
import pandas as pd
from tqdm import tqdm  
import time 
from datetime import datetime
import matplotlib.pyplot as plt
import numpy as np

This file uses LSTM model to predict the following week's features. The training process is using the 2022's data to train and the data of 2023 to validate(Same dataset as Holt-Winters)

In [3]:
df=pd.read_csv(r'C:\Users\Windows\Desktop\Spotify_Dataset_V3.csv',delimiter=';')
df_noURL=df.iloc[:,0:-1]
url=df.iloc[:,-1]
# Creat a daily average data of each feature
df_nodup = df_noURL.loc[:,['Title','Artists','Date','Danceability','Energy','Loudness','Speechiness','Acousticness','Instrumentalness','Valence']]
df_nodup = df_nodup.drop_duplicates(subset=['Title','Artists','Date'])
df_nodup = df_nodup.drop('Artists', axis=1)
df_nodup = df_nodup.iloc[::-1]
Date = [];Mean_dance=[];Mean_energy=[];Mean_loud=[];Mean_speech=[];Mean_acoustic=[];Mean_instru=[];Mean_valence=[]
Var_dance=[];Var_energy=[];Var_loud=[];Var_speech=[];Var_acoustic=[];Var_instru=[];Var_valence=[]
i=199;k=0
while (df_nodup.shape[0]-i)>0:
    date=datetime.strptime(df_nodup.iloc[i,1],'%d/%m/%Y').strftime('%Y-%m-%d')
    if i>200:
        if date == Date[-1]:
            k=k+1
            i=i+1
            continue
    Date.append(date)
    df_mean=df_nodup.iloc[i-199-k:i,2:].mean()
    df_var=df_nodup.iloc[i-199-k:i,2:].var()
    Mean_dance.append(df_mean.iloc[0]);Var_dance.append(df_var.iloc[0])
    Mean_energy.append(df_mean.iloc[1]);Var_energy.append(df_var.iloc[1])
    Mean_loud.append(df_mean.iloc[2]);Var_loud.append(df_var.iloc[2])
    Mean_speech.append(df_mean.iloc[3]);Var_speech.append(df_var.iloc[3])
    Mean_acoustic.append(df_mean.iloc[4]);Var_acoustic.append(df_var.iloc[4])
    Mean_instru.append(df_mean.iloc[5]);Var_instru.append(df_var.iloc[5])
    Mean_valence.append(df_mean.iloc[6]);Var_valence.append(df_var.iloc[6])
    i+=200
    k=0


In [4]:
## Instead of coding an LSTM by hand, let's see what we can do with PyTorch's nn.LSTM()
class LightningLSTM(L.LightningModule):

    def __init__(self): # __init__() is the class constructor function, and we use it to initialize the Weights and Biases.
        
        super().__init__() # initialize an instance of the parent class, LightningModule.

        L.seed_everything(seed=41)
        
        ## input_size = number of features (or variables) in the data. In our example
        ##              we only have a single feature (value)
        ## hidden_size = this determines the dimension of the output
        ##               in other words, if we set hidden_size=1, then we have 1 output node
        ##               if we set hiddeen_size=50, then we hve 50 output nodes (that can then be 50 input
        ##               nodes to a subsequent fully connected neural network.
        self.lstm = nn.LSTM(input_size=1, hidden_size=1) 
         

    def forward(self, input):
        ## transpose the input vector
        for input_i in range(input.size(0)):
            input_trans = input[input_i].view(len(input[input_i]),1)
            lstm_out, temp = self.lstm(input_trans)
            ## lstm_out has the short-term memories for all inputs. We make our prediction with the last one
            if input_i==0:
                prediction=lstm_out[-1]
            else:
                prediction=torch.cat((prediction,lstm_out[-1]),0)
        #prediction = prediction
        return prediction
        
        
    def configure_optimizers(self): # this configures the optimizer we want to use for backpropagation.
        return Adam(self.parameters(), lr=0.001) ## we'll just go ahead and set the learning rate to 0.1

  
    def training_step(self, batch, batch_idx): # take a step during gradient descent.
        input_i, label_i = batch # collect input
        output_i = self.forward(input_i[0]) # run input through the neural network
        loss = ((output_i - label_i)**2).mean() ## loss = mean squared residual
        
        ###################
        ##
        ## Logging the loss and the predicted values so we can evaluate the training
        ##
        ###################
        self.log("train_loss", loss)
        '''
        if (label_i == 0):
            self.log("out_0", output_i)
        else:
            self.log("out_1", output_i)
        '''
        return loss

In [5]:
def LSTM_weekpredict(data, model):
    """
        Returns mse of weeks' prediction
    """
    # errors array
    mse_all=[]
    for j in range(len(data[0])-37):
        predict=np.array([])
        if len(data[0])==2:
            input_data=[data[0][0][j:j+30],data[0][1][j:j+30]]
        else:
            input_data=data[0][j:j+30]
        for i in range(7):
            pred = model(torch.tensor([input_data])).detach()
            input_data = np.append(input_data,pred)
            predict = np.append(predict,pred)
            float32_array = np.array(input_data).astype(np.float32)
            # Convert back to a list
            input_data = float32_array.tolist()
        mse=round(np.power((np.array(data[0][j+30:j+37])-predict),2).mean(),7)
        mse_all.append(mse)
    return np.array(mse_all).mean()  

In [6]:
Daily_mean = {'Mean_dance': Mean_dance,'Mean_energy':Mean_energy,'Mean_speech':Mean_speech,'Mean_acoustic':Mean_acoustic,'Mean_instru':Mean_instru,'Mean_valence':Mean_valence}
mse={}
for j in list(Daily_mean.keys()):
    Train_data=[]
    data = Daily_mean[j]
    for i in range(31):
        train=data[1822+10+i:2296-119-31+10+i]
        Train_data.append(train)#2022.1.1-2022.11.20-2022.12.20
    Test_data=data[2296-119-31+10:2296-119+10]#2022.11.21-2022.12.21(Prevent chiristmas)
    #Train_test_data=Mean_dance[1822:2306-119]#2022.1.1-2022.12.31
    Validate_data_input=data[2306-119:]#2023.1.1-2023.5.22                     
    #Result_data=Mean_dance[2306-119:]#2023.1.1-2023.5.29 the data from model.result contains 7 days of pure prediction
    float32_array = np.array(Train_data).astype(np.float32)
    # Convert back to a list
    Train_data = float32_array.tolist()
    float32_array = np.array(Test_data).astype(np.float32)
    # Convert back to a list
    Test_data = float32_array.tolist()
    float32_array = np.array(Validate_data_input).astype(np.float32)
    # Convert back to a list
    Validate_data_input = float32_array.tolist()
    ## create the training data for the neural network.
    inputs = torch.tensor(Train_data)
    labels = torch.tensor(Test_data)
    dataset = TensorDataset(inputs.unsqueeze(0), labels.unsqueeze(0))
    #dataset = TensorDataset(inputs, labels) 
    dataloader = DataLoader(dataset)
    model = LightningLSTM() # First, make model from the class

    ## print out the name and value for each parameter
    print("Before optimization, the parameters are...")
    for name, param in model.named_parameters():
        print(name, param.data)
    
    trainer = L.Trainer(max_epochs=2000)#log_every_n_steps=2
    
    trainer.fit(model, train_dataloaders=dataloader)
    
    print("After optimization, the parameters are...")
    for name, param in model.named_parameters():
        print(name, param.data)
    print('\nMse in weeks calculation')
    mse[j]=(LSTM_weekpredict([Validate_data_input],model))
    print(mse[j])

Seed set to 41


Before optimization, the parameters are...
lstm.weight_ih_l0 tensor([[-0.5271],
        [-0.5468],
        [ 0.6011],
        [-0.6616]])
lstm.weight_hh_l0 tensor([[-0.4701],
        [ 0.5440],
        [-0.7436],
        [ 0.4904]])
lstm.bias_ih_l0 tensor([0.6089, 0.2714, 0.1792, 0.3866])
lstm.bias_hh_l0 tensor([ 0.7565,  0.0814, -0.7200,  0.9227])


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
D:\Anaconda\envs\pytorch\lib\site-packages\lightning\pytorch\trainer\connectors\logger_connector\logger_connector.py:75: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `lightning.pytorch` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name | Type | Params | Mode 
--------------------------------------
0 | lstm | LSTM | 16     | train
--------------------------------------
16        Trainable params
0         Non-trainable params
16        Total params
0.000     Total estimated model params size (MB)
1         Modules in train mode
0       

Training: |                                                                                      | 0/? [00:00<…

`Trainer.fit` stopped: `max_epochs=2000` reached.


After optimization, the parameters are...
lstm.weight_ih_l0 tensor([[-0.3075],
        [-0.2849],
        [ 0.8593],
        [-0.4408]])
lstm.weight_hh_l0 tensor([[-0.0626],
        [ 0.9761],
        [-0.4540],
        [ 0.8971]])
lstm.bias_ih_l0 tensor([0.8290, 0.5353, 0.4384, 0.6003])
lstm.bias_hh_l0 tensor([ 0.9765,  0.3452, -0.4608,  1.1365])

Mse in weeks calculation


Seed set to 41
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name | Type | Params | Mode 
--------------------------------------
0 | lstm | LSTM | 16     | train
--------------------------------------
16        Trainable params
0         Non-trainable params
16        Total params
0.000     Total estimated model params size (MB)
1         Modules in train mode
0         Modules in eval mode


5.545803571428571e-05
Before optimization, the parameters are...
lstm.weight_ih_l0 tensor([[-0.5271],
        [-0.5468],
        [ 0.6011],
        [-0.6616]])
lstm.weight_hh_l0 tensor([[-0.4701],
        [ 0.5440],
        [-0.7436],
        [ 0.4904]])
lstm.bias_ih_l0 tensor([0.6089, 0.2714, 0.1792, 0.3866])
lstm.bias_hh_l0 tensor([ 0.7565,  0.0814, -0.7200,  0.9227])


Training: |                                                                                      | 0/? [00:00<…

`Trainer.fit` stopped: `max_epochs=2000` reached.


After optimization, the parameters are...
lstm.weight_ih_l0 tensor([[-0.3153],
        [-0.2989],
        [ 0.8647],
        [-0.4414]])
lstm.weight_hh_l0 tensor([[-0.0676],
        [ 0.9626],
        [-0.4663],
        [ 0.8938]])
lstm.bias_ih_l0 tensor([0.8210, 0.5228, 0.4446, 0.5936])
lstm.bias_hh_l0 tensor([ 0.9685,  0.3327, -0.4546,  1.1297])

Mse in weeks calculation


Seed set to 41
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name | Type | Params | Mode 
--------------------------------------
0 | lstm | LSTM | 16     | train
--------------------------------------
16        Trainable params
0         Non-trainable params
16        Total params
0.000     Total estimated model params size (MB)
1         Modules in train mode
0         Modules in eval mode


6.017767857142857e-05
Before optimization, the parameters are...
lstm.weight_ih_l0 tensor([[-0.5271],
        [-0.5468],
        [ 0.6011],
        [-0.6616]])
lstm.weight_hh_l0 tensor([[-0.4701],
        [ 0.5440],
        [-0.7436],
        [ 0.4904]])
lstm.bias_ih_l0 tensor([0.6089, 0.2714, 0.1792, 0.3866])
lstm.bias_hh_l0 tensor([ 0.7565,  0.0814, -0.7200,  0.9227])


Training: |                                                                                      | 0/? [00:00<…

`Trainer.fit` stopped: `max_epochs=2000` reached.


After optimization, the parameters are...
lstm.weight_ih_l0 tensor([[-0.6654],
        [-0.6732],
        [ 0.9095],
        [-0.7896]])
lstm.weight_hh_l0 tensor([[-0.3539],
        [ 0.6479],
        [-0.8711],
        [ 0.5966]])
lstm.bias_ih_l0 tensor([0.4686, 0.1434, 0.4842, 0.2562])
lstm.bias_hh_l0 tensor([ 0.6162, -0.0466, -0.4150,  0.7923])

Mse in weeks calculation


Seed set to 41
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name | Type | Params | Mode 
--------------------------------------
0 | lstm | LSTM | 16     | train
--------------------------------------
16        Trainable params
0         Non-trainable params
16        Total params
0.000     Total estimated model params size (MB)
1         Modules in train mode
0         Modules in eval mode


4.432321428571429e-05
Before optimization, the parameters are...
lstm.weight_ih_l0 tensor([[-0.5271],
        [-0.5468],
        [ 0.6011],
        [-0.6616]])
lstm.weight_hh_l0 tensor([[-0.4701],
        [ 0.5440],
        [-0.7436],
        [ 0.4904]])
lstm.bias_ih_l0 tensor([0.6089, 0.2714, 0.1792, 0.3866])
lstm.bias_hh_l0 tensor([ 0.7565,  0.0814, -0.7200,  0.9227])


Training: |                                                                                      | 0/? [00:00<…

`Trainer.fit` stopped: `max_epochs=2000` reached.


After optimization, the parameters are...
lstm.weight_ih_l0 tensor([[-0.4158],
        [-0.4826],
        [ 0.9383],
        [-0.4981]])
lstm.weight_hh_l0 tensor([[-0.1754],
        [ 0.7067],
        [-0.6968],
        [ 0.7737]])
lstm.bias_ih_l0 tensor([0.7046, 0.3153, 0.5053, 0.4703])
lstm.bias_hh_l0 tensor([ 0.8522,  0.1252, -0.3939,  1.0065])

Mse in weeks calculation


Seed set to 41
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name | Type | Params | Mode 
--------------------------------------
0 | lstm | LSTM | 16     | train
--------------------------------------
16        Trainable params
0         Non-trainable params
16        Total params
0.000     Total estimated model params size (MB)
1         Modules in train mode
0         Modules in eval mode


0.0013220151785714282
Before optimization, the parameters are...
lstm.weight_ih_l0 tensor([[-0.5271],
        [-0.5468],
        [ 0.6011],
        [-0.6616]])
lstm.weight_hh_l0 tensor([[-0.4701],
        [ 0.5440],
        [-0.7436],
        [ 0.4904]])
lstm.bias_ih_l0 tensor([0.6089, 0.2714, 0.1792, 0.3866])
lstm.bias_hh_l0 tensor([ 0.7565,  0.0814, -0.7200,  0.9227])


Training: |                                                                                      | 0/? [00:00<…

`Trainer.fit` stopped: `max_epochs=2000` reached.


After optimization, the parameters are...
lstm.weight_ih_l0 tensor([[-0.7007],
        [-0.6990],
        [ 0.8799],
        [-0.8166]])
lstm.weight_hh_l0 tensor([[-0.3480],
        [ 0.6531],
        [-0.8958],
        [ 0.6014]])
lstm.bias_ih_l0 tensor([0.4351, 0.1190, 0.4573, 0.2314])
lstm.bias_hh_l0 tensor([ 0.5827, -0.0711, -0.4419,  0.7675])

Mse in weeks calculation


Seed set to 41
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name | Type | Params | Mode 
--------------------------------------
0 | lstm | LSTM | 16     | train
--------------------------------------
16        Trainable params
0         Non-trainable params
16        Total params
0.000     Total estimated model params size (MB)
1         Modules in train mode
0         Modules in eval mode


3.6089285714285714e-06
Before optimization, the parameters are...
lstm.weight_ih_l0 tensor([[-0.5271],
        [-0.5468],
        [ 0.6011],
        [-0.6616]])
lstm.weight_hh_l0 tensor([[-0.4701],
        [ 0.5440],
        [-0.7436],
        [ 0.4904]])
lstm.bias_ih_l0 tensor([0.6089, 0.2714, 0.1792, 0.3866])
lstm.bias_hh_l0 tensor([ 0.7565,  0.0814, -0.7200,  0.9227])


Training: |                                                                                      | 0/? [00:00<…

`Trainer.fit` stopped: `max_epochs=2000` reached.


After optimization, the parameters are...
lstm.weight_ih_l0 tensor([[-0.3612],
        [-0.3521],
        [ 0.8885],
        [-0.4924]])
lstm.weight_hh_l0 tensor([[-0.0914],
        [ 0.9270],
        [-0.5238],
        [ 0.8631]])
lstm.bias_ih_l0 tensor([0.7757, 0.4684, 0.4677, 0.5414])
lstm.bias_hh_l0 tensor([ 0.9233,  0.2783, -0.4314,  1.0775])

Mse in weeks calculation
6.552946428571429e-05


In [9]:
#mse_all={'Mean_dance': mse[0],'Mean_energy':mse[1],'Mean_speech':mse[2],'Mean_acoustic':mse[3],'Mean_instru':mse[4],'Mean_valence':mse[5]}
mse

{'Mean_dance': 5.545803571428571e-05,
 'Mean_energy': 6.017767857142857e-05,
 'Mean_speech': 4.432321428571429e-05,
 'Mean_acoustic': 0.0013220151785714282,
 'Mean_instru': 3.6089285714285714e-06,
 'Mean_valence': 6.552946428571429e-05}