In [None]:
# Imports

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import sklearn
import catboost

In [None]:
# Loading Data

train = pd.read_csv('data/train.csv')
songs = pd.read_csv('data/songs.csv')
song_labels = pd.read_csv('data/song_labels.csv')
test = pd.read_csv('data/test.csv')
save_for_later = pd.read_csv('data/save_for_later.csv')
dummy_submission = pd.read_csv('data/dummy_submission.csv')

In [None]:
from sklearn.model_selection import train_test_split
train, validation = train_test_split(train, train_size = 0.9, random_state=69)

In [None]:
customer_id_list = train['customer_id'].unique()

In [None]:
song_id_list = train['song_id'].unique()

In [None]:
learning_rate = 5e-4
iters = 100
dim = 10
reg = 0.1
# Initialization

customer_weights = {}
for customer in customer_id_list:
    customer_weights[customer] = np.random.uniform(-10e-3, 10e-3, dim)
    
song_weights = {}
for song in song_id_list:
    song_weights[song] = np.random.uniform(0, 10e-3, dim)

In [None]:
customer_gradients = {}
for customer1 in customer_id_list:
    customer_gradients[customer1] = np.random.uniform(0, 10e-10, dim)
    
song_gradients = {}
for song1 in song_id_list:
    song_gradients[song1] = np.random.uniform(0, 10e-10, dim)

In [None]:
customer_groups = train.groupby('customer_id')
song_groups = train.groupby('song_id')

In [None]:
for i in range(iters):
    # Finding customer gradients
    for customer in customer_id_list:
        group = customer_groups.get_group(customer)
        songlist = group['song_id'].to_numpy()
        temp = np.zeros(((len(group.index.to_numpy())), dim))
        customer_weight = customer_weights[customer]
        y = group['score'].to_numpy()
        for k in range(len(songlist)):
            temp[k, :] = song_weights[songlist[k]]
        gradient = np.matmul(temp, customer_weight)-y
        gradient = np.squeeze(np.matmul(temp.T, gradient)) + reg*customer_weight
        customer_gradients[customer] = gradient
    # Finding song gradients    
    for song in song_id_list:
        group = song_groups.get_group(song)
        customerlist = group['customer_id'].to_numpy()
        temp = np.zeros(((len(group.index.to_numpy())), dim))
        song_weight = song_weights[song]
        y = group['score'].to_numpy()
        for k in range(len(customerlist)):
            temp[k, :] = customer_weights[customerlist[k]]
        gradient = np.matmul(temp, song_weight)-y
        gradient = np.squeeze(np.matmul(temp.T, gradient)) + reg*song_weight
        song_gradients[song] = gradient
    
    for customer in customer_id_list:
        customer_weights[customer] = customer_weights[customer] - learning_rate*customer_gradients[customer]
        
    for song in song_id_list:
        song_weights[song] = song_weights[song] - learning_rate*song_gradients[song]
    estimates_train = []
    for k in range(len(train.index.to_numpy())):
        customer_weight = customer_weights[train['customer_id'].iloc[k]]
        song_weight = song_weights[train['song_id'].iloc[k]]
        estimate = np.dot(customer_weight, song_weight)
        estimates_train.append(estimate)

    estimates_validation = []
    for k in range(len(validation.index.to_numpy())):
        customer_weight = customer_weights[validation['customer_id'].iloc[k]]
        song_weight = song_weights[validation['song_id'].iloc[k]]
        estimate = np.dot(customer_weight, song_weight)
        estimates_validation.append(estimate)
    y_train = train['score'].to_numpy()
    y_val = validation['score'].to_numpy()
    estimates_train = np.array(estimates_train)
    estimates_validation = np.array(estimates_validation)
    train_error = (1/len(y_train))*np.linalg.norm(y_train - estimates_train)**2
    val_error = (1/len(y_val))*np.linalg.norm(y_val - estimates_validation)**2
    print(f'iter {i} train {train_error} val {val_error}')
    

In [None]:
train['customer_id'].value_counts()

In [None]:
customer_weights['F11620']

In [None]:
customer

In [None]:
group

In [None]:
customer_groups.get_group('K29715')

In [None]:
songlist

In [None]:
y

In [None]:
temp.shape

In [None]:
np.matmul(temp, customer_weight).shape

In [None]:
y.shape

In [None]:
np.squeeze(np.matmul(temp.T, np.matmul(temp, customer_weight)-y)).shape