In [3]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


In [2]:
import os
import time
import datetime
import numpy as np
from sklearn.utils.class_weight import compute_class_weight
from keras import optimizers 
from keras import regularizers
from keras import initializers
from keras.models import model_from_json
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, LeakyReLU, BatchNormalization, Activation, Softmax
from keras.callbacks import TensorBoard, CSVLogger
from keras.constraints import maxnorm
from keras.utils import to_categorical
from keras import backend as K
import pandas as pd

# Sort Data

In [3]:
!unzip -uq '/content/gdrive/MyDrive/msdchallenge/taste_profile_song_to_tracks.txt.zip' -d '/content/song_data'
!unzip -uq '/content/gdrive/MyDrive/msdchallenge/kaggle_visible_evaluation_triplets.zip' -d '/content/eval_data'

In [4]:
songmetadata = pd.read_csv(r'/content/gdrive/MyDrive/mil_song_subset/song_data.csv')
othersongdata = pd.read_fwf(r'/content/gdrive/MyDrive/mil_song_subset/10000.txt')

In [5]:
song_df= pd.DataFrame(songmetadata)
song_df.head()

Unnamed: 0,song_id,title,release,artist_name,year
0,SOQMMHC12AB0180CB8,Silent Night,Monster Ballads X-Mas,Faster Pussy cat,2003
1,SOVFVAK12A8C1350D9,Tanssi vaan,Karkuteillä,Karkkiautomaatti,1995
2,SOGTUKN12AB017F4F1,No One Could Ever,Butter,Hudson Mohawke,2006
3,SOBNYVR12A8C13558C,Si Vos Querés,De Culo,Yerba Brava,2003
4,SOHSBXH12A8C13B0DF,Tangle Of Aspens,Rene Ablaze Presents Winter Sessions,Der Mystic,0


In [6]:
othersongdata.columns = ['user_id','song_id','listen_count']
song_df = pd.merge(othersongdata, song_df.drop_duplicates(['song_id']), on="song_id", how ="left")

In [7]:
song_grouped = pd.DataFrame(song_df.groupby('song_id')['listen_count'].count())

In [8]:
def popular_playlist():
  song_grouped = song_df.groupby(['title']).agg({"listen_count":"count"}).sort_values(['listen_count'],ascending = False)
  return song_grouped.head(10)

In [None]:
songs_init1 = pd.read_csv(r'/content/gdrive/MyDrive/song_corr_fin.csv')
#songs_init1 = pd.read_csv(r'/content/gdrive/MyDrive/mil_song_subset/song_corr.csv')
#songs_init2 = pd.read_csv(r'/content/gdrive/MyDrive/mil_song_subset/song_corr2.csv')
#songs_init1 = pd.concat([songs_init1, songs_init2])
#songs_init1.to_csv('/content/gdrive/MyDrive/mil_song_subset/song_corr_fin.csv')

In [None]:
songs_init1.head()

In [None]:
songs_init1.set_index('user_id')

# CRFS

In [None]:
def song_based(song_id, no):
  predictor_song_ratings = songs_init1[song_id]
  similar_songs = songs_init1.corrwith(predictor_song_ratings)
  corr_listened_song = pd.DataFrame(similar_songs, columns = ['pearsonR'])
  corr_listened_song.dropna(inplace = True)
  predictor_corr_summary = corr_listened_song.join(song_grouped['listen_count'])
  final_recommended_songs = predictor_corr_summary[predictor_corr_summary.pearsonR < 0.9999]
  #final_recommended_songs = final_recommended_songs.rename(columns={'index': 'song_id'})
  final_recommended_songs.sort_values('pearsonR', ascending = False)
  final_recommended_songs = final_recommended_songs.reset_index()
  final_recommended_songs = final_recommended_songs.rename(columns={'index': 'song_id'})
  song_df_one = song_df.drop(['listen_count'], axis=1)
  similar_songs = pd.merge(final_recommended_songs, song_df_one.drop_duplicates(["song_id"]), on="song_id", how="left")
  similar_songs = similar_songs.sort_values('pearsonR', ascending = False)
  return similar_songs.head(no)

In [None]:
trial = song_based('SOFVZRE12A8C139783', 10)

In [None]:
trial

Unnamed: 0,song_id,pearsonR,listen_count,user_id,title,release,artist_name,year
4159,SOKMHKY12AF72AB079,0.274954,1375,a58de017cbeda1763ea002fe027ed41b4ed53109,How You Remind Me,FETENHITS - New Party Rock (set),Nickelback,2000
7714,SOTRDVJ12A8C13975E,0.243712,370,1a849df9dabb15845eb932d46d81e2fd77176786,Savin' Me,NOW (That's What I Call Music) 22,Nickelback,2005
2499,SOGIDHJ12AB01800D1,0.205616,712,1a849df9dabb15845eb932d46d81e2fd77176786,Never Gonna Be Alone (Album Version),Dark Horse,Nickelback,2008
8792,SOWQHUP12AB017C658,0.190493,87,6386d3ccb5d611599a9351bedb379dc4928922dc,Never Say Never,The Fray,The Fray,2009
9174,SOXQBCW12AB018704A,0.186946,67,c6c57f27fed3cf897daa4f830a0e2a17e65ba77d,Own Little World,ROUNDERS,Celldweller,2000
9673,SOZALSM12A8C13594C,0.186602,111,74d965061c841c6271fdc7d025055816f68bb257,Don't Stop Dancing,Greatest Hits,Creed,0
20,SOABHYV12A6D4F6D0F,0.183492,308,4208d4ac45e7caab7167a4ea6d34e759a6b9a1fc,Shadow Of The Day (Album Version),Minutes To Midnight,Linkin Park,2007
2414,SOGDKEK12AF72A193B,0.182734,313,fc604c906ec7bb98c494ef7659b0d3dc75503812,Higher,Greatest Hits,Creed,0
397,SOBADLL12A6D4F8FEA,0.173155,171,4bd925e2dc2205d8b885bca02a88852fd9c49af5,If You're Not The One,Gotta Get Thru This,Daniel Bedingfield,2002
7301,SOSOAWT12A8AE48C81,0.161633,259,fe76c9d535c5834e4a9b91c13e29be6460cb79c4,Never Again,Silver Side Up,Nickelback,2001


In [None]:
predictor_song_ratings = songs_init1['SOFVZRE12A8C139783']

In [None]:
predictor_song_ratings

0        0.0
1        0.0
2        0.0
3        0.0
4        0.0
        ... 
37944    0.0
37945    0.0
37946    0.0
37947    0.0
37948    0.0
Name: SOFVZRE12A8C139783, Length: 37949, dtype: float64

In [None]:
predictor_song_ratings[predictor_song_ratings>= 1]

146      1.0
161      1.0
225      1.0
286      6.0
288      1.0
        ... 
37470    1.0
37477    1.0
37535    1.0
37726    1.0
37851    1.0
Name: SOFVZRE12A8C139783, Length: 519, dtype: float64

In [None]:
similar_songs = songs_init1.corrwith(predictor_song_ratings)

In [None]:
similar_songs

SOAAAGQ12A8C1420C8    0.003195
SOAACPJ12A81C21360   -0.001724
SOAACSG12AB018DC80   -0.000898
SOAAEJI12AB0188AB5   -0.000252
SOAAFAC12A67ADF7EB    0.000331
                        ...   
SOZZTNF12A8C139916   -0.002836
SOZZVWB12AB0189C30   -0.001614
SOZZWZV12A67AE140F   -0.000948
SOZZYAO12A6701FF36   -0.000133
SOZZZPV12A8C1444B5   -0.001724
Length: 10000, dtype: float64

In [None]:
corr_listened_song = pd.DataFrame(similar_songs, columns = ['pearsonR'])

In [None]:
corr_listened_song.dropna(inplace = True)

In [None]:
corr_listened_song.head()

Unnamed: 0,pearsonR
SOAAAGQ12A8C1420C8,0.003195
SOAACPJ12A81C21360,-0.001724
SOAACSG12AB018DC80,-0.000898
SOAAEJI12AB0188AB5,-0.000252
SOAAFAC12A67ADF7EB,0.000331


In [None]:
predictor_corr_summary = corr_listened_song.join(song_grouped['listen_count'])

In [None]:
predictor_corr_summary = predictor_corr_summary.sort_values('pearsonR', ascending = False)

In [None]:
final_recommended_songs = predictor_corr_summary[predictor_corr_summary.pearsonR < 0.9999]

In [None]:
final_recommended_songs.sort_values('pearsonR', ascending = False)

Unnamed: 0,pearsonR,listen_count
SOKMHKY12AF72AB079,0.274954,1375
SOTRDVJ12A8C13975E,0.243712,370
SOGIDHJ12AB01800D1,0.205616,712
SOWQHUP12AB017C658,0.190493,87
SOXQBCW12AB018704A,0.186946,67
...,...,...
SOFWMNN12A8C13C3F2,-0.003622,406
SOREBOB12A6D4F7986,-0.003658,484
SODEYDM12A58A77072,-0.003664,691
SONAZWN12A8151C957,-0.003720,468


In [None]:
final_recommended_songs = final_recommended_songs.reset_index()

In [None]:
final_recommended_songs = final_recommended_songs.rename(columns={'index': 'song_id'})

In [None]:
song_df_one = song_df.drop(['listen_count'], axis=1)

In [None]:
song_df_one.head()

Unnamed: 0,user_id,song_id,title,release,artist_name,year
0,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBBMDR12A8C13253B,Entre Dos Aguas,Flamenco Para Niños,Paco De Lucia,1976
1,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBXHDL12A81C204C0,Stronger,Graduation,Kanye West,2007
2,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBYHAJ12A6701BF1D,Constellations,In Between Dreams,Jack Johnson,2005
3,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SODACBL12A8C13C273,Learn To Fly,There Is Nothing Left To Lose,Foo Fighters,1999
4,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SODDNQT12A6D4F5F7E,Apuesta Por El Rock 'N' Roll,Antología Audiovisual,Héroes del Silencio,2007


In [None]:
final_recommended_songs.head()

Unnamed: 0,song_id,pearsonR,listen_count
0,SOKMHKY12AF72AB079,0.274954,1375
1,SOTRDVJ12A8C13975E,0.243712,370
2,SOGIDHJ12AB01800D1,0.205616,712
3,SOWQHUP12AB017C658,0.190493,87
4,SOXQBCW12AB018704A,0.186946,67


In [None]:
similar_songs = pd.merge(final_recommended_songs, song_df_one.drop_duplicates(["song_id"]), on="song_id", how="left")


In [None]:
similar_songs = similar_songs.sort_values('pearsonR', ascending = False)

In [None]:
similar_songs.head()

Unnamed: 0,song_id,pearsonR,listen_count,user_id,title,release,artist_name,year
0,SOKMHKY12AF72AB079,0.274954,1375,a58de017cbeda1763ea002fe027ed41b4ed53109,How You Remind Me,FETENHITS - New Party Rock (set),Nickelback,2000
1,SOTRDVJ12A8C13975E,0.243712,370,1a849df9dabb15845eb932d46d81e2fd77176786,Savin' Me,NOW (That's What I Call Music) 22,Nickelback,2005
2,SOGIDHJ12AB01800D1,0.205616,712,1a849df9dabb15845eb932d46d81e2fd77176786,Never Gonna Be Alone (Album Version),Dark Horse,Nickelback,2008
3,SOWQHUP12AB017C658,0.190493,87,6386d3ccb5d611599a9351bedb379dc4928922dc,Never Say Never,The Fray,The Fray,2009
4,SOXQBCW12AB018704A,0.186946,67,c6c57f27fed3cf897daa4f830a0e2a17e65ba77d,Own Little World,ROUNDERS,Celldweller,2000
