In [1]:
import os
import datetime

#For data Handling
import pandas as pd
import numpy as np

#For Visualizations
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px 
%matplotlib inline

#Ignore Warnings
import warnings
warnings.filterwarnings('ignore')

#Progreebar
from tqdm import tqdm

# For transformations and predictions
from scipy.optimize import curve_fit
from yellowbrick.target import FeatureCorrelation
from sklearn.preprocessing import FunctionTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.cluster import KMeans 
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeRegressor

# For scoring
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import r2_score,mean_absolute_error

# For validation
from sklearn.model_selection import train_test_split

# Song Recommender

In [2]:
df = pd.read_csv('data/data.csv')

#Remove the Square Brackets from the artists

df["artists"]=df["artists"].str.replace("[", "")
df["artists"]=df["artists"].str.replace("]", "")
df["artists"]=df["artists"].str.replace("'", "")

def normalize_column(col):
    """
    col - column in the dataframe which needs to be normalized
    """
    max_d = df[col].max()
    min_d = df[col].min()
    df[col] = (df[col] - min_d)/(max_d - min_d)
    
#Normalize allnumerical columns so that min value is 0 and max value is 1
num_types = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
num = df.select_dtypes(include=num_types)
        
for col in num.columns:
    normalize_column(col)
    
#perform Kmeans CLustering
km = KMeans(n_clusters=25)
pred = km.fit_predict(num)
df['pred'] = pred
normalize_column('pred')

#Song Recommender
class Song_Recommender():
    """
    Neighbourhood Based Collborative Filterng REcoomendation System using similarity Metrics
    Manhattan Distance is calculated for all songs and Recommend Songs that are similar to it based on any given song
    """
    def __init__(self, data):
        self.data_ = data
    
    #function which returns recommendations, we can also choose the amount of songs to be recommended
    def get_recommendations(self, song_name, n_top):
        distances = []
        #choosing the given song_name and dropping it from the data
        song = self.data_[(self.data_.name.str.lower() == song_name.lower())].head(1).values[0]
        rem_data = self.data_[self.data_.name.str.lower() != song_name.lower()]
        for r_song in tqdm(rem_data.values):
            dist = 0
            for col in np.arange(len(rem_data.columns)):
                #indeces of non-numerical columns(id,Release date,name,artists)
                if not col in [3,8,14,16]:
                    #calculating the manhettan distances for each numerical feature
                    dist = dist + np.absolute(float(song[col]) - float(r_song[col]))
            distances.append(dist)
        rem_data['distance'] = distances
        #sorting our data to be ascending by 'distance' feature
        rem_data = rem_data.sort_values('distance')
        columns = ['artists', 'name']
        return rem_data[columns][:n_top]


#Instantiate recommender class
recommender = Song_Recommender(df)

#Get recommendations 'Red Roses (feat. Landon Cube)' song
recommender.get_recommendations('Red Roses (feat. Landon Cube)', 5)

100%|███████████████████████████████████████████████████████████████████████| 170652/170652 [00:12<00:00, 13153.75it/s]


Unnamed: 0,artists,name
38257,"Post Malone, Halsey, Future",Die For Me (feat. Future & Halsey)
57027,"Vince Staples, 6LACK, Mereba","Yo Love - From ""Queen & Slim: The Soundtrack"""
107211,The Weeknd,Initiation
17412,"Kid Cudi, MGMT, Ratatat",Pursuit Of Happiness (Nightmare)
38093,Bazzi,3:15


In [3]:
recommender.get_recommendations('Sorry', 5)

100%|████████████████████████████████████████████████████████████████████████| 170643/170643 [00:17<00:00, 9969.33it/s]


Unnamed: 0,artists,name
77582,Memphis Slim,Pigalle Love
128788,"""The Highway Q.C.s""",Somewhere To Lay My Head
159423,Ramsey Lewis Trio,Snowfall
96554,Ornette Coleman,Eventually - Mono
126072,EOR,Vele


In [4]:
recommender.get_recommendations('Confident', 5)

100%|███████████████████████████████████████████████████████████████████████| 170649/170649 [00:13<00:00, 12779.70it/s]


Unnamed: 0,artists,name
17533,Aventura,Por un Segundo
17295,Crystal Castles,Crimewave
123339,"J Balvin, Farruko",6 AM
92185,JAWNY,Honeypie
15746,Jagged Edge,Promise


In [4]:
recommender.get_recommendations('Just the way you are', 8)

100%|████████████████████████████████████████████████████████████████████████| 170643/170643 [00:22<00:00, 7672.36it/s]


Unnamed: 0,artists,name
13222,Don McLean,American Pie
11395,The Isley Brothers,"Groove with You, Pts. 1 & 2"
14929,Bryan Adams,Have You Ever Really Loved A Woman?
133035,Yumi Matsutoya,守ってあげたい
30486,José José,Mientras Llueve
54697,Yuri,Maldita Primavera
10966,Firefall,Cinderella
71377,Stephen Speaks,Passenger Seat (Acoustic)


In [5]:
recommender.get_recommendations('Tik Tok', 2)

100%|███████████████████████████████████████████████████████████████████████| 170652/170652 [00:15<00:00, 10817.33it/s]


Unnamed: 0,artists,name
18058,Train,Drive By
18100,Flo Rida,Good Feeling


In [6]:
recommender.get_recommendations('Devil in a New Dress', 10)

100%|████████████████████████████████████████████████████████████████████████| 170652/170652 [00:17<00:00, 9928.37it/s]


Unnamed: 0,artists,name
18638,"Travis Scott, Kacy Hill",90210 (feat. Kacy Hill)
37133,"Tyler, The Creator",Answer
55016,Rihanna,Russian Roulette
17688,"Kanye West, Kid Cudi, Raekwon",Gorgeous
170443,Hozier,No Plan
90978,Catfish and the Bottlemen,Cocoon
17926,Kirko Bangz,Drank in My Cup
107695,Highly Suspect,Bloodfeather
37544,"Meek Mill, Chris Brown, Nicki Minaj",All Eyes on You (feat. Chris Brown & Nicki Minaj)
153278,Plies,Plenty Money


In [7]:
recommender.get_recommendations('Haal Kaisa hai janab ka', 20)

100%|████████████████████████████████████████████████████████████████████████| 170652/170652 [00:24<00:00, 6888.74it/s]


Unnamed: 0,artists,name
46110,Johnny Cash,Bad News
97708,Sarah Vaughan,Bye-Bye
113886,Jr. Walker & The All Stars,Hot Cha
112855,Dueto America,El Venadito - Remastered
6566,Ella Fitzgerald,Oh Lady Be Good
26438,姚莉,"人生就是戲 (電影""入室佳人""插曲)"
63452,"Doris Day, André Previn, André Previn Trio",Control Yourself (with The André Previn Trio)
115897,Joan Baez,Fountain Of Sorrow
6768,"Pete Seeger, The Song Swappers",Roll the Union On
63517,Willie Nelson,Three Days


In [9]:
recommender.get_recommendations('Crazy', 10)

100%|███████████████████████████████████████████████████████████████████████| 170634/170634 [00:16<00:00, 10644.79it/s]


Unnamed: 0,artists,name
29047,America,Daisy Jane
29635,America,Daisy Jane
8486,Louis Armstrong,Moon River
7483,Roger Williams,Winter Wonderland
44689,Sarah Vaughan,Pennies From Heaven
7658,Ella Fitzgerald,The Christmas Song
81535,Cliff Richard,Visions - 2005 Remaster
6862,Billie Holiday,Good Morning Heartache
6991,The Platters,I'm Sorry
7855,Dave Van Ronk,Tell Old Bill


In [10]:
recommender.get_recommendations('Confident', 5)

100%|███████████████████████████████████████████████████████████████████████| 170649/170649 [00:15<00:00, 10717.66it/s]


Unnamed: 0,artists,name
17533,Aventura,Por un Segundo
17295,Crystal Castles,Crimewave
123339,"J Balvin, Farruko",6 AM
15746,Jagged Edge,Promise
74823,Raymix,Oye Mujer


In [11]:
recommender.get_recommendations('Kya Mujhe Pyaar hai', 5)

100%|███████████████████████████████████████████████████████████████████████| 170652/170652 [00:16<00:00, 10289.69it/s]


Unnamed: 0,artists,name
53669,Thalia,No Me Enseñaste
106625,"Lee ""Scratch"" Perry",Having A Party
56396,Arsenal Efectivo,Vida Peligrosa
53263,LeAnn Rimes,Can't Fight The Moonlight
17307,Jazmine Sullivan,Bust Your Windows
