In [21]:
import csv
from math import *

class MusicSimilarity:
    
    csv_file = 'Music.csv'
    
    dataset = []
    artist_music = []
    music_features = []
    
    dataset_attributes = [
        'acousticness', 
        'artists', 
        'danceability', 
        'energy', 
        'id', 
        'liveness', 
        'loudness', 
        'name', 
        'popularity', 
        'speechiness', 
        'tempo', 
        'valence'
    ]
    
    artist_music_attributes = [
        'artists',
        'name',
        'id',
    ]
    
    music_features_attributes = [
        'id',
        'acousticness',  
        'danceability', 
        'energy', 
        'liveness', 
        'loudness', 
        'popularity', 
        'speechiness', 
        'tempo', 
        'valence'
    ]
    
    def load_dataset(self):
        
        input_file = csv.DictReader(open(self.csv_file, encoding='utf8'))
        
        for row in input_file:

            temp_dict = {}
            for key in row:
                
                if key not in self.dataset_attributes:
                    continue
                temp_dict[key] = row[key]
            
            self.dataset.append(temp_dict)
        
        print('[Dataset Loaded Successfully] \n********************************\n Row 0 =>', self.dataset[0], '\n')
    
    def get_artist_music(self):
        
        
        for dict_row in self.dataset:
            
            temp_dict = {}
            
            for key in dict_row:
        
                if key in self.artist_music_attributes:            
                    temp_dict[key] = dict_row[key]
            
            self.artist_music.append(temp_dict)
    
        print('[Generating Artist Music] \n********************************\n Row 0 =>', self.artist_music[0], '\n')
    
    def get_music_features(self):
        
        for dict_row in self.dataset:
            
            temp_dict = {}
            
            for key in dict_row:
        
                if key in self.music_features_attributes:            
                    temp_dict[key] = dict_row[key]
            
            self.music_features.append(temp_dict)
    
        print('[Generating Music Features] \n********************************\n Row 0 =>', self.music_features[0], '\n')
        
    
    def euclidean_distance(self, x, y): 
        return sqrt(sum(pow(a - b, 2) for a, b in zip(x, y))) 
    
    def manhattan_distance(self, x, y): 
        return sum(abs(a - b) for a, b in zip(x, y))
    
    def square_rooted(self, x):
 
        return round(sqrt(sum([a * a for a in x])), 3)
 
    def cosine_similarity(self, x, y):

        numerator = sum(a * b for a, b in zip(x, y))
        denominator = self.square_rooted(x) * self.square_rooted(y)
        return round(numerator / float(denominator), 3)

    def jaccard_similarity(self, x, y):
 
        intersection_cardinality = len(set.intersection(*[set(x), set(y)]))
        union_cardinality = len(set.union(*[set(x), set(y)]))
        return intersection_cardinality/float(union_cardinality)

    def get_music_values(self, music_id):
        
        for row in self.music_features:
            
            if row['id'] == music_id:
                
                temp = []
                for key in row:
                    
                    if key != 'id':
                         temp.append(float(row[key]))
                            
                return temp
            
    def generate_music_similarity(self, music_id_1, music_id_2):
        
        x = self.get_music_values(music_id_1)
        y = self.get_music_values(music_id_2)
        
        print('\nValues: ',x, y)
        
        print('[Euclidean Distance] :', self.euclidean_distance(x, y))
        print('[Manhattan Distance] :', self.manhattan_distance(x, y))
        print('[Cosine Similarity] :', self.cosine_similarity(x, y))
        print('[Jaccard Similarity] :', self.jaccard_similarity(x, y))
    
    def mean(self, data):
        return [float(sum(col))/len(col) for col in zip(*data)]
    
    
    
    def get_artists_values(self, artists_name):
        
        music_ids = []
        
        for row in self.artist_music:    
            if row['artists'] == artists_name:
                music_ids.append(row['id'])
        
        values_list = []
        
        for music_id in music_ids:
            values_list.append(self.get_music_values(music_id))
        
        return self.mean(values_list)
        
        
    def generate_artists_similarity(self, artists_name_1, artists_name_2):
        
        x = self.get_artists_values(artists_name_1)
        y = self.get_artists_values(artists_name_2)
        
        print('\nValues: ',x, y)
        
        print('[Euclidean Distance] :', self.euclidean_distance(x, y))
        print('[Manhattan Distance] :', self.manhattan_distance(x, y))
        print('[Cosine Similarity] :', self.cosine_similarity(x, y))
        print('[Jaccard Similarity] :', self.jaccard_similarity(x, y))

In [23]:
def main():
    
    #task a (Loading the dataset)
    
    ms = MusicSimilarity()
    ms.load_dataset()
    ms.get_artist_music()
    ms.get_music_features()
    
    #task b (Finding Similarities)
    
    #music similarity
    print('Music Similarity\n*****************************\n')
    music_id_1 = '6KbQ3uYMLKb5jDxLF7wYDD'
    music_id_2 = '6L63VW0PibdM1HDSBoqnoM'
    
    ms.generate_music_similarity(music_id_1, music_id_2)
    
    #artist simialrity
    print('Artist Similarity\n*****************************\n')
    artists_name_1 = "['Carl Woitschach']"
    artists_name_2 = "['Robert Schumann', 'Vladimir Horowitz']"
    
    ms.generate_artists_similarity(artists_name_1, artists_name_2)

In [24]:
if __name__ == "__main__":
    main()

[Dataset Loaded Successfully] 
********************************
 Row 0 => {'acousticness': '0.995', 'artists': "['Carl Woitschach']", 'danceability': '0.708', 'energy': '0.195', 'id': '6KbQ3uYMLKb5jDxLF7wYDD', 'liveness': '0.151', 'loudness': '-12.428', 'name': 'Singende Bataillone 1. Teil', 'popularity': '0', 'speechiness': '0.0506', 'tempo': '118.469', 'valence': '0.779'} 

[Generating Artist Music] 
********************************
 Row 0 => {'artists': "['Carl Woitschach']", 'id': '6KbQ3uYMLKb5jDxLF7wYDD', 'name': 'Singende Bataillone 1. Teil'} 

[Generating Music Features] 
********************************
 Row 0 => {'acousticness': '0.995', 'danceability': '0.708', 'energy': '0.195', 'id': '6KbQ3uYMLKb5jDxLF7wYDD', 'liveness': '0.151', 'loudness': '-12.428', 'popularity': '0', 'speechiness': '0.0506', 'tempo': '118.469', 'valence': '0.779'} 

Music Similarity
*****************************


Values:  [0.995, 0.708, 0.195, 0.151, -12.428, 0.0, 0.0506, 118.469, 0.779] [0.604, 0.749,