In [1]:
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction import DictVectorizer
vec=DictVectorizer()
import spotipy 
from spotipy.oauth2 import SpotifyClientCredentials
from sklearn.svm import SVC

with open('login.txt') as f: 
    data = f.readlines()
#cid=data[0].strip()
#secret=data[1].strip()

In [2]:
print('Esperamos trabalhar no diretório')
print(os.getcwd())

Esperamos trabalhar no diretório
C:\Users\JUAN\Documents\GitHub\TuneMyMood


In [1]:
class Dataset:
    #classe para formatação e analise de datasets
    
    def __init__(self,df2): #df será passado como string do diretorio do csv
        if type(df2)==str:
            self.df=pd.read_csv(df2)
            self.df["loudness"]=abs(self.df["loudness"])
            self.dict=self.df.iloc[:,1:14].to_dict('records')
        else:   
            self.df=df2
            self.df["loudness"]=abs(self.df["loudness"])
            self.dict=self.df.iloc[:,[0,2,3,4,6,7,8,9,10,11,12,13]].to_dict('records')
            
        self.ans=self.df[self.df.columns[-1]].tolist()
        self.vec=DictVectorizer()
        self.train_songs=self.vec.fit_transform(self.dict).toarray()

    def save_csv(self,csv_name):
        self.df.to_csv(csv_name,encoding='utf-8')
        
    def merge_df(self,other):
        self.df=pd.concat([self.df,other.df])

class User:
    #classe para facilitar o uso do spotipy
    def __init__(self,client): #a variavel client devera ser uma lista que contem o id do cliente e a senha dele
        self.credentials_manager=SpotifyClientCredentials(client_id=data[0].strip(), client_secret=data[1].strip())
        self.sp=spotipy.Spotify(client_credentials_manager=self.credentials_manager)
        self.sp.trace=False
        self.playlists={}
    
    def playlist_downloader(self,name,userid,playlist_id): 
        #name= nome que a playlist ficara salva no dicionario
        #userid = id do usuario dono da playlist
        #playlist_id = id da playlist
        #função para baixar as musicas de uma playlist do spotify
        playlist = self.sp.user_playlist(user=userid,playlist_id=playlist_id)
        songs = playlist["tracks"]["items"]
        ids = []
        artists=[]
        song_title=[]
        
        for i in range(len(songs)): 
            ids.append(songs[i]["track"]["id"])
            artists.append(songs[i]["track"]["artists"][0]["name"])
            song_title.append(songs[i]["track"]["name"])
            
        features = self.sp.audio_features(ids) 
        df_features = pd.DataFrame(features)
        df_artist = pd.DataFrame(artists)
        series_artist = pd.Series(artists, name='artist')
        series_title = pd.Series(song_title,name="song_title")
        df=pd.concat([df_features,series_title,series_artist],axis=1)
        self.playlists[name]=df
        
        return df #retornara o dataframe com as musicas


class Trainer():
    #classe para facilitar o treinamento com o NB e SVC
    def __init__(self):
        self.data={}
        self.nb={}
        self.svc={}
        self.results_nb={}
        self.results_svc={}
    def add_data(self,genre,data):
        self.data[genre]=data
    
    def train(self,name,dataset):
        #name=nome da chave do dicionario que sera salvo o treinamento,
        #dataset=objeto da classe Dataset que recebera o treino
        nb=MultinomialNB()
        svc=SVC()
        self.nb[name]=nb.fit(dataset.train_songs,dataset.ans)
        self.svc[name]=svc.fit(dataset.train_songs,dataset.ans)
        return "The new dataset was trained and saved"
        
    def check_score(self,name,new_songs):
        #metodo para checar a eficacia do machine learning
        print("This is the SVC score: {}".format(self.svc[name].score(new_songs.train_songs,new_songs.ans)))

        print("This is the NB score: {}".format(self.nb[name].score(new_songs.train_songs,new_songs.ans)))

    
    
    def evaluate(self,name,new_songs):
        #metodo para salvar as musicas que a maquina classificou
        new_songs.df=new_songs.df.reset_index(drop=True)
        
        liked_nb=[]
        disliked_nb=[]
        
        liked_svc=[]
        disliked_svc=[]
        
        nb_res=self.nb[name].predict(new_songs.train_songs)
        svc_res=self.svc[name].predict(new_songs.train_songs)
        
        for i in range(len(nb_res)):
            if nb_res[i] == 0:
                
                disliked_nb.append(new_songs.df['song_title'][i]+' : '+new_songs.df['artist'][i])
            else:
                
                liked_nb.append(new_songs.df['song_title'][i]+' : '+new_songs.df['artist'][i])
            if svc_res[i] == 0:
                
                disliked_svc.append(new_songs.df['song_title'][i]+' : '+new_songs.df['artist'][i])
            else:
                liked_svc.append(new_songs.df['song_title'][i]+' : '+new_songs.df['artist'][i])
                

        self.results_nb[name]={'liked':liked_nb,'disliked':disliked_nb}
        self.results_svc[name]={'liked':liked_svc,'disliked':disliked_svc}
        

        
        return "The result of the evaluation is now saved"

        

In [4]:
test=Dataset("trained.csv")

new_songs=Dataset("trained2.csv")

ash=Trainer()

ash.train("test",test)

ash.check_score("test",new_songs)



This is the SVC score: 0.3
This is the NB score: 0.43333333333333335


In [5]:
client=User(data)
not_funk=client.playlist_downloader("paradox spiral",'12122112252','4INhsCJeBocKPzB79iPbMk')

In [6]:
is_funk=client.playlist_downloader("funk_juan","12144879613","4cZ6B4WujgwaQP2D2vbOn8")

In [7]:
is_funk["ans"]=1
not_funk["ans"]=0


In [8]:
funk_concat=pd.concat([not_funk,is_funk])



In [9]:
funk_training=Dataset(funk_concat)

ash.train("funk",funk_training)


'The new dataset was trained and saved'

In [10]:
is_funk_test= client.playlist_downloader("funk","spotify","37i9dQZF1DWYYeOy9vs7I5")
not_funk_test= client.playlist_downloader("blank banshee","11128845131","6zQDJx3HwQKACmjTku7tdW")

In [11]:
is_funk_test["ans"]=1
not_funk_test["ans"]=0

test_funk_concat=pd.concat([is_funk_test,not_funk_test])

In [12]:
test_funk=Dataset(test_funk_concat)

In [13]:
ash.check_score("funk",test_funk)

This is the SVC score: 0.5098039215686274
This is the NB score: 0.4411764705882353


In [14]:
ash.evaluate('funk',test_funk)

'The result of the evaluation is now saved'

In [15]:
client.playlist_downloader("chillstep/trap/future",'12122168529',"5DeNP7MuwXgAHF2qgyOM4v")

client.playlists["chillstep/trap/future"]["ans"]=1
client.playlists["funk"]["ans"]=0

fred_dt=pd.concat([client.playlists["chillstep/trap/future"],client.playlists["funk"]])

fred_tastes=Dataset(fred_dt)

ash.train('fred',fred_tastes)


'The new dataset was trained and saved'

In [16]:
client.playlist_downloader("soul touching","lucasvaz97","00LBQEoraAgSitcCnbndWi")
client.playlists["soul touching"]["ans"]=0
client.playlists["paradox spiral"]["ans"]=1

fred_r=pd.concat([client.playlists["paradox spiral"],client.playlists["soul touching"]])
fred_r

fred_mood=Dataset(fred_r)
#fred_mood.train_songs[1]
ash.check_score('fred',fred_mood)

This is the SVC score: 0.7874015748031497
This is the NB score: 0.1968503937007874


In [17]:
fred_mood.df=fred_mood.df.reset_index(drop=True)

ash.evaluate('fred',fred_mood)


[]