In [1]:
from platform import python_version
print('A versão usada neste projeto foi', python_version())

A versão usada neste projeto foi 3.9.12


In [3]:
import findspark
findspark.init()

In [4]:
# Imports
import os
import time
import random
import pyspark
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from pyspark.ml.feature import Normalizer, StandardScaler

### Etapa 1: Conexão entre o Spark e  streaming de dados no Kafka

In [5]:
# Endereço do servidor Kafka
SERVER = 'localhost:9092'

In [6]:
# Nome do tópico
TOPIC = 'projspotfy'

Precisamos agora concetar o Spark no Kafka, para isso precisamos de alguns conectores. Esses conectores estão disponíveis através de arquivos .jar

https://mvnrepository.com/artifact/org.apache.spark/spark-sql-kafka-0-10_2.12/3.2.1

https://mvnrepository.com/artifact/org.apache.kafka/kafka-clients/2.1.1

https://mvnrepository.com/artifact/org.apache.spark/spark-token-provider-kafka-0-10_2.13/3.3.2

https://mvnrepository.com/artifact/org.apache.commons/commons-pool2/2.8.0

https://mvnrepository.com/artifact/org.apache.spark/spark-token-provider-kafka-0-10_2.12/3.1.2

In [12]:
# Conectores do Spark para o Apache Kafka

spark_jars =  ("{},{},{},{},{}".format(os.getcwd() + "/jars/spark-sql-kafka-0-10_2.12-3.2.1.jar",  
                                       os.getcwd() + "/jars/kafka-clients-2.1.1.jar", 
                                       os.getcwd() + "/jars/spark-streaming-kafka-0-10-assembly_2.12-3.3.2.jar", 
                                       os.getcwd() + "/jars/commons-pool2-2.8.0.jar",  
                                       os.getcwd() + "/jars/spark-token-provider-kafka-0-10_2.12-3.1.2.jar"))

In [10]:
# Inicializa sessão Spark

spark_session = SparkSession \
        .builder \
        .config("spark.jars", spark_jars) \
        .appName("Project_Spotfy") \
        .getOrCreate()

23/04/23 10:02:15 WARN Utils: Your hostname, mor-Inspiron-3501 resolves to a loopback address: 127.0.1.1; using 192.168.0.217 instead (on interface wlp0s20f3)
23/04/23 10:02:15 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
23/04/23 10:02:15 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).


23/04/23 10:02:16 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041.
23/04/23 10:02:16 WARN Utils: Service 'SparkUI' could not bind on port 4041. Attempting port 4042.
23/04/23 10:02:16 WARN Utils: Service 'SparkUI' could not bind on port 4042. Attempting port 4043.
23/04/23 10:02:16 WARN Utils: Service 'SparkUI' could not bind on port 4043. Attempting port 4044.
23/04/23 10:02:16 WARN Utils: Service 'SparkUI' could not bind on port 4044. Attempting port 4045.
23/04/23 10:02:16 WARN Utils: Service 'SparkUI' could not bind on port 4045. Attempting port 4046.
23/04/23 10:02:16 WARN Utils: Service 'SparkUI' could not bind on port 4046. Attempting port 4047.


In [11]:
spark_session.sparkContext.setLogLevel("ERROR")

In [15]:
# Usamos o Spark Streaming para leitura do streaming de dados do Kafka e salvamos em um dataframe
df = spark_session \
        .readStream \
        .format("kafka") \
        .option("kafka.bootstrap.servers", SERVER) \
        .option("subscribe", TOPIC) \
        .option("startingOffsets", "latest") \
        .load()

In [16]:
# Selecionamos a coluna timestamp como string e salvamos em um novo dataframe
df1  = df.selectExpr('CAST(value AS STRING)', 'timestamp')

In [17]:
# Definimos o schema com o nome de cada coluna e o tipo de dado
def_schema = "order_id INT, id STRING, name STRING, popularity INT, duration_ms DOUBLE, " \
             + "artists STRING, id_artists STRING, release_date STRING, " \
             + "danceability DOUBLE,energy DOUBLE, key INT, loudness DOUBLE, " \
             + "mode INT,speechiness DOUBLE," \
             + "acousticness DOUBLE, instrumentalness DOUBLE, liveness DOUBLE, " \
             + "valence DOUBLE, tempo DOUBLE, time_signature DOUBLE"

In [19]:
# Selecionamos o streaming de dados de acordo com o schema e salvamos em um novo dataframe

df2 = df1.select(from_csv(col("value"), def_schema).alias("song"), "timestamp")

In [20]:
# Criamos uma view na memória do Spark e visualizamos o schema
df3 = df2.select("song.*", "timestamp")  
df3.createOrReplaceTempView("df3_View");
df3.printSchema()

root
 |-- order_id: integer (nullable = true)
 |-- id: string (nullable = true)
 |-- name: string (nullable = true)
 |-- popularity: integer (nullable = true)
 |-- duration_ms: double (nullable = true)
 |-- artists: string (nullable = true)
 |-- id_artists: string (nullable = true)
 |-- release_date: string (nullable = true)
 |-- danceability: double (nullable = true)
 |-- energy: double (nullable = true)
 |-- key: integer (nullable = true)
 |-- loudness: double (nullable = true)
 |-- mode: integer (nullable = true)
 |-- speechiness: double (nullable = true)
 |-- acousticness: double (nullable = true)
 |-- instrumentalness: double (nullable = true)
 |-- liveness: double (nullable = true)
 |-- valence: double (nullable = true)
 |-- tempo: double (nullable = true)
 |-- time_signature: double (nullable = true)
 |-- timestamp: timestamp (nullable = true)



In [21]:
# Selecionamos os dados com as músicas do stream
musicas_stream = spark_session.sql("SELECT * FROM df3_View")

In [22]:
# Criamos o stream de dados no Spark Streaming
musicas_stream_spark = musicas_stream \
        .writeStream \
        .trigger(processingTime = '5 seconds') \
        .outputMode("append") \
        .option("truncate", "false") \
        .format("memory") \
        .queryName("tabela_spark") \
        .start()

musicas_stream_spark.awaitTermination(1)

False

                                                                                

In [23]:
# Selecionamos as músicas da tabela de stream do Spark
spark_songs = spark_session.sql("SELECT * FROM tabela_spark")

                                                                                

In [24]:
# Agora podemos visualizar o stream em tempo real como tabela do Spark
spark_songs.show(5)

+--------+--------------------+--------------------+----------+-----------+------------------+--------------------+------------+------------+------+---+--------+----+-----------+------------+----------------+--------+-------+-------+--------------+--------------------+
|order_id|                  id|                name|popularity|duration_ms|           artists|          id_artists|release_date|danceability|energy|key|loudness|mode|speechiness|acousticness|instrumentalness|liveness|valence|  tempo|time_signature|           timestamp|
+--------+--------------------+--------------------+----------+-----------+------------------+--------------------+------------+------------+------+---+--------+----+-----------+------------+----------------+--------+-------+-------+--------------+--------------------+
|     477|6HAlldXFSCLDJXRUg...|            Changing|        26|   267341.0|              Ruru|   GqsHOipAGtytofWjo|  2018-03-15|        0.69| 0.339|  0| -12.625|   1|      0.205|       0.437

In [25]:
# Podemos visualizar apenas algumas colunas, por exemplo
spark_songs.select('order_id', 'id', 'name', 'popularity', 'duration_ms', 'artists').show(5)

+--------+--------------------+--------------------+----------+-----------+------------------+
|order_id|                  id|                name|popularity|duration_ms|           artists|
+--------+--------------------+--------------------+----------+-----------+------------------+
|     477|6HAlldXFSCLDJXRUg...|            Changing|        26|   267341.0|              Ruru|
|     478|5z0u74IwEWVfWsf6P...|      In The Morning|         0|   212869.0|         JayPrince|
|     479|3R7JC1tdrm65S5J2g...|For the Last Time...|        32|   190000.0|   RINISnaggleOwky|
|     480|3eutwyiT2pMTeZ08t...|             Classix|         3|   250285.0|PersonaLaAveBaraka|
|     481|5eZBVbruQGQJ8MFLr...|            Van Gogh|         0|   187768.0|           IrieLee|
+--------+--------------------+--------------------+----------+-----------+------------------+
only showing top 5 rows



In [68]:
# Contagem de músicas extraídas em tempo real
spark_songs.count()

3922

Neste ponto esperamos um período para mais dados sejam coletados pelo streaming. Lembrando que quanto mais dados melhor o modelo.

## Etapa 2: Extraindo dados do Spotfy (preferências do usuário)

In [29]:
# https://pypi.org/project/spotipy/
# !pip install -q spotipy


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m23.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


                                                                                

In [30]:
# Imports
import os
import ujson
import spotipy
import spotipy.util
from pyspark.ml.feature import VectorAssembler
from pyspark.ml.feature import StandardScaler
from pyspark.ml.clustering import KMeans
from pyspark.ml.evaluation import ClusteringEvaluator
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")

In [31]:
# Chaves da API do Spotify
os.environ["SPOTIPY_CLIENT_ID"] = 'inserir o clientid aqui'
os.environ["SPOTIPY_CLIENT_SECRET"] = 'inserir o client secret aqui'
os.environ["SPOTIPY_REDIRECT_URI"] = 'http://localhost:7777/callback'

In [32]:
# Escopo de extração das preferências do usuário
scope = 'user-library-read'

In [33]:
# Username no Spotify
username = 'marcioo.r@hotmail.com'

In [34]:
# Criação do tokhen de acesso

token = spotipy.util.prompt_for_user_token(username, scope)

                                                                                

In [35]:
# Cria o objeto de autenticação
spotipy_obj = spotipy.Spotify(auth=token)

In [37]:
# Extrai músicas da lista de favoritos do usuário

saved_tracks = spotipy_obj.current_user_saved_tracks(limit= 50)

In [38]:
# Número de músicas extraídas
n_tracks = saved_tracks['total']
print('Total de Tracks: %d ' % n_tracks)

Total de Tracks: 154 


In [39]:
# Função para extrair os atributos da lista de músicas do usuário
def select_features(track_response):
    return {        
        'id': str(track_response['track']['id']),
        'name': str(track_response['track']['name']),
        'artists': [artist['name'] for artist in track_response['track']['artists']],
        'popularity': track_response['track']['popularity']
    }

In [40]:
# Aplica a função
tracks = [select_features(track) for track in saved_tracks['items']]

In [41]:
# Extrai os atributos das músicas preferidas do usuário
while saved_tracks['next']:
    saved_tracks = spotipy_obj.next(saved_tracks)
    tracks.extend([select_features(track) for track in saved_tracks['items']])

In [42]:
# Criamos o dataframe do pandas
df_tracks = pd.DataFrame(tracks)
pd.set_option('display.max_rows', len(tracks))
df_tracks['artists'] = df_tracks['artists'].apply(lambda artists: artists[0])

In [43]:
df_tracks.head(10)

Unnamed: 0,id,name,artists,popularity
0,5vdp5UmvTsnMEMESIF2Ym7,Another One Bites The Dust - Remastered 2011,Queen,84
1,0nLiqZ6A27jJri2VCalIUs,Nothing Else Matters,Metallica,82
2,0qgrrDnUUhyxpxbBznUnzg,18 and Life,Skid Row,71
3,6rkeaQRCWZxwkjhyqgxjXi,Mouth for War,Pantera,63
4,4PtZE0h5oyPhCtPjg3NeYQ,No Excuses,Alice In Chains,65
5,7LRMbd3LEoV5wZJvXT1Lwb,T.N.T.,AC/DC,80
6,2OErSh4oVVAUll0vHxdr6p,Bigmouth Strikes Again - 2011 Remaster,The Smiths,72
7,2tAeN2TKlQLOoSPXtARzBV,Eruption - 2015 Remaster,Van Halen,66
8,0G21yYKMZoHa30cYVi1iA8,Welcome To The Jungle,Guns N' Roses,84
9,1aECQTfv5SO42lU4fvrdJJ,Surrender,Elvis Presley,54


In [44]:
# Dicionário para os atributos de áudio
audio_features = {}

In [45]:
# Extrai os atributos de áudio
for idd in df_tracks['id'].tolist():
    audio_features[idd] = spotipy_obj.audio_features(idd)[0]

In [46]:
# Adicionamos os atributos de áudio ao dataframe
df_tracks['acousticness'] = df_tracks['id'].apply(lambda idd: audio_features[idd]['acousticness'])
df_tracks['speechiness'] = df_tracks['id'].apply(lambda idd: audio_features[idd]['speechiness'])
df_tracks['key'] = df_tracks['id'].apply(lambda idd: str(audio_features[idd]['key']))
df_tracks['liveness'] = df_tracks['id'].apply(lambda idd: audio_features[idd]['liveness'])
df_tracks['instrumentalness'] = df_tracks['id'].apply(lambda idd: audio_features[idd]['instrumentalness'])
df_tracks['energy'] = df_tracks['id'].apply(lambda idd: audio_features[idd]['energy'])
df_tracks['tempo'] = df_tracks['id'].apply(lambda idd: audio_features[idd]['tempo'])
df_tracks['loudness'] = df_tracks['id'].apply(lambda idd: audio_features[idd]['loudness'])
df_tracks['danceability'] = df_tracks['id'].apply(lambda idd: audio_features[idd]['danceability'])
df_tracks['valence'] = df_tracks['id'].apply(lambda idd: audio_features[idd]['valence'])

In [47]:
df_tracks.head()

Unnamed: 0,id,name,artists,popularity,acousticness,speechiness,key,liveness,instrumentalness,energy,tempo,loudness,danceability,valence
0,5vdp5UmvTsnMEMESIF2Ym7,Another One Bites The Dust - Remastered 2011,Queen,84,0.112,0.161,5,0.163,0.312,0.528,109.967,-6.472,0.933,0.754
1,0nLiqZ6A27jJri2VCalIUs,Nothing Else Matters,Metallica,82,0.0458,0.0263,4,0.075,6e-06,0.362,142.352,-11.218,0.553,0.158
2,0qgrrDnUUhyxpxbBznUnzg,18 and Life,Skid Row,71,0.0023,0.028,1,0.112,1.5e-05,0.648,90.435,-11.101,0.466,0.305
3,6rkeaQRCWZxwkjhyqgxjXi,Mouth for War,Pantera,63,4e-06,0.0722,8,0.051,0.000614,0.954,156.899,-7.006,0.329,0.627
4,4PtZE0h5oyPhCtPjg3NeYQ,No Excuses,Alice In Chains,65,0.0206,0.0323,3,0.07,0.00062,0.838,117.454,-6.013,0.527,0.721


In [48]:
# Selecionamos uma música randomicamente
musica_randomica = random. randint(0,len(df_tracks)-1)
df_musica_randomica = df_tracks.head(musica_randomica)[-1:]
df_musica_randomica

Unnamed: 0,id,name,artists,popularity,acousticness,speechiness,key,liveness,instrumentalness,energy,tempo,loudness,danceability,valence
27,3tXmhimGAseS9JI2de0iF8,L'uomo dell'armonica,Ennio Morricone,36,0.754,0.0388,9,0.0907,0.44,0.362,66.78,-8.879,0.101,0.0387


In [49]:
# Músicas do streaming do Spark
spark_songs.show(5)

+--------+--------------------+--------------------+----------+-----------+------------------+--------------------+------------+------------+------+---+--------+----+-----------+------------+----------------+--------+-------+-------+--------------+--------------------+
|order_id|                  id|                name|popularity|duration_ms|           artists|          id_artists|release_date|danceability|energy|key|loudness|mode|speechiness|acousticness|instrumentalness|liveness|valence|  tempo|time_signature|           timestamp|
+--------+--------------------+--------------------+----------+-----------+------------------+--------------------+------------+------------+------+---+--------+----+-----------+------------+----------------+--------+-------+-------+--------------+--------------------+
|     477|6HAlldXFSCLDJXRUg...|            Changing|        26|   267341.0|              Ruru|   GqsHOipAGtytofWjo|  2018-03-15|        0.69| 0.339|  0| -12.625|   1|      0.205|       0.437

In [50]:
# Não precisamos mais dessas colunas
spark_songs = spark_songs.drop('order_id', 
                               'mode', 
                               'release_date', 
                               'id_artists',
                               'time_signature', 
                               'duration_ms',
                               'timestamp')

In [54]:
# Cria o dataframe com a música escolhida randomicamente
df_sp = spark_session.createDataFrame(df_musica_randomica)


In [55]:
# Concatena músicas do streaming do Spark com a música do Spotify
df = spark_songs.union(df_sp)

In [56]:
df.show(5)

+--------------------+--------------------+----------+------------------+------------+------+---+--------+-----------+------------+----------------+--------+-------+-------+
|                  id|                name|popularity|           artists|danceability|energy|key|loudness|speechiness|acousticness|instrumentalness|liveness|valence|  tempo|
+--------------------+--------------------+----------+------------------+------------+------+---+--------+-----------+------------+----------------+--------+-------+-------+
|6HAlldXFSCLDJXRUg...|            Changing|        26|              Ruru|        0.69| 0.339|  0| -12.625|      0.205|       0.437|           0.836|  0.0522|  0.575|157.854|
|5z0u74IwEWVfWsf6P...|      In The Morning|         0|         JayPrince|       0.791| 0.916|  4|  -4.364|     0.0676|      0.0316|         6.32E-6|  0.0318|  0.537|100.003|
|3R7JC1tdrm65S5J2g...|For the Last Time...|        32|   RINISnaggleOwky|       0.821| 0.387|  7| -10.859|      0.109|       0.296

## Pré-Processamento dos Dados

In [57]:
# Preparamos o VectorAssembler
vetor = VectorAssembler(inputCols = ['danceability',
                                     'energy',
                                     'loudness',
                                     'speechiness',
                                     'acousticness',
                                     'instrumentalness',
                                     'liveness',
                                     'valence',
                                     'tempo'], 
                        outputCol = 'song_features')

In [58]:
# Descartamos valores inválidos
assembled = vetor.setHandleInvalid("skip").transform(df)

In [59]:
# Preparamos o padronizador
std = StandardScaler(inputCol = 'song_features', outputCol = 'standardized')

In [60]:
# Treinamos o padronizador
scale = std.fit(assembled)

                                                                                

In [61]:
# Dataframe com dados padronizados
df = scale.transform(assembled)
df.show(5)

+--------------------+--------------------+----------+------------------+------------+------+---+--------+-----------+------------+----------------+--------+-------+-------+--------------------+--------------------+
|                  id|                name|popularity|           artists|danceability|energy|key|loudness|speechiness|acousticness|instrumentalness|liveness|valence|  tempo|       song_features|        standardized|
+--------------------+--------------------+----------+------------------+------------+------+---+--------+-----------+------------+----------------+--------+-------+-------+--------------------+--------------------+
|6HAlldXFSCLDJXRUg...|            Changing|        26|              Ruru|        0.69| 0.339|  0| -12.625|      0.205|       0.437|           0.836|  0.0522|  0.575|157.854|[0.69,0.339,-12.6...|[3.95481777348146...|
|5z0u74IwEWVfWsf6P...|      In The Morning|         0|         JayPrince|       0.791| 0.916|  4|  -4.364|     0.0676|      0.0316|     

## Machine Learning com Aprendizado Não Supervisionado

In [62]:
# Cria o objeto do modelo
objeto_KMeans = KMeans(featuresCol = 'standardized', k = 3)

In [63]:
# Treina o modelo
modelo_KMeans = objeto_KMeans.fit(df)

In [64]:
# Previsões do modelo
df_output = modelo_KMeans.transform(df)

## Sistema de Recomendação

In [69]:
# Classe
class RecoSystem():
    
    # Método construtor
    def __init__(self, data):
        self.data_ = data
    
    # Método de recomendação
    def Recomm(self, nome_musica, amount = 1):
        
        # Lista para as distâncias
        distancias = []
        
        # Seleciona a música
        song = self.data_[(self.data_.name.str.lower() == nome_musica.lower())].head(1).values[0]
        res_dt = self.data_[self.data_.name.str.lower() != nome_musica.lower()]
        
        # Loop para o cálculo das distâncias
        for i_song in tqdm(res_dt.values):
            
            # Inicializa a distância
            distancia = 0
            
            # Loop para calcular a distância
            for col in np.arange(len(res_dt.columns)):
                if not col in [0,1,2,14]:
                    distancia = distancia + np.absolute(float(song[col]) - float(i_song[col]))
            
            # Adiciona na lista de distâncias
            distancias.append(distancia)
        
        res_dt['distance'] = distancias
        res_dt = res_dt.sort_values('distance')
        
        columns = ['id','name', 
                   'artists', 
                   'acousticness', 
                   'liveness', 
                   'instrumentalness', 
                   'energy', 
                   'danceability', 
                   'valence']
        
        return res_dt[columns][:amount]

In [71]:
# Nomes das colunas
datalabel = df_output.select('id',
                             'name',
                             'artists',
                             'danceability',
                             'energy',
                             'key',
                             'loudness',
                             'speechiness',
                             'acousticness',
                             'instrumentalness',
                             'liveness',
                             'valence',
                             'tempo',
                             'prediction')

In [72]:
# Dataset final
df_final = datalabel.toPandas()
df_final.drop(df_final[df_final['artists'] == '0'].index, inplace = True)
df_final.drop_duplicates(inplace = True)
df_final.drop(df_final[df_final['danceability'] == 0.0000].index, inplace = True)
df_final.drop(df_final[df_final['liveness'] == 0.000].index, inplace = True)
df_final.drop(df_final[df_final['instrumentalness'] == 0.000000].index, inplace = True)
df_final.drop(df_final[df_final['energy'] == 0.0000].index, inplace = True)
df_final.drop(df_final[df_final['danceability'] == 0.000].index, inplace = True)
df_final.drop(df_final[df_final['valence'] == 0.000].index, inplace = True)

In [73]:
df_final.shape

(3387, 14)

In [74]:
df_final.sample(5)

Unnamed: 0,id,name,artists,danceability,energy,key,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,prediction
856,1jOdvyyjgPZNj1Rdv140zl,Like the First Time,ErinRae,0.532,0.497,2,-9.189,0.0244,0.473,0.0474,0.102,0.34,76.478,0
2236,3DVNhZJWQL65Zri5oDEBwF,Bliss,Biyo,0.595,0.456,6,-9.66,0.0561,0.177,0.00176,0.148,0.564,79.938,1
2649,6h2gMMiPIiPLWe4oTQr6Zr,Devon Loch,FutureHaunts,0.363,0.913,7,-3.703,0.0476,0.0169,0.559,0.124,0.433,144.317,1
1154,4fbVsIdbvl8QnzRaF67DhM,Lead,AndreaBelfi,0.471,0.49,1,-16.178,0.0345,0.226,0.912,0.117,0.0327,91.977,0
3077,2axmlxzcjgAdspldlkNKmu,Dirty Blue,Swings,0.34,0.46,7,-8.26,0.0324,0.0925,0.177,0.243,0.0695,121.392,1


In [75]:
# Cria o objeto
reco_obj = RecoSystem(df_final)

In [76]:
musica = df_musica_randomica['name'].tolist()[0]
print(musica)

L'uomo dell'armonica


In [77]:
# Executa a recomendação
recomendacao = reco_obj.Recomm(musica)

100%|████████████████████████████████████| 3386/3386 [00:00<00:00, 53327.90it/s]


In [78]:
# Extrai a música randômica da lista de favoritos do Spotify
y = df_musica_randomica[['id','name', 
                         'artists',  
                         'acousticness', 
                         'liveness', 
                         'instrumentalness', 
                         'energy', 
                         'danceability', 
                         'valence']]

In [79]:
# Concatena a recomendação com a música randômica da lista de favoritos do Spotify
recomendacao = pd.concat([recomendacao, y])

In [81]:
# Salva a recomendação em disco
recomendacao.to_csv('recomendacoes/recomendacao.csv')

In [83]:
# Carrega o arquivo do disco
df_reco = (spark_session.read.format("csv").options(header = "true").load("recomendacoes/recomendacao.csv"))

In [84]:
# Recomendação de música
df_reco.show()

+---+--------------------+--------------------+---------------+------------+--------+----------------+------+------------+-------+
|_c0|                  id|                name|        artists|acousticness|liveness|instrumentalness|energy|danceability|valence|
+---+--------------------+--------------------+---------------+------------+--------+----------------+------+------------+-------+
|459|7khpPruHJK39VTBUQ...|            Stranger|     MildOrange|       0.412|   0.109|           0.113| 0.491|       0.334|  0.452|
| 27|3tXmhimGAseS9JI2d...|L'uomo dell'armonica|Ennio Morricone|       0.754|  0.0907|            0.44| 0.362|       0.101| 0.0387|
+---+--------------------+--------------------+---------------+------------+--------+----------------+------+------------+-------+



### Fim do Projeto