In [None]:
try:
    import requests
except:
    print("If you read this message, it means you need to install the package requests by using pip3 install requests --user")
import numpy as np
import json
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import Audio as play_wave
from IPython.display import display
import getpass
from sklearn.decomposition import PCA
markers=",ov^8sp*hDPX"

csv_data="../data/CFdata.csv"

In [None]:

token= getpass.getpass("Please copy-paste your token from here https://developer.spotify.com/console/get-audio-features-several-tracks/ \n")
headers={"Authorization": "Bearer %s"%token}

# Data preparation
## Import CSV downloaded from google drive with Pandas
Pandas is a wonderful library that allows us to simply import and work with structured data.
For example, saving the Google Spreadsheet as a csv, we can then import it with a single command

In [None]:
df=pd.read_csv(csv_data, delimiter=",")
display(df)

## Separate people's names from tracks and rates
Now, let's put people's name in a list, the tracks' names and artists in a datafrane and let's export the rates in a matrix

In [None]:
people=df.columns[3:].tolist()
tracks=df.iloc[:,:2]
rates=df.iloc[:,3:].values.astype(np.float32)
print(people)
print(rates)
display(tracks)


# Get the ids of the tracks on Spotify and import their features
In order to have a content-based description of the tracks, we can use Spotify's APIs to get audio features.
First, we use the search apis to look for Spotify ids for each track

In [None]:
ids=[]
url="https://api.spotify.com/v1/search"

for i, track in tracks.iterrows():
    params={"q": "%s %s"%(track.Artist, track.Songs), "type": "track"}
    req=requests.get(url=url, params=params,headers=headers)
    assert req.status_code==200, req.content
    answer=req.json()
    items=answer["tracks"]["items"]
    first_result=items[0]
    ids.append(first_result["id"])
   
print(ids)

Then, we use audio-features with the set of ids to download the features and put them in another dataframe.
We then consider only those columns that are real features, as seen below

In [None]:
url="https://api.spotify.com/v1/audio-features"
params={"ids":",".join(ids)}
req=requests.get(url=url, params=params, headers=headers)
audio_features=req.json()["audio_features"]
df_features=pd.DataFrame(audio_features)


In [None]:
columns_fts=['acousticness', 'danceability', 'energy',  'instrumentalness', 'liveness', 'loudness',  
                    'tempo', 'valence']
features=df_features.loc[:, columns_fts].values

# Let's summarize

In [None]:
print("We have %d people who rated %d songs"%(len(people),tracks.shape[0]))
print("Indeed, the shape of our rating matrix is %s"%(str(rates.shape)))
print("For each track we extracted %d features from Spotify leading to a matrix shaped %s"%(len(columns_fts),str(features.shape)))

# Principal Component Analysis (PCA)
## Content-based PCA
Now what can we do about it? First, we can use Principal Component Analysis techniques to reduce the feature-space of songs from 8 dimensions to 2 dimensions, drawn as those who best fits the data in features.

Before doing anything, let's choose a song and guess: name two songs in the list that are similar in your opinion

In [None]:
pca_ft=PCA(n_components=2, whiten=True)
features_2=pca_ft.fit_transform(features)

In [None]:
plt.figure(figsize=(8,8))
for i, track in tracks.iterrows():
    plt.scatter(features[i,0], features[i,1], s=100, alpha=0.7, marker=markers[i%len(markers)], label=track.Songs)
plt.legend()
range_x=features[:,0].max()-features[:,0].min()
plt.xlim([features[:,0].min()-range_x/10, features[:,0].max()+1.5*range_x])
plt.show()

How the different features contribute to each component?

In [None]:
plt.figure(figsize=(16,2))
for component in pca_ft.components_:
    plt.plot(component)
plt.xticks(np.arange(len(columns_fts)), columns_fts)
plt.show()

## Context-based PCA
Actually, we have another way to describe songs: we can benefits from people rates.
Let's apply PCA to the rates, and let's summarize songs into 2 dimensions

Can you first guess what will appear?

In [None]:
pca_trackpeople=PCA(n_components=2, whiten=True)
tracks_2=pca_trackpeople.fit_transform(rates)

In [None]:
plt.figure(figsize=(8,8))
for i, track in tracks.iterrows():
    plt.scatter(tracks_2[i,0], tracks_2[i,1], s=100, marker=markers[i%len(markers)], alpha=0.7, label=track.Songs)
plt.legend()
range_x=tracks_2[:,0].max()-tracks_2[:,0].min()
plt.xlim([tracks_2[:,0].min()-range_x/10, tracks_2[:,0].max()+1.5*range_x])
plt.show()

How the different PEOPLE contribute to each component?

In [None]:
plt.figure(figsize=(16,2))
for component in pca_trackpeople.components_:
    plt.plot(component)
plt.xticks(np.arange(len(people)), people)
plt.show()

# YOUR TURN
**But we can also invert the process. How much listening habits describe people? Can we draw people in a 2D world?**