# Final Product

In [7]:
import numpy as np
import matplotlib.pylab as plt
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.linear_model import Perceptron
from sklearn.svm import SVC
from sklearn.decomposition import PCA
import statsmodels.api as sm
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd

In [8]:
def create_dataframe(playlist, artist='Taylor Swift'):
    '''The function creates a pandas data frame from a playlist link
    
    @param playlist: Playlist link
    @param artist: Artist searching for in the playlist
    
    @return labels
    @return features: dataset with all of the features
    '''
    
    # create spotipy 
    cid = 'e1d252163c854439adf89ee1872895ef'
    secret = '196af4bb9d674ad7942fb1c2f57f55fb'
    client_credentials_manager = SpotifyClientCredentials(client_id=cid, client_secret=secret)
    sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)
    
    playlist_link = playlist
    
    # retrive data
    playlist_URI = playlist_link.split("/")[-1].split("?")[0]
    track_uris = [x["track"]["uri"] for x in sp.playlist_tracks(playlist_URI)["items"]]
    
    # create the data frame
    dataframe = {'Artist' : []}
    for track in sp.playlist_tracks(playlist_URI)["items"]:
        # add artist
        artist_name = track["track"]["artists"][0]["name"]
        if artist_name == artist:
            dataframe['Artist'].append(1)
        else:
            dataframe['Artist'].append(0)

        # add the rest of features
        for keys, values in sp.audio_features(track["track"]["uri"])[0].items():
            if type(values) == float or type(values) == int:
                if keys not in dataframe:
                    dataframe[keys] = []
                dataframe[keys].append(values)
    
    # extract features and labels
    df = pd.DataFrame(data=dataframe)
    df = df.drop(columns=['mode'])
    features = df.drop(columns=['Artist'])
    labels = df['Artist']
    return features, labels

In [9]:
def playlist_logit_model(playlist_link = 'https://open.spotify.com/playlist/3Q2GsAzMUmMcIV2TGNqleu?si=9801a625a0d64160', artist = 'Taylor Swift'):
    '''Creates the logistic regression
    @param playlist_link: Training playlist
    @artist: artist: name of the artist searching for
    '''
    features, labels = create_dataframe(playlist_link, artist)
    
    logit_model = sm.Logit(labels, sm.add_constant(features, has_constant='add'))
    result = logit_model.fit()
    
    return result

In [22]:
def count_artist(playlist_link, artist = 'Taylor Swift', training_link = 'https://open.spotify.com/playlist/3Q2GsAzMUmMcIV2TGNqleu?si=9801a625a0d64160'):
    print(f'Counting how many times {artist} is in your playlist...')
    # logistic model
    result = playlist_logit_model(training_link, artist)
    
    # create the dataframe and fit into model
    features, _ = create_dataframe(playlist_link, artist='Taylor Swift')
    predict = result.predict(sm.add_constant(features, has_constant='add'))
    
    # count how many times artist is in playlist
    count = 0
    for i in predict:
        if i >= 0.5:
            count += 1
    return count

In [26]:
count_artist('https://open.spotify.com/playlist/0AtieNHc2NZuEI7XYVP6rK?si=46f1d623c63e4b5e')

Counting how many times Taylor Swift is in your playlist...
         Current function value: 0.359173
         Iterations: 35




10

Some limitations: 
- Cannot be a playlist made by spotify