Create TF-IDF Vectorizer

In [1]:
import pandas as pd
df = pd.read_csv('spotify_and_clean_lyrics.csv')


In [2]:
x_features = pd.read_csv('x_unigram_features.csv')


In [3]:
#x_features = x_features.drop(['Unnamed: 0'], axis=1)
x_feat_cols = x_features.columns
print(x_feat_cols)

Index(['Unnamed: 0', 'aa', 'abandon', 'abide', 'ability', 'ablaze', 'able',
       'aboard', 'absence', 'absent',
       ...
       'west coast rap', 'western swing', 'witch house', 'world.1',
       'world fusion', 'worship.1', 'yacht rock', 'yoga', 'zen', 'zeuhl'],
      dtype='object', length=5522)


In [4]:
#label encoder and decoder
y_labels = pd.read_csv('y_label_categorized.csv')
y_labels = y_labels.drop(['Unnamed: 0'], axis=1)

# y_labels.drop(columns=['Unnamed: 0'], inplace=True)
consolidated_labels = []

# Loop through each row in the DataFrame
for index, row in y_labels.iterrows():
    # Find the column with the non-zero value (1)
    label = row.idxmax()
    # Append the label to the list
    consolidated_labels.append(label)

# Create a new column 'label' in the DataFrame with the consolidated labels

from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
consolidated_encoded = le.fit_transform(consolidated_labels)

In [5]:
df[df['processed_lyrics'].isnull()]
#remove the rows with nans in df['clean_lyrics']
df = df[df['processed_lyrics'].notnull()]
df.reset_index(drop=True, inplace=True)

In [6]:
#drop Unnamed: 0 column
# x_features = x_features.drop(['Unnamed: 0'], axis=1)
x_feat_cols = x_features.columns
x_feat_cols

Index(['Unnamed: 0', 'aa', 'abandon', 'abide', 'ability', 'ablaze', 'able',
       'aboard', 'absence', 'absent',
       ...
       'west coast rap', 'western swing', 'witch house', 'world.1',
       'world fusion', 'worship.1', 'yacht rock', 'yoga', 'zen', 'zeuhl'],
      dtype='object', length=5522)

In [7]:
from sklearn.feature_extraction.text import TfidfVectorizer
import pandas as pd


def tfidf(df, ngram_range, max_features):
    if(max_features == 0):
        tfidf = TfidfVectorizer(ngram_range=ngram_range)
    else:
        tfidf = TfidfVectorizer(ngram_range=ngram_range, max_features=max_features)
    tfidf_matrix = tfidf.fit_transform(df['clean_lyrics'])
    return tfidf


x_tfidf_unigram = tfidf(df, (1,1), 5000)


In [8]:
#use better-profanity to filter out profanity
from better_profanity import profanity

def filter_profanity(text):
    profanity.load_censor_words()
    text = profanity.censor(text, '')
    return text

In [9]:

import re
import string
def clean_text(text):
    punct = '“’'
    # remove numbers
    text_nonum = re.sub(r'\d+', '', text)
    # remove punctuations and convert characters to lower case
    text_nopunct = ""
    for char in text_nonum:
        if char not in string.punctuation and char not in punct:
            text_nopunct += char.lower()
        else:    
            text_nopunct += " "
    # substitute multiple whitespace with single whitespace
    # Also, removes leading and trailing whitespaces
    text_no_doublespace = re.sub('\s+', ' ', text_nopunct).strip()
    return text_no_doublespace
    

In [10]:
import nltk
nltk.download('punkt')
nltk.download('stopwords')


import contractions
from nltk.tokenize import word_tokenize

def tokenize(text):
    text = contractions.fix(text)
    text = clean_text(text)
    tokens = word_tokenize(text)
    return tokens

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\nidhi\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\nidhi\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [11]:
from nltk.corpus import stopwords

def stop_words_input(input_text):
    
    stop_words = set(stopwords.words('english'))

    filtered_text = []
    for w in input_text: 
        w = w.strip()
        if w not in stop_words and w != "s":
            filtered_text.append(w)
    return filtered_text

In [12]:

from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()


def lemmatize(input_text):
    lemma = []
    for w in input_text:
        #this is so jank
        #ideally we would have to do POS tagging and then pass the actual tag to the function
        w = lemmatizer.lemmatize(w,  'v')
        w = lemmatizer.lemmatize(w,  'a')
        w = lemmatizer.lemmatize(w,  'r')
        lemma.append(lemmatizer.lemmatize(w,  'n'))
    return lemma
  


In [13]:
import nltk

def array_to_string(input_array):
    words = set(nltk.corpus.words.words())

    text = ""
    for w in input_array:
        #english language !
        if(w in words):
            text += w + " "
    
    return text

    

In [14]:
def process_text(text):
    tokens = tokenize(text)
    remove_stop = stop_words_input(tokens)
    lemmatized = lemmatize(tokens) 
    text = array_to_string(tokens)
    text = filter_profanity(text)
    return text

Get Lyrics from Artist Name and Song Title

In [15]:

import requests
from bs4 import BeautifulSoup
import os
import re

def scrape_lyrics(artist, song):
    GENIUS_API_TOKEN = 'osvPFwS1hvo1Y0oUI659YobdX11IWnS-B8-3uEw-SxaIllHqzmdBtCRoH8jzXlNh'
    artist = re.sub('[^a-zA-Z0-9\s]', '', artist)
    song = re.sub('[^a-zA-Z0-9\s]', '', song)

    artistname = artist.replace(' ', '-')
    songname = song.replace(' ', '-')
    url = 'https://genius.com/' + artistname + '-' + songname + '-lyrics'
    print(url)
    page = requests.get(url)
    html = BeautifulSoup(page.text, 'html.parser')
    lyrics1 = html.find('div', class_='lyrics')
    lyrics2 = html.find('div', class_='Lyrics__Container-sc-1ynbvzw-5 Dzxov')

    
    if lyrics1:
        lyrics = lyrics1.get_text(separator=' ', strip=True)
    elif lyrics2:
        lyrics = lyrics2.get_text(separator=' ', strip=True)
    elif lyrics1 == lyrics2:
        return None
    
    #new line is missing from lyrics
    return lyrics
    

In [16]:
def get_model_word_features(lyrics):
  processed_lyrics = process_text(lyrics)
  lyric_list = []
  lyric_list.append(processed_lyrics)

  vectors = x_tfidf_unigram.transform(lyric_list)
  feature_names = x_tfidf_unigram.get_feature_names_out()
  dense = vectors.todense()
  denselist = dense.tolist()
  x_input = pd.DataFrame(denselist, columns=feature_names)
  return x_input


Get spotify features

In [17]:
import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

In [18]:
def get_spotify_features(track, artist):

  client_id ='3699ed94db30435e8de48bab33770cab'
  client_secret = 'a65a1e99bdee45b5a5d8699a38618c63'
  client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
  sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

  # Search for a track
  track_name = track
  artist_name = artist
  results = sp.search(q='track:"{}" artist:"{}"'.format(track_name, artist_name), type='track', limit=1)

  genres = pd.read_csv('dummy_genres.csv')
  #dont know why rock-n-roll is here but not in x_features
  genres = genres.drop(columns=['Unnamed: 0', 'rock-n-roll', 'sad', 'turntablism'])
  genre_columns = genres.columns


  result = sp.search(artist)
  artist_uri = result['tracks']['items'][0]['artists'][0]['uri']

  # Get the artist object
  artist = sp.artist(artist_uri)

  artist_genres = artist['genres']
  print(artist_genres)
  for genre in artist_genres:
    if genre in genre_columns:
      genres[genre] = 1.0

  # Extract track details
  if results['tracks']['items']:
      track = results['tracks']['items'][0]
      track_id = track['id']
      track_name = track['name']
      artist_name = track['artists'][0]['name']
      audio_features = sp.audio_features([track_id])[0]
      audio_feature_columns = ['valence_tags', 'arousal_tags', 'dominance_tags', 'danceability', 'energy_1', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']
      # Create a DataFrame to store audio features
      #dominance is not 
      df = pd.DataFrame({'track_name': [track_name],
                        'artist_name': [artist_name],
                         'dominance_tags': 5.310701476955552,
                         'arousal_tags': 4.272828654445337,
                        'danceability': [audio_features['danceability']],
                        'energy_1': [audio_features['energy']],
                        'loudness': [audio_features['loudness']],
                        'speechiness': [audio_features['speechiness']],
                        'acousticness': [audio_features['acousticness']],
                        'instrumentalness': [audio_features['instrumentalness']],
                        'liveness': [audio_features['liveness']],
                        'valence': [audio_features['valence']],
                         'valence_tags':[audio_features['valence']],
                         'key': [audio_features['key']],
                       'mode': [audio_features['mode']],
                        'tempo': [audio_features['tempo']]})
      
      return pd.concat([df[audio_feature_columns], genres], axis=1)

  else:
      print('No matching track found.')
      #return empty dataframe

      return pd.DataFrame()


Pipeline Officially Starts!

In [19]:
import pickle

# Load the pickled model from file
with open('rf_binary_relevance.pkl', 'rb') as file:
    cat_model = pickle.load(file)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [20]:
y_label_cols = y_labels.columns
y_label_cols

Index(['uplifting', 'pessimistic', 'romantic', 'playful', 'spiritual',
       'introspective', 'dramatic', 'intense', 'nostalgic', 'dark', 'lively',
       'calm', 'sophisticated', 'whimsical', 'sarcastic', 'atmospheric',
       'energetic', 'naturalistic', 'sensual', 'philosophical', 'emotional',
       'angry', 'delicate', 'melancholic', 'humorous', 'soothing',
       'reflective', 'dissonant', 'mysterious'],
      dtype='object')

In [21]:
def get_labels(prediction):
    
    prediction = prediction.todense()
    prediction = prediction.tolist()
    top_three = sorted(prediction[0], reverse=True)[:3]
    top_three
    #find indices of top 3 predictions
    top_three_indices = []
    print(prediction[0])
    for i in top_three:
        print(i)
        if(i < 0.1):
            break
        top_three_indices.append(prediction[0].index(i))
        prediction[0][prediction[0].index(i)] = -1
    # print(top_three_indices)
    # print(top_three)
    #get labels of top 3 predictions
    top_three_labels = []
    for i in top_three_indices:
        top_three_labels.append(y_label_cols[i])
    return top_three_labels

In [None]:
#import BERT_lyrics_params.pth


In [23]:
#ignore warnings
import warnings
warnings.filterwarnings('ignore')

while True:

  artist_name = input("Enter Artist Name: ")
  song_title = input("Enter Song Title: ")
  lyrics = scrape_lyrics(artist_name, song_title)

  if lyrics == None:
    print("Couldn't find lyrics. Sorry!")
    continue
  else:
    x_text_features = get_model_word_features(lyrics)

  
  x_audio_features = get_spotify_features(song_title, artist_name)
  x_audio_features

  if(x_audio_features.empty):
    print("Couldn't find spotify features. Sorry!")

  else:
    x_complete = pd.concat([x_text_features, x_audio_features], axis=1)
    #set column names to x_feat_cols
    # print(x_complete.columns)
   # x_complete.insert(0, 'Unnamed: 0', 45000)
    #remove first element from x_feat_cols
    x_feat_cols_new = x_feat_cols[1:]
    x_complete.columns = x_feat_cols_new
    
    x_complete
    #add column at position 0 called Unnamed: 0 with value 0
    
    prediction = cat_model.predict_proba(x_complete)
    print(prediction)
    #print top 3 predictions and their probabilities
   
    print(get_labels(prediction))
    # `#decode prediction
    # print("The song is: ", prediction[0])
    

  (0, 0)	0.13
  (0, 1)	0.22
  (0, 2)	0.15
  (0, 3)	0.03
  (0, 4)	0.11
  (0, 5)	0.02
  (0, 6)	0.08
  (0, 7)	0.12
  (0, 8)	0.11
  (0, 9)	0.17
  (0, 10)	0.01
  (0, 11)	0.1
  (0, 12)	0.02
  (0, 13)	0.01
  (0, 14)	0.12
  (0, 15)	0.04
  (0, 16)	0.03
  (0, 17)	0.01
  (0, 18)	0.07
  (0, 20)	0.25
  (0, 21)	0.13
  (0, 22)	0.04
  (0, 23)	0.16
  (0, 24)	0.04
  (0, 25)	0.09
  (0, 26)	0.04
  (0, 27)	0.01
[0.13, 0.22, 0.15, 0.03, 0.11, 0.02, 0.08, 0.12, 0.11, 0.17, 0.01, 0.1, 0.02, 0.01, 0.12, 0.04, 0.03, 0.01, 0.07, 0.0, 0.25, 0.13, 0.04, 0.16, 0.04, 0.09, 0.04, 0.01, 0.0]
0.25
0.22
0.17
['emotional', 'pessimistic', 'dark']
https://genius.com/ed-sheeran-perfect-lyrics
['pop', 'uk pop']


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent work

  (0, 0)	0.1
  (0, 1)	0.22
  (0, 2)	0.12
  (0, 3)	0.04
  (0, 4)	0.07
  (0, 5)	0.02
  (0, 6)	0.04
  (0, 7)	0.07
  (0, 8)	0.07
  (0, 9)	0.16
  (0, 10)	0.01
  (0, 11)	0.09
  (0, 12)	0.02
  (0, 13)	0.02
  (0, 14)	0.1
  (0, 15)	0.03
  (0, 16)	0.03
  (0, 17)	0.01
  (0, 18)	0.06
  (0, 20)	0.22
  (0, 21)	0.15
  (0, 22)	0.03
  (0, 23)	0.22
  (0, 24)	0.08
  (0, 25)	0.11
  (0, 26)	0.09
  (0, 27)	0.01
[0.1, 0.22, 0.12, 0.04, 0.07, 0.02, 0.04, 0.07, 0.07, 0.16, 0.01, 0.09, 0.02, 0.02, 0.1, 0.03, 0.03, 0.01, 0.06, 0.0, 0.22, 0.15, 0.03, 0.22, 0.08, 0.11, 0.09, 0.01, 0.0]
0.22
0.22
0.22
['pessimistic', 'emotional', 'melancholic']
https://genius.com/ed-sheeran-perfect-lyrics
['pop', 'uk pop']


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent work

  (0, 0)	0.1
  (0, 1)	0.22
  (0, 2)	0.12
  (0, 3)	0.04
  (0, 4)	0.07
  (0, 5)	0.02
  (0, 6)	0.04
  (0, 7)	0.07
  (0, 8)	0.07
  (0, 9)	0.16
  (0, 10)	0.01
  (0, 11)	0.09
  (0, 12)	0.02
  (0, 13)	0.02
  (0, 14)	0.1
  (0, 15)	0.03
  (0, 16)	0.03
  (0, 17)	0.01
  (0, 18)	0.06
  (0, 20)	0.22
  (0, 21)	0.15
  (0, 22)	0.03
  (0, 23)	0.22
  (0, 24)	0.08
  (0, 25)	0.11
  (0, 26)	0.09
  (0, 27)	0.01
[0.1, 0.22, 0.12, 0.04, 0.07, 0.02, 0.04, 0.07, 0.07, 0.16, 0.01, 0.09, 0.02, 0.02, 0.1, 0.03, 0.03, 0.01, 0.06, 0.0, 0.22, 0.15, 0.03, 0.22, 0.08, 0.11, 0.09, 0.01, 0.0]
0.22
0.22
0.22
['pessimistic', 'emotional', 'melancholic']
https://genius.com/ed-sheeran--shape-of-you-lyrics
Couldn't find lyrics. Sorry!
https://genius.com/train-marry-me-lyrics
['pop', 'talent show', 'uk pop']


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent work

  (0, 0)	0.06
  (0, 1)	0.16
  (0, 2)	0.09
  (0, 3)	0.03
  (0, 4)	0.12
  (0, 5)	0.03
  (0, 6)	0.03
  (0, 7)	0.06
  (0, 8)	0.12
  (0, 9)	0.05
  (0, 11)	0.07
  (0, 12)	0.02
  (0, 13)	0.02
  (0, 14)	0.13
  (0, 15)	0.05
  (0, 16)	0.01
  (0, 17)	0.04
  (0, 18)	0.07
  (0, 20)	0.2
  (0, 21)	0.07
  (0, 23)	0.18
  (0, 24)	0.03
  (0, 25)	0.09
  (0, 26)	0.07
  (0, 28)	0.02
[0.06, 0.16, 0.09, 0.03, 0.12, 0.03, 0.03, 0.06, 0.12, 0.05, 0.0, 0.07, 0.02, 0.02, 0.13, 0.05, 0.01, 0.04, 0.07, 0.0, 0.2, 0.07, 0.0, 0.18, 0.03, 0.09, 0.07, 0.0, 0.02]
0.2
0.18
0.16
['emotional', 'melancholic', 'pessimistic']
https://genius.com/train-marry-me-lyrics
['pop', 'talent show', 'uk pop']


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent work

  (0, 0)	0.04
  (0, 1)	0.14
  (0, 2)	0.1
  (0, 3)	0.03
  (0, 4)	0.14
  (0, 5)	0.03
  (0, 6)	0.05
  (0, 7)	0.04
  (0, 8)	0.12
  (0, 9)	0.06
  (0, 10)	0.04
  (0, 11)	0.08
  (0, 13)	0.04
  (0, 14)	0.13
  (0, 15)	0.12
  (0, 17)	0.02
  (0, 18)	0.05
  (0, 20)	0.22
  (0, 21)	0.05
  (0, 22)	0.02
  (0, 23)	0.2
  (0, 24)	0.04
  (0, 25)	0.11
  (0, 26)	0.07
  (0, 28)	0.02
[0.04, 0.14, 0.1, 0.03, 0.14, 0.03, 0.05, 0.04, 0.12, 0.06, 0.04, 0.08, 0.0, 0.04, 0.13, 0.12, 0.0, 0.02, 0.05, 0.0, 0.22, 0.05, 0.02, 0.2, 0.04, 0.11, 0.07, 0.0, 0.02]
0.22
0.2
0.14
['emotional', 'melancholic', 'pessimistic']
https://genius.com/whitney-houston-i-will-always-love-you-lyrics
['dance pop']


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent work

  (0, 0)	0.03
  (0, 1)	0.15
  (0, 2)	0.11
  (0, 4)	0.03
  (0, 5)	0.03
  (0, 6)	0.04
  (0, 7)	0.05
  (0, 8)	0.08
  (0, 9)	0.15
  (0, 11)	0.05
  (0, 12)	0.02
  (0, 13)	0.02
  (0, 14)	0.21
  (0, 15)	0.03
  (0, 17)	0.01
  (0, 18)	0.04
  (0, 20)	0.11
  (0, 21)	0.05
  (0, 22)	0.03
  (0, 23)	0.11
  (0, 24)	0.08
  (0, 25)	0.02
  (0, 26)	0.08
  (0, 28)	0.02
[0.03, 0.15, 0.11, 0.0, 0.03, 0.03, 0.04, 0.05, 0.08, 0.15, 0.0, 0.05, 0.02, 0.02, 0.21, 0.03, 0.0, 0.01, 0.04, 0.0, 0.11, 0.05, 0.03, 0.11, 0.08, 0.02, 0.08, 0.0, 0.02]
0.21
0.15
0.15
['sarcastic', 'pessimistic', 'dark']
https://genius.com/selena-gomez-the-scene-tell-me-something-i-dont-know-lyrics
Couldn't find lyrics. Sorry!
https://genius.com/ed-sheeran-thinking-out-loud-lyrics
['pop', 'uk pop']


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent work

  (0, 0)	0.08
  (0, 1)	0.12
  (0, 2)	0.11
  (0, 3)	0.03
  (0, 4)	0.11
  (0, 5)	0.01
  (0, 6)	0.03
  (0, 7)	0.11
  (0, 8)	0.08
  (0, 9)	0.13
  (0, 11)	0.04
  (0, 12)	0.03
  (0, 13)	0.03
  (0, 14)	0.18
  (0, 15)	0.04
  (0, 16)	0.03
  (0, 17)	0.02
  (0, 18)	0.04
  (0, 19)	0.01
  (0, 20)	0.16
  (0, 21)	0.08
  (0, 22)	0.02
  (0, 23)	0.12
  (0, 24)	0.06
  (0, 25)	0.03
  (0, 26)	0.08
  (0, 28)	0.01
[0.08, 0.12, 0.11, 0.03, 0.11, 0.01, 0.03, 0.11, 0.08, 0.13, 0.0, 0.04, 0.03, 0.03, 0.18, 0.04, 0.03, 0.02, 0.04, 0.01, 0.16, 0.08, 0.02, 0.12, 0.06, 0.03, 0.08, 0.0, 0.01]
0.18
0.16
0.13
['sarcastic', 'emotional', 'dark']
https://genius.com/elton-john-your-song-lyrics
['glam rock', 'mellow gold', 'piano rock', 'rock']


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent work

  (0, 0)	0.05
  (0, 1)	0.24
  (0, 2)	0.12
  (0, 3)	0.02
  (0, 4)	0.01
  (0, 5)	0.06
  (0, 6)	0.07
  (0, 7)	0.01
  (0, 8)	0.11
  (0, 9)	0.16
  (0, 10)	0.04
  (0, 11)	0.07
  (0, 12)	0.03
  (0, 13)	0.05
  (0, 14)	0.16
  (0, 15)	0.05
  (0, 16)	0.03
  (0, 17)	0.02
  (0, 18)	0.09
  (0, 20)	0.25
  (0, 21)	0.07
  (0, 23)	0.17
  (0, 24)	0.05
  (0, 25)	0.07
  (0, 26)	0.03
[0.05, 0.24, 0.12, 0.02, 0.01, 0.06, 0.07, 0.01, 0.11, 0.16, 0.04, 0.07, 0.03, 0.05, 0.16, 0.05, 0.03, 0.02, 0.09, 0.0, 0.25, 0.07, 0.0, 0.17, 0.05, 0.07, 0.03, 0.0, 0.0]
0.25
0.24
0.17
['emotional', 'pessimistic', 'melancholic']
https://genius.com/--lyrics
Couldn't find lyrics. Sorry!
https://genius.com/--lyrics
Couldn't find lyrics. Sorry!
https://genius.com/--lyrics
Couldn't find lyrics. Sorry!
https://genius.com/--lyrics
Couldn't find lyrics. Sorry!
https://genius.com/--lyrics
Couldn't find lyrics. Sorry!


In [None]:
#sparse matrix to dense matrix

predict


[1, 4, 4]
[0.22, 0.16, 0.16]


In [None]:
for i in range(len(x_feat_cols)):
    print(x_feat_cols[i], x_complete.columns[i])

abandon aa
abide abandon
ability abide
ablaze ability
able ablaze
aboard able
absence aboard
absent absence
absolute absent
absolutely absolute
absorb absolutely
abstract absorb
absurd abstract
abuse absurd
aby abuse
accept aby
access accept
accident access
accord accident
account accord
accuse account
ace accuse
ache ace
achieve ache
acid achieve
acre acid
across acre
act across
actin act
action actin
activate action
actor activate
actress actor
actual actress
actually actual
ad actually
add ad
addict add
addiction addict
address addiction
adieu address
adjust adieu
admire adjust
admit admire
adore admit
adrenaline adore
advance adrenaline
advantage advance
adventure advantage
advice adventure
aeroplane advice
afar aeroplane
affair afar
affect affair
affection affect
afford affection
afloat afford
afraid afloat
afterglow afraid
afterlife afterglow
afternoon afterlife
age afternoon
agent age
agnus agent
ago agnus
agony ago
agora agony
agree agora
agua agree
ah agua
aha ah
ahead aha
ai 

In [None]:
x_feat_cols

Index(['abandon', 'abide', 'ability', 'ablaze', 'able', 'aboard', 'absence',
       'absent', 'absolute', 'absolutely',
       ...
       'west coast rap', 'western swing', 'witch house', 'world.1',
       'world fusion', 'worship.1', 'yacht rock', 'yoga', 'zen', 'zeuhl'],
      dtype='object', length=5520)