In [1]:
#Importing Necessary Libraries

#Data Manipulation/Calculation Libraries
import pandas as pd
import numpy as np
import math

#Visualisation
import matplotlib.pyplot as plt
import seaborn as sns

#NLP 
import nltk
import string
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from nltk.stem.wordnet import WordNetLemmatizer
from fuzzywuzzy import fuzz
from nltk import word_tokenize
from scipy.stats import skew, kurtosis
from scipy.spatial.distance import cosine, cityblock, jaccard, canberra, euclidean, minkowski, braycurtis
from tqdm import tqdm_notebook

#ML Models
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import Imputer
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix  
from sklearn.metrics import accuracy_score



In [2]:
data = pd.read_csv("FINAL_v1.csv")
del data['Unnamed: 0']
data.head()

Unnamed: 0,album_title,artist,track_title,lyrics
0,Girl Code,City Girls,What We Doin',"(The 90's), Yup, haha, House super clean (Clea..."
1,Girl Code,City Girls,Season (Ft. Lil Baby),"It's that season (Y'all know what time it is),..."
2,Girl Code,City Girls,Broke Boy,"If you a broke boy, stay the fuck out my way, ..."
3,Girl Code,City Girls,Clout Chasin',"Yung Miami, ho, JT on the track, Told that lil..."
4,Girl Code,City Girls,Intro (#FREEJT),"You have pre-paid call, You will not be charge..."


In [3]:
data.lyrics = data.lyrics.astype(str)

In [4]:
data['lyrics'][0]

"(The 90's), Yup, haha, House super clean (Clean), fridge on full (Full), Baby with the grandmom, bitch I'm good, We could party all night, where we goin'? What we doin'?, Party all night, where we goin'? What we doin'?, House super clean (Clean), fridge on full (Full), Baby with the grandmom, bitch I'm good, We could party all night, where we goin'? What we doin'?, Party all night, where we goin'? What we doin'?, Girl, it's Friday night, We ain't kicked it in a while on a Friday night, It's time to step out on this Friday night, You actin' like an old lady, Girl, you probably right, Let me look up in the closet, see what I could wear, Check bank account, lemme see what I could spare, Baby daddy might trip, but I don't even care, You don't ever go out, he been out all year, Girl, you gon' get a bitch beat up, Child, please, what time we gon' meet up?, His soft ass ain't gon' do shit, But talk like he always do, bitch, You ain't nothin' but trouble, sho' is, You my dawg, and I love you,

In [5]:
def clean_text(text):
    #Convert to lower case
    text = text.lower()
    #Lemmatizing the text
    lemma = WordNetLemmatizer()
    normalized = " ".join(lemma.lemmatize(word, pos = "v") for word in text.split())
    #Removing White spaces
    normalized = normalized.replace('\d+', '')
    normalized = normalized.strip()
    #Tokenize and extract words that are alpha-numeric
    tokens = word_tokenize(normalized)
    cleaned = [word for word in tokens if word.isalpha()]
    #Create a dictionary of stem-words such as "at" and 
    #"the" that don't contribute to meaning and remove them from the list
    stop_words = set(stopwords.words('english'))
    words = [w for w in cleaned if not w in stop_words]
    return words

In [6]:
data['lyrics_clean'] = [clean_text(text) for text in data["lyrics"]]

In [13]:
pd.set_option('display.max_rows', 10000)
pd.set_option('display.max_columns', 500)
data[0:1000]

Unnamed: 0,album_title,artist,track_title,lyrics,lyrics_clean
0,Girl Code,City Girls,What We Doin',"(The 90's), Yup, haha, House super clean (Clea...","[yup, haha, house, super, clean, clean, fridge..."
1,Girl Code,City Girls,Season (Ft. Lil Baby),"It's that season (Y'all know what time it is),...","[season, know, time, niggas, cut, check, reaso..."
2,Girl Code,City Girls,Broke Boy,"If you a broke boy, stay the fuck out my way, ...","[break, boy, stay, fuck, way, niggas, lyin, di..."
3,Girl Code,City Girls,Clout Chasin',"Yung Miami, ho, JT on the track, Told that lil...","[yung, miami, ho, jt, track, tell, lil, bitch,..."
4,Girl Code,City Girls,Intro (#FREEJT),"You have pre-paid call, You will not be charge...","[call, charge, call, call, jatavia, inmate, fe..."
5,Girl Code,City Girls,Drip,"Drip-drip-drip on a hundred, Y'all hoes hate, ...","[hundred, hoe, hate, hundred, hoe, hate, hundr..."
6,Girl Code,City Girls,Trap Star,"I got that million-dollar pussy (Yeah), And a ...","[get, pussy, yeah, nigga, yeah, get, budget, y..."
7,Girl Code,City Girls,Give It a Try (Ft. Jacquees),"Ohh, ahh, Ohh, ahh, Ohh, ahh, You better never...","[ohh, ahh, ohh, ahh, ohh, ahh, better, never, ..."
8,Girl Code,City Girls,Twerk (Ft. Cardi B),"(I want a slim, fine woman with some twerk wit...","[want, slim, fine, woman, twerk, throw, twerk,..."
9,Girl Code,City Girls,Panties and Bra,"When you come to the spot (Come to the spot), ...","[come, spot, come, spot, takin, takin, open, d..."


In [18]:
spotify = pd.read_csv("spotify_selected_data.csv")
del spotify['Unnamed: 0']
spotify.rename(columns = {'album':'album_title', 'name':'track_title'}, inplace = True) 
spotify.head()

Unnamed: 0,album_title,track_number,id,track_title,uri,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,valence,popularity
0,Girl Code,1,4Vw3GouohCgu8hgGYdl8m9,Intro (#FREEJT),spotify:track:4Vw3GouohCgu8hgGYdl8m9,0.126,0.664,0.637,0.0,0.378,-4.597,0.417,98.892,0.0393,34
1,Girl Code,2,0RRm4OS5ymfZryXBuj0G2m,Twerk (feat. Cardi B),spotify:track:0RRm4OS5ymfZryXBuj0G2m,0.121,0.788,0.842,0.00112,0.166,-4.006,0.142,190.077,0.481,70
2,Girl Code,3,3A2yGHWIzmGEIolwonU69h,Act Up,spotify:track:3A2yGHWIzmGEIolwonU69h,0.0167,0.938,0.638,0.0,0.111,-4.713,0.189,97.075,0.313,75
3,Girl Code,4,6gqvCJJY8Sh4opRxXVO7A7,Season (feat. Lil Baby),spotify:track:6gqvCJJY8Sh4opRxXVO7A7,0.00964,0.594,0.695,0.0,0.157,-4.634,0.311,138.833,0.408,50
4,Girl Code,5,3E7jehnxsH4xZAuxDQg4zC,Broke Boy,spotify:track:3E7jehnxsH4xZAuxDQg4zC,0.00397,0.782,0.69,3e-06,0.175,-3.677,0.0682,102.997,0.317,41


In [25]:
merged = pd.merge(data, spotify, on = ['track_title', 'album_title'])

In [28]:
merged.shape

(148, 18)

In [27]:
merged

Unnamed: 0,album_title,artist,track_title,lyrics,lyrics_clean,track_number,id,uri,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,valence,popularity
0,17,XXXTENTACION,Depression & Obsession,"Na, na, na, na, na, Na, na, na, na, na, na, Na...","[na, na, na, na, na, na, na, na, na, na, na, n...",3,2H3wWrnO758y0fPH7Ilerg,spotify:track:2H3wWrnO758y0fPH7Ilerg,0.585,0.769,0.156,0.015,0.102,-10.7,0.035,105.95,0.527,73
1,17,XXXTENTACION,Revenge,"I think I, I think I finally, Found a way to f...","[think, think, finally, find, way, forgive, mi...",5,5TXDeTFVRVY7Cvt0Dw4vWW,spotify:track:5TXDeTFVRVY7Cvt0Dw4vWW,0.782,0.746,0.251,0.00203,0.106,-16.169,0.259,139.999,0.18,76
2,17,XXXTENTACION,Ayala (Outro),"Ooh, oh-oh, oh-oh, She showed me fake love, ca...","[ooh, show, fake, love, ca, forget, hurt, make...",11,5xbrhx2tMMcN68IT3cJbhS,spotify:track:5xbrhx2tMMcN68IT3cJbhS,0.737,0.57,0.285,0.0133,0.108,-14.125,0.0381,83.485,0.0973,65
3,17,XXXTENTACION,Orlando,"The pain in my heart just won't end, The words...","[pain, heart, wo, end, word, find, seem, compa...",10,05bnEv2dpFzmGVLGdjD9UP,spotify:track:05bnEv2dpFzmGVLGdjD9UP,0.859,0.57,0.0279,0.0892,0.254,-22.282,0.0588,84.902,0.28,68
4,17,XXXTENTACION,The Explanation,"17. A collection of nightmares, thoughts, and ...","[collection, nightmares, thoughts, situations,...",1,0VJYhKhrHgJ7fgjGaC2k07,spotify:track:0VJYhKhrHgJ7fgjGaC2k07,0.461,0.597,0.113,0.000393,0.148,-34.475,0.954,81.311,0.313,58
5,17,XXXTENTACION,Jocelyn Flores,"I know you so well, so well, I mean, I can do ...","[know, well, well, mean, anything, know, somew...",2,7m9OqQk4RVRkw9JJdeAw96,spotify:track:7m9OqQk4RVRkw9JJdeAw96,0.469,0.872,0.391,4e-06,0.297,-9.144,0.242,134.021,0.437,87
6,17,XXXTENTACION,Save Me,"Up there (Yeah), Who do I have?, Heaven and he...","[yeah, heaven, hell, friend, friend, wo, shed,...",6,2UwbhMie1EAYuTZ0QXeMwl,spotify:track:2UwbhMie1EAYuTZ0QXeMwl,0.307,0.731,0.204,0.0109,0.0989,-11.314,0.0369,142.141,0.191,71
7,17,XXXTENTACION,Dead Inside (Interlude),"Dead inside, Spend a lot of time stuck in this...","[dead, inside, spend, lot, time, stick, head, ...",7,2vQwlW8H1jFveGNXwZfJCV,spotify:track:2vQwlW8H1jFveGNXwZfJCV,0.973,0.461,0.0279,0.00226,0.169,-21.992,0.0412,88.388,0.354,65
8,17,XXXTENTACION,Carry On,"How did you get here?, I'm drunk and confused,...","[get, drink, confused, try, patient, yeah, hig...",9,2yZax79pOrYuyIFVW2cZY2,spotify:track:2yZax79pOrYuyIFVW2cZY2,0.768,0.735,0.197,0.000276,0.296,-13.378,0.537,147.899,0.383,76
9,1989,Taylor Swift,Wildest Dreams,"He said, ""Let's get out of this town, Drive ou...","[said, let, get, town, drive, city, away, crow...",9,3fVnlF4pGqWI9flVENcT28,spotify:track:3fVnlF4pGqWI9flVENcT28,0.0709,0.553,0.664,0.0056,0.106,-7.417,0.0741,140.06,0.467,73
