In [3]:
import pandas as pd
import numpy as np
import sklearn
from sklearn.externals import joblib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans

data = pd.read_pickle("darklyrics/clustered_lyrics.pkl")
original_data = pd.read_pickle("darklyrics/data_filtered_original.pkl")
km = joblib.load('darklyrics/doc_cluster_big.pkl')
original_data.head()

Unnamed: 0,album,artist,name,year,lyrics
3,Dissolve Into Nothingness,angara damana,Blind Sheep,2012,[Hopeless time Everything Falls Ideas Died One...
4,Dissolve Into Nothingness,angara damana,A New Age Of Darkness And Blood,2012,"[Somewhere Near An Empty Space, Where The Inau..."
5,Dissolve Into Nothingness,angara damana,Contradiction & Repetition,2012,"[In This Endless Hedge, Where Is The Way Of Sa..."
6,Dissolve Into Nothingness,angara damana,A Truth Hidden In The Womb,2012,[the troops of darkness fly high in the sky to...
7,Dissolve Into Nothingness,angara damana,The Ado Of Wisdom,2012,"[I Am Standing In A Bright Circle, I See An Of..."


In [4]:
# Build vocabulary of existing clusters
vectorizer = TfidfVectorizer(ngram_range=(1, 1), stop_words='english', min_df=25, max_df=0.5)
x = vectorizer.fit_transform(data['lyrics_string'])
vocabulary = vectorizer.get_feature_names()

In [5]:
# New data
initial_lyrics = """
behold the vast and formless black below surges the human maelstrom swarming worthless 
breaths asphyxiate in themselves churches drown beneath our skyscrapers 
quelled by a vicious elite
a glorious caste of true industry
no gods, no time, no life
zenith of the universe
and yet, nadir of hell
mesmerize, entrancing metropolis
black psychedelia"""

new_data = {
    "artist": "new",
    "cluster": 0,
    "cluster_labels": "",
    "x": 0,
    "y": 0
}

brackets_regex = re.compile('\[.*?\]')
apostrophe_regex = re.compile("[']")
punctuation_regex = re.compile('[^0-9a-zA-Z ]+')
double_space_regex = re.compile('\s+')

lyrics = initial_lyrics.lower()
lyrics = brackets_regex.sub("", lyrics)
lyrics = apostrophe_regex.sub("", lyrics)
lyrics = punctuation_regex.sub("", lyrics)
lyrics = double_space_regex.sub(" ", lyrics)

lyrics = lyrics.split()
lyrics = [word for word in lyrics if word in vocabulary]
lyrics = ' '.join(lyrics)

new_data['lyrics_string'] = lyrics
new_data = pd.DataFrame(new_data, index=[len(data)])
data = data.append(new_data)
data

Unnamed: 0,artist,cluster,cluster_labels,lyrics_string,x,y
0,'68,0,gimme maybe aint hey lying gonna shes class ba...,take heart next room make scene make sure aint...,-0.329639,0.565931
1,(sic)monic,1,shall existence self control reality fucking m...,revelation resurrections human blood leak woun...,-0.273161,-0.203549
2,+\-,27,shall wind stars shadows sorrow silent rain mo...,looked wide open sky window often nights augus...,-0.569364,-0.359816
3,...aaaarrghh...,18,ja ei ni da se kun ir si el na min en je ty er...,hi lmeyi diledin mi ruhunun derinliklerinden g...,-0.810511,-0.245691
4,...and oceans,27,shall wind stars shadows sorrow silent rain mo...,fanfare chants trumpets loud proclaim world ho...,-0.573531,0.149318
5,.calibre,19,shall fucking evil people eternal reality self...,breath hurricane breath hurricane breath hurri...,-0.406454,0.613611
6,.crrust,7,youve change forget save fucking wrong better ...,let try discomfort let others things well see ...,-0.621415,0.141845
7,100 demons,25,fucking fuck shit youve fucked better sick cha...,dont even tell already know fucking beast insi...,-0.041817,-0.541671
8,1000 funerals,17,thy shall thou thee lord thine ancient forth e...,didnt tell secrets seems fool unfaithful one c...,-0.596968,0.420211
9,11 as in adversaries,21,forest trees revolt ancient spirits spheres ar...,tell difference looking scene shooting stars f...,-0.731733,-0.382420


In [6]:
vectorizer = TfidfVectorizer(ngram_range=(1, 1), stop_words='english', min_df=25, max_df=0.5)
x = vectorizer.fit_transform(data['lyrics_string'])
features = vectorizer.get_feature_names()
predictions = km.predict(x)
predictions

array([ 0,  1, 27, ..., 27, 19, 19], dtype=int32)

In [7]:
cluster = predictions[-1]
cluster_labels = data[data.cluster == cluster].iloc[0]['cluster_labels']
print cluster
print cluster_labels

19
shall fucking evil people eternal reality self existence gods escape shadows final control chaos stars minds destruction space endless future sea faith fuck void great grave sight suffering sorrow spirit


In [8]:
artist_sample = data[(data.cluster == cluster) & (data.artist != 'new')].sample(5)
artist_list = [row['artist'] for i, row in artist_sample.iterrows()]
print artist_list
song_sample = pd.DataFrame()
for i in range(0,5):
    song_sample = song_sample.append(original_data[original_data.artist == artist_list[i]].sample(1))
song_sample.head()

[u'kongh', u'the few against many', u'shen', u'godstomper', u'h\xe4vok \xfcnit']


Unnamed: 0,album,artist,name,year,lyrics
5911,Sole Creation,kongh,Skymning,2013,[Enclosed in a cloud shielding the outside off...
54805,Sot,the few against many,One With The Shadow,2009,"[[Music by C. Älvestam, Lyrics by M. Stanne], ..."
39399,Shapes,shen,Downfall - Zaratan,2013,"[Craving power, never satisfied,, Island gaine..."
18677,A Killer Combo (GODSTOMPER / MAGRUDERGRIND),godstomper,Family Crisis,2005,"[Home life is a battleground, Tragedy to be fo..."
11285,h.IV+ (Hoarse/Industrial/Viremia),hävok ünit,Man vs. Flesh [Structured Suicide],2007,"[Inside the void I will give myself control, I..."


In [12]:
song_sample.iloc[1]['lyrics']

[u'[Music by C. \xc4lvestam, Lyrics by M. Stanne]',
 u'It comes at you from both ends',
 u'the narrowing splinter',
 u'Of fading Life',
 u'What comes of a fear so infinte',
 u'In harrowing nights',
 u'Of The Howling Dark',
 u'It festers Inside your shadow',
 u'Everpresent and overpowering',
 u'This sickly fascination',
 u'has become the last addiction',
 u'The struggle continues',
 u'with the detachment of lipe',
 u'through proxy',
 u'life has stepped back',
 u'from a body in constant motion',
 u'the shadows seen through',
 u'transparent skin',
 u'An apparition deep in those eyes',
 u'So close you can taste it',
 u'But is this all all you could ask for?',
 u'Lay down your life',
 u'The Power of death commands you',
 u'The ultimate Lie',
 u'The Power of death commands you',
 u'Give in to the deal',
 u'Become one with the Shadow',
 u'It comes at you from both ends',
 u'the narrowing splinter',
 u'Of fading Life',
 u'What comes of a fear so infinte',
 u'In harrowing nights',
 u'Of The How