Step 1: predict moods:
    
    - Strategy 1:
    
        X1 - audio feature --> Y1 - probabilities of moods
        X2 - lyrics feature --> Y2 - probabilities of moods
        Y = mean(Y1, Y2)
        
    - Strategy 2:
        X- audio + lyrics ---> Y - probabilities of moods

Step 2: predict genre:

    - Strategy 1:
    
        X1 - audio feature --> Y1 - probabilities of genres
        X2 - moods --> Y2 - probabilities of genres
        Y = mean(Y1, Y2)
        
    - Strategy 2:
        X- audio + moods ---> Y - probabilities of genres

Step 3: similarity:

    - moods: jaccard_similarity_score
    - genre: cosine_similarity 
    - audio feature: cosine similarity

In [52]:
import pandas as pd
import numpy as np
import pickle

from sklearn.preprocessing import MultiLabelBinarizer, StandardScaler

from sklearn.metrics import classification_report

from sklearn.model_selection import train_test_split, GridSearchCV

from sklearn.feature_extraction.text import TfidfVectorizer

from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, VotingClassifier

from sklearn.pipeline import Pipeline
from sklearn.base import clone

import matplotlib.pylab as plt
import seaborn as sns
sns.set_style("whitegrid")

% matplotlib inline

In [53]:
df = pickle.load(open('audio_lyrics_moods.pickle', 'rb'))

In [54]:
df.head()

Unnamed: 0,key,energy,liveliness,tempo,speechiness,acousticness,instrumentalness,time_signature,duration,loudness,valence,danceability,mode,time_signature_confidence,tempo_confidence,key_confidence,mode_confidence,lyrics_features,moods
0,11.0,0.912744,0.083704,132.069,0.293137,0.005423,1e-06,0.0,4.0,218.30667,-3.89,0.752186,0.72692,0.552,0.541,1.0,1.0,oppa gangnam style gangnam style najeneun ttas...,[energetic]
1,6.0,0.745704,0.119955,100.008,0.046255,0.02623,0.012727,1.0,4.0,235.06086,-7.687,0.351282,0.691817,0.737,0.634,0.796,1.0,late ve ve lose sleep dream thing babi ve ve p...,[happy]
2,5.0,0.709932,0.231455,130.03,0.121741,0.036662,0.0,0.0,4.0,232.46104,-5.15,0.37439,0.704729,0.565,0.565,0.743,1.0,parti rock yeah woo let s parti rock hous toni...,"[happy, aggressive]"
3,3.0,0.705822,0.053292,126.009,0.126016,0.001966,0.0,0.0,4.0,194.09333,-3.898,0.592798,0.875137,0.004,0.114,1.0,0.742,alagamun lan weh wakun heya hanun gon alagamun...,"[energetic, happy]"
4,3.0,0.741757,0.072774,129.985,0.051255,0.096732,0.000474,0.0,4.0,285.42667,-5.86,0.58563,0.730711,0.271,0.324,0.822,1.0,j lo s new generat mr worldwid parti peopl flo...,[energetic]


In [17]:
np.random.seed(99)

## Audio

In [55]:
audio_moods = df.drop('lyrics_features', axis = 1)
audio_moods.dropna(how = 'any', inplace = True)
audio_moods.reset_index(drop = True, inplace = True)
audio_moods.head()

Unnamed: 0,key,energy,liveliness,tempo,speechiness,acousticness,instrumentalness,time_signature,duration,loudness,valence,danceability,mode,time_signature_confidence,tempo_confidence,key_confidence,mode_confidence,moods
0,11.0,0.912744,0.083704,132.069,0.293137,0.005423,1e-06,0.0,4.0,218.30667,-3.89,0.752186,0.72692,0.552,0.541,1.0,1.0,[energetic]
1,6.0,0.745704,0.119955,100.008,0.046255,0.02623,0.012727,1.0,4.0,235.06086,-7.687,0.351282,0.691817,0.737,0.634,0.796,1.0,[happy]
2,5.0,0.709932,0.231455,130.03,0.121741,0.036662,0.0,0.0,4.0,232.46104,-5.15,0.37439,0.704729,0.565,0.565,0.743,1.0,"[happy, aggressive]"
3,3.0,0.705822,0.053292,126.009,0.126016,0.001966,0.0,0.0,4.0,194.09333,-3.898,0.592798,0.875137,0.004,0.114,1.0,0.742,"[energetic, happy]"
4,3.0,0.741757,0.072774,129.985,0.051255,0.096732,0.000474,0.0,4.0,285.42667,-5.86,0.58563,0.730711,0.271,0.324,0.822,1.0,[energetic]


In [56]:
mlb_audio = MultiLabelBinarizer()

In [57]:
y_audio = mlb_audio.fit_transform(audio_moods['moods'])
y_audio

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       ..., 
       [0, 0, 0, ..., 1, 0, 0],
       [0, 0, 1, ..., 0, 0, 0],
       [0, 1, 0, ..., 0, 0, 0]])

In [58]:
y_audio.shape

(30296, 9)

In [59]:
mlb_audio.classes_

array(['aggressive', 'classy', 'earthy', 'energetic', 'funky', 'happy',
       'sad', 'seductive', 'trippy'], dtype=object)

In [60]:
X_1 = audio_moods.drop('moods', axis = 1)
X_1.head()

Unnamed: 0,key,energy,liveliness,tempo,speechiness,acousticness,instrumentalness,time_signature,duration,loudness,valence,danceability,mode,time_signature_confidence,tempo_confidence,key_confidence,mode_confidence
0,11.0,0.912744,0.083704,132.069,0.293137,0.005423,1e-06,0.0,4.0,218.30667,-3.89,0.752186,0.72692,0.552,0.541,1.0,1.0
1,6.0,0.745704,0.119955,100.008,0.046255,0.02623,0.012727,1.0,4.0,235.06086,-7.687,0.351282,0.691817,0.737,0.634,0.796,1.0
2,5.0,0.709932,0.231455,130.03,0.121741,0.036662,0.0,0.0,4.0,232.46104,-5.15,0.37439,0.704729,0.565,0.565,0.743,1.0
3,3.0,0.705822,0.053292,126.009,0.126016,0.001966,0.0,0.0,4.0,194.09333,-3.898,0.592798,0.875137,0.004,0.114,1.0,0.742
4,3.0,0.741757,0.072774,129.985,0.051255,0.096732,0.000474,0.0,4.0,285.42667,-5.86,0.58563,0.730711,0.271,0.324,0.822,1.0


In [61]:
X_audio = np.array(X_1)
X_audio.shape

(30296, 17)

In [62]:
audio_train_test_list = train_test_split(X_audio, y_audio, test_size = 0.33)

In [23]:
def optimizing (estimator, param_grid, train_test_list, n_jobs):
    
    output = {}
    X_train, X_test, y_train, y_test = train_test_list
    
    grid = GridSearchCV(estimator, param_grid, refit = True, n_jobs = n_jobs)
    grid.fit(X_train, y_train) 
    
    output['estimator'] = grid.best_estimator_
    output['params'] = grid.best_params_
    output['prediction'] = grid.predict(X_test)
    
    return output


def estimator_searching (init_classifiers, param_grids, train_test_list, n_jobs):
    
    classifiers = {method : {} for method in init_classifiers.keys()}
    
    for method in classifiers.keys():
        
        estimator = init_classifiers [method]
        param_grid = param_grids [method]
        
        classifiers[method] = optimizing(estimator, param_grid, train_test_list, n_jobs)
    
    return classifiers

def show_selected_params (classifiers, train_test_list):
        
    y_true = train_test_list[3]
        
    for model, result in classifiers.items():
        print("    Accuracy in {} model: {}".format(model, accuracy_score(y_true, result['prediction'])))
        print(" ...with selected params: {} \n".format(result['params']))

    print("\n")

In [65]:
audio_init_classifiers = {
        'rfc': Pipeline([ ('scaling', StandardScaler()), ('clf', RandomForestClassifier()) ])
    }

audio_param_grids = {    
    'rfc': 
    {
        'clf__n_estimators': [5, 10, 100],
        'clf__min_samples_split': [2, 3, 4, 5, 10],
        'clf__max_features': ['sqrt', 'log2', 'auto']
    }
}

In [66]:
audio_rfc = estimator_searching(audio_init_classifiers, audio_param_grids, audio_train_test_list, n_jobs=2)

In [67]:
audio_rfc

{'rfc': {'estimator': Pipeline(memory=None,
       steps=[('scaling', StandardScaler(copy=True, with_mean=True, with_std=True)), ('clf', RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
              max_depth=None, max_features='sqrt', max_leaf_nodes=None,
              min_impurity_decrease=0.0, min_impurity_split=None,
      ...n_jobs=1,
              oob_score=False, random_state=None, verbose=0,
              warm_start=False))]),
  'params': {'clf__max_features': 'sqrt',
   'clf__min_samples_split': 10,
   'clf__n_estimators': 5},
  'prediction': array([[ 0.,  0.,  0., ...,  0.,  0.,  0.],
         [ 0.,  0.,  0., ...,  0.,  0.,  0.],
         [ 0.,  0.,  0., ...,  0.,  0.,  0.],
         ..., 
         [ 0.,  0.,  0., ...,  0.,  0.,  0.],
         [ 0.,  0.,  0., ...,  0.,  0.,  0.],
         [ 0.,  0.,  0., ...,  0.,  0.,  0.]])}}

In [68]:
print(classification_report(audio_train_test_list[3], audio_rfc['rfc']['prediction']))

             precision    recall  f1-score   support

          0       0.59      0.25      0.35      1621
          1       0.40      0.01      0.03       440
          2       0.52      0.34      0.41      3090
          3       0.43      0.21      0.28      2751
          4       0.45      0.09      0.15      1042
          5       0.40      0.05      0.10      1314
          6       0.44      0.16      0.23      2436
          7       0.39      0.04      0.08      1346
          8       0.52      0.21      0.30      1899

avg / total       0.47      0.19      0.26     15939



In [69]:
rfc = RandomForestClassifier(n_estimators= 100, class_weight='balanced')
rfc.fit(audio_train_test_list[0], audio_train_test_list[2])

rfc_prediction = rfc.predict(audio_train_test_list[1])

In [64]:
print(classification_report(audio_train_test_list[3], rfc_prediction))

             precision    recall  f1-score   support

          0       0.68      0.23      0.34      1621
          1       0.50      0.00      0.00       440
          2       0.61      0.32      0.42      3090
          3       0.57      0.12      0.20      2751
          4       0.47      0.04      0.08      1042
          5       0.37      0.02      0.03      1314
          6       0.59      0.07      0.13      2436
          7       0.40      0.01      0.02      1346
          8       0.63      0.16      0.25      1899

avg / total       0.56      0.14      0.21     15939



In [70]:
import pickle

pickle.dump(rfc, open('audio_predict_moods.rfc.pickle', 'wb'))

## Lyrics

In [72]:
lyrics_moods = df.loc[:, ['lyrics_features', 'moods']].copy()
lyrics_moods.dropna(how='any', inplace = True)
lyrics_moods.reset_index(drop = True, inplace = True)
lyrics_moods.head()

Unnamed: 0,lyrics_features,moods
0,oppa gangnam style gangnam style najeneun ttas...,[energetic]
1,late ve ve lose sleep dream thing babi ve ve p...,[happy]
2,parti rock yeah woo let s parti rock hous toni...,"[happy, aggressive]"
3,alagamun lan weh wakun heya hanun gon alagamun...,"[energetic, happy]"
4,j lo s new generat mr worldwid parti peopl flo...,[energetic]


In [73]:
mlb_lyrics = MultiLabelBinarizer()

y_lyrics = mlb_lyrics.fit_transform(lyrics_moods['moods'])

In [74]:
y_lyrics.shape

(36733, 9)

In [75]:
lyrics_train_test_list = train_test_split(lyrics_moods['lyrics_features'], y_lyrics, test_size = 0.3)

In [87]:
lyrics_pipeline = Pipeline([ ('tf_idf', TfidfVectorizer()), ('clf', RandomForestClassifier(n_estimators=100)) ])

In [88]:
lyrics_pipeline.fit(lyrics_train_test_list[0], lyrics_train_test_list[2])

Pipeline(memory=None,
     steps=[('tf_idf', TfidfVectorizer(analyzer='word', binary=False, decode_error='strict',
        dtype=<class 'numpy.int64'>, encoding='utf-8', input='content',
        lowercase=True, max_df=1.0, max_features=None, min_df=1,
        ngram_range=(1, 1), norm='l2', preprocessor=None, smooth_idf=True,
...n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False))])

In [89]:
prediction = lyrics_pipeline.predict(lyrics_train_test_list[1])

In [91]:
print(classification_report(lyrics_train_test_list[3], prediction))

             precision    recall  f1-score   support

          0       0.28      0.01      0.01      1715
          1       0.35      0.01      0.02       584
          2       0.56      0.03      0.06      3446
          3       0.61      0.06      0.11      3083
          4       0.44      0.01      0.03      1075
          5       0.47      0.02      0.03      1473
          6       0.39      0.01      0.02      2648
          7       0.34      0.01      0.03      1487
          8       0.36      0.01      0.02      2144

avg / total       0.45      0.02      0.04     17655



In [81]:
from sklearn.metrics import jaccard_similarity_score

In [100]:
print(jaccard_similarity_score(lyrics_train_test_list[3][66], prediction[66]))

0.777777777778


In [96]:
pickle.dump(lyrics_pipeline, open('lyrics_predict_moods.rfc.pickle', 'wb'))

In [98]:
print(jaccard_similarity_score(['happy', 'aggressive'], ['trippy', 'aggressive']))

0.5


In [99]:
print(jaccard_similarity_score(['happy', 'aggressive'], ['trippy', 'happy']))

0.0
