# Import

In [41]:
import random
import numpy as np
import pandas as pd

from sklearn.metrics import roc_auc_score

from keras.models import Sequential
from keras.layers import Dense,Dropout,Activation,Embedding
from keras.layers import LSTM
from keras_preprocessing.sequence import pad_sequences
from keras.preprocessing.text import Tokenizer

## Process Data

In [25]:
df_train = pd.read_csv('data/train.csv', sep=',')
df_test = pd.read_csv('data/test.csv', sep=',')
df_train.head()

Unnamed: 0,lyric,class
0,Can't drink without thinkin' about you,1
1,Now Lil Pump flyin' private jet (Yuh),0
2,"No, matter fact, you ain't help me when I had ...",0
3,"And you could find me, I ain't hidin'",0
4,From the way you talk to the way you move,1


In [26]:
df_test.head()

Unnamed: 0,id,lyric
0,0,Now they know my name wherever I go
1,1,"If your girl don't get it poppin', put me on y..."
2,2,"P1 cleaner than your church shoes, ah"
3,3,"Bodies start to drop, ayy (Hit the floor)"
4,4,I don't look to the sky no mo'


In [6]:
target_map = {1:'pop', 0:'rap'}
df_train['genre'] = df_train['class'].map(target_map)
df_train.head()

Unnamed: 0,lyric,class,genre
0,Can't drink without thinkin' about you,1,pop
1,Now Lil Pump flyin' private jet (Yuh),0,rap
2,"No, matter fact, you ain't help me when I had ...",0,rap
3,"And you could find me, I ain't hidin'",0,rap
4,From the way you talk to the way you move,1,pop


In [None]:
df_train['lyric']=df_train['lyric'].str.replace(',','')
df_test['lyric']=df_test['lyric'].str.replace(',','')

In [27]:
train_text = df_train.lyric
train_label = df_train['class']

test_text = df_test.lyric

## Build Features

In [13]:
token = Tokenizer(num_words=4000)
token.fit_on_texts(train_text)
x_train_seq = token.texts_to_sequences(train_text)
x_train = pad_sequences(x_train_seq, maxlen = 400)
x_train = np.array(x_train)
y_train = np.array(train_label).reshape(-1,1)

In [28]:
x_test_seq = token.texts_to_sequences(test_text)
x_test = pad_sequences(x_test_seq, maxlen = 400)
x_test = np.array(x_test)


## Train Model

In [14]:
model = Sequential()
model.add(Embedding(output_dim=32,input_dim = 4000,input_length=400))
model.add(Dropout(0.2))
model.add(LSTM(32))
model.add(Dense(units=256, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(units=1, activation='sigmoid'))

2023-02-21 05:29:52.058797: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-02-21 05:29:52.059115: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-02-21 05:29:52.059193: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublas.so.11'; dlerror: libcublas.so.11: cannot open shared object file: No such file or directory
2023-02-21 05:29:52.059252: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublasLt.so.11'; dlerror: libcublasLt.so.11: cannot open shared object file: No such file or directory
2023-02-21 05:29:52.059307: W tensorflow/c

In [15]:
print(model.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 400, 32)           128000    
                                                                 
 dropout (Dropout)           (None, 400, 32)           0         
                                                                 
 lstm (LSTM)                 (None, 32)                8320      
                                                                 
 dense (Dense)               (None, 256)               8448      
                                                                 
 dropout_1 (Dropout)         (None, 256)               0         
                                                                 
 dense_1 (Dense)             (None, 1)                 257       
                                                                 
Total params: 145,025
Trainable params: 145,025
Non-trai

In [33]:
model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics=['accuracy'])
train_history = model.fit(x_train,y_train,batch_size=400,epochs=10,verbose=2,
                         validation_split=0.2)train_history = model.fit(x_train,y_train,batch_size=400,epochs=10,verbose=2,
                         validation_split=0.2)

In this example, y_true is a 1D array of true binary labels (0 or 1), and y_pred is a 1D array of predicted probabilities, which should be in the range [0, 1]. The roc_auc_score() function calculates the AUC by computing the ROC (Receiver Operating Characteristic) curve and then integrating the area under the curve. The resulting AUC value is a number between 0 and 1, where higher values indicate better performance.

Note that the roc_auc_score() function assumes that the predicted probabilities are for the positive class (i.e., class 1). If your model is a binary classifier that predicts the probability of the negative class (i.e., class 0), you can invert the predictions by subtracting them from 1 before passing them to roc_auc_score()

## Evaluate

### AUC

In [45]:
prediction = model.predict(x_test)

# you can choose a threshold to convert it to labels
threshold = 0.5
predicted_labels = (prediction > threshold).astype(int)

# after you label for this dataset by manual, you have a real label, it can used to evaluate your model
y_true = [random.randint(0, 1) for _ in prediction]  
auc = roc_auc_score(y_true, prediction)
print('AUC:', auc)


AUC: 0.5139649447453156


### Loss & Accuracy

In [46]:
# or use directly model to evaluate
scores = model.evaluate(x_test, predicted_labels, verbose=0)
print('Validation loss:', scores[0])
print('Validation accuracy:', scores[1])

Validation loss: 0.3367266356945038
Validation accuracy: 0.0
