# Load in extracted features

In [1]:
# open the every file in a directory and print the first line of each file
import os
import sys
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.metrics import accuracy_score
from sklearn.metrics import r2_score
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import BayesianRidge
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN


In [35]:
# load in the features
features_train = np.array([])
for file in os.listdir('features_train'):
    with open('features_train/' + file) as f:
        df = pd.read_csv(f)
        features_train = np.column_stack((features_train, df.values)) if features_train.size else df.values

# load the labels
train = pd.read_csv('train.csv')
y_train = train['Genre']
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)
# normalize the data
features_train = preprocessing.scale(features_train)

# Load in test Data


In [36]:
features_test = np.array([])
for file in os.listdir('features_test'):
    with open('features_test/' + file) as f:
        df = pd.read_csv(f)
        features_test = np.column_stack((features_test, df.values)) if features_test.size else df.values

# scale the test data
features_test = preprocessing.scale(features_test)


# Logistic Regression

In [37]:

logreg = LogisticRegression(max_iter=300)
logreg.fit(features_train, y_train)
predictions = logreg.predict(features_train)
# Calculate the accuracy
accuracy = accuracy_score(y_train, predictions)
print("Logistic Regression accuracy:", accuracy)


Logistic Regression accuracy: 0.885


# Predict with Logistic Regression and save to CSV

In [38]:

# run the model on the test data
predictions = logreg.predict(features_test)
predictions = label_encoder.inverse_transform(predictions)
# save the predictions to a csv file
df = pd.DataFrame({'ID': os.listdir('test'), 'genre': predictions})
df.to_csv('predictions.csv', index=False)

# Neural Network

In [47]:


model = Sequential()
model.add(Dense(80, activation='relu', input_shape = (65,), kernel_regularizer='l2', bias_regularizer='l2', activity_regularizer='l2'))

model.add(Dense(256, activation='relu'))

model.add(Dense(128, activation='relu'))

model.add(Dense(60, activation='softmax'))

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.fit(features_train, y_train, epochs=25, batch_size=28, validation_split=0.2, verbose=2, shuffle=True, steps_per_epoch=10, validation_steps=10, validation_batch_size=28, validation_freq=1, max_queue_size=10, workers=1, use_multiprocessing=False)

loss, accuracy = model.evaluate(features_train, y_train)
print('Neural Network accuracy:', accuracy)

Epoch 1/25


10/10 - 1s - loss: 4.5502 - accuracy: 0.1857 - val_loss: 3.8550 - val_accuracy: 0.2500 - 629ms/epoch - 63ms/step
Epoch 2/25
10/10 - 0s - loss: 3.4267 - accuracy: 0.3393 - 33ms/epoch - 3ms/step
Epoch 3/25
10/10 - 0s - loss: 2.5496 - accuracy: 0.5072 - 32ms/epoch - 3ms/step
Epoch 4/25
10/10 - 0s - loss: 2.2185 - accuracy: 0.5500 - 36ms/epoch - 4ms/step
Epoch 5/25
10/10 - 0s - loss: 2.0472 - accuracy: 0.5833 - 33ms/epoch - 3ms/step
Epoch 6/25
10/10 - 0s - loss: 1.8739 - accuracy: 0.6143 - 24ms/epoch - 2ms/step
Epoch 7/25
10/10 - 0s - loss: 1.7985 - accuracy: 0.6304 - 28ms/epoch - 3ms/step
Epoch 8/25
10/10 - 0s - loss: 1.6791 - accuracy: 0.6679 - 32ms/epoch - 3ms/step
Epoch 9/25
10/10 - 0s - loss: 1.6207 - accuracy: 0.6714 - 31ms/epoch - 3ms/step
Epoch 10/25
10/10 - 0s - loss: 1.4687 - accuracy: 0.7319 - 27ms/epoch - 3ms/step
Epoch 11/25
10/10 - 0s - loss: 1.3974 - accuracy: 0.7286 - 31ms/epoch - 3ms/step
Epoch 12/25
10/10 - 0s - loss: 1.4062 - accuracy: 0.6993 - 25ms/epoch - 2ms/step
Epoc

# Predict with Neural Network and save to CSV

In [48]:
predictions = model.predict(features_test)
predictions = np.argmax(predictions, axis=1)
predictions = label_encoder.inverse_transform(predictions)
print(predictions)

df = pd.DataFrame({'ID': os.listdir('test'), 'genre': predictions})
df.to_csv('predictions.csv', index=False)

1/7 [===>..........................] - ETA: 0s

['metal' 'pop' 'country' 'metal' 'metal' 'rock' 'rock' 'classical' 'blues'
 'jazz' 'disco' 'hiphop' 'hiphop' 'disco' 'reggae' 'blues' 'blues' 'rock'
 'pop' 'hiphop' 'jazz' 'disco' 'classical' 'hiphop' 'jazz' 'classical'
 'country' 'blues' 'rock' 'blues' 'country' 'classical' 'blues' 'rock'
 'metal' 'pop' 'reggae' 'country' 'country' 'country' 'pop' 'metal'
 'country' 'pop' 'jazz' 'disco' 'country' 'pop' 'rock' 'disco' 'blues'
 'hiphop' 'hiphop' 'rock' 'hiphop' 'reggae' 'pop' 'rock' 'metal' 'country'
 'classical' 'country' 'country' 'country' 'hiphop' 'metal' 'hiphop'
 'jazz' 'country' 'blues' 'jazz' 'metal' 'classical' 'metal' 'country'
 'hiphop' 'country' 'rock' 'rock' 'rock' 'pop' 'disco' 'reggae' 'country'
 'rock' 'country' 'classical' 'disco' 'blues' 'hiphop' 'reggae' 'jazz'
 'country' 'metal' 'hiphop' 'classical' 'rock' 'country' 'pop' 'disco'
 'reggae' 'country' 'pop' 'metal' 'classical' 'pop' 'disco' 'pop' 'rock'
 'classical' 'hiphop' 'country' 'blues' 'classical' 'country' 'reg