# Part 3 - Neural Network based Natural Language Processing

# Imports

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from tensorflow.keras.preprocessing.sequence import pad_sequences


from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV, ShuffleSplit
from sklearn.metrics import classification_report, confusion_matrix

from xgboost import XGBClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier



# Preprocessing

In [3]:
dfx = pd.read_csv("/kaggle/input/maestro-midi-notes-tsv/out.tsv", sep='\t')

In [4]:
df = dfx.copy()
# df = dfx.head(10)

# Extract features (notes) and labels (composers)
features = df['notes'].tolist()
labels = df.drop('notes', axis=1)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)


In [5]:
import ast
features = df['notes'].apply(ast.literal_eval).tolist()

In [6]:
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

In [7]:
# Padding
max_length = max(len(seq) for seq in X_train)
X_train_padded = pad_sequences(X_train, maxlen=max_length, padding='post')
X_test_padded = pad_sequences(X_test, maxlen=max_length, padding='post')

# Shuffle and Split
cv = ShuffleSplit(n_splits=5, test_size=0.2, random_state=42)

# RNN & LSTM

In [8]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding, SimpleRNN, Bidirectional

X_train_padded_ = X_train_padded.reshape(X_train_padded.shape[0], X_train_padded.shape[1], 1)
X_test_padded_ = X_test_padded.reshape(X_test_padded.shape[0], X_test_padded.shape[1], 1)

## Simple RNN

In [19]:
rnn_model = Sequential([
    SimpleRNN(64, input_shape=(X_train_padded_.shape[1], X_train_padded_.shape[2])),
    Dense(60, activation='softmax')  # Change the number of units to match the number of classes
])

from keras.optimizers import Adam

rnn_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train
rnn_model.fit(X_train_padded_, y_train, epochs=3, batch_size=256, validation_split=0.2)

Epoch 1/3


I0000 00:00:1713678797.863871     367 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 4s/step - accuracy: 0.6362 - loss: 0.6114 - val_accuracy: 0.6404 - val_loss: 0.5312
Epoch 2/3
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 3s/step - accuracy: 0.6731 - loss: 0.4235 - val_accuracy: 0.6424 - val_loss: 0.4665
Epoch 3/3
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 3s/step - accuracy: 0.6852 - loss: 0.3654 - val_accuracy: 0.6732 - val_loss: 0.3857


<keras.src.callbacks.history.History at 0x7ebbbce77b50>

## LSTM

In [30]:
# LSTM model
lstm_model = Sequential([
    LSTM(64, input_shape=(X_train_padded_.shape[1], X_train_padded_.shape[2])),
    Dense(60, activation='softmax')
])

lstm_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train
lstm_model.fit(X_train_padded_, y_train, epochs=3, batch_size=128, validation_split=0.2)

Epoch 1/3


I0000 00:00:1713678797.863871     367 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 4s/step - accuracy: 0.7120 - loss: 0.2714 - val_accuracy: 0.6856 - val_loss: 0.3312
Epoch 2/3
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 3s/step - accuracy: 0.7158 - loss: 0.2445 - val_accuracy: 0.6924 - val_loss: 0.3245
Epoch 3/3
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 3s/step - accuracy: 0.7206 - loss: 0.2365 - val_accuracy: 0.6887 - val_loss: 0.3387


<keras.src.callbacks.history.History at 0x7ebbbce77e90>