In [None]:
import tensorflow as tf
import numpy as np
import os
import pickle
from sklearn.model_selection import train_test_split
from tensorflow import keras
from tensorflow.keras import datasets, layers, models


In [None]:
dataset_dir = './music_dataset/'
pickle_file_path = os.path.join(dataset_dir, 'mel_chroma_pic_waug.pickle')

In [None]:
# unpack the pickle file
pickle_in = open(pickle_file_path, "rb")
test_data = pickle.load(pickle_in)

In [None]:
# map each raga str to an int
raga_to_int = {
    'Sankarabaranam': 0,
    'Reethigowla': 1,
    'Kalyani': 2,
    'Shanmukhapriya': 3,
    'SindhuBhairavi': 4,
    'Sri': 5,
    'Surutti': 6,
    'Thodi': 7,
    'PoorviKalyani': 8,
    'Mohanam': 9,
    'Madhyamavathi': 10,
    'Kharaharapriya': 11,
    'Keeravani': 12,
    'Kapi': 13,
    'Kambhodhi': 14,
    'Kamas': 15, 
    'Jaganmohini': 16,
    'Hamsadhwani': 17,
    'Bilahari': 18,
    'Bhairavi': 19
}


In [None]:
# OK so now just create a random network and see how it does
# split up into test and train data
X = []
y = []
for val in test_data:
  X.append(val[0])
  y.append(raga_to_int[val[1]])

del test_data




In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                   random_state=104, 
                                   test_size=0.50, 
                                   shuffle=True)
X_train =np.array(X_train)
X_test =np.array(X_test)
y_train =np.array(y_train)
y_test =np.array(y_test)

In [15]:
print(f'X_train shape: {X_train.shape}')
print(f'X_test shape: {X_test.shape}')
print(f'y_train shape: {y_train.shape}')
print(f'y_test shape: {y_test.shape}')

X_train shape: (782, 12, 2584)
X_test shape: (782, 12, 2584)
y_train shape: (782,)
y_test shape: (782,)


In [19]:
"""model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(12, 2584)),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(20)
])"""

# use RNN
#X_train = X_train.reshape(782, 76, 408)
#X_test = X_test.reshape(782, 76, 408)

model = keras.Sequential()
# Add an Embedding layer expecting input vocab of size 1000, and
# output embedding dimension of size 64.
model.add(layers.Embedding(input_dim=1000, output_dim=64))

# Add a LSTM layer with 128 internal units.
#model.add(layers.LSTM(128))

# Add a Dense layer with 10 units.
model.add(layers.Dense(20))

In [20]:
predictions = model(X_train[:1]).numpy()
predictions

array([[[[ 0.01422721,  0.00372917, -0.05990169, ...,  0.03028311,
          -0.00171646, -0.02276696],
         [ 0.01422721,  0.00372917, -0.05990169, ...,  0.03028311,
          -0.00171646, -0.02276696],
         [ 0.01422721,  0.00372917, -0.05990169, ...,  0.03028311,
          -0.00171646, -0.02276696],
         ...,
         [ 0.01422721,  0.00372917, -0.05990169, ...,  0.03028311,
          -0.00171646, -0.02276696],
         [ 0.01422721,  0.00372917, -0.05990169, ...,  0.03028311,
          -0.00171646, -0.02276696],
         [ 0.01422721,  0.00372917, -0.05990169, ...,  0.03028311,
          -0.00171646, -0.02276696]],

        [[ 0.01422721,  0.00372917, -0.05990169, ...,  0.03028311,
          -0.00171646, -0.02276696],
         [ 0.01422721,  0.00372917, -0.05990169, ...,  0.03028311,
          -0.00171646, -0.02276696],
         [ 0.01422721,  0.00372917, -0.05990169, ...,  0.03028311,
          -0.00171646, -0.02276696],
         ...,
         [ 0.01422721,  0.00372917

In [21]:
tf.nn.softmax(predictions).numpy()


array([[[[0.0505431 , 0.05001527, 0.0469319 , ..., 0.05136117,
          0.04974365, 0.04870746],
         [0.0505431 , 0.05001527, 0.0469319 , ..., 0.05136117,
          0.04974365, 0.04870746],
         [0.0505431 , 0.05001527, 0.0469319 , ..., 0.05136117,
          0.04974365, 0.04870746],
         ...,
         [0.0505431 , 0.05001527, 0.0469319 , ..., 0.05136117,
          0.04974365, 0.04870746],
         [0.0505431 , 0.05001527, 0.0469319 , ..., 0.05136117,
          0.04974365, 0.04870746],
         [0.0505431 , 0.05001527, 0.0469319 , ..., 0.05136117,
          0.04974365, 0.04870746]],

        [[0.0505431 , 0.05001527, 0.0469319 , ..., 0.05136117,
          0.04974365, 0.04870746],
         [0.0505431 , 0.05001527, 0.0469319 , ..., 0.05136117,
          0.04974365, 0.04870746],
         [0.0505431 , 0.05001527, 0.0469319 , ..., 0.05136117,
          0.04974365, 0.04870746],
         ...,
         [0.0505431 , 0.05001527, 0.0469319 , ..., 0.05136117,
          0.04974365, 0.0

In [22]:
#loss_fn = tf.keras.losses.categorical_crossentropy(from_logits=True)


In [23]:

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [24]:
model.fit(X_train, y_train, epochs=25)


Epoch 1/25




ValueError: ignored

In [None]:
# might have started to overfit on trarin accuracy after 1 epoch???
# try testing it 
model.evaluate(X_test,  y_test, verbose=2)


In [None]:
# ok so now, get the actual features of the dataset (Spectral centroid, spectral bandwidth, mel-spectrogram, Mel freq cepstral coeff, chroma)
# Get those into a pickle file, and see how it performs on the same neural network. If accuracy is low, then either modify network or add more data samples