<a href="https://colab.research.google.com/github/mahekmistry24/AI-assistant-for-your-desktop/blob/main/Musical_Instruments.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
import pandas as pd
import os
import numpy as np
import math, random
import torch
import torchaudio
from torchaudio import transforms
from IPython.display import Audio

In [8]:
sitar_files = "/content/Musical_Instrument_Data/sitar_files"
violin_files = "/content/Musical_Instrument_Data/violin_files"
mohanveena_files = "/content/Musical_Instrument_Data/mohanveena_files"

# Pre-Processing

In [9]:
class AudioProcessing():

  """
  open method is used to load the audio file and returns your signal as a Tensor and sampling rate
  Tensors are like multi-dimensional arrays with a uniform type
  """

  @staticmethod
  def open(audio_file_path):
    data, sampling_rate = torchaudio.load(audio_file_path)
    return (data, sampling_rate)


  """
  rechannel method: signals can either be mono or stereo. This method is used to get all our signals in the same dimensions.
  It converts all mono signals to stereo by duplicating the first channel
  Link for difference between mono/stereo : https://www.rowkin.com/blogs/rowkin/mono-vs-stereo-sound-whats-the-big-difference
  """
#channels stereo/mono
  @staticmethod
  def rechannel(audio_file, new_channel):
    data, sampling_rate = audio_file

    if (data.shape[0] == new_channel):
      return audio_file

    if (new_channel == 1):
      # stereo to mono
      resig = data[:1, :]
    else:
      # mono to stereo by duplicating
      resig = torch.cat([data, data])

    return ((resig, sampling_rate))

  """
  resampling method: our audio signals have different sampling rates as well. Hence, We need to standardise the sampling rate.
  Different sampling rates result in different array sizes. Ex: sr - 40000Hz means array size of 400000 whereas 40010Hz means aaray size of 40010
  After standardisation we get all arrays of the same size
  """
#resample one at a time and merge
  @staticmethod
  def resample(audio, new_sampling_rate):
    data, sampling_rate = audio

    if (sampling_rate == new_sampling_rate):
      return audio

    num_channels = data.shape[0]
    resig = torchaudio.transforms.Resample(sampling_rate, new_sampling_rate)(data[:1,:])
    if (num_channels > 1):
      retwo = torchaudio.transforms.Resample(sampling_rate, new_sampling_rate)(data[1:,:])
      resig = torch.cat([resig, retwo])

    return ((resig, new_sampling_rate))

  """
  pad_trunc method: Our audio files are bound to be of different lengths of time. This also needs to be standardised.
  This method either extends the length by padding with silence (Zero Padding) or reduces the length by truncating
  """
  @staticmethod
  def pad_trunc(audio, max_ms):
    data, sampling_rate = audio
    num_rows, data_len = data.shape
    max_len = sampling_rate//1000 * max_ms

    if (data_len > max_len):
      # truncate to given length
      data = data[:,:max_len]

    elif (data_len < max_len):
      # padding at the start and end of the audio
      pad_begin_len = random.randint(0, max_len - data_len) #fill with random no between at 0 upto the extra time(maxlen-datalen)
      pad_end_len = max_len - data_len - pad_begin_len

      # Pad with 0s - Zero Padding
      pad_begin = torch.zeros((num_rows, pad_begin_len))
      pad_end = torch.zeros((num_rows, pad_end_len))

      data = torch.cat((pad_begin, data, pad_end), 1)

    return (data, sampling_rate)


  # Spectrogram finally!!!
  """
  spectrogram method:
  Link for short explanation: https://colab.research.google.com/drive/1UgxygdrBfq7UGjhTCc9oupA-CyKFGhGa#scrollTo=733XclBe9Vgn
  """
  @staticmethod
  def spectro_gram(aud, n_mels=64, n_fft=1024, hop_len=None):
    sig,sr = aud
    top_db = 80

    # spec has shape [channel, n_mels, time], where channel is mono, stereo etc
    spec = transforms.MelSpectrogram(sr, n_fft=n_fft, hop_length=hop_len, n_mels=n_mels)(sig)

    # Convert to decibels
    spec = transforms.AmplitudeToDB(top_db=top_db)(spec)
    return (spec)



In [48]:
def data_processing(folder):

  spectrograms = []
  # looping over every files in the folder of musical instrument for ex: sitar
  for i in os.listdir(folder):

    audio = AudioProcessing.open(os.path.join(folder,i))
    resampled_audio = AudioProcessing.resample(audio, new_sampling_rate)
    rechanneled_audio = AudioProcessing.rechannel(resampled_audio, new_channel)
    padded_audio = AudioProcessing.pad_trunc(rechanneled_audio, duration)
    spectro_gram = AudioProcessing.spectro_gram(padded_audio, n_mels=64, n_fft=1024, hop_len=None)
    spectrograms.append(spectro_gram)
  return spectrograms

new_channel = 2                   #making all stereo sounds
new_sampling_rate = 44100         #permanently setting a standard rate
duration = 6000                   #setting a standard audio length of 6s, 6000ms

In [11]:
violin_spectrograms  = data_processing(violin_files) #cls id = 0
violin_arr_list = []
for i in violin_spectrograms:
  arr = i.numpy()
  violin_arr_list.append(arr)
violin_arr = np.array(violin_arr_list)
print(violin_arr.shape)


mohanveena_spectrograms  = data_processing(mohanveena_files) #cls id = 1
mohanveena_arr_list = []
for i in mohanveena_spectrograms:
  arr = i.numpy()
  mohanveena_arr_list.append(arr)
mohanveena_arr = np.array(mohanveena_arr_list)
print(mohanveena_arr.shape)


sitar_spectrograms  = data_processing(sitar_files) #cls id = 2
sitar_arr_list = []
for i in sitar_spectrograms:
  arr = i.numpy()
  sitar_arr_list.append(arr)
sitar_arr = np.array(sitar_arr_list)
print(sitar_arr.shape)

  s = torchaudio.io.StreamReader(src, format, None, buffer_size)


(6, 2, 64, 516)
(10, 2, 64, 516)
(10, 2, 64, 516)


In [12]:
y1 = np.zeros(6)
y2 = np.ones(10)
y3 = np.full(10,2)
print(y1.shape)
print(y2.shape)
print(y3.shape)
y = np.concatenate((y1,y2,y3), axis=0)
print(y.shape)
y = y.reshape(26,1)
print(y.shape)

(6,)
(10,)
(10,)
(26,)
(26, 1)


In [13]:
x = np.concatenate((violin_arr,mohanveena_arr,sitar_arr),axis=0)
print(x.shape)

(26, 2, 64, 516)


# Artificial Neural Networks

In [14]:
from keras import models, layers

In [15]:
network_model = models.Sequential()
network_model.add(layers.Dense(512, activation="leaky_relu", input_shape=(2*64*516,)))
network_model.add(layers.Dense(128, activation="relu", input_shape=(2*64*516,)))
network_model.add(layers.Dense(3, activation="softmax"))


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [16]:
network_model.summary()

In [17]:
network_model.compile(optimizer="adam", metrics=["accuracy"], loss="categorical_crossentropy")

In [18]:
x = x.reshape(26, 2*64*516)
x = x.astype(float)/255 #standardisation - line starts from origin
print(x.shape)
print(y.shape)

(26, 66048)
(26, 1)


In [19]:
y


array([[0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [2.],
       [2.],
       [2.],
       [2.],
       [2.],
       [2.],
       [2.],
       [2.],
       [2.],
       [2.]])

In [20]:
##preprocessing the labels data
from tensorflow.keras.utils import to_categorical

#one hot encoding
y = to_categorical(y)


In [21]:
y

array([[1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.]])

In [22]:
print(x.shape) #m, nx
print(y.shape) #m, no of classes

(26, 66048)
(26, 3)


In [23]:
network_model.fit(x, y, epochs=15)

Epoch 1/15
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - accuracy: 0.3077 - loss: 1.1058
Epoch 2/15
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step - accuracy: 0.3846 - loss: 6.1595
Epoch 3/15
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step - accuracy: 0.3846 - loss: 13.4928
Epoch 4/15
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step - accuracy: 0.6154 - loss: 2.4725
Epoch 5/15
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 141ms/step - accuracy: 0.6154 - loss: 2.7969
Epoch 6/15
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 129ms/step - accuracy: 0.6154 - loss: 3.1144
Epoch 7/15
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step - accuracy: 0.6538 - loss: 2.2553
Epoch 8/15
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 138ms/step - accuracy: 0.8846 - loss: 0.3432
Epoch 9/15
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

<keras.src.callbacks.history.History at 0x79d861351ca0>

# LDA


In [24]:
import sklearn
import pandas as pd
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as lda

In [25]:
print(x.shape)   #ML - m, nx format

(26, 66048)


In [26]:
y_1 = np.zeros(6)
y_2 = np.ones(10)
y_3 = np.full(10,2)
print(y_1.shape)
print(y_2.shape)
print(y_3.shape)
y_ = np.concatenate((y_1,y_2,y_3), axis=0)
print(y_.shape)
y_ = y_.reshape(26,1)
print(y_.shape)

(6,)
(10,)
(10,)
(26,)
(26, 1)


In [27]:
x_df = pd.DataFrame(x)
# x_df.head()

y_df = pd.DataFrame(y_)
# y_df.head()

In [28]:
x_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,66038,66039,66040,66041,66042,66043,66044,66045,66046,66047
0,0.058776,-0.037989,-0.061371,-0.104502,-0.101425,-0.044822,-0.016779,-0.032939,-0.059585,-0.055484,...,-0.150176,-0.150176,-0.150176,-0.150176,-0.150176,-0.150176,-0.150176,-0.150176,-0.150176,-0.150176
1,0.025032,-0.090484,-0.087138,-0.092207,-0.076259,-0.079505,-0.069672,-0.117736,-0.098156,-0.097636,...,-0.155297,-0.155297,-0.155297,-0.155297,-0.155297,-0.155297,-0.155297,-0.155297,-0.155297,-0.147607
2,0.073489,-0.068629,-0.057619,-0.117497,-0.046673,-0.052807,-0.056299,-0.100292,-0.044439,-0.036422,...,-0.162598,-0.162598,-0.162598,-0.162598,-0.162598,-0.162598,-0.162598,-0.162598,-0.162598,-0.162598
3,-0.19303,-0.19303,-0.19303,-0.19303,-0.19303,-0.19303,-0.19303,-0.19303,-0.19303,-0.19303,...,-0.19303,-0.19303,-0.19303,-0.19303,-0.19303,-0.19303,-0.19303,-0.19303,-0.19303,-0.19303
4,0.004726,-0.031086,-0.06044,-0.077444,-0.079415,-0.038561,-0.046101,-0.074131,-0.065917,-0.065552,...,-0.160752,-0.160752,-0.160752,-0.160752,-0.160752,-0.160752,-0.160752,-0.160752,-0.160752,-0.15178


In [29]:
y_df.head()

Unnamed: 0,0
0,0.0
1,0.0
2,0.0
3,0.0
4,0.0


In [30]:
LDA = lda(n_components =2)   # n-1 == 3-1 == 2

In [31]:
LDA.fit(x_df,y_df)

  y = column_or_1d(y, warn=True)


In [32]:
z = LDA.transform(x_df)  #Z-score
z

array([[-2.16483512, -0.81585845],
       [-3.31971084, -2.41994537],
       [-3.01657874, -1.83687416],
       [-0.58103568,  0.31325782],
       [-2.50636491, -2.47219242],
       [-2.63602415, -1.23451243],
       [ 1.6998619 , -0.78779024],
       [ 1.22095651,  0.0909287 ],
       [ 2.11177483, -0.5089531 ],
       [ 0.83503866,  0.10734957],
       [ 2.08864976, -0.07283334],
       [ 2.89741995, -0.62998811],
       [ 3.36585067, -2.12102241],
       [ 3.49081587, -0.66673914],
       [ 1.29896878,  0.88171403],
       [ 3.81908612, -0.36077106],
       [-0.79276098, -0.30940174],
       [-0.95519162,  0.20147901],
       [-1.33466304,  3.2734305 ],
       [-1.29662993,  1.77693745],
       [-2.82664839,  0.98257703],
       [ 0.13023893,  2.04228641],
       [-0.89288545, -0.12022049],
       [-0.81306595,  1.18718378],
       [ 0.89444248,  1.10383523],
       [-0.71670967,  2.39612292]])

In [33]:
y_pred = LDA.predict(x_df)
y_pred

array([0., 0., 0., 2., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2.,
       2., 2., 2., 2., 2., 2., 2., 2., 2.])

In [34]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

In [35]:
cm = confusion_matrix(y_df,y_pred)
print(cm)

[[ 5  0  1]
 [ 0 10  0]
 [ 0  0 10]]


LDA before PCA

In [36]:
accuracy_score(y_df ,y_pred)    # 25/26 = 0.96

0.9615384615384616

# PCA


In [37]:
from sklearn.decomposition import PCA

In [38]:
pca = PCA(n_components=1)
pca.fit(x_df)

print(f"pca.components_:\n{pca.components_}")  #eigen vector
print(f"\n\npca.explained_variance_:\n{pca.explained_variance_}")    #eigen value
print(f"\n\npca.explained_variance_ratio_:\n{pca.explained_variance_ratio_}") #percentage of variabilty
z_scores = pca.transform(x_df)
print(f"\n\nz_scores:\n{z_scores}")   #Z-Score

pca.components_:
[[0.00862502 0.00504649 0.0046949  ... 0.00136462 0.00136462 0.0018374 ]]


pca.explained_variance_:
[59.74638107]


pca.explained_variance_ratio_:
[0.31934678]


z_scores:
[[ 9.06086007e+00]
 [ 1.12885525e+01]
 [ 1.26704144e+01]
 [-5.07426605e+00]
 [ 1.33004010e+01]
 [ 1.01067173e+01]
 [ 3.31310199e+00]
 [-1.10137718e+01]
 [-1.14654181e+01]
 [-5.82026387e+00]
 [-1.78881591e+00]
 [ 4.54272095e-02]
 [-1.01957583e+01]
 [-1.54970208e+01]
 [-5.48871494e-02]
 [-1.04758975e+01]
 [ 3.42328587e+00]
 [-2.28555659e+00]
 [ 2.57897171e+00]
 [-4.81058350e-01]
 [ 3.02116874e+00]
 [ 5.42736270e+00]
 [-1.06417626e+00]
 [ 7.13382683e-01]
 [-3.02971023e-03]
 [ 2.70274207e-01]]


In [39]:
new_x = pd.DataFrame(z_scores)
new_x

Unnamed: 0,0
0,9.06086
1,11.288553
2,12.670414
3,-5.074266
4,13.300401
5,10.106717
6,3.313102
7,-11.013772
8,-11.465418
9,-5.820264


LDA after PCA

In [40]:
new_x.shape

(26, 1)

In [41]:
LDA = lda(n_components =1)
LDA.fit(new_x,y_)
z = LDA.transform(new_x)
print(z)
y_pred = LDA.predict(new_x)
print(y_pred)

[[ 1.71184467e+00]
 [ 2.13271680e+00]
 [ 2.39378836e+00]
 [-9.58667856e-01]
 [ 2.51281009e+00]
 [ 1.90943575e+00]
 [ 6.25935722e-01]
 [-2.08080319e+00]
 [-2.16613155e+00]
 [-1.09960728e+00]
 [-3.37956326e-01]
 [ 8.58244425e-03]
 [-1.92625804e+00]
 [-2.92781175e+00]
 [-1.03696860e-02]
 [-1.97918399e+00]
 [ 6.46752474e-01]
 [-4.31804248e-01]
 [ 4.87238402e-01]
 [-9.08850999e-02]
 [ 5.70781533e-01]
 [ 1.02537749e+00]
 [-2.01052046e-01]
 [ 1.34777530e-01]
 [-5.72395255e-04]
 [ 5.10622013e-02]]
[0. 0. 0. 1. 0. 0. 2. 1. 1. 1. 2. 2. 1. 1. 2. 1. 2. 2. 2. 2. 2. 2. 2. 2.
 2. 2.]


  y = column_or_1d(y, warn=True)


In [42]:
cm = confusion_matrix(y_,y_pred)
cm

array([[ 5,  1,  0],
       [ 0,  6,  4],
       [ 0,  0, 10]])

In [43]:
accuracy_score(y_ ,y_pred)

0.8076923076923077

To test your model with a new audio file, you'll need to:

1.  **Upload the audio file:** Click the folder icon in the left sidebar, then the "Upload to session storage" icon, and select your audio file.
2.  **Specify the path to your uploaded audio file.**
3.  **Preprocess the audio file** using the same steps as the training data.
4.  **Use the trained neural network model to predict the class** of the audio file.

**Replace the placeholder path below with the actual path to your uploaded audio file.**

In [54]:

new_audio_file_path = '/content/veena_classical-80561.mp3'

Now, let's preprocess the uploaded audio file using the `AudioProcessing` class you defined earlier.

In [55]:
# Preprocess the new audio file
new_audio = AudioProcessing.open(new_audio_file_path)
new_resampled_audio = AudioProcessing.resample(new_audio, new_sampling_rate)
new_rechanneled_audio = AudioProcessing.rechannel(new_resampled_audio, new_channel)
new_padded_audio = AudioProcessing.pad_trunc(new_rechanneled_audio, duration)
new_spectro_gram = AudioProcessing.spectro_gram(new_padded_audio, n_mels=64, n_fft=1024, hop_len=None)

# Convert the spectrogram to a numpy array and reshape for the model
new_spectrogram_arr = new_spectro_gram.numpy()
new_spectrogram_reshaped = new_spectrogram_arr.reshape(1, 2*64*516) # Reshape for a single sample
new_spectrogram_reshaped = new_spectrogram_reshaped.astype(float)/255 # Standardize the data

Finally, let's use the trained `network_model` to predict the class of the uploaded audio file.

In [56]:
# Predict the class using the trained model
prediction = network_model.predict(new_spectrogram_reshaped)

# Get the predicted class index
predicted_class_index = np.argmax(prediction)

# Define a mapping from class index to instrument name (based on your training data)
class_mapping = {
    0: 'violin',
    1: 'mohanveena',
    2: 'sitar'
}

# Get the predicted instrument name
predicted_instrument = class_mapping.get(predicted_class_index, 'unknown')

print(f"The predicted instrument is: {predicted_instrument}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
The predicted instrument is: mohanveena


In [57]:

new_audio_file_path = '/content/veena_recording-17856.mp3'

In [58]:
# Preprocess the new audio file
new_audio = AudioProcessing.open(new_audio_file_path)
new_resampled_audio = AudioProcessing.resample(new_audio, new_sampling_rate)
new_rechanneled_audio = AudioProcessing.rechannel(new_resampled_audio, new_channel)
new_padded_audio = AudioProcessing.pad_trunc(new_rechanneled_audio, duration)
new_spectro_gram = AudioProcessing.spectro_gram(new_padded_audio, n_mels=64, n_fft=1024, hop_len=None)

# Convert the spectrogram to a numpy array and reshape for the model
new_spectrogram_arr = new_spectro_gram.numpy()
new_spectrogram_reshaped = new_spectrogram_arr.reshape(1, 2*64*516) # Reshape for a single sample
new_spectrogram_reshaped = new_spectrogram_reshaped.astype(float)/255 # Standardize the data

In [59]:
# Predict the class using the trained model
prediction = network_model.predict(new_spectrogram_reshaped)

# Get the predicted class index
predicted_class_index = np.argmax(prediction)

# Define a mapping from class index to instrument name (based on your training data)
class_mapping = {
    0: 'violin',
    1: 'mohanveena',
    2: 'sitar'
}

# Get the predicted instrument name
predicted_instrument = class_mapping.get(predicted_class_index, 'unknown')

print(f"The predicted instrument is: {predicted_instrument}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
The predicted instrument is: mohanveena


In [66]:

new_audio_file_path = '/content/violin-improv-73437.mp3'

In [67]:
# Preprocess the new audio file
new_audio = AudioProcessing.open(new_audio_file_path)
new_resampled_audio = AudioProcessing.resample(new_audio, new_sampling_rate)
new_rechanneled_audio = AudioProcessing.rechannel(new_resampled_audio, new_channel)
new_padded_audio = AudioProcessing.pad_trunc(new_rechanneled_audio, duration)
new_spectro_gram = AudioProcessing.spectro_gram(new_padded_audio, n_mels=64, n_fft=1024, hop_len=None)

# Convert the spectrogram to a numpy array and reshape for the model
new_spectrogram_arr = new_spectro_gram.numpy()
new_spectrogram_reshaped = new_spectrogram_arr.reshape(1, 2*64*516) # Reshape for a single sample
new_spectrogram_reshaped = new_spectrogram_reshaped.astype(float)/255 # Standardize the data

In [68]:
# Predict the class using the trained model
prediction = network_model.predict(new_spectrogram_reshaped)

# Get the predicted class index
predicted_class_index = np.argmax(prediction)

# Define a mapping from class index to instrument name (based on your training data)
class_mapping = {
    0: 'violin',
    1: 'mohanveena',
    2: 'sitar'
}

# Get the predicted instrument name
predicted_instrument = class_mapping.get(predicted_class_index, 'unknown')

print(f"The predicted instrument is: {predicted_instrument}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
The predicted instrument is: violin


In [84]:

new_audio_file_path = '/content/impro-indian-echo-24411.mp3'

In [85]:
# Preprocess the new audio file
new_audio = AudioProcessing.open(new_audio_file_path)
new_resampled_audio = AudioProcessing.resample(new_audio, new_sampling_rate)
new_rechanneled_audio = AudioProcessing.rechannel(new_resampled_audio, new_channel)
new_padded_audio = AudioProcessing.pad_trunc(new_rechanneled_audio, duration)
new_spectro_gram = AudioProcessing.spectro_gram(new_padded_audio, n_mels=64, n_fft=1024, hop_len=None)

# Convert the spectrogram to a numpy array and reshape for the model
new_spectrogram_arr = new_spectro_gram.numpy()
new_spectrogram_reshaped = new_spectrogram_arr.reshape(1, 2*64*516) # Reshape for a single sample
new_spectrogram_reshaped = new_spectrogram_reshaped.astype(float)/255 # Standardize the data

In [86]:
# Predict the class using the trained model
prediction = network_model.predict(new_spectrogram_reshaped)

# Get the predicted class index
predicted_class_index = np.argmax(prediction)

# Define a mapping from class index to instrument name (based on your training data)
class_mapping = {
    0: 'violin',
    1: 'mohanveena',
    2: 'sitar'
}

# Get the predicted instrument name
predicted_instrument = class_mapping.get(predicted_class_index, 'unknown')

print(f"The predicted instrument is: {predicted_instrument}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
The predicted instrument is: sitar
