<a href="https://colab.research.google.com/github/nehakalakonda/Ragam-Classification/blob/main/ragam_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, MaxPooling1D, Flatten, Dense
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

In [2]:
data = pd.read_csv('/content/raag.csv', encoding_errors='replace')

input_sequences = data['PHRASES'].values
labels = data['RAGAM'].values

label_encoder = LabelEncoder()
integer_encoded_labels = label_encoder.fit_transform(labels)
one_hot_labels = to_categorical(integer_encoded_labels)

In [3]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(input_sequences)
sequences = tokenizer.texts_to_sequences(input_sequences)
max_length = max(len(seq) for seq in sequences)
padded_sequences = pad_sequences(sequences, maxlen=max_length)

In [4]:
vocab_size = len(tokenizer.word_index) + 1
embedding_dim = 50
embeddings = Embedding(vocab_size, embedding_dim)

In [5]:
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_length))
model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(len(set(labels)), activation='softmax'))



In [6]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(padded_sequences, one_hot_labels, epochs=10, validation_split=0.2)

Epoch 1/10
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 56ms/step - accuracy: 0.5827 - loss: 1.0623 - val_accuracy: 0.0000e+00 - val_loss: 1.5929
Epoch 2/10
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.6053 - loss: 0.9195 - val_accuracy: 0.0000e+00 - val_loss: 1.6514
Epoch 3/10
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.6670 - loss: 0.7207 - val_accuracy: 0.2222 - val_loss: 1.0677
Epoch 4/10
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.7491 - loss: 0.6037 - val_accuracy: 0.4921 - val_loss: 0.9055
Epoch 5/10
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.8496 - loss: 0.4617 - val_accuracy: 0.6032 - val_loss: 0.7630
Epoch 6/10
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - accuracy: 0.9464 - loss: 0.2572 - val_accuracy: 0.8730 - val_loss: 0.3965
Epoch 7/10
[1m8/8[0m [32m━━━━━━━━━━

<keras.src.callbacks.history.History at 0x7abe5e410590>

In [7]:
loss, accuracy = model.evaluate(padded_sequences,one_hot_labels)
print(f'Test Accuracy: {accuracy}')

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.9969 - loss: 0.0249     
Test Accuracy: 0.9903537034988403


In [8]:
single_input_sequence = "Ni Ri Pa Ma Ga Ri Ga Da Pa Ma Ga Ri Ga Ma Pa Ma Ga Ri Sa"

sequence = tokenizer.texts_to_sequences([single_input_sequence])
padded_sequence = pad_sequences(sequence, maxlen=max_length)

prediction = model.predict(padded_sequence)

predicted_label = list(set(labels))[prediction.argmax()]

print(f"Input Sequence: {single_input_sequence}")
print(f"Predicted Ragam: {predicted_label}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 97ms/step
Input Sequence: Ni Ri Pa Ma Ga Ri Ga Da Pa Ma Ga Ri Ga Ma Pa Ma Ga Ri Sa
Predicted Ragam: HAMSDHVANI


In [9]:
!pip install gradio

Collecting gradio
  Downloading gradio-5.29.1-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.10.1 (from gradio)
  Downloading gradio_client-1.10.1-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.10-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.

In [10]:
import gradio as gr
import pandas as pd
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, MaxPooling1D, Flatten, Dense
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

data = pd.read_csv('/content/raag.csv', encoding_errors='replace')
input_sequences = data['PHRASES'].values
labels = data['RAGAM'].values

label_encoder = LabelEncoder()
integer_encoded_labels = label_encoder.fit_transform(labels)
one_hot_labels = to_categorical(integer_encoded_labels)

tokenizer = Tokenizer()
tokenizer.fit_on_texts(input_sequences)
sequences = tokenizer.texts_to_sequences(input_sequences)
max_length = max(len(seq) for seq in sequences)
padded_sequences = pad_sequences(sequences, maxlen=max_length)
vocab_size = len(tokenizer.word_index) + 1
embedding_dim = 50

model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_length))
model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(len(set(labels)), activation='softmax'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(padded_sequences, one_hot_labels, epochs=10, validation_split=0.2)
loss, accuracy = model.evaluate(padded_sequences, one_hot_labels)
print(f'Test Accuracy: {accuracy}')

def predict_ragam(input_sequence):
    sequence = tokenizer.texts_to_sequences([input_sequence])
    padded_sequence = pad_sequences(sequence, maxlen=max_length)
    prediction = model.predict(padded_sequence)
    predicted_label = label_encoder.inverse_transform([prediction.argmax()])[0]
    return predicted_label

interface = gr.Interface(
    fn=predict_ragam,
    inputs="text",
    outputs="text",
    title="Ragam Prediction",
    description="Enter a musical phrase to predict the corresponding ragam."
)

interface.launch()



Epoch 1/10
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 51ms/step - accuracy: 0.4331 - loss: 1.0764 - val_accuracy: 0.0000e+00 - val_loss: 1.5000
Epoch 2/10
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - accuracy: 0.6590 - loss: 0.8921 - val_accuracy: 0.0000e+00 - val_loss: 1.5138
Epoch 3/10
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.6600 - loss: 0.7188 - val_accuracy: 0.3333 - val_loss: 1.0233
Epoch 4/10
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.7564 - loss: 0.5738 - val_accuracy: 0.6508 - val_loss: 0.7937
Epoch 5/10
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.8997 - loss: 0.4088 - val_accuracy: 0.8413 - val_loss: 0.5901
Epoch 6/10
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.9398 - loss: 0.2551 - val_accuracy: 0.8571 - val_loss: 0.4360
Epoch 7/10
[1m8/8[0m [32m━━━━━━━━━━

