In [9]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense

# Assuming a CSV with columns 'text' and 'label'
data = pd.read_csv(r"C:\Users\Lenovo\Desktop\Bytewise_Project\text_data\train_essays_v1.csv")

# Extract text and labels
texts = data['text'].tolist()
labels = data['label'].tolist()

# Tokenization and padding
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)

max_length = max(len(seq) for seq in sequences)
padded_sequences = pad_sequences(sequences, maxlen=max_length)

# Convert labels to numerical format (assuming categorical labels)
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
numerical_labels = label_encoder.fit_transform(labels)

# Split data into training and validation sets
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(padded_sequences, numerical_labels, test_size=0.2, random_state=42)

# Reshape target data
y_train = np.expand_dims(y_train, axis=-1)

# Build the model
vocab_size = len(tokenizer.word_index) + 1

model = Sequential([
    Embedding(vocab_size, 64),
    SimpleRNN(32),
    Dense(vocab_size, activation='softmax')
])

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=10, batch_size=64, validation_data=(X_val, y_val))

Epoch 1/10
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 226ms/step - accuracy: 0.5602 - loss: 8.9216 - val_accuracy: 1.0000 - val_loss: 6.6099
Epoch 2/10
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 218ms/step - accuracy: 1.0000 - loss: 5.5650 - val_accuracy: 1.0000 - val_loss: 2.7244
Epoch 3/10
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 211ms/step - accuracy: 1.0000 - loss: 2.0171 - val_accuracy: 1.0000 - val_loss: 0.5731
Epoch 4/10
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 235ms/step - accuracy: 1.0000 - loss: 0.4205 - val_accuracy: 1.0000 - val_loss: 0.1804
Epoch 5/10
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 243ms/step - accuracy: 1.0000 - loss: 0.1529 - val_accuracy: 1.0000 - val_loss: 0.0982
Epoch 6/10
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 245ms/step - accuracy: 1.0000 - loss: 0.0881 - val_accuracy: 1.0000 - val_loss: 0.0652
Epoch 7/10
[1m44/44[0

<keras.src.callbacks.history.History at 0x297a5609e80>

In [12]:
from tensorflow.keras.layers import Bidirectional

model = Sequential([
    Embedding(vocab_size, 64),
    Bidirectional(SimpleRNN(32)),
    Dense(vocab_size, activation='softmax')
])

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=10, batch_size=64, validation_data=(X_val, y_val))

Epoch 1/10
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 325ms/step - accuracy: 0.7324 - loss: 8.3443 - val_accuracy: 1.0000 - val_loss: 3.5601
Epoch 2/10
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 306ms/step - accuracy: 1.0000 - loss: 2.0786 - val_accuracy: 1.0000 - val_loss: 0.1301
Epoch 3/10
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 324ms/step - accuracy: 1.0000 - loss: 0.0931 - val_accuracy: 1.0000 - val_loss: 0.0453
Epoch 4/10
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 304ms/step - accuracy: 1.0000 - loss: 0.0398 - val_accuracy: 1.0000 - val_loss: 0.0282
Epoch 5/10
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 290ms/step - accuracy: 1.0000 - loss: 0.0257 - val_accuracy: 1.0000 - val_loss: 0.0198
Epoch 6/10
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 365ms/step - accuracy: 1.0000 - loss: 0.0184 - val_accuracy: 1.0000 - val_loss: 0.0150
Epoch 7/10
[1m44/44[

<keras.src.callbacks.history.History at 0x297c145fdd0>

In [13]:
# Example evaluation
loss, accuracy = model.evaluate(X_val, y_val)
print('Accuracy:', accuracy)

[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 48ms/step - accuracy: 1.0000 - loss: 0.0068
Accuracy: 1.0


In [23]:
from tensorflow.keras.layers import GlobalAveragePooling1D

# ... rest of your code

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(padded_sequences, numerical_labels, test_size=0.2, random_state=42)

# Reshape target data
y_train = np.expand_dims(y_train, axis=-1)

# ... rest of your model building code

# Model prediction and evaluation
y_pred = model.predict(X_test)
y_pred = np.argmax(y_pred, axis=-1)

[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 117ms/step
