In [1]:
import pandas as pd
import numpy as np
import re
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from keras.layers import Dense, SimpleRNN


def preprocess_text(text, stop_words):
    # Remove special characters and lowercase the text
    text = re.sub(r"[^\w\s]", "", text.lower())
    # Remove stopwords
    text = " ".join(word for word in text.split() if word not in stop_words)
    return text


def build_model(vocab_size):
    model = Sequential()
    model.add(SimpleRNN(64, activation='relu', input_dim=( 6609,2080)))
    # model.add(Dropout(0.5))
    # model.add(Dense(32, activation='relu'))
    # model.add(Dropout(0.5))
    model.add(Dense(5, activation='softmax'))
    return model


def train_model(model, X_train, y_train, X_val, y_val, epochs=20, batch_size=32):
    print('X_train shape :', X_train.shape)
    print('X_test shape :' ,X_val.shape)
    print('y_train shape :' ,y_val.shape)
    print(y_train)
    
    early_stop = EarlyStopping(monitor='val_loss', patience=3)
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_val, y_val))


def evaluate_model(model, X_train, y_train, X_test, y_test):
    train_loss, train_acc = model.evaluate(X_train, y_train)
    test_loss, test_acc = model.evaluate(X_test, y_test)
    print("Train Loss:", train_loss)
    print("Train Accuracy:", train_acc)
    print("Test Loss:", test_loss)
    print("Test Accuracy:", test_acc)
    print(X_train)


def text_emotion_detection(dataset_path, stopwords_path):
    # Step 1: Load the dataset
    data = pd.read_csv(dataset_path)

    # Step 2: Load stopwords
    stop_words = set(open(stopwords_path, 'r').read().split())

    # Step 3: Preprocess the text
    data['Text'] = data['Text'].apply(lambda x: preprocess_text(x, stop_words))

    # Step 4: Split the dataset into train, validation, and test sets
#     train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)
#     train_data, val_data = train_test_split(train_data, test_size=0.2, random_state=42)
    
    
    # Split last 150 text of each class for the test dataset
    classes = np.unique.data['Label']
    test_data = []
    for c in classes:
        class_data = [(X[i], y[i]) for i in range(len(X)) if y[i] == label_map[c]]
        test_data.extend(class_data[-150:])

    # Use the rest of the data for training
    train_data = []
    for i in range(len(X)):
        found = False
        for j in range(len(test_data)):
            if all(X[i] == test_data[j][0]) and y[i] == test_data[j][1]:
                found = True
                break
        if not found:
            train_data.append((X[i], y[i]))

    # Separate the input features and labels for the training and test sets
    X_train, y_train = zip(*train_data)
    X_test, y_test = zip(*test_data)

    # Step 5: Tokenize the text
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(data['Text'])
    train_data_encoded = tokenizer.texts_to_matrix(train_data['Text'], mode='binary')
    val_data_encoded = tokenizer.texts_to_matrix(val_data['Text'], mode='binary')
    test_data_encoded = tokenizer.texts_to_matrix(test_data['Text'], mode='binary')

    # Step 6: Encode emotion labels
    label_encoder = LabelEncoder()
    label_encoder.fit(train_data['Label'])
    train_labels_encoded = label_encoder.transform(train_data['Label'])
    val_labels_encoded = label_encoder.transform(val_data['Label'])
    test_labels_encoded = label_encoder.transform(test_data['Label'])

    # Step 7: Build the model
    vocab_size = len(tokenizer.word_index) + 1
    print(vocab_size)
    model = build_model(vocab_size)

    # Step 8: Train the model
    train_model(model, train_data_encoded, train_labels_encoded, val_data_encoded, val_labels_encoded)

    # Step 9: Evaluate the model
    evaluate_model(model, train_data_encoded, train_labels_encoded, test_data_encoded, test_labels_encoded)


# Example usage
dataset_path = 'Dataset/Text_Emotion_Data.csv'
stopwords_path = 'Dataset/stopwords.txt'
text_emotion_detection(dataset_path, stopwords_path)


AttributeError: 'function' object has no attribute 'data'