In [1]:
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import Embedding, GRU, Dense, Dropout
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical

# Load the dataset
df = pd.read_csv('Chatgptdataset.csv', encoding='latin1')

unique_categories = df['category'].unique()
print(unique_categories)
print(df.columns)

# 1. Data Preparation:

# Tokenize the text:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['Base_Reviews'])
X = tokenizer.texts_to_sequences(df['Base_Reviews'])
maxlen = max(len(x) for x in X)
X = pad_sequences(X, maxlen=maxlen)

# Convert categories to integers:
categories = ['frustration', 'disappointment', 'anger', 'distrust', 'disgust', 'fear', 'confusion', 'sadness']
y = np.array([categories.index(cat) for cat in df['category']])

# One-hot encode the labels:
y = to_categorical(y)

# 2. Model creation, training, and evaluation:

def train_and_evaluate_model(params):
    model = Sequential()
    model.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=params['embedding_dim'], input_length=maxlen))
    model.add(GRU(params['gru_units']))  # Replacing LSTM with GRU
    model.add(Dropout(params['dropout']))
    model.add(Dense(len(categories), activation='softmax'))  # Softmax activation for multi-class
    model.compile(optimizer=params['optimizer'], loss='categorical_crossentropy', metrics=['accuracy'])

    model.fit(X, y, epochs=params['epochs'], batch_size=32, verbose=0, validation_split=0.2)
    _, accuracy = model.evaluate(X, y, verbose=0)
    return accuracy

# 3. Hyperparameter tuning:
param_grid = {
    'embedding_dim': [50, 100],
    'gru_units': [32, 64],  # Replacing lstm_units with gru_units
    'dropout': [0.2, 0.5],
    'optimizer': ['adam', 'rmsprop'],
    'epochs': [5, 10]
}

best_score = 0
best_params = None

for embedding_dim in param_grid['embedding_dim']:
    for gru_units in param_grid['gru_units']:  # Iterating over gru_units
        for dropout in param_grid['dropout']:
            for optimizer in param_grid['optimizer']:
                for epochs in param_grid['epochs']:
                    params = {
                        'embedding_dim': embedding_dim,
                        'gru_units': gru_units,  # Using gru_units instead of lstm_units
                        'dropout': dropout,
                        'optimizer': optimizer,
                        'epochs': epochs
                    }
                    score = train_and_evaluate_model(params)
                    if score > best_score:
                        best_score = score
                        best_params = params

print("Best score:", best_score)
print("Best parameters:", best_params)


['frustration' 'disappointment' 'anger' 'distrust' 'disgust' 'fear'
 'confusion' 'sadness']
Index(['Reviewer_Name', 'Stars', 'Title_of_Review', 'Base_Review1',
       'Base_Reviews', 'Sentiment_Score', 'Sentiment', 'sentiment', 'emotion',
       'category', 'Unnamed: 10', 'Unnamed: 11', 'Unnamed: 12'],
      dtype='object')
Best score: 0.8641414046287537
Best parameters: {'embedding_dim': 100, 'gru_units': 64, 'dropout': 0.2, 'optimizer': 'adam', 'epochs': 10}
