#**USING LSTM AND GENAI TO CREATE FAKE REVIEW DETECTION AND FEEDBACK SYSTEM**

In [None]:
import pandas as pd
df = pd.read_csv("fake_reviews_dataset.csv")
print(df.isnull().sum())
df = df.rename(columns={"text_": "review"})
df.head(20)

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Get counts for each rating value for both labels
or_ratings = df[df['label'] == 'OR']['rating'].value_counts().sort_index()
cg_ratings = df[df['label'] == 'CG']['rating'].value_counts().sort_index()

# Setting the positions and width for the bars
bar_width = 0.35
index = np.arange(len(or_ratings))

# Plotting the bars
bar1 = plt.bar(index, or_ratings.values, bar_width, color='blue', label='OR')
bar2 = plt.bar(index + bar_width, cg_ratings.values, bar_width, color='green', label='CG')

# Setting the title and labels
plt.title('Distribution of Ratings by Label')
plt.xlabel('Rating')
plt.ylabel('Number of Reviews')
plt.xticks(index + bar_width / 2, or_ratings.index)  # Positioning the x-labels in the center of the grouped bars
plt.legend()

# Displaying the plot
plt.tight_layout()
plt.show()


In [None]:
df["category"].unique()

In [None]:
df["label"].unique()

In [None]:
import matplotlib.pyplot as plt

# Get counts for each label in the dataset
category_data = df["label"].value_counts()

# Plotting
plt.figure(figsize=(8, 5))
plt.bar(category_data.index, category_data.values, color='blue')
plt.xticks(rotation=45, ha="right")
plt.title("Distribution of Review Labels")
plt.xlabel("Review Labels")
plt.ylabel("Number of Reviews")
plt.tight_layout()
plt.show()

In [None]:
import matplotlib
import matplotlib.pyplot as plt
category_data = df["category"].value_counts()
plt.bar(category_data.index, category_data.values, color='blue')
plt.xticks(rotation=45, ha="right")
plt.tight_layout()
plt.show()

In [None]:
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout

# Split the data into training and testing sets
from sklearn.model_selection import train_test_split
reviews = df['review'].tolist()

# Tokenize the text data
tokenizer = Tokenizer()
tokenizer.fit_on_texts(reviews)
word_index = tokenizer.word_index
vocab_size = len(word_index)

# Convert text to sequences and pad them to a fixed length
sequences = tokenizer.texts_to_sequences(reviews)


max_sequence_length = 100
sequences = pad_sequences(sequences, maxlen=max_sequence_length)

# Create a model
model = Sequential()
model.add(Embedding(input_dim=vocab_size + 1, output_dim=100, input_length=max_sequence_length))

model.add(LSTM(128))



model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))


model.add(Dense(1, activation='sigmoid'))  # Output layer with sigmoid activation for binary classification

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Split the data into training and testing sets (X and y are sequences and labels)
X = sequences
y = df['label'].values
y = df['label'].map({'CG': 0, 'OR': 1}).values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
lstm = model.fit(X_train, y_train, epochs=5, batch_size=64, validation_split=0.2)


# Save the entire model to a .h5 file
model.save('lstm_model.h5')

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")


In [None]:
from tensorflow.keras.models import load_model

# Load the saved model
model = load_model('lstm_model.h5')


In [None]:
pip install --upgrade setuptools

In [None]:
python -m ensurepip
pip --version

In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
def predict_fake_review(review):
# Tokenize and pad the review text
    review_sequence = tokenizer.texts_to_sequences([review])
    review_padded = pad_sequences(review_sequence, maxlen=max_sequence_length)

    # Predict using the model
    prediction = model.predict(review_padded)

    # Check the prediction
    if prediction[0][0] >= 0.5:
        return "Genuine"
    else:
        return "Fake"


In [None]:
import openai

# Set up your API key
import os
openai.api_key = os.getenv('OPENAI_API_KEY')  # Load from environment variable

def query_gpt3_5_chat(prompt):
    response = openai.ChatCompletion.create(
      model="gpt-3.5-turbo",
      messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ]
    )
    return response.choices[0].message['content']


In [None]:
def handle_customer_review(review):
    review_type = predict_fake_review(review)
    if review_type == "Fake":
        return "The review appears to be inauthentic."
    else:
        # If the review is genuine, get suggestions from GPT-3.5
        prompt_for_gpt = f"The following review seems genuine: '{review}'. Can you provide suggestions for improvement of the product?"
        response = query_gpt3_5_chat(prompt_for_gpt)
        return response

In [None]:

sample_review_2 = "This is one of the coolest screensavers I have ever seen, the fish move realistically, the environments look real, and the graphics are stunning."
print(handle_customer_review(sample_review_2))
