In [2]:
!pip install tensorflow

Defaulting to user installation because normal site-packages is not writeable
Collecting tensorflow
  Obtaining dependency information for tensorflow from https://files.pythonhosted.org/packages/93/21/9b035a4f823d6aee2917c75415be9a95861ff3d73a0a65e48edbf210cec1/tensorflow-2.15.0-cp311-cp311-win_amd64.whl.metadata
  Using cached tensorflow-2.15.0-cp311-cp311-win_amd64.whl.metadata (3.6 kB)
Collecting tensorflow-intel==2.15.0 (from tensorflow)
  Obtaining dependency information for tensorflow-intel==2.15.0 from https://files.pythonhosted.org/packages/4c/48/1a5a15517f18eaa4ff8d598b1c000300b20c1bb0e624539d702117a0c369/tensorflow_intel-2.15.0-cp311-cp311-win_amd64.whl.metadata
  Using cached tensorflow_intel-2.15.0-cp311-cp311-win_amd64.whl.metadata (5.1 kB)
Collecting absl-py>=1.0.0 (from tensorflow-intel==2.15.0->tensorflow)
  Obtaining dependency information for absl-py>=1.0.0 from https://files.pythonhosted.org/packages/a2/ad/e0d3c824784ff121c03cc031f944bc7e139a8f1870ffd2845cc2dd76f6c4/



In [5]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Sample data for sentiment analysis
positive_reviews = ["I love this product!", "Great experience with the service.", "Fantastic job!"]
negative_reviews = ["Terrible product, waste of money.", "Bad customer service.", "Disappointing experience."]

# Create labels for the reviews (1 for positive, 0 for negative)
labels = [1, 1, 1, 0, 0, 0]

# Combine positive and negative reviews
all_reviews = positive_reviews + negative_reviews

# Tokenize the text data
tokenizer = Tokenizer()
tokenizer.fit_on_texts(all_reviews)
total_words = len(tokenizer.word_index) + 1

# Convert text data to sequences
sequences = tokenizer.texts_to_sequences(all_reviews)

# Pad sequences to ensure they have the same length
padded_sequences = pad_sequences(sequences)

# Convert labels to a NumPy array
labels = np.array(labels)

# Define the model
model = Sequential()
model.add(Embedding(total_words, 16, input_length=len(padded_sequences[0])))
model.add(LSTM(100))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(padded_sequences, labels, epochs=10)

# Test the model
test_reviews = ["This is good!", "I hate it."]
test_sequences = tokenizer.texts_to_sequences(test_reviews)
padded_test_sequences = pad_sequences(test_sequences, maxlen=len(padded_sequences[0]))

# Predict sentiment for test reviews
predictions = model.predict(padded_test_sequences)

# Display predictions
for i in range(len(test_reviews)):
    sentiment = "Positive" if predictions[i] > 0.5 else "Negative"
    print(f"Review: {test_reviews[i]} - Sentiment: {sentiment} (Confidence: {predictions[i][0]:.2f})")


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Review: This is good! - Sentiment: Positive (Confidence: 0.50)
Review: I hate it. - Sentiment: Negative (Confidence: 0.50)


In [6]:
# Import necessary libraries
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
import numpy as np

# Sample sentiment data (you should replace this with your own dataset)
training_data = [
    ("Great product! I love it", 1),
    ("Poor quality. Do not buy.", 0),
    # Add more examples...
]

# Preprocessing the text data
corpus = [row[0] for row in training_data]
tokenizer = Tokenizer()
tokenizer.fit_on_texts(corpus)
total_words = len(tokenizer.word_index) + 1

# Converting text data to sequences
input_sequences = tokenizer.texts_to_sequences(corpus)
max_sequence_len = max([len(x) for x in input_sequences])
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))

X_train = input_sequences
y_train = np.array([row[1] for row in training_data])

# Building the RNN model
model = Sequential()
model.add(Embedding(total_words, 64, input_length=max_sequence_len))
model.add(LSTM(100))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10)

# Once trained, you can use the model to predict sentiment on new data
# For example:
test_sentence = "This is a fantastic product!"
test_sequence = tokenizer.texts_to_sequences([test_sentence])
padded_test_sequence = np.array(pad_sequences(test_sequence, maxlen=max_sequence_len, padding='pre'))
prediction = model.predict(padded_test_sequence)
print(prediction)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[[0.5063883]]


In [14]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import LabelEncoder
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical


df = pd.read_csv('C:\\Users\\samso\\Downloads\\Reddit_Data.csv')

# Drop rows with NaN values
df = df.dropna()

# Split the dataset into training and testing sets
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

# Tokenize the text
tokenizer = Tokenizer()
tokenizer.fit_on_texts(train_df['clean_comment'])
X_train = tokenizer.texts_to_sequences(train_df['clean_comment'])
X_test = tokenizer.texts_to_sequences(test_df['clean_comment'])

# Pad the sequences to have the same length
X_train = pad_sequences(X_train, maxlen=100)
X_test = pad_sequences(X_test, maxlen=100)

# Encode the labels
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(train_df['category'])
y_test = label_encoder.transform(test_df['category'])
y_train = to_categorical(y_train, num_classes=3)
y_test = to_categorical(y_test, num_classes=3)

# Build the RNN model
model = Sequential()
model.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=50, input_length=100))
model.add(LSTM(100))
model.add(Dense(3, activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=5, batch_size=32, validation_split=0.1)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Loss: {loss:.4f}, Test Accuracy: {accuracy:.4f}')


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test Loss: 0.4120, Test Accuracy: 0.8969


In [15]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical

# Load the dataset
df = pd.read_csv('C:\\Users\\samso\\Downloads\\Reddit_Data.csv')

# Drop rows with NaN values
df = df.dropna(subset=['clean_comment'])

# Split the dataset into training and testing sets
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

# Tokenize the text
tokenizer = Tokenizer()
tokenizer.fit_on_texts(train_df['clean_comment'])
X_train = tokenizer.texts_to_sequences(train_df['clean_comment'])
X_test = tokenizer.texts_to_sequences(test_df['clean_comment'])

# Pad the sequences to have the same length
X_train = pad_sequences(X_train, maxlen=100)
X_test = pad_sequences(X_test, maxlen=100)

# Encode the labels
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(train_df['category'])
y_test = label_encoder.transform(test_df['category'])
y_train = to_categorical(y_train, num_classes=3)
y_test = to_categorical(y_test, num_classes=3)

# Build the RNN model
model = Sequential()
model.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=50, input_length=100))
model.add(LSTM(100))
model.add(Dense(3, activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=5, batch_size=32, validation_split=0.1)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Loss: {loss:.4f}, Test Accuracy: {accuracy:.4f}')


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test Loss: 0.4038, Test Accuracy: 0.8945
