In [1]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Embedding
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np


In [2]:
# Load the sentiment 140 dataset
# You can download the dataset from: http://help.sentiment140.com/for-students/
# Extract the training set and put it in a file named 'training.1600000.processed.noemoticon.csv'

# Define the path to the dataset
dataset_path = '/content/training.1600000.processed.noemoticon.csv'

In [7]:
tweets = []
labels = []
with open(dataset_path, 'r', encoding='ISO-8859-1') as file:
    for line in file:
        parts = line.split(',')
        sentiment = int(parts[0].strip('"'))  # Remove double quotes around sentiment value
        tweet = ','.join(parts[5:])
        tweets.append(tweet)
        labels.append(sentiment)

In [8]:
# Tokenize the tweets
tokenizer = Tokenizer()
tokenizer.fit_on_texts(tweets)
sequences = tokenizer.texts_to_sequences(tweets)

In [9]:
# Pad sequences to have the same length
max_sequence_length = max(len(seq) for seq in sequences)
sequences = pad_sequences(sequences, maxlen=max_sequence_length)

In [10]:
# Convert labels to numpy array
labels = np.array(labels)

In [11]:
# Define the model architecture
model = Sequential()
model.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=100, input_length=max_sequence_length))
model.add(LSTM(128))
model.add(Dense(1, activation='sigmoid'))

In [12]:
# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [13]:
# Train the model
model.fit(sequences, labels, batch_size=128, epochs=10, validation_split=0.2)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f91aae49b40>

In [14]:
# Generate text based on sentiment
def generate_text(sentiment):
    # Convert sentiment to one-hot vector
    sentiment_vector = np.array([sentiment])
    
    # Generate text based on the sentiment
    generated_text = ''
    seed_text = 'I feel '
    for _ in range(20):  # Generate 20 words
        encoded_text = tokenizer.texts_to_sequences([seed_text])[0]
        padded_text = pad_sequences([encoded_text], maxlen=max_sequence_length)
        predicted_sentiment = model.predict(padded_text)[0][0]
        
        if predicted_sentiment < 0.5:
            next_word = 'bad'
        else:
            next_word = 'good'
        
        generated_text += next_word + ' '
        seed_text += next_word + ' '
    
    return generated_text


In [15]:
# Generate text for positive sentiment
positive_text = generate_text(1)
print('Positive Text:', positive_text)

Positive Text: bad bad bad bad bad bad bad bad bad bad bad bad bad bad bad bad bad bad bad bad 


In [16]:
# Generate text for negative sentiment
negative_text = generate_text(0)
print('Negative Text:', negative_text)

Negative Text: bad bad bad bad bad bad bad bad bad bad bad bad bad bad bad bad bad bad bad bad 


In [17]:
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.bleu_score import corpus_bleu

In [19]:
# Generate a reference text for positive sentiment
reference_positive = "I feel good today. The weather is beautiful."

# Generate a reference text for negative sentiment
reference_negative = "I feel bad today. Everything is going wrong."

# Calculate BLEU score for positive sentiment
positive_bleu = sentence_bleu([reference_positive.split()], positive_text.split())
print('BLEU score (positive sentiment):', positive_bleu)

# Calculate BLEU score for negative sentiment
negative_bleu = sentence_bleu([reference_negative.split()], negative_text.split())
print('BLEU score (negative sentiment):', negative_bleu)



BLEU score (positive sentiment): 0
BLEU score (negative sentiment): 8.614911585158347e-232
