In [1]:
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

In [3]:
# Load the data
train_data = pd.read_csv('tweets-ext.csv')
valid_data = pd.read_csv('tweets-valid.csv')
test_data = pd.read_csv('tweets-test.csv')

In [4]:
# Extract the text and labels
train_tweets = train_data['tweet'].tolist()
train_labels = train_data['label'].tolist()
valid_tweets = valid_data['tweet'].tolist()
valid_labels = valid_data['label'].tolist()
test_tweets = test_data['tweet'].tolist()
test_labels = test_data['label'].tolist()

In [5]:
# Tokenize the text
tokenizer = Tokenizer()
tokenizer.fit_on_texts(train_tweets + valid_tweets + test_tweets)

# Convert text sequences to numerical sequences
train_sequences = tokenizer.texts_to_sequences(train_tweets)
valid_sequences = tokenizer.texts_to_sequences(valid_tweets)
test_sequences = tokenizer.texts_to_sequences(test_tweets)

In [7]:
# Pad the sequences to a fixed length
max_seq_length = 280
train_X = pad_sequences(train_sequences, maxlen=max_seq_length)
valid_X = pad_sequences(valid_sequences, maxlen=max_seq_length)
test_X = pad_sequences(test_sequences, maxlen=max_seq_length)

In [8]:
# Convert labels to numpy arrays
train_y = np.array(train_labels)
valid_y = np.array(valid_labels)
test_y = np.array(test_labels)

In [9]:
# Define the model architecture
vocab_size = len(tokenizer.word_index) + 1
embedding_dim = 280

In [11]:
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_seq_length))
model.add(LSTM(units=128, return_sequences=True))  # Add an additional LSTM layer
model.add(LSTM(units=64))  # Add another LSTM layer
model.add(Dense(units=32, activation='relu'))  # Add a dense hidden layer
model.add(Dense(units=1, activation='sigmoid'))

In [12]:
# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [13]:
# Train the model
model.fit(train_X, train_y, validation_data=(valid_X, valid_y), epochs=10, batch_size=32)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x22c021afd88>

In [14]:
# Evaluate the model
loss, accuracy = model.evaluate(test_X, test_y)
print('Test Loss:', loss)
print('Test Accuracy:', accuracy)

Test Loss: -5378.43798828125
Test Accuracy: 0.4577777683734894
