# **Sentiment Analysis on Tweets**
### Will redo this!!!

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt
import random

Define the model.

In [None]:
def create_model(lstm_units=64, dropout_rate=0.5, dense_units=32, embedding_dim=64):
    # lstm model with specified hyperparameters
    model = Sequential([
        Embedding(input_dim=10000, output_dim=embedding_dim),
        LSTM(lstm_units, return_sequences=False),
        Dropout(dropout_rate),
        Dense(dense_units, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    
    model.compile(
        loss='binary_crossentropy',
        optimizer='adam',
        metrics=['accuracy']
    )
    return model

cross validation and hyperparameter tuning function. inefficient approach.

In [None]:
def random_search_cv(x, y, n_iterations=8, cv_folds=3):

    param_ranges = {
        'lstm_units': [32, 64, 128, 256],
        'dropout_rate': [0.2, 0.3, 0.4, 0.5, 0.6],
        'dense_units': [16, 32, 64, 128],
        'embedding_dim': [32, 64, 128]
    }
    
    best_score = 0
    best_params = None
    all_results = []
    
    print(f"Random Search: {n_iterations} iterations with {cv_folds}-fold CV")
    print("=" * 60)
    
    for i in range(n_iterations):
        params = {
            key: random.choice(values) 
            for key, values in param_ranges.items()
        }
        
        print(f"Iteration {i+1:2d}/{n_iterations}: {params}")
        
        skf = StratifiedKFold(n_splits=cv_folds, shuffle=True, random_state=42+i)
        fold_scores = []
        
        for fold, (train_idx, val_idx) in enumerate(skf.split(x, y)):
            x_train_fold, x_val_fold = x[train_idx], x[val_idx]
            y_train_fold, y_val_fold = y.iloc[train_idx], y.iloc[val_idx]
            
            model = create_model(**params)
            
            early_stopping = EarlyStopping(
                monitor='val_accuracy',
                patience=3,
                restore_best_weights=True,
                verbose=0
            )
            
            history = model.fit(
                x_train_fold, y_train_fold,
                validation_data=(x_val_fold, y_val_fold),
                epochs=12,
                batch_size=32,
                verbose=0,
                callbacks=[early_stopping]
            )
            
            best_val_acc = max(history.history['val_accuracy'])
            fold_scores.append(best_val_acc)
        
        mean_score = np.mean(fold_scores)
        std_score = np.std(fold_scores)
        
        all_results.append({
            'params': params,
            'mean_accuracy': mean_score,
            'std_accuracy': std_score,
            'fold_scores': fold_scores
        })
        
        print(f"              CV Accuracy: {mean_score:.4f} (±{std_score:.4f})")
        
        if mean_score > best_score:
            best_score = mean_score
            best_params = params
            print(f"NEW BEST!")
    
    return best_params, best_score, all_results

Run the process.

In [None]:
print("Hyperparameter Tuning for LSTM")
print("=" * 50)

x_combined = np.concatenate([x_train, x_val])
y_combined = pd.concat([y_train, y_val])

best_params, best_score, all_results = random_search_cv(
    x_combined, y_combined, 
    n_iterations=8, 
    cv_folds=3
)

print("\n" + "=" * 50)
print("BEST RESULTS")
print("=" * 50)
print(f"Best Parameters: {best_params}")
print(f"Best CV Score: {best_score:.4f}")

print("\nTraining Final Model...")
final_model = create_model(**best_params)

early_stopping = EarlyStopping(
    monitor='val_accuracy',
    patience=5,
    restore_best_weights=True
)

final_history = final_model.fit(
    x_train, y_train,
    validation_data=(x_val, y_val),
    epochs=12,
    batch_size=32,
    callbacks=[early_stopping],
    verbose=1
)

test_loss, test_accuracy = final_model.evaluate(x_test, y_test, verbose=0)

Final results.

In [None]:
print("\n" + "=" * 50)
print("FINAL RESULTS")
print("=" * 50)
print(f"Cross-Validation Accuracy: {best_score:.4f}")
print(f"Final Test Accuracy: {test_accuracy:.4f}")
print(f"Best Parameters: {best_params}")

Plot the results of tuning.

In [None]:
plt.figure(figsize=(15, 5))

plt.subplot(1, 3, 1)
plt.plot(final_history.history['accuracy'], label='Training')
plt.plot(final_history.history['val_accuracy'], label='Validation')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1, 3, 2)
plt.plot(final_history.history['loss'], label='Training')
plt.plot(final_history.history['val_loss'], label='Validation')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 3, 3)
cv_scores = [result['mean_accuracy'] for result in all_results]
plt.hist(cv_scores, bins=8, alpha=0.7, color='skyblue', edgecolor='black')
plt.axvline(best_score, color='red', linestyle='--', linewidth=2, label=f'Best: {best_score:.4f}')
plt.title('CV Accuracy Distribution')
plt.xlabel('Accuracy')
plt.ylabel('Frequency')
plt.legend()

plt.tight_layout()
plt.show()

Print the top configurations from the tuning.

In [None]:
print("\nTOP 3 CONFIGURATIONS")
print("=" * 40)
sorted_results = sorted(all_results, key=lambda x: x['mean_accuracy'], reverse=True)
for i, result in enumerate(sorted_results[:3]):
    print(f"{i+1}. Accuracy: {result['mean_accuracy']:.4f} (±{result['std_accuracy']:.4f})")
    print(f"   {result['params']}")
    print()

### **RESULTS**

#### 🥇 TOP 3 CONFIGURATIONS
---

1. **Accuracy:** `0.8098 ± 0.0006`  
   **Parameters:**  
   `{'lstm_units': 256, 'dropout_rate': 0.6, 'dense_units': 128, 'embedding_dim': 128}`

2. **Accuracy:** `0.8098 ± 0.0002`  
   **Parameters:**  
   `{'lstm_units': 256, 'dropout_rate': 0.6, 'dense_units': 128, 'embedding_dim': 32}`

3. **Accuracy:** `0.8097 ± 0.0009`  
   **Parameters:**  
   `{'lstm_units': 256, 'dropout_rate': 0.3, 'dense_units': 128, 'embedding_dim': 32}`
