<a href="https://www.kaggle.com/code/rubinr12/overfittedlstm-ipynb?scriptVersionId=192164559" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score, recall_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization, GRU, Bidirectional, Input
from tensorflow import keras
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.regularizers import l2
from torch.nn.utils import clip_grad_norm_
from torch.optim import RMSprop

In [None]:
# Load data
train_data = pd.read_excel('/kaggle/input/dataset/train.xlsx')
test_data = pd.read_csv('/kaggle/input/dataset/test.csv')

# Drop unnecessary columns
train_data.drop(['job_id'], axis=1, inplace=True)
test_data.drop(columns=['job_id','Latitude', 'Longitude'], inplace=True)

# Separate features and labels
train_labels = train_data['label'].values
train_features = train_data.drop(['label'], axis=1).values

test_labels = test_data['label'].values
test_features = test_data.drop(['label'], axis=1).values

# Normalize/Standardize the features
scaler = StandardScaler()
train_features = scaler.fit_transform(train_features)
test_features = scaler.transform(test_features)

# Create sliding windows
def create_sliding_window(data, labels, window_size=20):
    X = []
    y = []
    for i in range(len(data) - window_size):
        X.append(data[i:i+window_size])
        y.append(labels[i+window_size-1])
    return np.array(X), np.array(y)

X_train, y_train = create_sliding_window(train_features, train_labels, window_size=20)
X_test, y_test = create_sliding_window(test_features, test_labels, window_size=20)

In [None]:
# # Build LSTM model with Dropout and Regularization
# model = Sequential()
# model.add(LSTM(64, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=False, kernel_regularizer='l2'))
# model.add(Dropout(0.4))
# model.add(Dense(1, activation='sigmoid'))

# # Compile the model
# model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# # Early stopping callback
# early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# # Train the model with early stopping and class weights
# model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2, callbacks=[early_stopping], class_weight=class_weight_dict)

In [None]:
# # Build LSTM model
# model = Sequential()
# model.add(LSTM(50, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=False))
# model.add(Dense(1, activation='sigmoid'))
# model.compile(optimizer=Adam(learning_rate=0.0005), loss='binary_crossentropy', metrics=['accuracy'])

# # Train the model
# model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

In [None]:
# # Compute class weights to handle class imbalance
# class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
# class_weight_dict = {i: class_weights[i] for i in range(len(class_weights))}


# model = Sequential()
# model.add(LSTM(32, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=False, kernel_regularizer='l2'))
# model.add(BatchNormalization())
# model.add(Dropout(0.5))  
# model.add(Dense(1, activation='sigmoid'))

# model.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])

# early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2, callbacks=[early_stopping], class_weight=class_weight_dict)

In [None]:


# Build the improved model
model = Sequential()
model.add(Bidirectional(LSTM(64, return_sequences=False, kernel_regularizer='l2', recurrent_dropout=0.3), input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(32, kernel_regularizer='l2'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(32, activation='relu', kernel_regularizer='l2'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.0005), loss='binary_crossentropy', metrics=['accuracy'])

# Early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# Learning rate reduction callback
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=0.00001, verbose=1)

# Train the model with early stopping and validation
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2, callbacks=[early_stopping, reduce_lr])

In [None]:
# # Predict on test data
y_pred = (model.predict(X_test) > 0.5).astype("int32")

# Calculate precision and recall
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

print(f'Precision: {precision}')
print(f'Recall: {recall}')

In [None]:
# Predict on test data with adjusted threshold
thresholds = np.arange(0.1, 0.9, 0.05)
best_precision = 0
best_recall = 0
best_threshold = 0.5

for threshold in thresholds:
    y_pred = (model.predict(X_test) > threshold).astype("int32")
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    
    # Find the best threshold balancing precision and recall
    if precision > best_precision and recall > best_recall:
        best_precision = precision
        best_recall = recall
        best_threshold = threshold

print(f'Best Precision: {best_precision}')
print(f'Best Recall: {best_recall}')
print(f'Best Threshold: {best_threshold}')

In [None]:
#Previous
metrics_new = [
    keras.metrics.Precision(name="precision"),
    keras.metrics.Recall(name="recall"),
]

window_size = 20
learning_rate = 0.0001
optimizer = Adam(learning_rate=learning_rate)

model1 = Sequential()
model1.add(Input(shape=(window_size, X_train.shape[2])))
model1.add(LSTM(units = 128, activation='tanh',return_sequences= True))
model1.add(Dropout(0.2))
model1.add(LSTM(units = 64, activation='tanh',return_sequences= True))
model1.add(Dropout(0.2))
model1.add(LSTM(units = 64, activation='tanh'))
model1.add(Dense(1, activation='sigmoid'))
model1.compile(optimizer=optimizer, loss='BinaryCrossentropy', metrics=metrics_new)

early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# Learning rate reduction callback
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=0.00001, verbose=1)

# Train the model
history = model1.fit(X_train, y_train,epochs=32,batch_size=64,validation_split=0.3,callbacks=[early_stopping],verbose=1)
# model1.summary()
# model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2, callbacks=[early_stopping, reduce_lr])

In [None]:
y_pred = (model.predict(X_test) > 0.5).astype("int32")

# Calculate precision and recall
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

print(f'Precision: {precision}')
print(f'Recall: {recall}')

In [None]:
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:

metrics_new = [
    keras.metrics.Precision(name="precision"),
    keras.metrics.Recall(name="recall"),
]

window_size = 20
learning_rate = 0.0001
optimizer = Adam(learning_rate=learning_rate)

model1 = Sequential()
model1.add(Input(shape=(window_size, X_train.shape[2])))
model1.add(LSTM(units=128, activation='tanh', return_sequences=True, kernel_regularizer=l2(0.001)))
model1.add(BatchNormalization())
model1.add(Dropout(0.3))
model1.add(LSTM(units=64, activation='tanh', return_sequences=True, kernel_regularizer=l2(0.001)))
model1.add(BatchNormalization())
model1.add(Dropout(0.3))
model1.add(LSTM(units=64, activation='tanh', kernel_regularizer=l2(0.001)))
model1.add(BatchNormalization())
model1.add(Dropout(0.3))
model1.add(Dense(1, activation='sigmoid', kernel_regularizer=l2(0.001)))
model1.compile(optimizer=optimizer, loss='BinaryCrossentropy', metrics=metrics_new)

early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# Learning rate reduction callback
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=0.00001, verbose=1)

# Train the model
history = model1.fit(X_train, y_train, epochs=32, batch_size=64, validation_split=0.3, callbacks=[early_stopping, reduce_lr], verbose=1)


## FastAI

In [None]:
!pip install -q fastai --upgrade
from fastai.tabular.all import *
from fastai.callback.all import *

In [None]:
from torch.utils.data import TensorDataset, DataLoader

In [None]:
# Convert data to torch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

In [None]:
# Combine tensors into a Dataset
train_ds = TensorDataset(X_train_tensor, y_train_tensor)
valid_ds = TensorDataset(X_test_tensor, y_test_tensor)

In [None]:
# Create DataLoaders
train_dl = DataLoader(train_ds, batch_size=64, shuffle=True)
valid_dl = DataLoader(valid_ds, batch_size=64, shuffle=False)

dls = DataLoaders(train_dl, valid_dl)

In [None]:
# Define the LSTM model using fastai's Learner
class LSTMModel(Module):
    def __init__(self):
        self.lstm1 = nn.LSTM(input_size=X_train.shape[2], hidden_size=128, batch_first=True)
        self.lstm2 = nn.LSTM(input_size=128, hidden_size=64, batch_first=True)
        self.drop = nn.Dropout(0.3)
        self.fc = nn.Linear(64, 1)

    def forward(self, x):
        x, (h, c) = self.lstm1(x)
        x, (h, c) = self.lstm2(x)
        x = self.drop(x[:, -1, :])  # Get the last time step
        x = self.fc(x)
        return torch.sigmoid(x)

# Instantiate the model
model = LSTMModel()

# Define loss function and optimizer
loss_func = BCEWithLogitsLossFlat()
opt_func = Adam

# Define metrics
metrics = [Precision(), Recall()]

# Create a Learner
learn = Learner(dls, model, loss_func=loss_func, opt_func=opt_func, metrics=metrics, cbs=[EarlyStoppingCallback(monitor='valid_loss', patience=3), ReduceLROnPlateau()])

# Train the model
learn.fit_one_cycle(32, lr_max=1e-3)

In [None]:
predictions_new = model1.predict(X_test)
check_value = 0.2
predictions_01 = (predictions_new > check_value).astype(int)
y_new_binary = (y_test > check_value).astype(int)

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix

precision = precision_score(y_new_binary, predictions_01, average='macro')
recall = recall_score(y_new_binary, predictions_01, average='macro')
f1 = f1_score(y_new_binary, predictions_01, average='macro')
conf_matrix = confusion_matrix(y_new_binary, predictions_01)

print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1 Score: {f1}')
print(f'Confusion Matrix:\n{conf_matrix}')