In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow.keras.models import Sequential # type: ignore
from tensorflow.keras.layers import Dense, LSTM, Dropout # type: ignore
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import log_loss


In [3]:
train_data = pd.read_csv("Train.csv")  # Contains event_id, precipitation, and labels
test_data = pd.read_csv("Test.csv")   # Contains event_id and precipitation
composite_images = np.load("composite_images.npz")  # Composite images

In [4]:
scaler = MinMaxScaler()
train_data["precipitation"] = scaler.fit_transform(train_data[["precipitation"]])
test_data["precipitation"] = scaler.transform(test_data[["precipitation"]])

In [5]:
# Prepare training data
X = train_data[["precipitation"]].values
y = train_data["label"].values

In [6]:
# Prepare test data
X_test = test_data[["precipitation"]].values

In [7]:
# Reshape data for LSTM input
sequence_length = 1  # Since only precipitation is used as sequential input
X = X.reshape(X.shape[0], sequence_length, -1)
X_test = X_test.reshape(X_test.shape[0], sequence_length, -1)

In [8]:
# Train-test split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
# Build LSTM model
model = Sequential([
    LSTM(64, activation='selu', return_sequences=True, input_shape=(sequence_length, X.shape[2])),
    Dropout(0.2),
    LSTM(32, activation='selu', return_sequences=False),
    Dropout(0.2),
    Dense(16, activation='selu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


  super().__init__(**kwargs)


In [10]:
# Train model
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=20,
    batch_size=32
)

Epoch 1/20
[1m12301/12301[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 5ms/step - accuracy: 0.9993 - loss: 0.0191 - val_accuracy: 0.9993 - val_loss: 0.0051
Epoch 2/20
[1m12301/12301[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 5ms/step - accuracy: 0.9994 - loss: 0.0051 - val_accuracy: 0.9993 - val_loss: 0.0055
Epoch 3/20
[1m12301/12301[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 5ms/step - accuracy: 0.9993 - loss: 0.0052 - val_accuracy: 0.9993 - val_loss: 0.0049
Epoch 4/20
[1m12301/12301[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 6ms/step - accuracy: 0.9993 - loss: 0.0054 - val_accuracy: 0.9993 - val_loss: 0.0050
Epoch 5/20
[1m12301/12301[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 5ms/step - accuracy: 0.9993 - loss: 0.0052 - val_accuracy: 0.9993 - val_loss: 0.0049
Epoch 6/20
[1m12301/12301[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 6ms/step - accuracy: 0.9994 - loss: 0.0045 - val_accuracy: 0.9993 - val_loss: 0.005

In [14]:
# Evaluate model
y_val_pred = model.predict(X_val)
print(f"Log Loss: {log_loss(y_val, y_val_pred)}")

[1m3076/3076[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step
Log Loss: 0.004999877517975564


In [15]:
# Predict on test data
y_test_pred = model.predict(X_test)

[1m5110/5110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 2ms/step


In [16]:
# Save predictions for submission
submission = pd.DataFrame({
    "ID": test_data["event_id"],
    "Target": y_test_pred.flatten()
})

In [18]:
submission.head()

Unnamed: 0,ID,Target
0,id_j7b6sokflo4k_X_0,0.000156
1,id_j7b6sokflo4k_X_1,0.000328
2,id_j7b6sokflo4k_X_2,0.000156
3,id_j7b6sokflo4k_X_3,0.003241
4,id_j7b6sokflo4k_X_4,0.000295


In [17]:
submission.to_csv("submission_one.csv", index=False)
