# Imports

In [1]:
import os

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.linear_model import LogisticRegression
from pickle import dump

from tensorflow.keras.utils import set_random_seed
from tensorflow.data import Dataset
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, Activation, Dropout
from tensorflow.keras.activations import relu, sigmoid
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.metrics import BinaryAccuracy

2024-11-22 18:11:25.124258: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-11-22 18:11:25.127719: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-11-22 18:11:25.137423: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1732291885.153977   98325 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1732291885.158681   98325 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-22 18:11:25.175191: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU ins

# Hyperparameters

In [2]:
BASE_DIR = os.path.abspath("")
TRAIN_DATA_PATH = os.path.join(BASE_DIR, "..", "data", "landmines-train.csv")
TEST_DATA_PATH = os.path.join(BASE_DIR, "..", "data", "landmines-test.csv")
METRICS_PATH = os.path.join(BASE_DIR, "..", "out", "metrics.csv")
LOGISTIC_REGRESSION_PATH = os.path.join(BASE_DIR, "..", "out", "logistic-regression.pkl")
MODEL_PATH = os.path.join(BASE_DIR, "..", "out", "landmines.keras")

TARGET_COLUMN = "is_mine"
SEED = 0
BATCH_SIZE = 32
EARLY_STOP_PATIENCE = 25
LEARNING_RATE = 0.001

In [3]:
def get_optimizer(learning_rate=LEARNING_RATE):
    return Adam(learning_rate)

def get_model(input_shape, batch_size):
    batch_shape = (batch_size, input_shape)
    
    model = Sequential([
        Input(batch_shape=batch_shape),
    
        Dense(units=64),
        Activation(relu),
        Dropout(rate=0.2),
    
        Dense(units=64),
        Activation(relu),
        Dropout(rate=0.2),
    
        Dense(units=64),
        Activation(relu),
        Dropout(rate=0.2),
    
        Dense(units=1),
        Activation(sigmoid),
    ])

    return model

def compile_model(model):
    model.compile(
        optimizer=get_optimizer(),
        loss=BinaryCrossentropy(),
        metrics=[BinaryAccuracy()],
    )
    return model

In [4]:
set_random_seed(SEED)

# Data Loading

In [5]:
df_train = pd.read_csv(TRAIN_DATA_PATH)
df_test  = pd.read_csv(TEST_DATA_PATH)

In [6]:
x_train = df_train.drop(columns=[TARGET_COLUMN]).values
y_train = df_train[[TARGET_COLUMN]].values

x_test = df_test.drop(columns=[TARGET_COLUMN]).values
y_test = df_test[[TARGET_COLUMN]].values

In [7]:
x_train[:5]

array([[0.33534705, 0.27272727, 0.6       ],
       [0.30211447, 0.90909091, 1.        ],
       [0.35368536, 0.72727273, 0.8       ],
       [0.78851865, 0.45454545, 0.        ],
       [0.36253735, 0.72727273, 0.2       ]])

In [8]:
y_train[:5]

array([[0],
       [0],
       [1],
       [1],
       [1]])

In [9]:
n_batches = len(x_test)

train_ds = Dataset.from_tensor_slices((x_train, y_train)).batch(BATCH_SIZE)
test_ds  = Dataset.from_tensor_slices((x_test, y_test)).batch(n_batches)

2024-11-22 18:11:37.114143: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


# Logistic Regression

## Definition

In [10]:
lr = LogisticRegression(random_state=SEED)

## Fitting

In [11]:
lr.fit(x_train, y_train.flatten())

## Saving

In [12]:
with open(LOGISTIC_REGRESSION_PATH, "wb") as file:
    dump(lr, file)

# ANN

## Prep

In [13]:
input_shape = x_train.shape[1]
input_shape

3

In [14]:
early_stop = EarlyStopping(monitor="val_loss", mode="min", patience=EARLY_STOP_PATIENCE)

## Architecture

In [15]:
model = get_model(input_shape=input_shape, batch_size=BATCH_SIZE)
model = compile_model(model)

model.summary()

## Fitting

In [16]:
# model.fit(
#     x=train_ds,
#     validation_data=test_ds,
#     epochs=1000,
#     callbacks=[early_stop],
# )

## Saving

In [17]:
model_sigle = get_model(input_shape=input_shape, batch_size=1)

model_sigle.set_weights(model.get_weights())

model_sigle.summary()

In [18]:
# model.save(MODEL_PATH)

In [19]:
# metrics = pd.DataFrame(model.history.history)

# metrics.to_csv(METRICS_PATH, index=False)