In [62]:
import pandas as pd

In [63]:
import kagglehub
import os
import csv
import numpy as np

path = kagglehub.dataset_download("zeeshier/weather-forecast-dataset")

print("Path to dataset files:", path)

Using Colab cache for faster access to the 'weather-forecast-dataset' dataset.
Path to dataset files: /kaggle/input/weather-forecast-dataset


In [79]:
import os
import csv
import numpy as np

# --- File path ---
train_file = os.path.join(path, "weather_forecast_data.csv")

# --- 1. Load CSV into NumPy arrays (skip header) ---
data_list = []
label_list = []

with open(train_file, "r") as f:
    reader = csv.reader(f)
    header = next(reader)
    headers = {name: i for i, name in enumerate(header)}

    for row in reader:
        data_list.append([
            row[headers["Temperature"]],
            row[headers["Humidity"]],
            row[headers["Cloud_Cover"]],
        ])
        label_list.append(row[headers["Rain"]])

# Convert to NumPy arrays
data = np.array(data_list, dtype=float)  # converts strings to floats
labels = np.array(label_list)

# --- 2. Remove rows with missing values ---
mask = np.all(data != "", axis=1) & (labels != "")
data = data[mask]
labels = labels[mask]

# --- 3. Convert labels to binary (0/1) ---
labels_bin = np.array([1 if l == "rain" else 0 for l in labels])

# --- 4. Split into train/test (balanced test set) ---

# indices for each class
rain_idx = np.where(labels_bin == 1)[0]
non_rain_idx = np.where(labels_bin == 0)[0]

print(f'rain instances in data: {len(rain_idx)}')
print(f'non-rain instances in data: {len(non_rain_idx)}')

# number of rain samples for test (20%)
n_test_rain = int(0.1 * len(rain_idx))
n_test_non_rain = n_test_rain  # balanced test set

# random choice
rng = np.random.default_rng(seed=70)

test_rain_idx = rng.choice(rain_idx, size=n_test_rain, replace=False)
test_non_rain_idx = rng.choice(non_rain_idx, size=n_test_non_rain, replace=False)

test_idx = np.concatenate([test_rain_idx, test_non_rain_idx])
train_idx = np.setdiff1d(np.arange(len(labels_bin)), test_idx)

# build datasets
X_train = data[train_idx]
y_train = labels_bin[train_idx]

X_test = data[test_idx]
y_test = labels_bin[test_idx]

# --- 5. Print shapes and samples ---
print("Train samples:", X_train.shape[0], "Train features:", X_train.shape[1])
print("Test samples:", X_test.shape[0], "Test features:", X_test.shape[1])

print("\nExample X_train:", X_train[:3])
print("Example y_train:", y_train[:3])

print("\nExample X_test:", X_test[:3])
print("Example y_test:", y_test[:3])


rain instances in data: 314
non-rain instances in data: 2186
Train samples: 2438 Train features: 3
Test samples: 62 Test features: 3

Example X_train: [[27.87973416 46.48970404  4.99005293]
 [25.0690844  83.07284289 14.85578394]
 [23.62207957 74.36775792 67.25528206]]
Example y_train: [0 0 1]

Example X_test: [[16.37659463 90.29102494 75.22174497]
 [12.81068293 98.57650733 81.87243586]
 [10.47807244 74.38637566 72.60556873]]
Example y_test: [1 1 1]


In [80]:
import tensorflow as tf
from tensorflow import keras

# Binary classification model
model = keras.Sequential()

# Input layer: 2 features
model.add(keras.layers.Dense(16, activation='relu', input_shape=(3,)))

# Hidden layer
model.add(keras.layers.Dense(16, activation='relu'))

# Output layer: 1 neuron with sigmoid for binary classification
model.add(keras.layers.Dense(1, activation='sigmoid'))

# Compile model
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',  # binary classification
    metrics=['accuracy']
)

# Summary
model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [81]:
from sklearn.utils import class_weight

# compute class weights (balanced)
class_weight = class_weight.compute_class_weight(class_weight='balanced',classes=np.unique(y_train),y=y_train)
class_weight = dict(enumerate(class_weight))

In [82]:
history = model.fit(X_train, y_train, epochs=50, batch_size=64,
                    validation_data=(X_test, y_test), class_weight=class_weight)

Epoch 1/50
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - accuracy: 0.7633 - loss: 2.5417 - val_accuracy: 0.7903 - val_loss: 0.3984
Epoch 2/50
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.7169 - loss: 0.5271 - val_accuracy: 0.9032 - val_loss: 0.2500
Epoch 3/50
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.8062 - loss: 0.3574 - val_accuracy: 0.9516 - val_loss: 0.1561
Epoch 4/50
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.8402 - loss: 0.2565 - val_accuracy: 0.9516 - val_loss: 0.1272
Epoch 5/50
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8776 - loss: 0.2193 - val_accuracy: 0.9516 - val_loss: 0.1059
Epoch 6/50
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.8819 - loss: 0.2147 - val_accuracy: 0.9355 - val_loss: 0.1312
Epoch 7/50
[1m39/39[0m [32m━━━━━━━━━

# Export the weights

prints the weights. They should be copy-paste into `model_data.cpp`.

In [83]:
W1, b1 = model.layers[0].get_weights()
W2, b2 = model.layers[1].get_weights()
W3, b3 = model.layers[2].get_weights()

print(W1.shape, b1.shape)
print(W2.shape, b2.shape)
print(W3.shape, b3.shape)

(3, 16) (16,)
(16, 16) (16,)
(16, 1) (1,)


In [84]:
names = ["W1_data", "b1_data", "W2_data", "b2_data", "W3_data", "b3_data"]
arrays = [W1, b1, W2.T, b2, W3, b3]

for name, array in zip(names, arrays):
    print("const float %s[] PROGMEM = {" % name)
    print("   ", ", ".join([str(x) + "f" for x in array.flatten()]))
    print("};\n")

const float W1_data[] PROGMEM = {
    0.13737203f, -0.015746525f, 0.16990644f, -0.3432942f, -0.1353307f, 0.29496348f, -0.36915487f, 0.1856215f, -0.48480105f, -0.4748979f, 0.2546103f, 0.39210027f, -0.027806543f, -0.18415129f, -0.078740925f, 0.09551273f, -0.4610023f, 0.0035052528f, -0.40004897f, -0.0053494284f, -0.5613176f, -0.102639444f, -0.28466782f, 0.25783333f, 0.062266413f, -0.39352146f, -0.23633572f, 0.37198618f, 0.2566907f, -0.408399f, -0.11535248f, -0.07182007f, 0.48000348f, -0.18738335f, -0.5088426f, 0.55306387f, 0.06286299f, 0.04605391f, -0.3012226f, -0.20991723f, 0.4955593f, -0.20383614f, -0.20038226f, -0.27930343f, -0.3067977f, -0.2227262f, -0.5132788f, 0.081022345f
};

const float b1_data[] PROGMEM = {
    0.22581924f, -0.048457243f, 0.0f, -0.20411307f, 0.0f, 0.26640385f, 0.0f, 0.14735292f, -0.13289669f, 0.0f, 0.0f, -0.08101153f, -0.017031457f, 0.0f, 0.0f, 0.24474455f
};

const float W2_data[] PROGMEM = {
    -0.032378614f, 0.3319297f, -0.36044827f, 0.21596125f, -0.001333415