In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import RobustScaler, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

import tensorflow as tf

In [None]:
data = pd.read_csv("/kaggle/input/weather-dataset-rattle-package/weatherAUS.csv")
data.head()

In [None]:
data.drop('Date', axis=1, inplace=True)

In [None]:
data.isnull().sum()

In [None]:
data['RainToday'] = data['RainToday'].fillna('No')
data['RainTomorrow'] = data['RainTomorrow'].fillna('No')

encoder = LabelEncoder()

label_encoder_columns = ['RainToday', 'RainTomorrow']

for column in label_encoder_columns:
    data[column] = encoder.fit_transform(data[column])

In [None]:
def add_column_prefixes(data, column, prefix):
    return data[column].apply(lambda x: prefix + str(x))

data['WindDir9am'] = add_column_prefixes(data, 'WindDir9am', "9_")
data['WindDir3pm'] = add_column_prefixes(data, 'WindDir3pm', "3_")

In [None]:
def onehot_encoder(data, columns):
    for column in columns:
        dummies = pd.get_dummies(data[column])
        data = pd.concat([data, dummies], axis=1)
        data.drop(column, axis=1, inplace=True)
    return data

categorical_features = ['Location', 'WindGustDir', 'WindDir9am', 'WindDir3pm']

data = onehot_encoder(data, categorical_features)

In [None]:
data.isnull().sum()

In [None]:
def impute_means(data, columns):
    for column in columns:
        data[column] = data[column].fillna(data[column].mean())
        
        
na_columns = ['MinTemp', 'MaxTemp', 'Rainfall', 'Evaporation', 'Sunshine', 'WindGustSpeed',
              'WindSpeed9am', 'WindSpeed3pm', 'Humidity9am', 'Humidity3pm', 'Pressure9am',
              'Pressure3pm', 'Cloud9am', 'Cloud3pm', 'Temp9am', 'Temp3pm']

impute_means(data, na_columns)

In [None]:
data.head()

In [None]:
y = data['RainTomorrow']
X = data.drop('RainTomorrow', axis=1)

scaler = StandardScaler()

X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [None]:
inputs = tf.keras.Input(shape=(116,))

x = tf.keras.layers.Dense(16, activation='relu')(inputs)
x = tf.keras.layers.Dense(16, activation='relu')(x)

outputs = tf.keras.layers.Dense(2, activation='softmax')(x)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

In [None]:
model.summary()

In [None]:
model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])

In [None]:
epochs = 6

history = model.fit(X_train,y_train,validation_split=0.1,epochs=epochs, batch_size=32, verbose=-1)

In [None]:
plt.figure(figsize=(14, 10))

plt.plot(range(epochs), history.history['loss'], color='b',label='loss')
plt.plot(range(epochs), history.history['val_loss'], color='r',label='val_loss')
plt.title("Loss")
plt.legend()
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.show()

In [None]:
plt.figure(figsize=(14, 10))

plt.plot(range(epochs), history.history['accuracy'], color='b',label='accuracy')
plt.plot(range(epochs), history.history['val_accuracy'], color='r',label='val_accuracy')
plt.title("Accuracy")
plt.legend()
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.show()

In [None]:
print(f'Model Accuracy: {model.evaluate(X_test,y_test,verbose=0)[1]}')
print(f'Model Loss: {model.evaluate(X_test,y_test,verbose=0)[0]}')

In [None]:
y_pred = model.predict(X_test)
y_pred

In [None]:
y_pred = list(map(lambda x: np.argmax(x), y_pred))
result = pd.DataFrame()
result['actual'] = y_test
result['pred'] = y_pred

result

In [None]:
print(f"Model F1 Score: {f1_score(y_test, y_pred)}")