<div>
    <img src="https://storage.googleapis.com/kaggle-datasets-images/6012/1733506/0a5af086168a9b87a38b29bac75aa8ce/dataset-cover.jpg" />
</div>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm

from tensorflow.keras import regularizers
from tensorflow.keras.layers import Dense
from tensorflow.keras import Input, Model
from tensorflow.keras import Sequential

from sklearn.utils import shuffle
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

import warnings
warnings.filterwarnings('ignore')

<h1 id="dataset" style="color:black; background:#fc850c; border:0.5px dotted;"> 
    <center>Dataset
        <a class="anchor-link" href="#dataset" target="_self">¶</a>
    </center>
</h1>

### Load

In [None]:
path = '/kaggle/input/weather-dataset-rattle-package/weatherAUS.csv'
df = pd.read_csv(path)

df = shuffle(df)
df.fillna(df.mean(), inplace=True)

df.head()

### Remove non important columns

In [None]:
df.drop(['Date', 'Location', 'WindGustDir', 'WindDir9am', 'WindDir3pm'], axis=1, inplace=True)
df.head()

### One hot encodings

In [None]:
df.RainToday = [1 if each == 'Yes' else 0 for each in df.RainToday]
df.RainTomorrow = [1 if each == 'Yes' else 0 for each in df.RainTomorrow]
df.sample(3)

### Features

In [None]:
y = df.RainTomorrow.values
x_data = df.drop('RainTomorrow', axis=1)
x_data.head()

In [None]:
X = (x_data - np.min(x_data)) / (np.max(x_data) - np.min(x_data))
X.head(5)

<h1 id="autoencoder" style="color:black; background:#fc850c; border:0.5px dotted;"> 
    <center>Autoencoder
        <a class="anchor-link" href="#autoencoder" target="_self">¶</a>
    </center>
</h1>

In [None]:
input_layer = Input(shape=(X.shape[1],))
encoded = Dense(128, activation='tanh', activity_regularizer=regularizers.l1(10e-5))(input_layer)
encoded = Dense(64, activation='relu')(encoded)

decoded = Dense(64, activation='tanh')(encoded)
decoded = Dense(128, activation='tanh')(decoded)
output_layer = Dense(X.shape[1], activation='relu')(decoded)

In [None]:
autoencoder = Model(input_layer, output_layer)
autoencoder.compile(optimizer='adadelta', loss='mse')

In [None]:
X_rain, X_dry = X[y==1], X[y==0]
print('Training - Rain data:{:}, Dry data:{:}'.format(len(X_rain), len(X_dry)))

history = autoencoder.fit(X_dry, X_dry, epochs=60, shuffle=True, validation_split=0.2, verbose=0)

<h1 id="analyze" style="color:black; background:#fc850c; border:0.5px dotted;"> 
    <center>Analyze
        <a class="anchor-link" href="#analyze" target="_self">¶</a>
    </center>
</h1>

In [None]:
plt.figure(figsize=(14,8))
plt.title("Loss")
plt.plot(history.history['loss'])
plt.show()

<h1 id="semi" style="color:black; background:#fc850c; border:0.5px dotted;"> 
    <center>Semi-Supervised Learning
        <a class="anchor-link" href="#semi" target="_self">¶</a>
    </center>
</h1>

### Model layers from encoders

In [None]:
hidden_rep = Sequential()
hidden_rep.add(autoencoder.layers[0])
hidden_rep.add(autoencoder.layers[1])
hidden_rep.add(autoencoder.layers[2])

### Semi-Supervised Predictions

In [None]:
dry_hid_rep = hidden_rep.predict(X_dry)
rain_hid_rep = hidden_rep.predict(X_rain)

### Features-Labels

In [None]:
# Prepare features, labels and combine them
rep_x = np.append(dry_hid_rep, rain_hid_rep, axis=0)
y_n = np.zeros(dry_hid_rep.shape[0])
y_f = np.ones(rain_hid_rep.shape[0])
rep_y = np.append(y_n,  y_f)

# Split features, labels
X_train, X_test, y_train, y_test = train_test_split(rep_x, rep_y, test_size=0.25)

<h1 id="logistic" style="color:black; background:#fc850c; border:0.5px dotted;"> 
    <center>Logistic Regression
        <a class="anchor-link" href="#logistic" target="_self">¶</a>
    </center>
</h1>

### Train

In [None]:
clf = LogisticRegression().fit(X_train, y_train)
y_hat = clf.predict(X_test)

### Analyze

In [None]:
clf_report = classification_report(y_test, y_hat)
acc = accuracy_score(y_test, y_hat)

In [None]:
print(clf_report)

In [None]:
print("Accuracy:", acc)