# GRU (Gated Recurrent Unit)

In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np
from sklearn.metrics import root_mean_squared_error, r2_score

In [11]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return sigmoid(x) * (1 - sigmoid(x))

def tanh(x):
    return np.tanh(x)

def tanh_derivative(x):
    return 1 - np.tanh(x)**2

In [12]:
class GRU:
    def __init__(self, input_size, hidden_size, output_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        
        self.weights_z = self.__init_weights((input_size + hidden_size, hidden_size))
        self.biases_z = np.zeros((hidden_size,))
        
        self.weights_r = self.__init_weights((input_size + hidden_size, hidden_size))
        self.biases_r = np.zeros((hidden_size,))
        
        self.weights_h = self.__init_weights((input_size + hidden_size, hidden_size))
        self.biases_h = np.zeros((hidden_size,))
        
        self.weights_output = self.__init_weights((hidden_size, output_size))
        self.biases_output = np.zeros((output_size,))
        
        self.h_prev = np.zeros((hidden_size,))

    def __forward_propagation(self, x):
        x_and_h_prev = np.hstack([x, self.h_prev])
        
        zt = sigmoid(np.dot(x_and_h_prev, self.weights_z) + self.biases_z)
        rt = sigmoid(np.dot(x_and_h_prev, self.weights_r) + self.biases_r)
        self.h_prev = zt * self.h_prev + (1 - zt) * tanh(np.dot(np.hstack([x, rt * self.h_prev]), self.weights_h) + self.biases_h)
        
        y = np.dot(self.h_prev, self.weights_output) + self.biases_output        
        return y

    def __backward_propagation(self, x, dy, learning_rate):
        x_and_h_prev = np.hstack([x, self.h_prev])
        
        zt = sigmoid(np.dot(x_and_h_prev, self.weights_z) + self.biases_z)
        rt = sigmoid(np.dot(x_and_h_prev, self.weights_r) + self.biases_r)
        combined_r = np.hstack([x, rt * self.h_prev])
        
        dh = np.dot(dy, self.weights_output.T)
        dht_candidate = dh * (1 - zt) * (1 - tanh(np.dot(combined_r, self.weights_h) + self.biases_h)**2)
        
        dwh = np.outer(combined_r, dht_candidate)
        dbh = dht_candidate
        
        drt = np.dot(dht_candidate, self.weights_h.T[:, :self.hidden_size]) * rt * (1 - rt) * self.h_prev
        dwr = np.outer(x_and_h_prev, drt)
        dbr = drt
        
        dzt = dh * (self.h_prev - tanh(np.dot(combined_r, self.weights_h) + self.biases_h)) * zt * (1 - zt)
        dwz = np.outer(x_and_h_prev, dzt)
        dbz = dzt
        
        self.weights_h -= learning_rate * dwh
        self.weights_r -= learning_rate * dwr
        self.weights_z -= learning_rate * dwz
        self.weights_output -= learning_rate * np.outer(self.h_prev, dy)

        self.biases_h -= learning_rate * dbh
        self.biases_r -= learning_rate * dbr
        self.biases_z -= learning_rate * dbz
        self.biases_output -= learning_rate * dy

        self.h_prev = zt * self.h_prev + (1 - zt) * tanh(np.dot(combined_r, self.weights_h) + self.biases_h)
        
    def fit(self, x_train, y_train, epochs=100, learning_rate=0.001, batch_size=16):
        for epoch in range(epochs):
            perm = np.random.permutation(len(x_train))
            for i in range(0, len(x_train), batch_size):
                batch_indices = perm[i:i + batch_size]
                batch_x = x_train[batch_indices]
                batch_y = y_train[batch_indices]
                outputs = []
                for x in batch_x:
                    out = self.__forward_propagation(x)
                    outputs.append(out)
                gradients = 2 * (outputs - batch_y) / batch_size
                for j in range(len(gradients) - 1, 0, -1):
                    self.__backward_propagation(batch_x[j], gradients[j], learning_rate)
                    
    def predict(self, x):
        return self.__forward_propagation(x)
        
    def __init_weights(self, size):
        return np.random.uniform(-1, 1, size)

In [13]:
df = pd.read_csv('resources/Clear_steel_industry_data.csv')

In [14]:
x = df.drop('Usage_kWh', axis=1).values
y = df['Usage_kWh'].values.reshape(-1, 1)

In [15]:
scaler_x = StandardScaler()
scaler_y = StandardScaler()
x_scaled = scaler_x.fit_transform(x)
y_scaled = scaler_y.fit_transform(y)

In [16]:
x_train, x_test, y_train, y_test = train_test_split(x_scaled, y_scaled, test_size=0.2, shuffle=False, random_state=42)

In [17]:
gru = GRU(x_train.shape[1], 10, 1)
gru.fit(x_train, y_train, epochs=100, learning_rate=0.001, batch_size=16)

In [18]:
def test_model(gru, x_test, y_test):
    predictions = [gru.predict(x.reshape(1, -1)) for x in x_test]
    predictions = np.array(predictions).reshape(-1, 1)

    y_test_inverse = scaler_y.inverse_transform(y_test)
    predictions_inverse = scaler_y.inverse_transform(predictions)
    print("R2 Score:\t", r2_score(y_test_inverse, predictions_inverse))
    print("RMSE:\t\t", root_mean_squared_error(y_test_inverse, predictions_inverse))

In [19]:
test_model(gru, x_test, y_test)

R2 Score:	 0.9082095402467093
RMSE:		 9.504735175985514
