In [58]:
import numpy as np
from sklearn.neural_network import MLPRegressor
import pandas as pd
from sklearn.preprocessing import StandardScaler
import copy
from sklearn.model_selection import train_test_split

In [88]:
class CustomMLPClassifier:
    def __init__(self, learning_rate=0.01, num_epochs=100, hidden_layer_sizes=(100,)):
        self.learning_rate = learning_rate
        self.num_epochs = num_epochs
        self.hidden_layer_sizes = hidden_layer_sizes
        
    def fit(self, X, y):
        X = np.array(X)
        y = np.array(y)
        m = X.shape[0]
        self._w = []
        self._b = []
        
        for layer_idx in range(len(self.hidden_layer_sizes)):
            if layer_idx == 0:
                n_in = X.shape[1]
            else:
                n_in = self.hidden_layer_sizes[layer_idx-1]
            n_out = self.hidden_layer_sizes[layer_idx]
            limit = np.sqrt(6 / (n_in + n_out))
            self._w.append(np.random.uniform(-limit, limit, size=(n_in, n_out)))
            self._b.append(0.0)
        
        for epoch in range(self.num_epochs):
            # forward propagation
            cache = []
            A = X
            Z = []
            for l in range(len(self._w)):
                A_prev = A
                
                Z.append(np.dot(A_prev, self._w[l]) + self._b[l])
                linear_cache = (A_prev, self._w[l], self._b[l])
                
                if l != len(self._w) - 1:
                    A = np.maximum(0, Z[l])
                else:
                    A = 1 / (1 + np.exp(-Z[l]))
                activation_cache = Z[l]
                cache.append((linear_cache, activation_cache))
            
            cost = (-1 / m) * np.sum(y * np.log(A).T + (1 - y) * np.log(1 - A).T)

            # backward
            y = y.reshape(A.shape)
            m = A.shape[0]
            parameter_w = []
            parameter_b = []
            
            dA = - (np.divide(y, A) - np.divide(1 - y, 1 - A))
            
            current_cache = cache[-1]
            linear_cache, activation_cache = current_cache
            Z = activation_cache
            A_prev, W, b = linear_cache
            
            dZ = dA * (np.exp(-Z) / (1 + np.exp(-Z)) ** 2)
            dA_prev = np.dot(W, dZ.T).T
            dW = (1 / m) * np.dot(dZ.T, A_prev)
            db = (1 / m) * np.sum(dZ, axis=0)
            parameter_w.append(dW)
            parameter_b.append(db)
            
            for l in reversed(range(len(self._w) - 1)):
                current_cache = cache[l]
                linear_cache, activation_cache = current_cache
                Z = activation_cache
                A_prev, W, b = linear_cache
                
                dZ = dA_prev * np.int64(Z > 0)
                dA_prev = np.dot(W, dZ.T).T
                dW = (1 / m) * np.dot(dZ.T, A_prev)
                db = (1 / m) * np.sum(dZ, axis=0)
                parameter_w.append(dW)
                parameter_b.append(db)
            
            # update parameters
            parameter_w = copy.deepcopy(parameter_w)
            parameter_b = copy.deepcopy(parameter_b)
            for l in range(len(self._w)):
                self._w[l] = self._w[l] - self.learning_rate * parameter_w[-(l+1)].T
                self._b[l] = self._b[l] - self.learning_rate * parameter_b[-(l+1)].T
                
            
    def predict(self, X):
        X = np.array(X)
        m = X.shape[0]
        y_prediction = np.zeros((m, 1))
        A = X
        A_prev = A
        
        for l in range(len(self._w)):
            if l != len(self._w) - 1:
                A = np.maximum(0, np.dot(A_prev, self._w[l]) + self._b[l])
            else:
                A = 1 / (1 + np.exp(-(np.dot(A_prev, self._w[l]) + self._b[l])))
            A_prev = A
        
        for i in range(A.shape[0]):
            if A[i, 0] > 0.5 :
                y_prediction[i, 0] = 1
            else:
                y_prediction[i, 0] = 0
                
        return y_prediction
model = CustomMLPClassifier(hidden_layer_sizes=(80, 100, 60, 1), num_epochs=10000)
model.fit(X_train, y_train)
X = df.iloc[:, :-1]
y = df.iloc[:, -1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)
y_pred = model.predict(X_test)

def accuracy(y_pred, y_true):
    correct_predictions = sum(1 for pred, true in zip(y_pred, y_true) if pred == true)
    total_values = len(y_true)
    accuracy = correct_predictions / total_values
    
    return accuracy

print(f"Accuracy on test data: {accuracy(y_pred, y_test)}")

ValueError: shapes (4074,50) and (2,80) not aligned: 50 (dim 1) != 2 (dim 0)

In [86]:
df = pd.read_csv('./data/merged_data.csv')
df

Unnamed: 0,PC1,PC2,PC3,PC4,PC5,PC6,PC7,PC8,PC9,PC10,...,PC42,PC43,PC44,PC45,PC46,PC47,PC48,PC49,PC50,Price
0,3419.910843,-483.031634,-3.398608,-8.042770,0.889019,-0.042800,0.069389,-1.040899,-0.446999,0.349008,...,-0.102600,-0.131898,-0.328820,-0.041014,-0.039966,-0.318119,0.080555,0.020365,0.163390,1480000
1,3418.057577,-529.667754,-74.964367,-8.016888,1.210881,-1.016772,0.293009,-1.000364,-0.090151,0.677045,...,0.160192,-0.274086,-0.438961,0.651436,-0.072127,-0.152689,0.343013,0.009713,0.152070,1035000
2,3417.398766,-550.991589,-3.760897,-7.938391,-0.310455,-1.540416,-1.013147,0.052738,1.006280,0.322419,...,-0.324849,-0.028676,-0.128211,-0.238444,-0.037352,-0.294030,0.017966,0.009102,0.184914,1465000
3,3415.924478,-590.953266,-2.385884,-7.890959,-0.673569,-0.564996,-1.109222,0.037534,0.575173,0.043186,...,-0.315747,0.032302,-0.186581,-0.295302,-0.010397,-0.252918,0.106299,-0.033317,0.140873,850000
4,3416.864768,-565.055814,-11.628935,-7.854109,-1.311513,0.115066,0.119118,-1.080552,0.535839,0.053610,...,-0.076446,-0.176867,-0.236986,0.026757,-0.023510,-0.039351,-0.102667,-0.015860,-0.059328,1600000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13575,65.828417,91.206501,-1.484890,6.472355,-1.149423,-0.359331,-0.486444,0.078073,-0.100180,0.320790,...,0.209279,0.568324,0.000890,0.087701,0.043116,-0.020926,-0.267891,-0.096550,0.442323,1245000
13576,1065.322187,-265.102730,-18.340277,-3.642960,-0.807924,0.345274,-0.822266,0.163939,0.953571,-0.572552,...,0.169483,0.033905,-0.012523,-0.136206,0.066143,-0.165047,-0.199355,0.015365,0.088398,1031000
13577,1069.162884,-162.010063,-1.306025,-3.571541,-1.523567,2.160139,-0.885439,-0.126838,0.013415,0.099936,...,0.148792,0.022132,0.006052,-0.175148,0.089852,-0.151903,-0.165522,0.068968,0.047430,1170000
13578,1085.047682,267.733818,0.664296,-3.473803,-2.785716,2.664701,-0.975002,-0.466211,0.375389,-1.037217,...,0.125811,0.000796,0.073436,-0.117593,0.121307,-0.171482,-0.240113,0.055094,0.063966,2500000


In [87]:
for column in df.columns:
    if column != "Price":
        scaler = StandardScaler()
        normalized_data = scaler.fit_transform(np.array(df[column]).reshape(-1, 1))
        df[column] = normalized_data
df

Unnamed: 0,PC1,PC2,PC3,PC4,PC5,PC6,PC7,PC8,PC9,PC10,...,PC42,PC43,PC44,PC45,PC46,PC47,PC48,PC49,PC50,Price
0,0.780992,-0.121056,-0.008711,-1.367526,0.615374,-0.051982,0.096447,-1.563192,-0.793242,0.666369,...,-0.507336,-0.673986,-1.689029,-0.213247,-0.210067,-1.697611,0.436558,0.112969,0.976944,1480000
1,0.780568,-0.132744,-0.192151,-1.363125,0.838165,-1.234922,0.407271,-1.502318,-0.159982,1.292698,...,0.792113,-1.400555,-2.254784,3.387014,-0.379108,-0.814810,1.858914,0.053881,0.909259,1035000
2,0.780418,-0.138088,-0.009640,-1.349778,-0.214895,-1.870913,-1.408233,0.079200,1.785740,0.615602,...,-1.606309,-0.146534,-0.658576,-1.239743,-0.196325,-1.569064,0.097363,0.050490,1.105640,1465000
3,0.780081,-0.148103,-0.006116,-1.341713,-0.466241,-0.686217,-1.541773,0.056368,1.020700,0.082456,...,-1.561300,0.165062,-0.958397,-1.535367,-0.054645,-1.349672,0.576074,-0.184822,0.842305,850000
4,0.780296,-0.141613,-0.029808,-1.335448,-0.907822,0.139753,0.165570,-1.622742,0.950899,0.102359,...,-0.378010,-0.903773,-1.217312,0.139119,-0.123572,-0.209995,-0.556389,-0.087981,-0.354735,1600000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13575,0.015033,0.022858,-0.003806,1.100506,-0.795625,-0.436426,-0.676137,0.117248,-0.177780,0.612492,...,1.034840,2.904084,0.004573,0.455986,0.226624,-0.111671,-1.451797,-0.535589,2.644735,1245000
13576,0.243283,-0.066439,-0.047010,-0.619419,-0.559241,0.419353,-1.142916,0.246199,1.692204,-1.093186,...,0.838054,0.173253,-0.064324,-0.708176,0.347654,-0.880756,-1.080377,0.085236,0.528549,1031000
13577,0.244161,-0.040603,-0.003348,-0.607275,-1.054605,2.623598,-1.230724,-0.190482,0.023806,0.190809,...,0.735742,0.113091,0.031088,-0.910648,0.472275,-0.810616,-0.897022,0.382586,0.283591,1170000
13578,0.247788,0.067099,0.001703,-0.590657,-1.928259,3.236414,-1.355213,-0.700142,0.666164,-1.980381,...,0.622110,0.004066,0.377217,-0.611401,0.637603,-0.915094,-1.301259,0.305620,0.382465,2500000


In [81]:
file_path = '../part_a/data/data09.csv'
df = pd.read_csv(file_path, header=None, delimiter=';')
df.head()

Unnamed: 0,0,1,2
0,0.774,0.288,1
1,0.989,0.48,1
2,0.135,0.419,0
3,0.802,0.395,1
4,0.924,0.172,1
