In [1]:
from copy import deepcopy
import numpy as np
import pandas as pd

def train_test_split_self(data, labels, split_ratio=0.2):
    split_ratio = 1 - split_ratio
    data.reset_index(drop=True, inplace=True)
    labels.reset_index(drop=True, inplace=True)
    indices = np.random.permutation(data.shape[0])
    split_index = int(split_ratio * data.shape[0])
    X_train_split = data.iloc[indices[:split_index]]
    y_train_split = labels.iloc[indices[:split_index]]
    X_val_split = data.iloc[indices[split_index:]]
    y_val_split = labels.iloc[indices[split_index:]]

    return X_train_split, X_val_split, y_train_split, y_val_split

def read_csv_files_pandas(train_data_path, train_labels_path, test_data_path):

    data = pd.read_csv(train_data_path)
    labels = pd.read_csv(train_labels_path)
    data['BEDS'] = labels['BEDS']

    data = data[data['PRICE'] < 16000000.0]
    data = data[data['PROPERTYSQFT'] < 35000]
    data = data[data['BATH'] < 30]

    temp_df = pd.get_dummies(data['SUBLOCALITY'], drop_first=True, dtype=int)
    data = pd.concat([data,temp_df],axis=1)
    temp_df = pd.get_dummies(data['TYPE'], drop_first=True, dtype=int)
    data = pd.concat([data,temp_df],axis=1)

    drop_columns = ['BROKERTITLE', 'TYPE', 'ADDRESS',
       'STATE', 'MAIN_ADDRESS', 'ADMINISTRATIVE_AREA_LEVEL_2', 'LOCALITY',
       'SUBLOCALITY', 'STREET_NAME', 'LONG_NAME', 'FORMATTED_ADDRESS',
       'LATITUDE', 'LONGITUDE', 'BEDS']

    X = data.drop(columns=drop_columns)
    X_org = X.copy()
    y = data['BEDS']
    X = (X - X.min())/(X.max() - X.min())

    train_data, val_data, train_labels, val_labels = train_test_split_self(X, y, 0.2)

    train_data.reset_index(drop=True, inplace=True)
    train_labels.reset_index(drop=True, inplace=True)
    val_data.reset_index(drop=True, inplace=True)
    val_labels.reset_index(drop=True, inplace=True)

    train_data, val_data, train_labels, val_labels = np.array(train_data), np.array(val_data), np.array(train_labels), np.array(val_labels)

    test_data = pd.read_csv(test_data_path)

    temp_df = pd.get_dummies(test_data['SUBLOCALITY'], drop_first=True, dtype=int)
    test_data = pd.concat([test_data,temp_df],axis=1)
    temp_df = pd.get_dummies(test_data['TYPE'], drop_first=True, dtype=int)
    test_data = pd.concat([test_data,temp_df],axis=1)

    test_data = test_data.reindex(columns = data.columns, fill_value=0)

    test_data = test_data.drop(columns=drop_columns)

    test_data = (test_data - X_org.min())/(X_org.max() - X_org.min())
    test_data = np.array(test_data)
    test_labels = np.zeros((test_data.shape[0],))

    print('Train labels:', train_labels.shape, 'val:', val_labels.shape, 'test:', test_labels.shape)
    return train_data, train_labels, val_data, val_labels, test_data, test_labels

class softmax_cross_entropy:
    def __init__(self):
        self.expand_Y = None
        self.calib_logit = None
        self.sum_exp_calib_logit = None
        self.prob = None

    def forward(self, X, Y):
        self.expand_Y = np.zeros(X.shape).reshape(-1)
        self.expand_Y[Y.astype(int).reshape(-1) + np.arange(X.shape[0]) * X.shape[1]] = 1.0
        self.expand_Y = self.expand_Y.reshape(X.shape)
        self.calib_logit = X - np.amax(X, axis = 1, keepdims = True)
        self.sum_exp_calib_logit = np.sum(np.exp(self.calib_logit), axis = 1, keepdims = True)
        self.prob = np.exp(self.calib_logit) / self.sum_exp_calib_logit
        forward_output = - np.sum(np.multiply(self.expand_Y, self.calib_logit - np.log(self.sum_exp_calib_logit))) / X.shape[0]
        return forward_output

    def backward(self, X, Y):
        backward_output = - (self.expand_Y - self.prob) / X.shape[0]
        return backward_output

def predict_label(f):
    if f.shape[1] == 1:
        return (f > 0).astype(float)
    else:
        return np.argmax(f, axis=1).astype(float).reshape((f.shape[0], -1))

class DataSplit:
    def __init__(self, X, Y):
        self.X = X
        self.Y = Y
        self.N, self.d = self.X.shape

    def get_entire_dataset(self):
        return self.X, self.Y

    def get_example(self, idx):
        batchX = np.zeros((len(idx), self.d))
        batchY = np.zeros((len(idx), 1))
        for i in range(len(idx)):
            batchX[i] = self.X[idx[i]]
            batchY[i, :] = self.Y[idx[i]]

        return batchX, batchY

class linear_layer:
    
    def __init__(self, input_D, output_D):
        self.params = dict()
        self.gradient = dict()       
        self.params['W'] = np.random.normal(0, 0.1, (input_D, output_D))
        self.params['b'] = np.random.normal(0, 0.1, (1, output_D))

        self.gradient['W'] = np.zeros((input_D, output_D))
        self.gradient['b'] = np.zeros((1, output_D))

    def forward(self, X):
        forward_output = X@self.params['W'] + self.params['b']
        return forward_output

    def backward(self, X, grad):
        self.gradient['W'] = np.dot(X.T, grad)
        self.gradient['b'] = np.sum(grad, axis=0, keepdims=False)
        backward_output = np.dot(grad, self.params['W'].T)
        return backward_output

class relu:
    def __init__(self):
        self.mask = None

    def forward(self, X):
        zerosss = np.zeros(X.shape)
        forward_output = np.maximum(zerosss, X)
        return forward_output

    def backward(self, X, grad):
        backward_output = grad * (X > 0)
        return backward_output

def miniBatchGradientDescent(model, _learning_rate):
    for module_name, module in model.items():
        if hasattr(module, 'params'):
            for key, _ in module.params.items():
                g = module.gradient[key]
                module.params[key] -= _learning_rate * g

    return model

def forward_pass(model, x, y):
    a1 = model['L1'].forward(x)
    h1 = model['nonlinear1'].forward(a1)
    a2 = model['L2'].forward(h1)
    h2 = model['nonlinear2'].forward(a2)
    a3 = model['L3'].forward(h2)
    loss = model['loss'].forward(a3, y)

    return a1, h1, a2, h2, a3, loss

def backward_pass(model, x, a1, h1, a2, h2, a3, y):
    grad_a3 = model['loss'].backward(a3, y)
    grad_h2 = model['L3'].backward(h2, grad_a3)
    grad_a2 = model['nonlinear2'].backward(a2, grad_h2)
    grad_h1 = model['L2'].backward(h1, grad_a2)
    grad_a1 = model['nonlinear1'].backward(a1, grad_h1)
    grad_x = model['L1'].backward(x, grad_a1)

def compute_accuracy_loss(N_data, DataSet, model, minibatch_size=100):
    acc = 0.0
    loss = 0.0
    count = 0

    actual_values = []
    predicted_values = []

    for i in range(int(np.floor(N_data / minibatch_size))):
        x, y = DataSet.get_example(np.arange(i * minibatch_size, (i + 1) * minibatch_size))
        _, _, _, _, a2, batch_loss = forward_pass(model, x, y)
        loss += batch_loss
        acc += np.sum(predict_label(a2) == y)
        predicted_values.append(predict_label(a2))
        actual_values.append(y)
        count += len(y)

    return acc / count, loss, actual_values, predicted_values

def compute_accuracy_loss_test(N_data, DataSet, model, minibatch_size=1):
    acc = 0.0
    loss = 0.0
    count = 0
    actual_values = []
    predicted_values = []
    x, y = DataSet.get_entire_dataset()

    _, _, _, _, a2, batch_loss = forward_pass(model, x, y)
    predicted_values.append(predict_label(a2))
    return 0, loss, actual_values, predicted_values


train_data_path = '/Users/sanmitpatil/Library/CloudStorage/GoogleDrive-sanmitpa@usc.edu/My Drive/USC_CS_AI/CS561_AI/HW/HW3_NeuralNetworks/data/train_data4.csv'
train_labels_path = '/Users/sanmitpatil/Library/CloudStorage/GoogleDrive-sanmitpa@usc.edu/My Drive/USC_CS_AI/CS561_AI/HW/HW3_NeuralNetworks/data/train_label4.csv'
test_data_path = '/Users/sanmitpatil/Library/CloudStorage/GoogleDrive-sanmitpa@usc.edu/My Drive/USC_CS_AI/CS561_AI/HW/HW3_NeuralNetworks/data/test_data4.csv'

# train_data_path = 'train_data.csv'
# train_labels_path = 'train_label.csv'
# test_data_path = 'test_data.csv'

Xtrain, Ytrain, Xval, Yval, Xtest, Ytest = read_csv_files_pandas(train_data_path, train_labels_path, test_data_path)
N_train, d = Xtrain.shape
N_val, N_test = Xval.shape[0], Xtest.shape[0]

trainSet, valSet, testSet = DataSplit(Xtrain, Ytrain), DataSplit(Xval, Yval), DataSplit(Xtest, Ytest)

model = dict()
num_L1, num_L2, out_L = 48, 48, 50

num_epoch, minibatch_size, _learning_rate = 300, 16, 0.015

train_acc_record, train_loss_record, val_acc_record, val_loss_record = [], [], [], []
best_epoch, best_model = 0, None

model['L1'] = linear_layer(input_D=d, output_D=num_L1)
model['nonlinear1'] = relu()
model['L2'] = linear_layer(input_D=num_L1, output_D=num_L2)
model['nonlinear2'] = relu()
model['L3'] = linear_layer(input_D=num_L2, output_D=out_L)
model['loss'] = softmax_cross_entropy()

final_test_accuracies, last_test_accuracies = [], []

for t in range(num_epoch):
    idx_order = np.random.permutation(N_train)
    for i in range(int(np.floor(N_train / minibatch_size))):
        x, y = trainSet.get_example(idx_order[i * minibatch_size: (i + 1) * minibatch_size])
        a1, h1, a2, h2, a3, _ = forward_pass(model, x, y)
        backward_pass(model, x, a1, h1, a2, h2, a3, y)
        model = miniBatchGradientDescent(model, _learning_rate)

    train_acc, train_loss, _, __ = compute_accuracy_loss(N_train, trainSet, model)
    train_acc_record.append(train_acc)
    train_loss_record.append(train_loss)

    val_acc, val_loss, _, __ = compute_accuracy_loss(N_val, valSet, model)
    val_acc_record.append(val_acc)
    val_loss_record.append(val_loss)

    print(train_acc, val_acc)

    latest_model = deepcopy(model)
    if val_acc == max(val_acc_record):
        best_model = deepcopy(model)
        best_epoch = t + 1

test_acc, test_loss, actual_values, predicted_values = compute_accuracy_loss_test(N_test, testSet, best_model)

final_output = []
for batchh in predicted_values:
    for val in batchh:
        final_output.append(int(val[0]))

output_path = 'output.csv'
out_df = pd.DataFrame()
out_df['BEDS'] = final_output
out_df.to_csv(output_path, index=False)

# USE_DATASET_SPLIT 4

Train labels: (2648,) val: (662,) test: (1435,)
0.3007692307692308 0.315
0.3007692307692308 0.315
0.3007692307692308 0.315
0.3007692307692308 0.315
0.3007692307692308 0.315
0.3007692307692308 0.315
0.3007692307692308 0.315
0.34423076923076923 0.33166666666666667
0.35115384615384615 0.3383333333333333
0.3465384615384615 0.35
0.34115384615384614 0.34
0.35346153846153844 0.3433333333333333
0.35346153846153844 0.345
0.35807692307692307 0.33166666666666667
0.36153846153846153 0.3333333333333333
0.37538461538461537 0.33666666666666667
0.36423076923076925 0.33666666666666667
0.3861538461538462 0.3466666666666667
0.3696153846153846 0.32
0.3934615384615385 0.3333333333333333
0.3773076923076923 0.3283333333333333
0.3696153846153846 0.31666666666666665
0.37615384615384617 0.3283333333333333
0.38269230769230766 0.3333333333333333
0.38884615384615384 0.33
0.4034615384615385 0.3416666666666667
0.37884615384615383 0.3383333333333333
0.3869230769230769 0.345
0.4103846153846154 0.35833333333333334
0.41