## Submission code

This notebook is used to compute the predictions on the test.csv file, in order to submit it
to aicrowd and test our accuracy

In [2]:
# Useful starting lines
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt

%load_ext autoreload
%autoreload 2

In [3]:
from py_files.data_loader import *
from implementations import *

In [3]:
%store -r
# it fetches the variable we have stored previously

In [4]:
ids, predictions, data = load_data("test.csv")
# predictions is obviously empty, it's up to us to compute them

In [5]:
from py_files.data_processing import *

In [6]:
processed_data = data

In [7]:
processed_data = one_hot_encoding(processed_data)
processed_data = handle_outliers(processed_data)
processed_data = handle_undefined_values(processed_data)
processed_data = polynomial_2(processed_data)
processed_data = standardize(processed_data)
processed_data = ones_concatenate(processed_data)

## 1. Predictions with least squares gradient descent

In [8]:
pred_1 = np.dot(processed_data, w_opt_gd)
pred_1 = np.where(pred_1 < 1 / 2, -1, 1)
num = np.count_nonzero(pred_1 == 1)
print("The percentage of Boson events predicted is %.3f %%" % (num / len(pred_1) * 100))

# ok, we are not predicting boson events

The percentage of Boson events predicted is 22.981 %


## 2. Predictions with least squares: stochastic gradient descent

In [9]:
pred_2 = np.dot(processed_data, w_opt_sgd)
pred_2 = np.where(pred_2 < 1 / 2, -1, 1)
num = np.count_nonzero(pred_2 == 1)
print("The percentage of Boson events predicted is %.3f %%" % (num / len(pred_2) * 100))

The percentage of Boson events predicted is 8.945 %


## 3. Predictions with least squares: normal equations

In [10]:
pred_3 = np.dot(processed_data, w_opt_ne)
pred_3 = np.where(pred_3 < 1 / 2, -1, 1)
num = np.count_nonzero(pred_3 == 1)
print("The percentage of Boson events predicted is %.3f %%" % (num / len(pred_3) * 100))

The percentage of Boson events predicted is 29.654 %


## 4. Predictions with ridge regression: normal equations

In [11]:
pred_4 = np.dot(processed_data, w_opt_ridge)
pred_4 = np.where(pred_4 < 1 / 2, -1, 1)
num = np.count_nonzero(pred_4 == 1)
print("The percentage of Boson events predicted is %.3f %%" % (num / len(pred_4) * 100))

The percentage of Boson events predicted is 29.654 %


## 5. Predictions with logistic regression

In [12]:
pred_5 = np.dot(processed_data, w_opt_lr)
pred_5 = np.where(sigmoid(pred_5) < 1 / 2, -1, 1)
num = np.count_nonzero(pred_5 == 1)
print("The percentage of Boson events predicted is %.3f %%" % (num / len(pred_5) * 100))

The percentage of Boson events predicted is 27.417 %


## 6. Predictions with regularized logistic regression with gradient descent

In [13]:
pred_6 = np.dot(processed_data, w_opt_rlr)
pred_6 = np.where(sigmoid(pred_6) < 1 / 2, -1, 1)
num = np.count_nonzero(pred_6 == 1)
print("The percentage of Boson events predicted is %.3f %%" % (num / len(pred_6) * 100))

The percentage of Boson events predicted is 23.747 %


## Creation of submission file

In [14]:
from py_files.helpers import *

In [15]:
create_csv_submission(ids, pred_4, "submission.csv")