In [1]:
import numpy as np
import matplotlib.pyplot as pyplot
from functools import reduce

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, KBinsDiscretizer

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

import time
import DataPreprocessing

In [2]:
##Obtain & process data NPY
me_train, mom_train = DataPreprocessing.npy('NLO/uuxg/me_1000000.npy', 'NLO/uuxg/mom_1000000.npy', [], frac=1)
me_test, mom_test = DataPreprocessing.npy('NLO/uuxg/me_500000.npy', 'NLO/uuxg/mom_500000.npy', [], frac=1)

combs = ['1,3','2,3']

tic = time.perf_counter()
div_train = reduce(np.multiply, DataPreprocessing.mandel_creation(combs, mom_train))
temp_train = np.multiply(div_train[:,np.newaxis], me_train)
toc = time.perf_counter()
print(f"Test multiplication ran in {toc - tic:0.4f} seconds")

tic = time.perf_counter()
div_test = reduce(np.multiply, DataPreprocessing.mandel_creation(combs, mom_test))
temp_test = np.multiply(div_test[:,np.newaxis], me_test)
toc = time.perf_counter()
print(f"Test multiplication ran in {toc - tic:0.4f} seconds")

me_test = temp_test[:,1]
me_train = temp_train[:,1]

Test multiplication ran in 0.3784 seconds
Test multiplication ran in 0.1581 seconds


In [3]:
mom_train = np.array([np.ndarray.flatten(np.array(element)) for element in mom_train])
mom_test = np.array([np.ndarray.flatten(np.array(element)) for element in mom_test])

pipeline = Pipeline([
    ('scaler', StandardScaler()), #Rescale Data.
    ('kbins', KBinsDiscretizer(n_bins=20000)) #Checking convergence as bins.
])
 
mom_train = pipeline.fit_transform(mom_train) #Rescale on training set
mom_test = pipeline.transform(mom_test) #Rescale on test set

tic = time.perf_counter()

linreg = LinearRegression().fit(mom_train, me_train)

toc = time.perf_counter()

print(f"Ran in {toc - tic:0.4f} seconds")

Ran in 13.6484 seconds


In [4]:
tic = time.perf_counter()

me_predict_lin = linreg.predict(mom_test)

toc = time.perf_counter()

print(f"Ran in {toc - tic:0.4f} seconds")
np.save('NLO/uuxg/pred_kbins_fin.npy', me_predict_lin)

Ran in 0.0344 seconds


In [5]:
lin_mse = mean_squared_error(me_test, me_predict_lin) 
print('RMSE Test: {}'.format(np.sqrt(lin_mse)))

lin_mse = mean_squared_error(me_train, linreg.predict(mom_train))
print('RMSE Train: {}'.format(np.sqrt(lin_mse)))

lin_perc = 100*np.mean(np.abs(np.divide(me_test - me_predict_lin, me_test)))
print('Percentage Error: {}'.format(lin_perc))

RMSE Test: 4287.239849031828
RMSE Train: 3290.2500159419633
Percentage Error: 157.85534621623296
