In [6]:
import numpy as np
import matplotlib.pyplot as pyplot
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split, learning_curve, StratifiedShuffleSplit
import pandas as pd
from functools import reduce

import DataPreprocessing

In [7]:
##Obtain & process data NPY
me_train, mom_train = DataPreprocessing.npy('NLO/MG_uuxg/me_1000000.npy', 'NLO/MG_uuxg/mom_1000000.npy', [], 1000, frac=0.1)
me_test, mom_test = DataPreprocessing.npy('NLO/MG_uuxg/me_300000.npy', 'NLO/MG_uuxg/mom_300000.npy', [], 1000, frac=0.1)

In [8]:
from sklearn.preprocessing import PolynomialFeatures


def calc_RMSE(mom_train, mom_test, me_train, me_test, var_name):
    ##Data transformation
    pipeline = Pipeline([
        ('poly', PolynomialFeatures(degree=2)),
        ('scaler', StandardScaler()), #Rescale Data.
    ])
 
    mom_train = pipeline.fit_transform(mom_train) #Rescale on training set
    mom_test = pipeline.transform(mom_test) #Rescale on test set

    linreg = LinearRegression().fit(mom_train, me_train) #Linear fit
    pred = linreg.predict(mom_test) #Prediction on test set
    
    mse = mean_squared_error(me_test, pred) #Mean squared error on test set
    print('{} RMSE: {}'.format(var_name, np.sqrt(mse)))

    perc = np.mean(100*np.divide(np.abs(me_test - pred), me_test))
    print('{} Percentage Error: {}'.format(var_name, perc))
    
for i, name in enumerate(['Born', 'Real', 'Sing', 'Doub']):
    div_train = reduce(np.multiply, DataPreprocessing.mandel_creation(['1,3','2,3'], mom_train))
    div_test = reduce(np.multiply, DataPreprocessing.mandel_creation(['1,3','2,3'], mom_test))

    temp_train = np.multiply(div_train, me_train[:,i])
    temp_test = np.multiply(div_test, me_test[:,i])
        
    calc_RMSE(mom_train, mom_test, temp_train, temp_test, name)

Born RMSE: 1.0113494747286401e-07
Born Percentage Error: 3.262965813336881e-12
Real RMSE: 19520.858383874478
Real Percentage Error: 31.90201858937738
Sing RMSE: 3238.530905981832
Sing Percentage Error: -9.920705289063822
Doub RMSE: 1.6051563407153593e-06
Doub Percentage Error: -9.90084971764896e-11
