In [None]:
import numpy as np
import pandas as pd
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import keras
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten, BatchNormalization, Dropout
from sklearn.externals import joblib
from sklearn.metrics import r2_score, mean_squared_error
from math import sqrt
from numpy.random import seed
import matplotlib.pyplot as plt
import random
import time
from sklearn.utils import shuffle
import copy
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasRegressor
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
%matplotlib inline



In [None]:
data_dir = 'drive/My Drive/rpt_project/rpt_data/4walls_materials_RawData/'
result_dir = 'drive/My Drive/rpt_project/rpt_data/results/'

calibration_points = 'drive/My Drive/rpt_project/rpt_data/old4walls_materials/r_10527_5M.mac'

data_files =  ['r_16_10527_5M_Air_Air_counts_RawData.txt',
               'r_16_10527_5M_AC_Air_counts_RawData.txt',
               'r_16_10527_5M_AC_Water_counts_RawData.txt',
               'r_16_10527_5M_AC_Glass_counts_RawData.txt',
               'r_16_10527_5M_AC_Steel_counts_RawData.txt',
               'r_16_10527_5M_AL_Air_counts_RawData.txt',
               'r_16_10527_5M_AL_Water_counts_RawData.txt',
               'r_16_10527_5M_AL_Glass_counts_RawData.txt',
               'r_16_10527_5M_AL_Steel_counts_RawData.txt',
               'r_16_10527_5M_SS_Air_counts_RawData.txt',
               'r_16_10527_5M_SS_Water_counts_RawData.txt',
               'r_16_10527_5M_SS_Glass_counts_RawData.txt']

num_tasks = len(data_files)

In [None]:
keras.__version__

'2.4.3'

In [None]:
# Performance
def evaluate_performance(real, pred):
    diff_sq = (real - pred) ** 2
    mede = np.sum(np.sqrt(np.sum(diff_sq, axis=1))) / pred.shape[0]

    diff_abs = np.abs(real - pred)
    mae = np.sum(diff_abs, axis=0) / pred.shape[0]

    standard_deviation = np.std(diff_abs, axis=0)

    r2 = r2_score(real, pred)
    rmse = sqrt(mean_squared_error(real, pred))

    return mede*1000, r2, rmse*1000

In [None]:
def read_position_from_mac(calibration_file):
    """Read the positions of tracers from .mac file

    Args:
        calibration_file (str): Name of the file containing Geant4 macro.
    """
    simulated_positions = []
    for line in open(calibration_file):
        temp = line.split()
        if "/gun/position" in line:
            simulated_positions.append([float(temp[1]), float(temp[2]), float(temp[3])])
    simulated_positions = np.array(simulated_positions, dtype=float)
    return simulated_positions

In [None]:
def create_ann_model(input_dim, lr):

    layers = [256, 128, 128, 16, 3]

    NN_model = Sequential()

    # The Input Layer :
    NN_model.add(Dense(layers[0], kernel_initializer='normal', input_dim=input_dim))
    # NN_model.add(Dropout(0.2))
    NN_model.add(Activation('relu'))
    # NN_model.add(BatchNormalization())

    # The Hidden Layers :
    for i in range(1, len(layers)-1):
        NN_model.add(Dense(layers[i], kernel_initializer='normal'))
        # NN_model.add(Dropout(0.2))
        NN_model.add(Activation('relu'))
        # NN_model.add(BatchNormalization())

    # The Output Layer :
    NN_model.add(Dense(layers[-1], kernel_initializer='normal', activation='linear'))


    # Compile the network :
    opt = keras.optimizers.Adam(learning_rate=lr)
    NN_model.compile(loss='mean_squared_error', optimizer=opt, metrics=['mean_squared_error'])

    return NN_model

In [None]:
sample = 1000
num_features = 16
seed(2020)

In [None]:
with open(result_dir + 'train_data_8527.npy', 'rb') as f:
    X_train_all = np.load(f)

# with open(result_dir + 'val_data.npy', 'rb') as f:
#     X_val_all = np.load(f)

with open(result_dir + 'test_data_2000.npy', 'rb') as f:
    X_test_all = np.load(f)

with open(result_dir + 'train_label_8527.npy', 'rb') as f:
    y_train_all = np.load(f)

# with open(result_dir + 'val_label.npy', 'rb') as f:
#     y_val_all = np.load(f)

with open(result_dir + 'test_label_2000.npy', 'rb') as f:
    y_test_all = np.load(f)

In [None]:
# ANN BL and PT experiments
mede_arr, r2_arr, rmse_arr = [], [], []

for i in range(num_tasks):

    X_cand, y_cand = X_train_all[8527*i:8527*(i+1)], y_train_all[8527*i:8527*(i+1)]

    X_cand, y_cand = shuffle(X_cand, y_cand)
    X_train, y_train = X_cand[:sample], y_cand[:sample]


    # scaling
    scaler = StandardScaler()
    scaler.fit(X_train)
    X_train = scaler.transform(X_train)
    X_cand = scaler.transform(X_cand)

    X_train, y_train = shuffle(X_train, y_train)
    X_train, y_train = X_train[:sample], y_train[:sample]


    NN_model = create_ann_model(X_train.shape[1], 0.0001)

    NN_model.fit(X_train, y_train, 
                 validation_data=(X_cand, y_cand), 
                 epochs=100, 
                 batch_size=32, 
                 callbacks=[keras.callbacks.EarlyStopping(patience=5),], 
                 verbose=0)
    

    m, r, s = [], [], []

    for j in range(num_tasks):

        X_test, y_test = X_test_all[2000*j:2000*(j+1)], y_test_all[2000*j:2000*(j+1)]
        X_test = scaler.transform(X_test)

        recons = NN_model.predict(X_test)

        mede, r2, rmse = evaluate_performance(y_test, recons)
        m.append(mede)
        r.append(r2)
        s.append(rmse)
        
    mede_arr.append(m)
    r2_arr.append(r)
    rmse_arr.append(s)

In [None]:
# LO experiments: ANN
total_train = sample * 11
mede_arr2, r2_arr2, rmse_arr2 = [], [], []

# print(y_train.shape)
for i in range(num_tasks):
    train_tasks = list(np.arange(num_tasks))
    test_task = train_tasks.pop(i)
    train_data, train_label = [], []
    val_data, val_label = [], []

    for task in train_tasks:
        X_cand, y_cand = X_train_all[8527*task:8527*(task+1)], y_train_all[8527*task:8527*(task+1)]

        X_cand, y_cand = shuffle(X_cand, y_cand)
        data, label = X_cand[:sample], y_cand[:sample]

        train_data.append(data)
        train_label.append(label)


    X_train = np.array(train_data).reshape(total_train, num_features)
    y_train = np.array(train_label).reshape(total_train, 3)


    X_test = X_test_all[2000*test_task:2000*(test_task+1)] 
    y_test = y_test_all[2000*test_task:2000*(test_task+1)]

    # scaling
    scaler = StandardScaler()
    scaler.fit(X_cand)
    X_train = scaler.transform(X_train)
    X_cand = scaler.transform(X_cand)
    # X_val = scaler.transform(X_val)
    X_test = scaler.transform(X_test)

    NN_model = create_ann_model(X_train.shape[1], 0.0001)

    NN_model.fit(X_train, y_train, 
                 validation_data=(X_cand, y_cand), 
                 epochs=100, 
                 batch_size=32, 
                 callbacks=[keras.callbacks.EarlyStopping(patience=5),], 
                 verbose=0)
    
    
    recons = NN_model.predict(X_test)
    
    print('Test task: ', i)
    mede, r2, rmse = evaluate_performance(y_test, recons)
    print(mede, r2, rmse)
    mede_arr2.append(mede)
    r2_arr2.append(r2)
    rmse_arr2.append(rmse)

In [None]:
# AI experiments: ANN
total_train = 12 * sample

train_data, train_label = [], []
all_data, all_label = [], []
val_data, val_label = [], []

mede_arr3, r2_arr3, rmse_arr3 = [], [], []

for task in range(12):
    data = X_train_all[8527*task:8527*(task+1)]
    label = y_train_all[8527*task:8527*(task+1)]
    data, label = shuffle(data, label)
    all_data.append(data)
    all_label.append(label)
    data = data[:sample]
    label = label[:sample]
    train_data.append(data)
    train_label.append(label)

X_train = np.array(train_data).reshape(total_train, num_features)
y_train = np.array(train_label).reshape(total_train, 3)

X_all = np.array(all_data).reshape(8527*12, num_features)
y_all = np.array(all_label).reshape(8527*12, 3)


# scaling
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_all = scaler.transform(X_all)

NN_model = create_ann_model(X_train.shape[1], 0.0001)

NN_model.fit(X_train, y_train, 
              validation_data=(X_all, y_all), 
              epochs=100, 
              batch_size=32, 
              callbacks=[keras.callbacks.EarlyStopping(patience=5),], 
              verbose=0)

for i in range(num_tasks):

    X_test = X_test_all[2000*i:2000*(i+1)]
    y_test = y_test_all[2000*i:2000*(i+1)]
    X_test = scaler.transform(X_test)

    recons = NN_model.predict(X_test)

    print('Task: ', i)
    mede, r2, rmse = evaluate_performance(y_test, recons)
    print(mede, r2, rmse)
    mede_arr3.append(mede)
    r2_arr3.append(r2)
    rmse_arr3.append(rmse)

In [None]:
df_mede = pd.DataFrame(mede_arr)
df_mede = df_mede.append([mede_arr2], ignore_index=True)
df_mede = df_mede.append([mede_arr3], ignore_index=True)

In [None]:
df_mede

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,2.127241,13.830023,2.100473,11.672489,36.133464,11.711724,3.353867,13.509918,37.820145,10.953228,4.680215,15.540843
1,17.297397,2.326249,17.278767,29.743345,46.769502,5.150664,20.06116,31.701745,48.327431,7.494701,18.524931,29.196384
2,2.000626,13.993752,2.087553,11.180057,37.651918,12.096838,3.110461,12.457629,39.320158,11.270442,4.32047,15.503132
3,7.674452,16.470638,7.761561,2.42147,28.826411,16.056621,8.41053,2.667291,29.995726,16.141728,7.740545,7.1819
4,35.28723,35.950491,35.072216,29.342843,5.263281,38.129644,35.67149,29.106613,5.452945,38.54222,33.443876,25.443002
5,13.527149,4.277581,13.447808,25.094036,36.91405,2.607354,15.755773,26.846435,38.502434,3.399567,14.51956,25.825759
6,3.281095,15.044278,3.321434,9.502772,31.137264,13.411335,1.966374,10.598453,32.757594,12.51394,4.59998,13.757847
7,9.682424,17.629639,9.783668,3.418188,25.072479,17.010145,9.103103,2.564214,25.490334,17.197082,9.117865,7.022931
8,34.538629,35.943891,34.271627,27.728433,5.854403,37.757607,34.885942,27.251581,5.286312,37.783059,32.164473,23.299179
9,12.54019,6.360439,12.41785,22.464499,45.71931,3.689826,13.941966,23.100149,46.625736,2.468164,12.24831,23.262243


In [None]:
df_r2 = pd.DataFrame(r2_arr)
df_r2 = df_r2.append([r2_arr2], ignore_index=True)
df_r2 = df_r2.append([r2_arr3], ignore_index=True)

In [None]:
df_rmse = pd.DataFrame(rmse_arr)
df_rmse = df_rmse.append([rmse_arr2], ignore_index=True)
df_rmse = df_rmse.append([rmse_arr3], ignore_index=True)

In [None]:
df_r2

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,0.999664,0.968911,0.999688,0.959635,0.288304,0.971267,0.998625,0.953805,0.246128,0.968397,0.997558,0.945159
1,0.942723,0.999693,0.943717,0.789507,-0.468621,0.994187,0.933576,0.768517,-0.559396,0.986901,0.942297,0.778669
2,0.999327,0.969667,0.99936,0.961951,0.295376,0.969425,0.998985,0.957879,0.25408,0.964186,0.997586,0.947172
3,0.984107,0.942256,0.984331,0.999424,0.64364,0.937166,0.977754,0.999208,0.627047,0.92688,0.97751,0.993442
4,0.692598,0.693277,0.694433,0.756471,0.996103,0.663294,0.679936,0.749528,0.996805,0.64113,0.688006,0.787139
5,0.952487,0.993963,0.952854,0.795227,-0.424146,0.999592,0.946855,0.776329,-0.504152,0.997132,0.950599,0.773485
6,0.99857,0.960167,0.998546,0.955643,0.336242,0.963784,0.99963,0.949726,0.29719,0.961614,0.997062,0.942421
7,0.984401,0.944987,0.984388,0.99903,0.670113,0.942522,0.979892,0.999507,0.652007,0.935869,0.980922,0.993807
8,0.704255,0.69457,0.708684,0.773602,0.995996,0.663693,0.691335,0.770091,0.99702,0.645194,0.70519,0.807121
9,0.948587,0.983275,0.949022,0.790202,-0.659453,0.996713,0.947092,0.776075,-0.73961,0.999472,0.949635,0.76337


In [None]:
df_rmse

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,0.697936,8.188834,0.681151,7.085695,25.682297,7.021651,1.797638,7.903713,26.53229,6.684521,2.603207,9.171233
1,10.238106,0.708613,10.075354,14.52063,36.736016,4.294264,11.603915,15.183127,37.630456,6.196346,9.723492,14.216464
2,0.801686,7.633358,0.80522,6.234317,25.006939,6.738983,1.321425,6.877422,25.72738,6.64567,2.480513,8.829466
3,4.000985,9.778407,4.019099,0.896836,19.867783,9.392923,4.846295,1.168918,20.477119,9.663607,4.631115,4.484242
4,29.249928,26.864469,29.012986,26.841926,2.504826,29.06044,30.505599,27.826121,2.311999,29.927793,29.12874,24.663009
5,8.19702,4.319163,8.102763,13.58434,37.13195,0.869539,9.269578,14.034405,38.016511,2.82641,7.957624,14.522043
6,1.787513,9.550908,1.797728,6.742214,25.104632,8.462901,0.725562,7.280041,25.880132,7.964132,2.977253,9.181135
7,4.199301,9.426911,4.241148,1.146121,19.629625,9.206033,4.790018,0.814254,20.234548,9.317188,4.453464,4.329052
8,27.568509,25.663219,27.218424,25.011252,2.444998,27.807919,28.635927,25.698882,2.101012,28.555299,27.185704,22.796423
9,7.846004,6.93621,7.827036,13.565642,38.384866,2.929382,8.510624,13.790665,39.21955,0.843295,7.48516,14.853351


In [None]:
df_mede.to_csv('ann_mede_' + str(sample) + '.csv')
df_r2.to_csv('ann_r2_' + str(sample) + '.csv')
df_rmse.to_csv('ann_rmse_' + str(sample) + '.csv')

In [None]:
print(np.mean(df_mede[:12].values.diagonal()))
print(np.mean(df_r2[:12].values.diagonal()))
print(np.mean(df_rmse[:12].values.diagonal()))

2.3914153568995826
0.9983370239693629
1.5892337010951085


In [None]:
print(np.mean(df_mede[12:13].values))
print(np.mean(df_r2[12:13].values))
print(np.mean(df_rmse[12:13].values))

2.8631690118256685
0.9980620900577876
1.8988922437687015


In [None]:
print(np.mean(df_mede[13:14].values))
print(np.mean(df_r2[13:14].values))
print(np.mean(df_rmse[13:14].values))

3.0176137301204062
0.9983207827540541
1.9895585108800267


In [None]:
print((np.sum(df_mede[:12].values) - np.sum(df_mede[:12].values.diagonal())) / 132)
print((np.sum(df_r2[:12].values) - np.sum(df_r2[:12].values.diagonal())) / 132)
print((np.sum(df_rmse[:12].values) - np.sum(df_rmse[:12].values.diagonal())) / 132)

23.08258931485394
0.7820101593198319
13.964378997290929
