In [1]:
#Train model on simulated data

import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import interpn
import os
import glob
import random

In [2]:

import config
import utils


In [3]:
# Read all simulated profiles for a regular grid of primary beam parameters, for fields 3x3, 10x10, and 30x30

dataPoints = [(str(e),str(se),str(s),str(an)) for e in config.simulatedEnergies for se in config.simulatedEnergyDispersions for s in config.simulatedSourceSizes 
              for an in config.simulatedAngularDivergences]

random.seed(config.SEED)
random.shuffle(dataPoints)

profiles = utils.readProfiles(config.profileDIR,dataPoints)


In [4]:

# Create means and goals for the selected part (config.FRACTION) of all simulated data

trainProfiles = []
for field in range(3):
    trainProfiles.append(profiles[field][:(int)(profiles[field].shape[0]*config.FRACTION)])

goals = np.asarray(dataPoints[:(int)(len(dataPoints)*config.FRACTION)],dtype=np.float)

means = []
for field in range(3):
    means.append(np.mean(trainProfiles[field],0))

diffTrain = []
for field in range(3):
    diffTrain.append(trainProfiles[field] - np.stack([means[field] for _ in range(trainProfiles[field].shape[0])]))

np.save(config.modelDIR + config.meansFileName,np.asarray(means))
np.save(config.modelDIR + config.goalsFileName,goals)

print(goals.shape,type(goals[0,0]))
print(trainProfiles[0].shape)

(300, 4) <class 'numpy.float64'>
(300, 6, 495)


In [5]:

# Run PCA to reduce dimensionality of the data; write PCA models to files
# PCA - https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html

from sklearn.decomposition import PCA
import pickle


trainFeatures = []

for nfield,(field,Ranges) in enumerate(zip(config.analyzedProfiles,config.analyzedRanges)):
    if field != None:
        for profile,Range in zip(field,Ranges):
            pca = PCA(n_components=config.numbefOfPCAFeatures)
            X = diffTrain[nfield][:,profile,Range[0]:Range[1]]
            pca.fit(X)
            X_projected = pca.transform(X)
            trainFeatures.append(X_projected)
            pcaName = config.modelDIR + 'PCA_' + str(nfield) + '_' + str(profile) + '_.pkl'
            pickle.dump(pca, open(pcaName,"wb"))

            print(nfield,profile,Range[0],Range[1],np.sum(pca.explained_variance_ratio_),pca.explained_variance_ratio_)
            
X_train = np.stack(trainFeatures)
X_train = np.swapaxes(X_train,1,0)
X_train = np.reshape(X_train,(X_train.shape[0],X_train.shape[1]*X_train.shape[2]))  
print(X_train.shape)

np.save(config.modelDIR + config.trainFeaturesFileName,X_train)


1 0 0 298 0.9884003797799471 [0.97495119 0.00993918 0.00351001]
1 1 166 329 0.9894553033022359 [0.85234076 0.12605318 0.01106136]
1 3 159 336 0.986504395353545 [0.8176412  0.12863054 0.04023265]
2 0 0 297 0.981171087343056 [0.87828978 0.09371993 0.00916138]
2 1 65 430 0.9931304311772537 [0.94832709 0.03923426 0.00556908]
2 3 50 445 0.9928726172870717 [0.92280423 0.05462276 0.01544562]
(300, 18)


In [6]:

# Run regression - best model is selected, based on 5 cross-validation using the training data
# The best models are saved to files and used later for testing
# SVR - https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVR.html
# Model selection - https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html

from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV

featuresFileName = config.modelDIR + config.trainFeaturesFileName
goalsFileName = config.modelDIR + config.goalsFileName

X_train = np.load(featuresFileName)
y_train = np.load(goalsFileName)
print(X_train.shape,y_train.shape)

#można dodać sample_weights, aby wskazać wagę poszczególnych profili

parameters = {'C':np.arange(0.5,20.5,2.5)
              ,'epsilon':[0.01,0.1,0.5,1]}

for param in config.trainingGoals:
    svr = SVR()
    clf = GridSearchCV(svr, parameters)
    clf.fit(X_train,y_train[:,param])
#    print(clf.best_params_)
    print(clf.score(X_train,y_train[:,param]))
    #print(clf.best_estimator_.score)
    modelName = config.modelDIR + 'SVR_' + str(param) + '_.pkl'
    pickle.dump(clf, open(modelName,"wb"))


(300, 18) (300, 4)
0.9890769898754829
0.0
0.99753602689454
0.9807872079755784
