## Environment setup

In [1]:
import glob
import importlib

## Networks definitions and adaptations

In [2]:
from architecture_definitions import *

oneOverPt = False 
lut_nn = True
output_type = 0
last_input_is_bias = True

if output_type == 1:
    layer3_neurons = 3
    loss_fn = custom_loss3
else: 
    output_cnt = 1
    layer3_neurons = 1
    loss_fn = 'mae'
        
if not last_input_is_bias:
    networkInputSize =  nLayers
    layer2_lutRangesCnt = 1
    layer2_input_offset = None 

print_Classic_NN()  
print_LUT_NN()  

Classic NN definitions:
dense_layer1_size: 128
dense_layer2_size: 64
dense_layer3_size: 48
dense_layer4_size: 1
------------------------
LUT NN definitions:
layer1_lut_size 1024
layer2_lut_size 256
layer3_lut_size 32
layer2_lutRangesCnt 16
layer2_input_offset 8.0
------------------------


### Classic NN data read test

In [5]:
%%time
import io_functions as io
importlib.reload(io)

trainDataDir = "/scratch_ssd/akalinow/ProgrammingProjects/MachineLearning/OMTF/data/18_12_2020/"   
testDataDir = "/scratch_ssd/akalinow/ProgrammingProjects/MachineLearning/OMTF/data/18_12_2020/" 

testFileNames = glob.glob(trainDataDir+'OMTFHits_pats0x0003_newerSample_files_1_100_chunk_0.tfrecord.gzip')
trainFileNames = glob.glob(testDataDir+'OMTFHits_pats0x0003_oldSample_files_*_chunk_0.tfrecord.gzip')

trainDataDir = "/scratch_cmsse/alibordi/data/"
testDataDir = "/scratch_cmsse/alibordi/data/"
testFileNames = glob.glob(trainDataDir+'*15Feb*tfrecord.gzip')
trainFileNames = glob.glob(testDataDir+'*22Feb*.tfrecord.gzip')

batchSize = 4096
nEpochs = 1

dataset = io.get_Classic_NN_dataset(batchSize, nEpochs, trainFileNames, isTrain=True)
io.reading_benchmark(dataset.take(100))
io.dumpOneEvent(dataset)

dataset = io.get_Classic_NN_dataset(batchSize, nEpochs, testFileNames, isTrain=False)
io.reading_benchmark(dataset.take(100))
io.dumpOneEvent(dataset)

Reading data from files:
/scratch_cmsse/alibordi/data/omtfAnalysis_22Feb.tfrecord.gzip
Execution time: 4.190115006000269
features.shape: (4096, 37)
len(labels) 1
labels[0].shape: (4096,)
weights.shape: (4096,)
Hits in OMTF Layers:
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
 16. 16. 16. 16. 16. 16. 16. 16. 16. 16. 16. 16. 16. 16. 16. 16. 16. 16.
  0.]
ptLabels:
 20.62
weights:
 1.0
Reading data from files:
/scratch_cmsse/alibordi/data/omtfAnalysis_15Feb.tfrecord.gzip
Execution time: 3.0157685580052203
features.shape: (4096, 37)
len(labels) 6
labels[0].shape: (4096,)
weights.shape: (4096,)
Hits in OMTF Layers:
 [ 0.  0.  0. -6. -2. -7.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
 16. 16.  0.  0.  0.  0. 16. 16. 16. 16. 16. 16. 16. 16. 16. 16. 16. 16.
  2.]
ptLabels:
 48.75
weights:
 1.0
CPU times: user 24 s, sys: 5.72 s, total: 29.7 s
Wall time: 7.64 s


### LUT NN data read test

In [None]:
%%time
import io_functions as io
importlib.reload(io)

trainDataDir = "/scratch_ssd/akalinow/ProgrammingProjects/MachineLearning/OMTF/data/18_12_2020/"   
testDataDir = "/scratch_ssd/akalinow/ProgrammingProjects/MachineLearning/OMTF/data/18_12_2020/" 

testFileNames = glob.glob(trainDataDir+'OMTFHits_pats0x0003_newerSample_files_1_100_chunk_0.tfrecord.gzip')
trainFileNames = glob.glob(trainDataDir+'OMTFHits_pats0x0003_oldSample_files_*_chunk_0.tfrecord.gzip')

batchSize = 4096
nEpochs = 1

dataset = io.get_LUT_NN_dataset(batchSize, nEpochs, trainFileNames, 
                                nRefLayers=nRefLayers,
                                layer1_lut_size=layer1_lut_size,
                                layer2_lut_size=layer2_lut_size,
                                layer2_lutRangesCnt=layer2_lutRangesCnt,
                                last_input_is_bias=last_input_is_bias,
                                rangeFactor=rangeFactor,
                                isTrain=True)
io.reading_benchmark(dataset.take(100))
io.dumpOneEvent(dataset)

dataset = io.get_LUT_NN_dataset(batchSize, nEpochs, testFileNames, 
                                nRefLayers=nRefLayers,
                                layer1_lut_size=layer1_lut_size,
                                layer2_lut_size=layer2_lut_size,
                                layer2_lutRangesCnt=layer2_lutRangesCnt,
                                last_input_is_bias=last_input_is_bias,
                                rangeFactor=rangeFactor,
                                isTrain=False)
io.reading_benchmark(dataset.take(100))
io.dumpOneEvent(dataset)

In [None]:
import pandas as pd
fileName = "/scratch_cmsse/alibordi/data/df.parquet_omtfAnalysis_22Feb.gzip"
#fileName = "/scratch_ssd/akalinow/ProgrammingProjects/MachineLearning/OMTF/data/18_12_2020/df.parquet_OMTFHits_pats0x0003_newerSample_files_1_100_chunk_0.gzip"
df = pd.read_parquet(fileName)
print(df.columns)
df.describe()

In [None]:
df1 = df[df["omtfPt"]>0]
df1["muonEta"].hist()