In [None]:
from sklearn.utils import shuffle
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
import keras
from tensorflow.keras import datasets, layers, models
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Activation
from keras.layers import Conv2D, MaxPooling2D, TimeDistributed, LSTM, Conv3D
from tensorflow.keras.optimizers import Adam
from keras.utils import np_utils
from keras.callbacks import CSVLogger
import os
import matplotlib.pyplot as plt
import numpy as np 
import pandas as pd
from pandas import read_csv
import math
import seaborn as sns
from collections import deque
import copy
import glob
import fileinput

#gpus = tf.config.experimental.list_physical_devices('GPU')
#for gpu in gpus:
 # tf.config.experimental.set_memory_growth(gpu, True)

#this run has 768k train, 192k test
BATCH_SIZE = 512 
EPOCHS = 20
NUMBER_TEST_SET = 192000 #will do train/test split before this, and pass already-made sets in
TEMPORAL_LENGTH = 8 #use first 8 frames (these are 16 frames in each video)

In [None]:
#pip install pandas

In [None]:
train_data = []
for file in glob.glob('train/Data*.npy'):
  input = np.load(file,allow_pickle=True)
  train_data.append(input)
train_data = np.concatenate(train_data)

In [None]:
test_data = []
for file in glob.glob('test/Data*.npy'):
  input = np.load(file,allow_pickle=True)
  test_data.append(input)
test_data = np.concatenate(test_data)

In [None]:
train_data[0]

In [None]:
print ('Total number of test samples:',len(test_data))
#print ('Total number of train samples:',len(train_data))

In [None]:
test_data[0]

In [None]:
def shuffle_data(samples):
    data = shuffle(samples,random_state=2)
    return data

In [None]:
def data_generator(data,batch_size=BATCH_SIZE,temporal_padding='same',shuffle=True):               
    num_samples = len(data)
    if shuffle:
        data = shuffle_data(data)
    while True:   
        for offset in range(0, num_samples, batch_size):
            #print ('starting index: ', offset) 
            batch_samples = data[offset:offset+batch_size]
            
            X_train = []
            y_train = []
            
            for batch_sample in batch_samples: 
                #print(batch_sample)
                # Load image (X)
                x = batch_sample[0] #image
                y = batch_sample[1] #label
                temp_data_list = []
                for img in x:
                    try:
                        img = np.load(img)                        
                        temp_data_list.append(img)
                    except Exception as e:
                        print (e)
                        print ('error reading in frame: ',img)                      

                X_train.append(temp_data_list)
                y_train.append(y)

            X_train = np.array(X_train)   
            y_train = np.array(y_train)
            
            scaler = StandardScaler()
            X_train = scaler.fit_transform(X_train.reshape(-1, X_train.shape[-1])).reshape(X_train.shape)
        
            print(X_train.shape)               
            yield X_train, y_train

In [None]:
print(len(train_data))
print(len(test_data))

train_generator = data_generator(train_data,batch_size=BATCH_SIZE,shuffle=True)

test_generator = data_generator(test_data,batch_size=BATCH_SIZE,shuffle=False) 
#x,y = next(train_generator)
#xx,yy = next(train_generator)

In [None]:
def get_model():
    model = Sequential()
    model.add(
    TimeDistributed(
        Conv2D(32, (3,3), activation='relu'), 
        input_shape=(8, 13, 21, 1))
    )
    model.add(
    TimeDistributed(
        Conv2D(64, (3,3), activation='relu')
        )
    )
    model.add(
    TimeDistributed(
        MaxPooling2D()
        )
    ) 
          
    model.add(
    TimeDistributed(
        Flatten()
        )
    )        
    model.add(LSTM(64, activation='relu', return_sequences=False, recurrent_dropout=0.5))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(1, activation='linear'))
    model.compile(loss='Huber', optimizer=Adam(), metrics=['mean_squared_error'])
    model.summary()
    return model
model = get_model()

In [None]:
class CustomCallback(keras.callbacks.Callback):
    def on_train_begin(self, logs=None):
        keys = list(logs.keys())
        print("Starting training; got log keys: {}".format(keys))

    def on_train_end(self, logs=None):
        keys = list(logs.keys())
        print("Stop training; got log keys: {}".format(keys))

    def on_epoch_begin(self, epoch, logs=None):
        keys = list(logs.keys())
        print("Start epoch {} of training; got log keys: {}".format(epoch, keys))

    def on_epoch_end(self, epoch, logs=None):
        keys = list(logs.keys())
        print("End epoch {} of training; got log keys: {}".format(epoch, keys))

    def on_test_begin(self, logs=None):
        keys = list(logs.keys())
        print("Start testing; got log keys: {}".format(keys))

    def on_test_end(self, logs=None):
        keys = list(logs.keys())
        print("Stop testing; got log keys: {}".format(keys))

    def on_predict_begin(self, logs=None):
        keys = list(logs.keys())
        print("Start predicting; got log keys: {}".format(keys))

    def on_predict_end(self, logs=None):
        keys = list(logs.keys())
        print("Stop predicting; got log keys: {}".format(keys))

    def on_train_batch_begin(self, batch, logs=None):
        keys = list(logs.keys())
        print("...Training: start of batch {}; got log keys: {}".format(batch, keys))

    def on_train_batch_end(self, batch, logs=None):
        keys = list(logs.keys())
        print("...Training: end of batch {}; got log keys: {}".format(batch, keys))

    def on_test_batch_begin(self, batch, logs=None):
        keys = list(logs.keys())
        print("...Evaluating: start of batch {}; got log keys: {}".format(batch, keys))

    def on_test_batch_end(self, batch, logs=None):
        keys = list(logs.keys())
        print("...Evaluating: end of batch {}; got log keys: {}".format(batch, keys))    

In [None]:
checkpoint_path = "cp.ckpt"
earlyStop_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=8)
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=False,
                                                 verbose=1)
csv_logger = CSVLogger('log.csv', append=True, separator=';')

In [None]:
from keras.models import load_model
model = tf.keras.models.load_model('./cp.ckpt/')

hist = model.fit(train_generator,
                 steps_per_epoch=(len(train_data)/BATCH_SIZE),
                 epochs=EPOCHS,
                 validation_data=(test_generator), validation_steps=(len(test_data)/BATCH_SIZE),
                 use_multiprocessing=True,workers=6,
                 callbacks=[cp_callback, csv_logger, earlyStop_callback])

In [None]:
#from keras.models import load_model
#model = tf.keras.models.load_model('./cp.ckpt/')

In [None]:
truthB = []
predB = []

limit = NUMBER_TEST_SET/BATCH_SIZE
 
batches = 0
for i in test_generator:
  predB.append(model.predict(i[0]))
  truthB.append(i[1]) 
  batches += 1
  if batches > (NUMBER_TEST_SET/BATCH_SIZE)-1:
    break

predBATCHED = np.concatenate(predB)
truthBATCHED = np.concatenate(truthB)

df_predict2 = pd.DataFrame(predBATCHED, columns=['cotBeta'])
df_true = pd.DataFrame(truthBATCHED, columns=['cotBeta'])

In [None]:
sns.distplot(df_true['cotBeta']-df_predict2['cotBeta'], kde=False, bins=50)
plt.xlabel('cotBeta residual')
plt.ylabel('frequency')
plt.xlim([-.05,.05])
plt.title('Cot Beta Residual')
plt.savefig('cotBeta-resolution.png')

In [None]:
df_predict2.to_csv('predictions.csv')
df_true.to_csv('trueLabels.csv')