In [None]:
!pip -qq install focal-loss

In [None]:
import pandas as pd
import numpy as np
import skopt
import gc
import time
from skopt.space import Real, Integer, Categorical
from skopt.utils import use_named_args
from skopt import gp_minimize
import matplotlib.pyplot as plt
from skopt.plots import plot_convergence,plot_objective,plot_evaluations
import warnings
import random
import tensorflow as tf
from tensorflow.keras import Model
from focal_loss import SparseCategoricalFocalLoss
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import mean_absolute_error as mae
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.layers import Conv1D, Input, Dropout, BatchNormalization, MaxPooling1D, Flatten, Dense, GlobalMaxPooling1D, GlobalAveragePooling1D
from skopt import dump, load
import tensorflow.keras.backend as K
warnings.filterwarnings("ignore")

In [None]:
AUTO = tf.data.experimental.AUTOTUNE
# Detect hardware, return appropriate distribution strategy
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection. No parameters necessary if TPU_NAME environment variable is set. On Kaggle this is always the case.
    print('Running on TPU ', tpu.master())
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
else:
    strategy = tf.distribute.get_strategy() # default distribution strategy in Tensorflow. Works on CPU and single GPU.

print("REPLICAS: ", strategy.num_replicas_in_sync)

In [None]:
df = pd.read_csv('../input/cmsdata/CMS_trigger.csv').drop(columns = 'Unnamed: 0')
df['1/pT'] = df['q/pt'].abs()
def label(a):
    if a<=10:
        return 0
    if a>10 and a<=30:
        return 1
    if a>30:
        return 2

df['pT'] = 1/df['1/pT']
    
df['pT_classes'] = df['pT'].apply(label)

features = ['Phi_'+str(i) for i in [0,2,3,4]] + ['Theta_'+str(i) for i in [0,2,3,4]] + ['Front_'+str(i) for i in [0,2,3,4]]
labels_1 = ['pT']
labels_2 = ['pT_classes']
labels_3 = ['PatternStraightness']

scaler_1 = StandardScaler()
df[features] = scaler_1.fit_transform(df[features])

scaler_3 = MinMaxScaler()
df[labels_3] = scaler_3.fit_transform(df[labels_3])

In [None]:
X_train = df[features].iloc[:int(len(df)*0.8)].to_numpy().reshape((-1,4,3))
Y1_train = df[labels_1].iloc[:int(len(df)*0.8)]
Y2_train = df[labels_2].astype('float32').iloc[:int(len(df)*0.8)]
Y3_train = df[labels_3].iloc[:int(len(df)*0.8)]

X_test = df[features].iloc[int(len(df)*0.8):].to_numpy().reshape((-1,4,3))
Y1_test = df[labels_1].iloc[int(len(df)*0.8):]
Y2_test = df[labels_2].astype('float32').iloc[int(len(df)*0.8):]
Y3_test = df[labels_3].iloc[int(len(df)*0.8):]

df = 0

In [None]:
X_train.shape, len(Y1_train), len(Y2_train), len(Y3_train), X_test.shape, len(Y1_test)

In [None]:
# Real - Dropout, Focal loss weight, SkipFraction
# Binary - Batchnorm, skipType
# Integer - number of layers, number of filters, focal loss gamma, number of dense neurons

In [None]:
def loss(y_true,y_pred):
    y_t = K.cast(y_true<80,K.dtype(y_true))*y_true + K.cast(y_true>=80,K.dtype(y_true))*K.cast(y_true<160,K.dtype(y_true))*y_true*2.4 + K.cast(y_true>=160,K.dtype(y_true))*10 
    return K.mean(y_t*K.pow((y_pred-y_true)/y_true,2))/250

In [None]:
space  = [Real(0.0, 0.7, name='Dropout'),
          Real(0.0, 1.0, name='SkipFraction'),
          Categorical(['YES','No'], name = 'BatchNorm'),
          Categorical(['ADD','CONCAT'], name = 'SkipType'),
          Integer(2, 10, name='# Layers'),
          Integer(16, 512, name='# Filters'),
          Integer(128, 512, name='DenseNeurons'),
          Categorical(['MAX','AVG'], name = 'PoolingType'),
          Categorical(['relu','swish','tanh','sigmoid'], name = 'Activation')
         ]

In [None]:
def cnn(values = [0,1,'NO','ADD',5,32,128,'AVG','relu'], X_train = X_train, Y1_train = Y1_train, Y2_train = Y2_train, Y3_train = Y3_train):
    global strategy
    dropout = values[0]
    SkipFraction = values[1]
    batchNorm = values[2]
    SkipType = values[3]
    Layers = values[4]
    Filters = values[5]
    DN = values[6]
    PoolingType = values[7]
    Activation = values[8]
    
    batch_size=1024* strategy.num_replicas_in_sync
    path = "model.h5"
    
    with strategy.scope():
        I = Input(shape=(4,3))
        x = Conv1D(filters=Filters, kernel_size=3, activation=Activation, padding='same')(I)
        x_ = x
        for i in range(Layers-1):
            x = Dropout(dropout)(x)
            if batchNorm=="YES":
                x = BatchNormalization()(x)
            x__ = Conv1D(filters=Filters, kernel_size=3, activation=Activation, padding='same')(x)
            if SkipType=='CONCAT' and Layers*(1-SkipFraction)<=i:
                x = tf.concat([x_, x__] ,axis = -1)
                x_ = x__
            elif SkipType=='ADD' and Layers*(1-SkipFraction)<=i:
                x = x_ + x__
                x_ = x__
            else:
                x_ = x__
                x = x__
        if PoolingType == 'AVG':
            x = GlobalAveragePooling1D()(x)
        if PoolingType == 'MAX':
            x = GlobalMaxPooling1D()(x)
        x1 = Dense(DN, activation=Activation)(x)
        O1 = Dense(1, activation='linear')(x1)
#         x2 = Dense(DN, activation=Activation)(x)
#         O2 = Dense(3, activation='softmax')(x2)
#         x3 = Dense(DN, activation=Activation)(x)
#         O3 = Dense(1, activation='sigmoid')(x3)

#         model = Model(inputs=I, outputs=[O1,O2, O3])
        model = Model(inputs=I, outputs=O1)

        checkpoint = ModelCheckpoint(path, monitor='val_loss', verbose=0, save_best_only=True, mode='min')
        early_stop = EarlyStopping(monitor='val_loss',patience=3,verbose=0)
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=0,verbose=0)

#         model.compile(optimizer = 'adam', loss=[loss,SparseCategoricalFocalLoss(gamma=2),'mse'],loss_weights = [1,0.05,1] )
        model.compile(optimizer = 'adam', loss=loss)
#     model.summary()
#     print(X_train.shape, Y1_train.shape, Y2_train.shape, Y3_train.shape, Y4_train.shape )
#     model.fit(x = X_train, y = [Y1_train, Y2_train, Y3_train], batch_size=batch_size, epochs=55, verbose=0, validation_split=0.1, callbacks=[checkpoint,early_stop,reduce_lr])
    model.fit(x = X_train, y = Y1_train, batch_size=batch_size, epochs=55, verbose=0, validation_split=0.1, callbacks=[checkpoint,early_stop,reduce_lr])
    
    model.load_weights(path)
    
    return model

In [None]:
def score(true, preds):
    S = []
    count = 0
    for i in range(120):
        try:
            if i<25:
                S.append(2*mae(true[(true>i)&(true<=i+1)],preds[(true>i)&(true<=i+1)])/(i+0.5))
            if i>=25 and i<50:
                S.append(1.5*mae(true[(true>i)&(true<=i+1)],preds[(true>i)&(true<=i+1)])/(i+0.5))
            else:
                S.append(mae(true[(true>i)&(true<=i+1)],preds[(true>i)&(true<=i+1)])/(i+0.5))
        except:
            count+=1
            continue
    print(count)
    return sum(S)

In [None]:
# def score(true, preds):
#     true = 1/true
#     preds = 1/(preds+pow(10,-6))
#     return mae(true,preds)

In [None]:
iteration__ = 0
def objective(values = [0,1,'NO','ADD',5,32,128,'AVG','relu'], X_test = X_test, Y1_test = Y1_test, Y2_test = Y3_test, Y3_test = Y3_test):
    global iteration__
    start = time.time()
    iteration__ += 1
    gc.collect()
    model = cnn(values)
#     test_preds = model.predict(X_test)[0]
    test_preds = model.predict(X_test)
    if len(set(list(test_preds.reshape((-1)))))==1:
        model = cnn(values)
        test_preds = model.predict(X_test)[0]
    loss = score(Y1_test.to_numpy(), test_preds)
    print(iteration__, "iteration loss = ", loss)
    print('Time-taken = ', time.time()-start)
    print()
    return loss

In [None]:
x0 = []
y0 = np.array([])
for i in range(1):
    res = load('../input/newlossbayesianopt2/result.pkl')
    x0 = x0 + res.x_iters
    y0 = np.concatenate([y0,res.func_vals])
    
# # for i in range(2):
# #     res = load('../input/bayesian3/result'+str(i)+'.pkl')
# #     x0 = x0 + res.x_iters
# #     y0 = np.concatenate([y0,res.func_vals])

In [None]:
res_gp = gp_minimize(objective, space, n_calls=40, x0=x0, y0=y0, n_random_starts=10)

"Best score=%.4f" % res_gp.fun

In [None]:
# res_gp = gp_minimize(objective, space, n_calls=40, n_random_starts=10)

# "Best score=%.4f" % res_gp.fun

In [None]:
x0 = res_gp.x_iters
y0 = res_gp.func_vals

for i in sorted(y0):
    print(i, x0[np.where(y0==i)[0][0]])

In [None]:
dump(res_gp, 'result.pkl')

In [None]:
print("Best parameters:")
print("- Dropout=",res_gp.x[0])
print("- SkipFraction=",res_gp.x[1])
print("- BatchNorm=",res_gp.x[2])
print("- SkipType=",res_gp.x[3])
print("- # Layers=",res_gp.x[4])
print("- # Filters=",res_gp.x[5])
print("- DenseNeurons=",res_gp.x[6])
print("- PoolingType=",res_gp.x[7])
print("- Activation=",res_gp.x[8])

In [None]:
plot_convergence(res_gp)
plt.show()

In [None]:
plot_objective(res_gp)
plt.show()

In [None]:
plot_evaluations(res_gp)
plt.show()

In [None]:
res_loaded = load('result.pkl')

In [None]:
res_loaded.fun