In [4]:
import pandas as pd
import json
import matplotlib.pyplot as plt
from tensorflow.keras import layers
import numpy as np
from sklearn.linear_model import LinearRegression
import json
from sklearn.preprocessing import StandardScaler
import tensorflow 
from sklearn import metrics
import tensorflow.keras
from tensorflow.keras import layers
from tensorflow import keras

with open("../input/regdata/ipt_12.json") as f:
    data = json.load(f)
df = pd.DataFrame(data)

In [5]:
def convert_to_numpy(series):
    return np.array(series)

In [6]:
def load_data(data_path):
    """
    Loading of the dataset provided
    Edit the code below
    """
    with open(data_path,"r") as f:
        data = json.load(f)
    data = pd.DataFrame(data)
    data = data.drop('94')
    return data

In [7]:
import numpy as np
from sklearn.model_selection import train_test_split

train,test = train_test_split(df, test_size=0.2, random_state=42)

In [8]:
def shorten_series(data_series):
    series = np.array(data_series)
    small_series = []
    for i in range(0,len(series),10):
        if i !=0:
            small_series.append(np.mean(series[i-10:i]))
        else: 
            small_series.append(series[0])
    return np.array(small_series)


In [9]:
def preprocess_data(data):
    """
    A standard nan removal to be added.
    Add more preprocessing steps if needed.
    """
    X = []
    for i in range(data.shape[0]):
        series = np.stack((data.iloc[i]['smcAC'],data.iloc[i]['smcDC'],data.iloc[i]['vib_table'],data.iloc[i]['vib_spindle'],data.iloc[i]['AE_table'],data.iloc[i]['AE_spindle']),axis=1)
        X.append(series.reshape(900,6))
    X = np.array(X)
    y = data['VB'].fillna(data['VB'].mean())
    return X,y

In [10]:
def split_train_test(X, y):
    """
    Splitting the data into train, test, validation 
    """
    
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
    X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2, random_state=42)
    
    return X_train, X_val, X_test, y_train, y_val, y_test

In [11]:
def normalization(X_train, X_val, X_test):
    scaler = StandardScaler()
    shp = X_train.shape[1]
    X_train = np.reshape(X_train, (-1,shp))
    X_val = np.reshape(X_val, (-1,shp))
    X_test = np.reshape(X_test, (-1,shp))
    
    X_train = scaler.fit_transform(X_train)
    X_val = scaler.transform(X_val)
    X_test = scaler.transform(X_test)
    
    return np.reshape(X_train, (-1,shp,1)), np.reshape(X_val, (-1,shp,1)), np.reshape(X_test, (-1,shp,1))

In [12]:
def timeseries_transform(data, head_size, num_heads, ff_dim, dropout=0):
    """
    Implement the timeseries transformer here
    """
    # Normalization and Attention
    x = data
    x = layers.MultiHeadAttention(key_dim=head_size, num_heads=num_heads, dropout=dropout)(x, x)
    x = layers.LayerNormalization(epsilon=1e-6)(x)


    res = x + data
    res = layers.MaxPooling1D((1))(res) 
    # Feed Forward Part
    x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(x)
    x = layers.MaxPooling1D(pool_size=1)(x)
    x = layers.LayerNormalization(epsilon=1e-6)(x)
    x = layers.Conv1D(filters=data.shape[-1], kernel_size=1)(x)
    return x + res

In [13]:
def build_model(input_shape, head_size, num_heads, ff_dim, num_transformer_blocks, mlp_units, dropout=0, mlp_dropout=0):
    inputs = keras.Input(shape=input_shape)
    x = inputs
    for i in range(3):
        for _ in range(num_transformer_blocks):
            x = timeseries_transform(x, head_size, num_heads, ff_dim, dropout)

    x = layers.GlobalAveragePooling1D(data_format="channels_first")(x)
    for dim in mlp_units:
        x = layers.Dense(dim, activation="relu")(x)
    x = layers.Dense(128, activation="relu")(x)
    outputs = layers.Dense(1)(x)
    return keras.Model(inputs, outputs)

In [14]:
def model_training(X_train, y_train, X_val, y_val):
    """
    Train the data with the compatible model
    """
    
    input_shape = X_train.shape[1:]

    model = build_model(input_shape, head_size=256, num_heads=4, ff_dim=4, num_transformer_blocks=4, mlp_units=[128], mlp_dropout=0.4, dropout=0.0)
    lr_schedule = keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=1e-4, decay_steps=10000, decay_rate=0.9)

    model.compile(
        loss="mse",
        optimizer=keras.optimizers.Adam(learning_rate=lr_schedule),
        metrics=[tensorflow.keras.metrics.MeanSquaredError()],
    )
    
    model.summary()

    callbacks = [keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)]

    model.fit(X_train, y_train, validation_data=(X_val,y_val),  epochs=200, batch_size=16, callbacks=callbacks)
    return model

In [15]:
def metric(y_act, y_pred):
    """
    Standard metrics and plotting should be same
    Metrics should be computed on validation data(unseen data)
    1. Balanced accuracy score
    2. Confusion matrix
    3. Per-class accuracy
    """
    
    cm = metrics.confusion_matrix(y_act, y_pred)
    balanced_accuracy = metrics.balanced_accuracy_score(y_act, y_pred)
    
    return cm, balanced_accuracy

In [16]:
def validation(X_val, y_val, metric):
    """
    Comparing the results with provided Series Embedder
    Plot confusion matrices of self analysis and LSTM with balanced_accuracy
    
    """
    
    score = model.evaluate(X_val, y_val, verbose=1)
    
    return score

In [17]:
def evaluate(X_test, y_act, metric, model):
    y_pred = model.predict(X_test, verbose=1)
    cm, ba = metric(y_act, y_pred)
    
    return y_pred, cm, ba

In [18]:
path = "../input/regdata/ipt_12.json"
data = load_data(path)
data = data.head(55)
data['smcAC'] = data['smcAC'].apply(lambda x:shorten_series(x))
data['smcDC'] = data['smcDC'].apply(lambda x:shorten_series(x))
data['vib_table'] = data['vib_table'].apply(lambda x:shorten_series(x))
data['vib_spindle'] = data['vib_spindle'].apply(lambda x:shorten_series(x))
data['AE_table'] = data['AE_table'].apply(lambda x:shorten_series(x))
data['AE_spindle'] = data['AE_spindle'].apply(lambda x:shorten_series(x))
X, y = preprocess_data(data)

X_train, X_val, X_test, y_train, y_val, y_test = split_train_test(X, y)
model_self=model_training(X_train, y_train, X_val, y_val)


In [19]:
y_pred=model_self.predict(X_test)

In [21]:
print(y_pred)

In [22]:
print(y_test)