# Installing and Importing and Initializing Libraries.

In [None]:
!pip install pandarallel

In [None]:
import numpy as np # linear algebra
import pandas as pd# data processing, CSV file I/O (e.g. pd.read_csv)
#from pandarallel import pandarallel
import plotly.express as px
import matplotlib.pyplot as plt
pd.set_option('display.max_row', 500)
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
import warnings
warnings.filterwarnings("ignore")
pd.set_option('display.max_colwidth', None)
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
from tqdm import tqdm
tqdm.pandas()
#pandarallel.initialize(progress_bar=True)

# Reading the Datasets.

In [None]:
df_series=pd.read_parquet(path="/kaggle/input/child-mind-institute-detect-sleep-states/train_series.parquet", engine='auto')

In [None]:
df_events=pd.read_csv("/kaggle/input/child-mind-institute-detect-sleep-states/train_events.csv")
events=df_events[['series_id', 'step','event']]

# Merging the Datasets to train the model

In [None]:
series_df=pd.merge(df_series,events,on=["step","series_id"],how='left')
series_df['sleep']=np.nan
series_df.loc[series_df["event"]=="onset", "sleep"] = 1
series_df.loc[series_df["event"]=="wakeup", "sleep"] = 0
series_df['sleep'].fillna(method='ffill', inplace=True)
series_df['sleep'].fillna(value=0, inplace=True)
df_series=[]

In [None]:
series_df

# Windowing the training data
### As the target is to detect an event, 
### So, extracting timesteps 30 min before and after the events. 

In [None]:
def window(df, win_size):
    ind=df.index[df['event'].isna()==False]
    c=0
    df['window']=np.nan
    for i in tqdm(ind):
        a=i-win_size
        b=i+win_size
        df['window'].loc[a:i]=int(c)
        c=c+1
        df['window'].loc[i:b]=int(c)
        c=c+1
    df['window'].dropna(inplace=True)
    return df[df['window'].isna()==False]
win_size=360
df_series=window(series_df,win_size)
#df_series=df_series[df_series['window'].isna()==False]

In [None]:
df_series

# Clustering the Enmo and Anglez

In [None]:
def clustering(df):
    from sklearn.cluster import KMeans
    from sklearn.preprocessing import StandardScaler    
    X=df[['anglez','enmo']]
#Scalling the data
    scaler=StandardScaler()
    X_scaled=scaler.fit_transform(X)
#perform clustering
    model=KMeans(n_clusters=4 )
    model.fit(X_scaled)
    return model.labels_
df_series['cluster']=(clustering(df_series)+1)/4

# Adding Rolling Standard Deviations

- As per observation enmo and anglez varies frequently and more than normal for few timesteps before and after the event.
- In order to consider the variations into the modelling, using the method.

In [None]:
def rollingstd(df):
    df['sd_anglez_1']=pd.to_numeric(df['anglez'].rolling(window=12).std())
    df['sd_enmo_1']=pd.to_numeric(df['enmo'].rolling(window=12).std())
    df['sd_anglez_1'].fillna('0.0',inplace=True)
    df['sd_enmo_1'].fillna('0.0',inplace=True)
   # df['sleep'] = df['sleep'].replace({0:False, 1:True})
    return(df)
series_df=rollingstd(df_series)
series_df['sd_anglez_1']=pd.to_numeric(series_df['sd_anglez_1'])
series_df['sd_enmo_1']=pd.to_numeric(series_df['sd_anglez_1'])
series_df[['sleep']]=series_df[['sleep']].astype(int)

In [None]:
series_df.dtypes

In [None]:
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
X=series_df[['sd_anglez_1','sd_enmo_1','anglez','enmo','cluster']]
y=series_df[['sleep']]
X_scaled=X
#scaler=preprocessing.MinMaxScaler().fit(X)
#X_scaled= scaler.transform(X)
X_train, X_test, y_train, y_test =train_test_split(X_scaled,y,test_size=0.2, random_state=42)

In [None]:
X_train.dtypes

In [None]:
from sklearn.ensemble import RandomForestClassifier
rf =RandomForestClassifier(n_jobs=-1,verbose=1) 
print ('Training the model')
rf.fit(X_train,y_train)
print ('Saving the model')
#from joblib import dump, load
#dump(rf, 'rf_model.joblib')
y_pred=rf.predict(X_test)
evaluate(y_test,y_pred)

In [None]:
X_train.shape[1]
input_size=X_train.shape[1]*win_size


In [None]:
import tensorflow as tf
model_nn=tf.keras.Sequential([
  tf.keras.layers.Dense(input_size,input_shape=[X_train.shape[1]]),
  tf.keras.layers.Dense(win_size*2,activation=tf.nn.relu, use_bias=True),
  tf.keras.layers.Dense(win_size,activation=tf.nn.relu, use_bias=False),
  tf.keras.layers.Dense(win_size/2,activation=tf.nn.relu, use_bias=False),
  tf.keras.layers.Dense(1, activation=tf.nn.softmax),
])
model_nn.compile(optimizer=tf.keras.optimizers.Adam(learning_rate = 0.001) , 
                loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), 
                metrics=['accuracy'])



In [None]:
epochs = 50
batch_size = win_size

callbacks = [
    tf.keras.callbacks.ModelCheckpoint(
        "best_model.keras", save_best_only=True, monitor="val_loss"
    ),
    tf.keras.callbacks.ReduceLROnPlateau(
        monitor="val_loss", factor=0.5, patience=epochs/10, min_lr=0.0001
    ),
    tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=epochs/10, verbose=1),
]
model_nn.compile(
    optimizer="adam",
    #loss="sparse_categorical_crossentropy",
    loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
    metrics=['accuracy'],
)
model_nn.summary()

In [None]:
y_train.dtypes

In [None]:
history = model_nn.fit(
    X_train,
    y_train,
    batch_size=360,
    epochs=epochs,
    callbacks=[early_stop],
    validation_split=0.2,
    verbose=1,
)

In [None]:
model = keras.models.load_model("best_model.keras")

test_loss, test_acc = model.evaluate(X_test, y_test)

print("Test accuracy", test_acc)
print("Test loss", test_loss)

In [None]:
metric = "sparse_categorical_accuracy"
plt.figure()
plt.plot(history.history[metric])
plt.plot(history.history["val_" + metric])
plt.title("model " + metric)
plt.ylabel(metric, fontsize="large")
plt.xlabel("epoch", fontsize="large")
plt.legend(["train", "val"], loc="best")
plt.show()
plt.close()