In [2]:
from keras.layers import *
from keras import Model
import pandas as pd
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from keras import Sequential
import keras as kr
import keras.losses as losses
 
 
# Setup the network parameters:
intermediate_dim = 300
latent_dim = 10
beta = 5
n_sequences = 200
n_features = 8

boat_csv = pd.read_csv("Data/Boat_nominal_data/Boat_sequences.csv")
boat_csv = boat_csv.drop(columns=["Unnamed: 0"])
scaler = StandardScaler()
normal_data = scaler.fit_transform(boat_csv)
print(normal_data.shape)

boat_val = pd.read_csv("Data/Boat_nominal_data/Boat_nom_validation.csv")
boat_val = boat_val.drop(columns=["Unnamed: 0"])
scaler = StandardScaler()
val_nom_data = scaler.fit_transform(boat_val)


boat_anom_csv = pd.read_csv("Data/Boat_anomalous_big.csv")
boat_anom_csv= boat_anom_csv.drop(columns=["Unnamed: 0"])    
scaler = StandardScaler()
anomalous_data = scaler.fit_transform(boat_anom_csv)
print(anomalous_data.shape)

boat_anom_val_csv = pd.read_csv("Data/Boat_anomalous_validation.csv")
boat_anom_val_csv= boat_anom_val_csv.drop(columns=["Unnamed: 0"])    
scaler = StandardScaler()
anomalous_val_data = scaler.fit_transform(boat_anom_val_csv)
print(anomalous_val_data.shape)

def prepare_sequences(data, batch_size, interval):
    samples = []
    for i in range(0,data.shape[0]- batch_size, interval):
        sample = data[i:i+batch_size]	
        samples.append(sample)

    sequences = np.array(samples)

    # Batch size (Number of samples time steps and number of features
    trainX = np.reshape(sequences, (len(sequences), batch_size, n_features))

    return trainX


def prepare_data():    
    trainX_nominal = prepare_sequences(normal_data,656,656) 
    print(trainX_nominal.shape)
    
    valX_nominal = prepare_sequences(val_nom_data, 656,656)
    print(valX_nominal.shape)
    
    trainX_anomalous = prepare_sequences(anomalous_data,6600,6600)
    print(trainX_anomalous.shape)  
    
    valX_anom = prepare_sequences(anomalous_val_data,6600,6600)
    print(valX_anom.shape)

    return trainX_nominal, valX_nominal, trainX_anomalous, valX_anom



trainX_nominal, valX_nominal, trainX_anomalous, valX_anom = prepare_data()


Using TensorFlow backend.


(66256, 8)


(72600, 8)
(13200, 8)
(100, 656, 8)
(17, 656, 8)
(10, 6600, 8)
(1, 6600, 8)


In [1]:

def create_model(pool_size,up_size, windows, seq_length, trainX, epochs, val_data):
    encoder = Sequential(name="encoder")
    encoder.add(Conv1D(filters=300, kernel_size=n_features,
               padding='same', name='Conv1'))
    encoder.add(MaxPooling1D(pool_size=pool_size[0], name='Maxpool1'))
    encoder.add(Conv1D(filters=200, kernel_size=n_features,
               padding='same', name='Conv2'))
    encoder.add(MaxPooling1D(pool_size=pool_size[1],  name="Maxpool2"))
    encoder.add(Conv1D(filters=150, kernel_size=n_features,
               padding='same',name='Conv3'))
    encoder.add(MaxPool1D(pool_size=pool_size[2], name="Maxpool3"))
    encoder.add(Conv1D(filters=100, kernel_size=n_features,
               padding="same", name='Conv4'))
    encoder.add(MaxPool1D(pool_size=pool_size[3], name="Maxpool4"))
    encoder.add(Conv1D(filters=50, kernel_size=n_features,
               padding="same", name='Conv5'))
    encoder.add(MaxPool1D(pool_size=pool_size[4], name="Maxpool5"))
    encoder.add(Conv1D(filters=latent_dim, kernel_size=n_features,          
               padding="same", name='Conv6'))
    encoder.add(MaxPool1D(pool_size=pool_size[5], name="Maxpool6"))
    encoder.add(Flatten())
    encoder.add(Dense(latent_dim, name='embedding'))
    
    decoder = Sequential(name="decoder")
    decoder.add(Dense(seq_length))
    decoder.add(Reshape((windows, np.prod(up_size))))
    decoder.add(Conv1D(filters=n_features, kernel_size=n_features,
                padding='same', name='conv-decode1'))
    decoder.add(UpSampling1D(size=up_size[0], name='upsampling1'))
    decoder.add(Conv1D(filters=50, kernel_size=n_features,
                padding='same', name='conv-decode2'))
    decoder.add(UpSampling1D(size=up_size[1], name='upsampling2'))
    decoder.add(Conv1D(filters=100, kernel_size=n_features,
                padding='same', name='conv-decode3'))
    decoder.add(UpSampling1D(size=up_size[2], name='upsampling3'))
    decoder.add(Conv1D(filters=n_features, kernel_size=n_features,
                padding='same', name='conv-decode4'))
    decoder.add(UpSampling1D(size=up_size[3], name='upsampling4'))       
    
    model = Sequential()
    model.add(encoder)
    model.add(decoder)
    
    encoder.build(input_shape=(None,seq_length,n_features))
    decoder.build(input_shape=(None,latent_dim))
    model.build(input_shape=(None,seq_length, n_features))
    model.compile(optimizer="adam", loss="mse", metrics=['accuracy'])
    encoder.summary()
    decoder.summary()
    model.fit(x=trainX, y=trainX, epochs=epochs, validation_data=(val_data, val_data))
    model.save("Models/Anomalous_conv_compressor.h5")
    
    return model
    
    
# model = create_model(pool_size=[2, 2, 2, 2, 2], windows=41,
#                      up_size=[2, 2, 2, 2], seq_length=656, 
#                      trainX=trainX_nominal, epochs=15,
#                       val_data=valX_nominal)
model = create_model(pool_size=[2, 2, 2, 5, 5, 3], windows=33, 
                     up_size=[5, 5, 4, 2], seq_length=6600, 
                     trainX=trainX_anomalous, epochs=50,
                     val_data=valX_anom)


NameError: name 'trainX_anomalous' is not defined

In [5]:
def create_conv_model(epochs, seq_length, trainX, valX_data):
    model = Sequential()
    
    encoder = Sequential()
    encoder.add(Conv1D(filters=200, input_shape=(seq_length, 8), padding='same', 
                       kernel_size=n_features))
    encoder.add(Conv1D(filters=150, padding='same', 
                       kernel_size=n_features))
    encoder.add(Conv1D(filters=100,padding='same', 
                       kernel_size=n_features))
    encoder.add(Conv1D(filters=50, padding='same',
                       kernel_size=n_features))
    encoder.add(Conv1D(filters=latent_dim, padding='same',
                       kernel_size=n_features)) 
    
    decoder = Sequential(name="Decoder")
    decoder.add(Dense(64, name="Decoder_first"))
    decoder.add(Dense(units=n_features))
    
    model.add(encoder)
    model.add(decoder)
    
    model.compile(optimizer="adam", loss="mse", metrics=['accuracy'])
    
    model.fit(x=trainX, y=trainX, epochs=epochs, validation_data=(valX_data,valX_data))
    model.save("Models/Nominal_conv.h5")

    return model


# nom_model = create_conv_model(30, 656, trainX=trainX_nominal, 
#                             valX_data=valX_nominal)

# anom_model = create_conv_model(50, 6600, trainX=trainX_anomalous, 
#                                valX_data=valX_anom)


In [9]:
nom_model = kr.models.load_model("Models/Nominal_conv.h5")
anom_model = kr.models.load_model("Models/Anomalous_conv.h5") 

print("Done")


Done


In [10]:

nom_encoded = nom_model.get_layer(index=0).predict(trainX_nominal)
print(nom_encoded.shape)
# 
# plt.plot(nom_encoded[0])
# plt.show()

anom_encoded = anom_model.get_layer(index=0).predict(trainX_anomalous) 
print(anom_encoded.shape)
# 
# plt.plot(anom_encoded)
# plt.show()


(100, 656, 10)


(10, 6600, 10)


In [12]:
plt.plot(nom_encoded[0])
plt.title("nom_encoding")
plt.show()
plt.plot(anom_encoded[0])
plt.title("anom_encoded")
plt.show()


In [13]:
mod_enc_anom = np.zeros(shape=(10,10))

mod_enc_anom[:anom_encoded.shape[0],:anom_encoded.shape[1]] = anom_encoded
mod_enc_anom[0] = [-0.4481652 ,0.660993,3.6124344, 1.1903877,0.03680914,4.85686,
                    -0.5965184, 3.8528519, -2.424729, 0.23854657]


ValueError: could not broadcast input array from shape (10,6600,10) into shape (10,10)

In [14]:
mod_anom_dec = anom_model.get_layer(index=1).predict(mod_enc_anom)
print(mod_anom_dec.shape)
plt.plot(anom_dec[0])
plt.title("normal")
plt.show()
plt.plot(mod_anom_dec[0])
plt.title("modified")
plt.show()


ValueError: Error when checking input: expected Decoder_input to have 3 dimensions, but got array with shape (10, 10)

In [77]:
mod_df = pd.DataFrame(mod_anom_dec[8], columns=boat_anom_csv.columns)
plt.plot(mod_df["lon"], mod_df['lat'])
plt.title("reconstruction")
plt.show()


In [16]:

nom_dec = nom_model.get_layer(index=1).predict(nom_encoded)
print(nom_dec.shape)

anom_dec = anom_model.get_layer(index=1).predict(anom_encoded)
print(anom_dec.shape)


(100, 656, 8)
(10, 6600, 8)


In [17]:
df = pd.DataFrame(nom_dec[0], columns=boat_csv.columns)
plt.plot(df["Lon"], df['Lat'])
plt.title("reconstruction_nom")
plt.show()


df_anom = pd.DataFrame(anom_dec[0], columns=boat_anom_csv.columns)
plt.plot(df_anom["lon"], df_anom['lat'])
plt.title("reconstruction_anom")
plt.show()



In [18]:

from sklearn.manifold import TSNE

def tsne(data, title):
    tsne = TSNE(n_components=2, random_state=0)
    
    tsne_obj= tsne.fit_transform(data)
    tsne_df = pd.DataFrame({'X':tsne_obj[:,0],
                            'Y':tsne_obj[:,1],
                            })
    
    plt.scatter(x=tsne_df["X"],y=tsne_df["Y"], alpha=0.5)
    plt.title(title)
    plt.show()
    
    return tsne_df


In [19]:
weight_modifiers = np.ones((latent_dim,))

tsne_dec_nom_df = tsne(nom_dec[0], "Decoded Nominal")
print(len(tsne_dec_nom_df))

tsne_dec_anom_df = tsne(anom_dec[0], "Decoded Anomalous")
print(len(tsne_dec_anom_df))


656


6600


In [20]:
import scipy.cluster.hierarchy as shc
from sklearn.cluster import AgglomerativeClustering

plt.figure(figsize=(10, 7))
plt.title("Customer Dendograms")
dend_nom = shc.dendrogram(shc.linkage(tsne_dec_nom_df, method='ward'))
dend_anom = shc.dendrogram(shc.linkage(tsne_dec_anom_df, method='ward'))



In [21]:

n_clusters = 4
cluster = AgglomerativeClustering(n_clusters=n_clusters, 
                                  affinity='euclidean',
                                  linkage='ward')
cl_nom = cluster.fit_predict(tsne_dec_nom_df)
cl_anom = cluster.fit_predict(tsne_dec_anom_df)
plt.plot(cl_nom)
plt.title("NOMINAL CLUSTERS")
plt.show()

plt.plot(cl_anom)
plt.title("Anomalous_clusters")
plt.show()

In [22]:

df_nom = pd.DataFrame(normal_data, columns=boat_csv.columns)
df_anom = pd.DataFrame(anomalous_data, columns=boat_anom_csv.columns)
def plot_clusters(cl, df, nominal):
    for i in range(n_clusters):
        cluster = np.squeeze(np.argwhere(cl==i))    
        if nominal:
            plt.scatter(x=df['Lon'][cluster],y=df["Lat"][cluster],s=5)
        else:
            plt.scatter(x=df['lon'][cluster],y=df["lat"][cluster],s=5)
    plt.show()


plot_clusters(cl_nom, df_nom, True)
plot_clusters(cl_anom, df_anom, False)


In [32]:
df_nominal = pd.DataFrame(normal_data, columns=boat_csv.columns)
df = pd.DataFrame(nom_enc[0])

plt.figure(1)
axis_list = []

for i in range(df.shape[1]):
    plt.plot(df[i])

plt.show()    

In [23]:
from sklearn.cluster import KMeans
cluster_comp = [0,1,2,3]# print(tsne_enc_df)
# print(tsne_dec_df)

kmeans_normal = KMeans(n_clusters=2, random_state=0).fit(normal_data)
clusters_normal = kmeans_normal.predict(np.average(trainX_nominal,axis=0))

kmeans_enc = KMeans(n_clusters=2, random_state=0).fit(tsne_enc_df)
clusters_enc = kmeans_enc.predict(tsne_enc_df)
plt.plot(clusters_enc)
plt.show()

kmeans_dec = KMeans(n_clusters=2, random_state=0).fit(tsne_dec_df)
clusters_dec = kmeans_dec.predict(tsne_dec_df)

NameError: name 'tsne_enc_df' is not defined

In [25]:
def transform_to_mask(cl_label, clust_obj):
    mask = []
    part = []
    for i, elem in enumerate(clust_obj):
        if elem == cl_label:
            part.append(i)
        else:
            if part:
                mask.append(part)
                part = []
            else:
                pass
    return mask


def get_mask_list(clust_obj):
    mask_list = []
    for i in cluster_comp:
        mask_list.append(transform_to_mask(i, clust_obj))
    return mask_list


masks_normal = np.array(get_mask_list(clusters_normal))

masks_enc = np.array(get_mask_list(clusters_enc))

masks_dec = np.array(get_mask_list(clusters_dec))

print(masks_normal)
masks = (masks_normal, masks_enc,masks_dec)
print(masks_normal.shape, masks_enc.shape, masks_enc.shape)


NameError: name 'clusters_enc' is not defined

In [41]:
def plot_cl(cl, color):
    plt.plot(cl["lon"], cl["lat"], color=color)
        
    
def plot_clusters_on_map():
    titles = ['Nominal Normal','Nominal Encoded','Nominal Decoded']
    color_list = ['blue','green','red','black']
    map = anomalous_data[:6620]
    print(map.shape)
    for k,mask in enumerate(masks):
        for i, elem in enumerate(mask):
            for j in elem:
                cl = pd.DataFrame(map[j], columns=boat_anom_csv.columns)
                plot_cl(cl, color_list[i]) 
        plt.title(titles[k])
        plt.show()
      
               
plot_clusters_on_map()


(6620, 7)
