# Main libraries

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import plotly.express as px

# Loading data

In [2]:
df_raw = pd.read_csv('LSTM_output.csv')
df_raw = df_raw.tail(int(0.25*len(df_raw))) # last 25% of initial data
df_raw

Unnamed: 0,roll,pitch,heading,rollRate,pitchRate,yawRate,groundSpeed,airSpeed,climbRate,altitudeRelative,altitudeAMSL,flightDistance,flightTime,distanceToHome,headingToNextWP,headingToHome,throttlePct,gps.lat,gps.lon,anomaly27
27000,-0.1,-0.4,0,-0.1,-0.1,0.0,0.0,0.0,-0.0,0.0,847.2,0.0,00:00:01,0.0,--.--,--.--,29,-19.467932,-49.143791,0
27001,-0.1,-0.4,0,0.1,0.0,0.0,0.0,0.0,-0.0,-0.0,847.2,0.0,00:00:02,0.0,--.--,--.--,55,-19.467932,-49.143791,0
27002,-0.1,0.0,0,0.2,-0.0,0.2,0.0,0.0,0.1,-0.0,847.2,0.0,00:00:03,0.0,--.--,--.--,73,-19.467932,-49.143791,0
27003,-0.0,-0.6,0,-0.3,-0.5,0.0,0.0,0.0,0.9,0.5,847.8,0.0,00:00:04,0.0,--.--,--.--,74,-19.467932,-49.143791,0
27004,-0.3,-0.8,1,1.7,0.4,-4.7,0.0,0.0,1.3,1.6,848.9,0.0,00:00:05,0.0,--.--,--.--,72,-19.467932,-49.143790,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35994,-0.1,0.4,0,-0.0,0.2,-0.1,0.0,0.0,-0.2,-0.9,846.3,549.6,00:02:54,0.2,224,--.--,12,-2.700000,-0.000000,0
35995,-0.1,0.4,0,-0.1,0.2,0.1,0.0,0.0,-0.1,-0.8,846.4,549.6,00:02:55,0.2,224,--.--,0,-2.800000,-0.000000,0
35996,-0.1,0.4,0,-0.0,-0.0,0.0,0.0,0.0,-0.1,-0.8,846.5,549.6,00:02:56,0.2,224,--.--,0,-2.900000,-0.000000,0
35997,-0.1,0.4,0,-0.0,-0.1,0.1,0.0,0.0,-0.0,-0.6,846.6,549.6,00:02:57,0.2,9,--.--,0,-2.700000,-0.000000,0


In [3]:
df = df_raw.drop(['airSpeed','altitudeAMSL','flightDistance','flightTime','distanceToHome','headingToNextWP','headingToHome','gps.lat','gps.lon'],axis=1)
df

Unnamed: 0,roll,pitch,heading,rollRate,pitchRate,yawRate,groundSpeed,climbRate,altitudeRelative,throttlePct,anomaly27
27000,-0.1,-0.4,0,-0.1,-0.1,0.0,0.0,-0.0,0.0,29,0
27001,-0.1,-0.4,0,0.1,0.0,0.0,0.0,-0.0,-0.0,55,0
27002,-0.1,0.0,0,0.2,-0.0,0.2,0.0,0.1,-0.0,73,0
27003,-0.0,-0.6,0,-0.3,-0.5,0.0,0.0,0.9,0.5,74,0
27004,-0.3,-0.8,1,1.7,0.4,-4.7,0.0,1.3,1.6,72,0
...,...,...,...,...,...,...,...,...,...,...,...
35994,-0.1,0.4,0,-0.0,0.2,-0.1,0.0,-0.2,-0.9,12,0
35995,-0.1,0.4,0,-0.1,0.2,0.1,0.0,-0.1,-0.8,0,0
35996,-0.1,0.4,0,-0.0,-0.0,0.0,0.0,-0.1,-0.8,0,0
35997,-0.1,0.4,0,-0.0,-0.1,0.1,0.0,-0.0,-0.6,0,0


# Understanding patterns

In [4]:
df.anomaly27.value_counts()

0    6799
1    2200
Name: anomaly27, dtype: int64

In [9]:
fig1 = px.scatter(df, x="rollRate", y="pitchRate", color="yawRate",size="anomaly27")
fig1.show()

In [6]:
fig2 = px.scatter_3d(df, x="rollRate", y="pitchRate",z="yawRate",color="climbRate",size="anomaly27")
fig2.show()

# Finding our clusters

In [10]:
df_anomaly = df.loc[df['anomaly27'] == 1].drop(['anomaly27'],axis=1)
df_anomaly

Unnamed: 0,roll,pitch,heading,rollRate,pitchRate,yawRate,groundSpeed,climbRate,altitudeRelative,throttlePct
27247,-0.3,10.3,0,-2.3,2.6,0.2,3.7,-0.0,15.0,73
27248,-1.9,15.8,359,-0.9,6.3,0.2,0.7,0.1,14.9,72
27257,11.7,-0.8,0,22.2,19.1,-0.9,2.1,0.0,15.0,72
27261,1.3,0.4,359,1.2,-2.5,0.2,4.2,-0.0,15.0,70
27263,4.4,2.6,0,-3.7,-4.8,0.1,2.0,0.0,15.0,70
...,...,...,...,...,...,...,...,...,...,...
35947,-0.4,17.2,0,2.4,-22.5,-0.9,4.5,-0.0,15.0,72
35949,-0.6,11.3,359,-0.3,0.2,0.0,4.9,-0.0,15.0,71
35952,-15.4,2.6,0,-15.2,-4.1,-0.4,2.7,-0.0,15.0,72
35959,-7.4,8.4,359,-0.1,-0.2,-0.0,4.9,-0.0,15.0,71


In [11]:
from sklearn.cluster import KMeans
from sklearn.preprocessing import MinMaxScaler
import plotly.graph_objects as go
X=df_anomaly
scaler = MinMaxScaler()
scaler.fit(X)
X=scaler.transform(X)
inertia = []
for i in range(1,11):
    kmeans = KMeans(
        n_clusters=i, init="k-means++",
        n_init=10,
        tol=1e-04, random_state=42
    )
    kmeans.fit(X)
    inertia.append(kmeans.inertia_)
    print(f'inertia: {inertia}')
fig = go.Figure(data=go.Scatter(x=np.arange(1,11),y=inertia))
fig.update_layout(title="Inertia vs Cluster Number",xaxis=dict(range=[0,11],title="Cluster Number"),
                  yaxis={'title':'Inertia'},
                 annotations=[
        dict(
            x=3,
            y=inertia[2],
            xref="x",
            yref="y",
            text="Elbow!",
            showarrow=True,
            arrowhead=7,
            ax=20,
            ay=-40
        )
    ])

inertia: [965.5912855492394]
inertia: [965.5912855492394, 518.1353133861228]
inertia: [965.5912855492394, 518.1353133861228, 456.4297893156302]
inertia: [965.5912855492394, 518.1353133861228, 456.4297893156302, 399.33598317720157]
inertia: [965.5912855492394, 518.1353133861228, 456.4297893156302, 399.33598317720157, 371.7657168789036]
inertia: [965.5912855492394, 518.1353133861228, 456.4297893156302, 399.33598317720157, 371.7657168789036, 350.57081793962743]
inertia: [965.5912855492394, 518.1353133861228, 456.4297893156302, 399.33598317720157, 371.7657168789036, 350.57081793962743, 328.3147459673539]
inertia: [965.5912855492394, 518.1353133861228, 456.4297893156302, 399.33598317720157, 371.7657168789036, 350.57081793962743, 328.3147459673539, 313.59550675383554]
inertia: [965.5912855492394, 518.1353133861228, 456.4297893156302, 399.33598317720157, 371.7657168789036, 350.57081793962743, 328.3147459673539, 313.59550675383554, 292.84768548967713]
inertia: [965.5912855492394, 518.135313386

In [12]:
kmeans = KMeans(
        n_clusters=3, init="k-means++",
        n_init=10,
        tol=1e-04, random_state=42
    )
kmeans.fit(X)
clusters=pd.DataFrame(X,columns=df_anomaly.columns)
clusters['label']=kmeans.labels_
polar=clusters.groupby("label").mean().reset_index()
polar=pd.melt(polar,id_vars=["label"])
fig4 = px.line_polar(polar, r="value", theta="variable", color="label", line_close=True,height=800,width=1400)
fig4.show()

In [13]:
pie=clusters.groupby('label').size().reset_index()
pie.columns=['label','value']
px.pie(pie,values='value',names='label',color=['blue','red','green'])

In [16]:
kmeans.cluster_centers_

array([[0.46582153, 0.55825226, 0.05961385, 0.58616792, 0.58017946,
        0.48984402, 0.38744425, 0.36766064, 0.94507786, 0.88402659],
       [0.44714325, 0.57987057, 0.95144844, 0.55267901, 0.57440254,
        0.50454352, 0.66936556, 0.34735456, 0.97020977, 0.88909869],
       [0.51121087, 0.58671251, 0.03749732, 0.57095661, 0.54964287,
        0.51142912, 0.83253205, 0.34387327, 0.97796213, 0.88870192]])

In [17]:
kmeans.labels_

array([2, 1, 0, ..., 0, 1, 0], dtype=int32)

In [11]:
clusters

Unnamed: 0,roll,pitch,heading,rollRate,pitchRate,yawRate,groundSpeed,climbRate,altitudeRelative,throttlePct,label
0,0.479029,0.731557,0.0,0.538462,0.591398,0.504392,0.74,0.346154,0.980392,0.9125,2
1,0.443709,0.844262,1.0,0.556410,0.641129,0.504392,0.14,0.384615,0.973856,0.9000,1
2,0.743929,0.504098,0.0,0.852564,0.813172,0.496340,0.42,0.346154,0.980392,0.9000,0
3,0.514349,0.528689,1.0,0.583333,0.522849,0.504392,0.84,0.346154,0.980392,0.8750,1
4,0.582781,0.573770,0.0,0.520513,0.491935,0.503660,0.40,0.346154,0.980392,0.8750,0
...,...,...,...,...,...,...,...,...,...,...,...
2195,0.476821,0.872951,0.0,0.598718,0.254032,0.496340,0.90,0.346154,0.980392,0.9000,2
2196,0.472406,0.752049,1.0,0.564103,0.559140,0.502928,0.98,0.346154,0.980392,0.8875,1
2197,0.145695,0.573770,0.0,0.373077,0.501344,0.500000,0.54,0.346154,0.980392,0.9000,0
2198,0.322296,0.692623,1.0,0.566667,0.553763,0.502928,0.98,0.346154,0.980392,0.8875,1


# Exporting the model

In [12]:
# saving the model (joblib)
import joblib
filename = 'clusters.joblib'
joblib.dump(kmeans, filename)

['clusters.joblib']

In [13]:
# loading the model (joblib)
filename = 'clusters.joblib'
loaded_model = joblib.load(filename)
loaded_model

KMeans(n_clusters=3, random_state=42)

In [14]:
# Example of data to take as input to the model
df_anomaly.iloc[5]

roll               -11.7
pitch                8.5
heading              0.0
rollRate            18.8
pitchRate            3.0
yawRate              0.4
groundSpeed          1.7
climbRate            0.0
altitudeRelative    15.0
throttlePct         71.0
Name: 27264, dtype: float64

In [15]:
df_anomaly.iloc[[5]].values #input values

array([[-11.7,   8.5,   0. ,  18.8,   3. ,   0.4,   1.7,   0. ,  15. ,
         71. ]])

In [16]:
model_input = df_anomaly.iloc[[5]].values
model_output = loaded_model.predict(model_input)
model_output

array([2])