In [1]:

!pip install -q scikit-learn pandas matplotlib plotly tensorflow joblib pyngrok fastapi uvicorn nest_asyncio


In [2]:
# Cell 2: Imports + helper functions + folders
import os, time, threading
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import IsolationForest
from sklearn.metrics import precision_score, recall_score, f1_score
import joblib
import tensorflow as tf
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, Dense, LSTM, RepeatVector, TimeDistributed
from tensorflow.keras.optimizers import Adam

# For optional API
from fastapi import FastAPI
from pydantic import BaseModel
import nest_asyncio
from pyngrok import ngrok
import uvicorn

# Reproducibility
SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)

# Create directories
os.makedirs("/content/data", exist_ok=True)
os.makedirs("/content/models", exist_ok=True)
os.makedirs("/content/results", exist_ok=True)

def create_simulated_data(n=600, inject=True):
    """Simulate telemetry: time, altitude, velocity, yaw, pitch, battery, gps_drift"""
    t = np.arange(n)
    altitude = 100 + np.cumsum(np.random.normal(0, 0.4, n))
    velocity = 10 + np.random.normal(0, 0.3, n)
    yaw = np.cumsum(np.random.normal(0, 0.2, n))
    pitch = np.random.normal(0, 0.1, n)
    battery = np.linspace(100, 20, n) + np.random.normal(0, 1, n)
    gps_drift = np.random.normal(0, 0.05, n)  # small drift normally

    if inject:
        altitude[120:125] += 30          # sudden altitude spike
        velocity[300:305] += 8           # speed burst (diversion / instability)
        battery[500:505] -= 35           # fast battery drain
        gps_drift[400:405] += 0.5        # GPS drift spike (jamming/spoof)
    df = pd.DataFrame({
        "time": t,
        "altitude": altitude,
        "velocity": velocity,
        "yaw": yaw,
        "pitch": pitch,
        "battery": battery,
        "gps_drift": gps_drift
    })
    return df

print("Helpers ready. Directories created: /content/data, /content/models, /content/results")


Helpers ready. Directories created: /content/data, /content/models, /content/results


In [3]:
# Cell 3: Load dataset from remote or fallback to simulated
import requests

remote_csv = "https://raw.githubusercontent.com/plotly/datasets/master/drone-energy/drone_energy_train.csv"
local_path = "/content/data/drone_telemetry.csv"

use_sim = False
try:
    print("Attempting to download example dataset from GitHub...")
    r = requests.get(remote_csv, timeout=20)
    if r.status_code == 200:
        # Save temporarily and inspect
        open(local_path, "wb").write(r.content)
        tmp = pd.read_csv(local_path)
        # Check if telemetry columns exist
        needed = {"altitude","velocity","yaw","pitch","battery","gps_drift"}
        if needed.issubset(set(tmp.columns)):
            print("Remote dataset has required telemetry columns. Using downloaded file.")
            df = tmp.copy()
        else:
            print("Remote dataset doesn't contain required telemetry columns. Using simulated telemetry.")
            df = create_simulated_data()
            df.to_csv(local_path, index=False)
            use_sim = True
    else:
        print("Could not download (status != 200). Using simulated data.")
        df = create_simulated_data()
        df.to_csv(local_path, index=False)
        use_sim = True
except Exception as e:
    print("Download failed:", e)
    df = create_simulated_data()
    df.to_csv(local_path, index=False)
    use_sim = True

print("Data shape:", df.shape)
df.head(6)


Attempting to download example dataset from GitHub...
Could not download (status != 200). Using simulated data.
Data shape: (600, 7)


Unnamed: 0,time,altitude,velocity,yaw,pitch,battery,gps_drift
0,0,100.198686,10.227097,0.025045,0.190119,99.278262,-0.09539
1,1,100.14338,9.72335,-0.060836,-0.006066,100.043265,-0.043019
2,2,100.402455,10.260882,-0.036377,-0.070841,99.186208,-0.02068
3,3,101.011667,10.406691,0.072283,-0.151371,99.327677,0.094384
4,4,100.918006,10.12403,0.082055,-0.180314,101.139228,0.027828
5,5,100.824351,10.563039,0.090173,-0.158414,100.672681,-0.066774


In [5]:
# Cell 4: EDA - describe and interactive plots
print("Columns:", df.columns.tolist())
display(df.describe().T)

# Altitude plot
fig1 = px.line(df, x="time", y="altitude", title="Altitude over time")
fig1.show()

# GPS drift plot
fig2 = px.line(df, x="time", y="gps_drift", title="GPS drift over time")
fig2.show()


Columns: ['time', 'altitude', 'velocity', 'yaw', 'pitch', 'battery', 'gps_drift']


Unnamed: 0,count,mean,std,min,25%,50%,75%,max
time,600.0,299.5,173.349358,0.0,149.75,299.5,449.25,599.0
altitude,600.0,99.4425,3.67939,94.589222,96.737907,99.172989,101.583091,126.713661
velocity,600.0,10.093752,0.792721,9.131123,9.831139,10.033543,10.224681,18.383536
yaw,600.0,3.679424,3.025882,-1.335865,0.777939,3.778428,6.011503,9.88801
pitch,600.0,-0.001797,0.097565,-0.301951,-0.064953,-0.001069,0.064652,0.313775
battery,600.0,59.74227,23.767383,-3.829361,39.578663,60.438147,79.852623,101.139228
gps_drift,600.0,0.004146,0.070584,-0.146472,-0.037664,0.001221,0.037206,0.528488


In [6]:
# Cell 5: Preprocessing & scaling
features = ["altitude","velocity","yaw","pitch","battery","gps_drift"]
X = df[features].values
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# save scaler
joblib.dump(scaler, "/content/models/scaler.joblib")
print("Scaler saved to /content/models/scaler.joblib")


Scaler saved to /content/models/scaler.joblib


In [7]:
# Cell 6: Rule-based quick checks (fast detection)
df_rules = df.copy()

# battery low rule
df_rules["flag_battery"] = (df_rules["battery"] < 25).astype(int)  # threshold adjustable

# gps drift rule (absolute)
df_rules["flag_gps"] = (df_rules["gps_drift"].abs() > 0.3).astype(int)  # adjust threshold

# altitude jump rule: compare rolling difference
df_rules["alt_diff"] = df_rules["altitude"].diff().abs().fillna(0)
df_rules["flag_alt_jump"] = (df_rules["alt_diff"] > 5.0).astype(int)  # 5 meters sudden change

# aggregate rule anomaly
df_rules["rule_anomaly"] = ((df_rules["flag_battery"] + df_rules["flag_gps"] + df_rules["flag_alt_jump"]) > 0).astype(int)

print("Rule-based anomalies count:", df_rules["rule_anomaly"].sum())
df_rules[["time","altitude","battery","gps_drift","alt_diff","flag_battery","flag_gps","flag_alt_jump","rule_anomaly"]].head(10)


Rule-based anomalies count: 52


Unnamed: 0,time,altitude,battery,gps_drift,alt_diff,flag_battery,flag_gps,flag_alt_jump,rule_anomaly
0,0,100.198686,99.278262,-0.09539,0.0,0,0,0,0
1,1,100.14338,100.043265,-0.043019,0.055306,0,0,0,0
2,2,100.402455,99.186208,-0.02068,0.259075,0,0,0,0
3,3,101.011667,99.327677,0.094384,0.609212,0,0,0,0
4,4,100.918006,101.139228,0.027828,0.093661,0,0,0,0
5,5,100.824351,100.672681,-0.066774,0.093655,0,0,0,0
6,6,101.456036,97.899083,0.024302,0.631685,0,0,0,0
7,7,101.76301,99.894841,-0.077365,0.306974,0,0,0,0
8,8,101.57522,99.742949,0.054135,0.18779,0,0,0,0
9,9,101.792244,97.649733,-0.023556,0.217024,0,0,0,0


In [8]:
# Cell 7: Isolation Forest baseline
iso_model = IsolationForest(n_estimators=200, contamination=0.05, random_state=SEED)
iso_model.fit(X_scaled)
iso_pred = iso_model.predict(X_scaled)  # -1 anomaly, 1 normal
df_rules["anomaly_iso"] = (iso_pred == -1).astype(int)

# Save IsolationForest model
joblib.dump(iso_model, "/content/models/isolation_forest.joblib")
print("IsolationForest saved to /content/models/isolation_forest.joblib")
print("Iso anomalies count:", df_rules["anomaly_iso"].sum())
df_rules.head(6)


IsolationForest saved to /content/models/isolation_forest.joblib
Iso anomalies count: 30


Unnamed: 0,time,altitude,velocity,yaw,pitch,battery,gps_drift,flag_battery,flag_gps,alt_diff,flag_alt_jump,rule_anomaly,anomaly_iso
0,0,100.198686,10.227097,0.025045,0.190119,99.278262,-0.09539,0,0,0.0,0,0,1
1,1,100.14338,9.72335,-0.060836,-0.006066,100.043265,-0.043019,0,0,0.055306,0,0,0
2,2,100.402455,10.260882,-0.036377,-0.070841,99.186208,-0.02068,0,0,0.259075,0,0,0
3,3,101.011667,10.406691,0.072283,-0.151371,99.327677,0.094384,0,0,0.609212,0,0,0
4,4,100.918006,10.12403,0.082055,-0.180314,101.139228,0.027828,0,0,0.093661,0,0,0
5,5,100.824351,10.563039,0.090173,-0.158414,100.672681,-0.066774,0,0,0.093655,0,0,1


In [9]:
# Cell 8: Dense Autoencoder (pointwise features)
input_dim = X_scaled.shape[1]

inputs = Input(shape=(input_dim,))
x = Dense(32, activation="relu")(inputs)
x = Dense(16, activation="relu")(x)
encoded = Dense(4, activation="relu")(x)
x = Dense(16, activation="relu")(encoded)
x = Dense(32, activation="relu")(x)
decoded = Dense(input_dim, activation="linear")(x)

autoencoder = Model(inputs, decoded)
autoencoder.compile(optimizer=Adam(learning_rate=0.001), loss="mse")
autoencoder.summary()

# Train (use CPU or GPU)
history = autoencoder.fit(X_scaled, X_scaled,
                          epochs=60,
                          batch_size=32,
                          validation_split=0.15,
                          verbose=1)

# Recon error (MSE per sample)
reconstructions = autoencoder.predict(X_scaled)
mse = np.mean(np.square(X_scaled - reconstructions), axis=1)
threshold = np.percentile(mse, 95)  # top 5% as anomalies (tunable)
df_rules["mse_auto"] = mse
df_rules["anomaly_auto"] = (mse > threshold).astype(int)
print("Autoencoder anomalies count:", df_rules["anomaly_auto"].sum())

# Save autoencoder
autoencoder.save("/content/models/autoencoder.h5")
print("Autoencoder saved to /content/models/autoencoder.h5")


Epoch 1/60
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 26ms/step - loss: 1.0273 - val_loss: 1.1452
Epoch 2/60
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.9874 - val_loss: 1.1736
Epoch 3/60
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 0.9320 - val_loss: 1.1964
Epoch 4/60
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.8663 - val_loss: 1.1963
Epoch 5/60
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.7993 - val_loss: 1.1716
Epoch 6/60
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 0.7351 - val_loss: 1.1214
Epoch 7/60
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.6761 - val_loss: 1.0692
Epoch 8/60
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.6289 - val_loss: 1.0289
Epoch 9/60
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━



Autoencoder anomalies count: 30
Autoencoder saved to /content/models/autoencoder.h5


In [10]:
# Cell 9: Combine detections and produce final flag & severity score
df_rules["anomaly_final"] = ((df_rules["rule_anomaly"]==1) |
                             (df_rules["anomaly_iso"]==1) |
                             (df_rules["anomaly_auto"]==1)).astype(int)

# severity score (simple sum) - higher -> more evidence
df_rules["severity"] = df_rules["flag_battery"] + df_rules["flag_gps"] + df_rules["flag_alt_jump"] + df_rules["anomaly_iso"] + df_rules["anomaly_auto"]

print("Final anomalies count (combined):", df_rules["anomaly_final"].sum())
display(df_rules[df_rules["anomaly_final"]==1].head(20))


Final anomalies count (combined): 76


Unnamed: 0,time,altitude,velocity,yaw,pitch,battery,gps_drift,flag_battery,flag_gps,alt_diff,flag_alt_jump,rule_anomaly,anomaly_iso,mse_auto,anomaly_auto,anomaly_final,severity
0,0,100.198686,10.227097,0.025045,0.190119,99.278262,-0.09539,0,0,0.0,0,0,1,0.248104,0,1,1
5,5,100.824351,10.563039,0.090173,-0.158414,100.672681,-0.066774,0,0,0.093655,0,0,1,0.379097,0,1,1
8,8,101.57522,9.466384,-0.463326,-0.158119,99.742949,0.054135,0,0,0.18779,0,0,1,0.398115,0,1,1
12,12,101.51737,10.083991,-0.644446,0.161222,97.274784,-0.064358,0,0,0.096785,0,0,1,0.405759,0,1,1
16,16,99.432044,10.032818,-0.409601,-0.215182,98.414581,-0.075967,0,0,0.405132,0,0,1,0.162529,0,1,1
120,120,126.516298,10.092341,0.901129,-0.19793,84.1485,-0.024178,0,0,30.316413,1,1,1,2.715814,1,1,3
121,121,126.152543,9.486949,1.007067,0.074791,86.824992,-0.030608,0,0,0.363755,0,0,1,0.569001,0,1,1
122,122,126.713661,9.595544,0.992967,-0.107274,84.073659,0.02456,0,0,0.561118,0,0,1,1.645757,1,1,2
123,123,126.15292,10.222979,1.090268,0.023925,83.259091,-0.017901,0,0,0.56074,0,0,1,0.597627,0,1,1
124,124,126.387663,10.05126,1.103163,0.207408,84.360867,-0.00697,0,0,0.234743,0,0,1,0.247061,0,1,1


In [16]:
# Run this cell once before saving plotly figures as images
!pip install -q kaleido
print("kaleido installed. Now run the visualization cell.")


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/66.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.3/66.3 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/51.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.3/51.3 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25hkaleido installed. Now run the visualization cell.


In [17]:
# Updated Cell 10: Interactive visualization with robust image saving (kaleido + fallbacks)
import plotly.express as px
import matplotlib.pyplot as plt
import os

os.makedirs("/content/results", exist_ok=True)

iso_df = df_rules[df_rules["anomaly_iso"]==1]
auto_df = df_rules[df_rules["anomaly_auto"]==1]
rule_df = df_rules[df_rules["rule_anomaly"]==1]
final_df = df_rules[df_rules["anomaly_final"]==1]

# Altitude with markers
fig = px.line(df_rules, x="time", y="altitude", title="Altitude with Anomalies (Iso=red, Auto=green, Rule=orange)")
fig.add_scatter(x=iso_df["time"], y=iso_df["altitude"], mode='markers',
                marker=dict(color='red', size=8), name='Iso Anomaly')
fig.add_scatter(x=auto_df["time"], y=auto_df["altitude"], mode='markers',
                marker=dict(color='green', symbol='x', size=10), name='Auto Anomaly')
fig.add_scatter(x=rule_df["time"], y=rule_df["altitude"], mode='markers',
                marker=dict(color='orange', symbol='diamond', size=9), name='Rule Anomaly')
fig.update_layout(height=500)
fig.show()

# GPS drift plot
fig2 = px.line(df_rules, x="time", y="gps_drift", title="GPS drift with anomalies")
fig2.add_scatter(x=iso_df["time"], y=iso_df["gps_drift"], mode='markers', marker=dict(color='red', size=8), name='Iso Anomaly')
fig2.add_scatter(x=auto_df["time"], y=auto_df["gps_drift"], mode='markers', marker=dict(color='green', symbol='x', size=10), name='Auto Anomaly')
fig2.add_scatter(x=rule_df["time"], y=rule_df["gps_drift"], mode='markers', marker=dict(color='orange', symbol='diamond', size=9), name='Rule Anomaly')
fig2.update_layout(height=350)
fig2.show()

# Try to save as PNG using kaleido, otherwise fallback to HTML + matplotlib PNG
try:
    fig.write_image("/content/results/altitude_anomalies.png")
    fig2.write_image("/content/results/gps_drift_anomalies.png")
    print("✅ Saved PNGs to /content/results/: altitude_anomalies.png , gps_drift_anomalies.png")
except Exception as e:
    print("⚠️ PNG export failed (kaleido issue):", e)
    print("Saving interactive HTML and matplotlib fallback PNGs instead...")

    # Save interactive HTML (good for judges to open)
    fig.write_html("/content/results/altitude_anomalies.html")
    fig2.write_html("/content/results/gps_drift_anomalies.html")
    print("✅ Saved interactive HTML to /content/results/")

    # Matplotlib fallback (static altitude PNG)
    plt.figure(figsize=(12,4))
    plt.plot(df_rules['time'], df_rules['altitude'], label='Altitude', color='#2563EB')
    plt.scatter(iso_df['time'], iso_df['altitude'], color='red', label='Iso Anomaly', s=30)
    plt.scatter(auto_df['time'], auto_df['altitude'], color='green', marker='x', label='Auto Anomaly', s=40)
    plt.scatter(rule_df['time'], rule_df['altitude'], color='orange', marker='D', label='Rule Anomaly', s=35)
    plt.xlabel('time'); plt.ylabel('altitude'); plt.title('Altitude with anomalies (matplotlib fallback)')
    plt.legend(); plt.grid(True)
    fallback_path = "/content/results/altitude_anomalies_matplotlib.png"
    plt.savefig(fallback_path, dpi=150, bbox_inches='tight')
    plt.close()
    print(f"✅ Saved matplotlib fallback PNG to {fallback_path}")

print("All done — check /content/results/ for outputs.")



⚠️ PNG export failed (kaleido issue): 
Image export using the "kaleido" engine requires the kaleido package,
which can be installed using pip:
    $ pip install -U kaleido

Saving interactive HTML and matplotlib fallback PNGs instead...
✅ Saved interactive HTML to /content/results/
✅ Saved matplotlib fallback PNG to /content/results/altitude_anomalies_matplotlib.png
All done — check /content/results/ for outputs.


In [12]:
# Cell 11: Save final CSV of detections for submission
out_csv = "/content/results/anomalies_output.csv"
cols_out = ["time","altitude","velocity","yaw","pitch","battery","gps_drift","flag_battery","flag_gps","flag_alt_jump","anomaly_iso","mse_auto","anomaly_auto","rule_anomaly","anomaly_final","severity"]
df_rules.to_csv(out_csv, index=False)
print("Saved final anomalies CSV to", out_csv)


Saved final anomalies CSV to /content/results/anomalies_output.csv


In [13]:
# Cell 12: Upload your CSV to test (must contain required columns)
from google.colab import files
uploaded = files.upload()

if uploaded:
    fname = list(uploaded.keys())[0]
    print("Uploaded:", fname)
    user_df = pd.read_csv(fname)
    required = ["altitude","velocity","yaw","pitch","battery","gps_drift"]
    if not set(required).issubset(set(user_df.columns)):
        print("Uploaded CSV missing required columns. Expected:", required)
    else:
        # Preprocess
        scaler = joblib.load("/content/models/scaler.joblib")
        X_user = scaler.transform(user_df[required].values)
        # Rule flags
        user_df["flag_battery"] = (user_df["battery"] < 25).astype(int)
        user_df["flag_gps"] = (user_df["gps_drift"].abs() > 0.3).astype(int)
        user_df["alt_diff"] = user_df["altitude"].diff().abs().fillna(0)
        user_df["flag_alt_jump"] = (user_df["alt_diff"] > 5.0).astype(int)
        user_df["rule_anomaly"] = ((user_df["flag_battery"] + user_df["flag_gps"] + user_df["flag_alt_jump"])>0).astype(int)
        # Iso model
        iso_model = joblib.load("/content/models/isolation_forest.joblib")
        user_df["anomaly_iso"] = (iso_model.predict(X_user) == -1).astype(int)
        # autoencoder
        ae = load_model("/content/models/autoencoder.h5")
        rec = ae.predict(X_user)
        mse_user = np.mean(np.square(X_user - rec), axis=1)
        # threshold learned earlier
        user_df["mse_auto"] = mse_user
        user_df["anomaly_auto"] = (mse_user > threshold).astype(int)
        user_df["anomaly_final"] = ((user_df["rule_anomaly"]==1) | (user_df["anomaly_iso"]==1) | (user_df["anomaly_auto"]==1)).astype(int)
        display(user_df.head(30))
        # Save
        user_out = "/content/results/user_anomalies_output.csv"
        user_df.to_csv(user_out, index=False)
        print("Saved user detection results to", user_out)


Saving fin.csv to fin.csv
Uploaded: fin.csv
Uploaded CSV missing required columns. Expected: ['altitude', 'velocity', 'yaw', 'pitch', 'battery', 'gps_drift']
