In [None]:
# ============================================================
# COMP7707 A3 - Real-time IoT Analytics Prototype
# Member A: System Design Lead & Implementation
# Name: <Your Name> (auth0004)
# ============================================================

# ------------------------------------------------------------
# 1) Imports & Setup
# ------------------------------------------------------------
import pandas as pd, numpy as np, time, urllib.request, os
from sklearn.ensemble import IsolationForest
from sklearn.svm import OneClassSVM
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

DATA_URL = "https://data.gov.au/data/dataset/southern-grampians-weather-sensor-data/resource/82a5e953-00dc-42d6-9c07-3066bf800be3/download/SGSC_Weather_Sensor_Data.csv"
LOCAL_PATH = "SGSC_Weather_Sensor_Data.csv"

# ------------------------------------------------------------
# 2) Data Ingestion
# ------------------------------------------------------------
if not os.path.exists(LOCAL_PATH):
    urllib.request.urlretrieve(DATA_URL, LOCAL_PATH)
    print("‚úÖ Download complete")
df = pd.read_csv(LOCAL_PATH, low_memory=False)
df.columns = df.columns.str.lower().str.strip()

# ------------------------------------------------------------
# 3) Preprocessing
# ------------------------------------------------------------
if 'time' in df.columns:
    df['time'] = pd.to_datetime(df['time'], format='%Y%m%d%H%M%S', errors='coerce')
else:
    for c in df.columns:
        if 'date' in c:
            df[c] = pd.to_datetime(df[c], errors='coerce')
            df.rename(columns={c:'time'}, inplace=True)
df = df.dropna(subset=['time']).sort_values('time')

features = [c for c in ['airtemp','relativehumidity','windspeed','solar','vapourpressure'] if c in df.columns]
df = df.dropna(subset=features)
print(f"‚úÖ Loaded {len(df)} rows with features: {features}")

# ------------------------------------------------------------
# 4) Feature Extraction (optional)
# ------------------------------------------------------------
# ‡∏™‡∏£‡πâ‡∏≤‡∏á rolling mean ‡∏´‡∏£‡∏∑‡∏≠ difference
df['temp_roll'] = df['airtemp'].rolling(5).mean()
df['humidity_diff'] = df['relativehumidity'].diff()
df = df.dropna()

# ------------------------------------------------------------
# 5) Train-Test Split
# ------------------------------------------------------------
train_ratio = 0.7
train_end = int(len(df) * train_ratio)
train_df, stream_df = df.iloc[:train_end], df.iloc[train_end:].reset_index(drop=True)

# ------------------------------------------------------------
# 6) Model Training (from Member B)
# ------------------------------------------------------------
X_train = train_df[features].values
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_train)

iforest = IsolationForest(contamination=0.05, random_state=42)
ocsvm = OneClassSVM(kernel='rbf', nu=0.05, gamma='scale')

iforest.fit(X_train)
ocsvm.fit(X_scaled)
print("‚úÖ Models trained")

# ------------------------------------------------------------
# 7) Real-time Streaming Simulation
# ------------------------------------------------------------
scores_if, scores_oc, alerts = [], [], []
for i, row in stream_df.iterrows():
    x = row[features].values.reshape(1, -1)
    score_if = iforest.decision_function(x)[0]
    pred_if = iforest.predict(x)[0]
    x_scaled = scaler.transform(x)
    score_oc = ocsvm.decision_function(x_scaled)[0]
    pred_oc = ocsvm.predict(x_scaled)[0]
    scores_if.append(score_if)
    scores_oc.append(score_oc)
    if pred_if == -1 or pred_oc == -1:
        alerts.append({"time":row['time'], "IF":pred_if, "OC":pred_oc})
    time.sleep(0.1)

# ------------------------------------------------------------
# 8) Visualization & Results
# ------------------------------------------------------------
plt.figure(figsize=(10,4))
plt.plot(scores_if, label="IsolationForest")
plt.plot(scores_oc, label="OneClassSVM", alpha=0.7)
plt.legend()
plt.title("Real-time Anomaly Detection Scores")
plt.xlabel("Timestep")
plt.ylabel("Score")
plt.show()

print(f"‚ö†Ô∏è Total anomalies detected: {len(alerts)}")

# ------------------------------------------------------------
# 9) Export Alerts Log
# ------------------------------------------------------------
alerts_df = pd.DataFrame(alerts)
alerts_df.to_csv("anomaly_alert_log.csv", index=False)
print("üìÇ Alerts saved to anomaly_alert_log.csv")


‚úÖ Download complete
‚úÖ Loaded 742141 rows with features: ['airtemp', 'relativehumidity', 'windspeed', 'solar', 'vapourpressure']
