### **TUGAS BESAR IOT TEMP**

---

KELOMPOK        : 03

Kelas       : SI4703

---

In [1]:
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import DBSCAN
from sklearn.ensemble import IsolationForest
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import silhouette_score, confusion_matrix, classification_report, roc_auc_score
import pandas as pd
import streamlit as st
import plotly.express as px
import joblib
from fastapi import FastAPI
import uvicorn
from datetime import datetime
import threading
import os
from pyngrok import ngrok

In [2]:
!ngrok authtoken add-authtoken 2ww55iuAVykS1Gi7CYFWYKsXAFS_5i4MAWjQdcr9sGXSghAvN

ERROR:  accepts 1 arg(s), received 2


In [3]:
app = FastAPI()

In [4]:
data = pd.read_csv('IOT-temp.csv', parse_dates=['noted_date'])
print(data.describe())
print("Distribusi Status:")
print(data['out/in'].value_counts())

               temp
count  97606.000000
mean      35.053931
std        5.699825
min       21.000000
25%       30.000000
50%       35.000000
75%       40.000000
max       51.000000
Distribusi Status:
out/in
Out    77261
In     20345
Name: count, dtype: int64


In [5]:
# Preprocessing
data = data.drop_duplicates()
data['noted_date'] = pd.to_datetime(data['noted_date'], format='%d-%m-%Y %H:%M')
data['hour'] = data['noted_date'].dt.hour
data = data.drop(columns=['id', 'room_id/id'])
data['temp'] = data['temp'].fillna(data['temp'].mean())
data['out/in'] = data['out/in'].map({'In': 0, 'Out': 1}).fillna(0)
data['hour'] = data['hour'].fillna(data['hour'].mode()[0])
scaler = StandardScaler()
data['temp_scaled'] = scaler.fit_transform(data[['temp']])
data['temp_deviation'] = data['temp'] - data['temp'].mean()

In [6]:
# Fitur untuk clustering dan supervised learning
features = data[['temp_scaled', 'temp_deviation', 'hour']]

In [7]:
# DBSCAN Clustering
dbscan = DBSCAN(eps=0.5, min_samples=5)
data['cluster'] = dbscan.fit_predict(features)
print("Distribusi Cluster:")
print(data['cluster'].value_counts().sort_index())

Distribusi Cluster:
cluster
-1       99
 0      220
 1      124
 2      731
 3      360
       ... 
 568    260
 569     62
 570     13
 571     21
 572     13
Name: count, Length: 574, dtype: int64


In [None]:
# Evaluasi Silhouette Score untuk DBSCAN
sil_score = silhouette_score(features[data['cluster'] != -1], data['cluster'][data['cluster'] != -1])
print(f"Silhouette Score: {sil_score:.2f}")

In [None]:
# Isolation Forest Detector
iso_forest = IsolationForest(contamination=0.1, random_state=42)
data['anomaly'] = iso_forest.fit_predict(features)
data['anomaly_score'] = iso_forest.decision_function(features)

In [None]:
# Regresi Logistik
X = data[['temp_scaled', 'hour']]
y = data['out/in']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
log_reg = LogisticRegression(max_iter=1000)
log_reg.fit(X_train, y_train)
y_pred_log = log_reg.predict(X_test)
print("Regresi Logistik - Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_log))
print("Classification Report:")
print(classification_report(y_test, y_pred_log))
print(f"ROC-AUC: {roc_auc_score(y_test, log_reg.predict_proba(X_test)[:, 1]):.2f}")
print(f"Koefisien: temp_scaled={log_reg.coef_[0][0]:.4f}, hour={log_reg.coef_[0][1]:.4f}")

In [None]:
# Naïve Bayes
nb = GaussianNB()
nb.fit(X_train, y_train)
y_pred_nb = nb.predict(X_test)
print("Naïve Bayes - Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_nb))
print("Classification Report:")
print(classification_report(y_test, y_pred_nb))
print(f"ROC-AUC: {roc_auc_score(y_test, nb.predict_proba(X_test)[:, 1]):.2f}")
print(f"Feature Importance (Variance): {nb.theta_.var(axis=0)}")

In [None]:
# Simpan model
joblib.dump(dbscan, 'dbscan_model.pkl')
joblib.dump(iso_forest, 'iso_forest_model.pkl')
joblib.dump(scaler, 'scaler.pkl')

In [None]:
# API Endpoint
@app.post("/anomalies")
async def get_anomalies(data_input: dict):
    input_df = pd.DataFrame([data_input])
    input_df['noted_date'] = pd.to_datetime(data_input.get('noted_date', datetime.now()), format='%d-%m-%Y %H:%M')
    input_df['hour'] = input_df['noted_date'].dt.hour
    input_df['temp'] = input_df['temp'].fillna(data['temp'].mean())
    input_df['out/in'] = input_df['out/in'].map({'In': 0, 'Out': 1}).fillna(0)
    input_df['temp_deviation'] = input_df['temp'] - data['temp'].mean()
    input_features = scaler.transform(input_df[['temp', 'temp_deviation', 'hour']])
    prediction = iso_forest.predict(input_features)[0]
    score = iso_forest.decision_function(input_features)[0]
    status = 'anomaly' if prediction == -1 else 'normal'
    return {"status": status, "score": score, "data": input_df.to_dict('records')[0]}

In [None]:
# Sel 8: Streamlit Dashboard
st.set_page_config(page_title="IoT Temperature Anomaly Dashboard", page_icon="🌡️", layout="wide")

st.title("IoT Temperature Anomaly Detection Dashboard")

st.markdown("""
## Selamat Datang di Dashboard Deteksi Anomali Suhu IoT

Dashboard ini menggunakan DBSCAN, Isolation Forest, Regresi Logistik, dan Naïve Bayes untuk analisis data suhu IoT.
""")

# Statistik
col1, col2, col3 = st.columns(3)
with col1:
    st.metric("Jumlah Data", data.shape[0])
with col2:
    st.metric("Data Normal (In)", data[data['out/in'] == 0].shape[0])
with col3:
    st.metric("Data Anomali (Out)", data[data['out/in'] == 1].shape[0])

# Visualisasi Cluster
st.subheader("Distribusi Cluster")
{
    "type": "pie",
    "data": {
        "labels": data['cluster'].value_counts().index.astype(str).tolist(),
        "datasets": [{
            "data": data['cluster'].value_counts().values.tolist(),
            "backgroundColor": ['#3498db', '#e74c3c', '#2ecc71', '#f1c40f', '#9b59b6'],
            "borderColor": ['#2980b9', '#c0392b', '#27ae60', '#f39c12', '#8e44ad'],
            "borderWidth": 1
        }]
    },
    "options": {
        "responsive": True,
        "plugins": {
            "legend": {
                "position": "top"
            },
            "title": {
                "display": True,
                "text": "Distribusi Cluster DBSCAN"
            }
        }
    }
}

st.subheader("Hasil Klasterisasi Suhu")
{
    "type": "scatter",
    "data": {
        "datasets": [{
            "label": "Klaster Suhu",
            "data": [{"x": row['noted_date'].strftime('%Y-%m-%d %H:%M'), "y": row['temp'], "cluster": row['cluster']} for _, row in data.iterrows()],
            "backgroundColor": data['cluster'].apply(lambda x: '#3498db' if x == 0 else '#e74c3c' if x == 1 else '#2ecc71' if x == 2 else '#f1c40f').tolist(),
            "borderColor": data['cluster'].apply(lambda x: '#2980b9' if x == 0 else '#c0392b' if x == 1 else '#27ae60' if x == 2 else '#f39c12').tolist(),
            "borderWidth": 1
        }]
    },
    "options": {
        "responsive": True,
        "plugins": {
            "legend": {
                "position": "top"
            },
            "title": {
                "display": True,
                "text": "Temperature Clustering (DBSCAN)"
            }
        },
        "scales": {
            "x": {
                "title": {
                    "display": True,
                    "text": "Tanggal"
                }
            },
            "y": {
                "title": {
                    "display": True,
                    "text": "Suhu (°C)"
                }
            }
        }
    }
}

# Visualisasi Anomali
st.subheader("Deteksi Anomali")
{
    "type": "scatter",
    "data": {
        "datasets": [{
            "label": "Deteksi Anomali",
            "data": [{"x": row['noted_date'].strftime('%Y-%m-%d %H:%M'), "y": row['temp'], "status": 'Anomali' if row['anomaly'] == -1 else 'Normal'} for _, row in data.iterrows()],
            "backgroundColor": data['anomaly'].apply(lambda x: '#e74c3c' if x == -1 else '#2ecc71').tolist(),
            "borderColor": data['anomaly'].apply(lambda x: '#c0392b' if x == -1 else '#27ae60').tolist(),
            "borderWidth": 1
        }]
    },
    "options": {
        "responsive": True,
        "plugins": {
            "legend": {
                "position": "top"
            },
            "title": {
                "display": True,
                "text": "Anomaly Detection (Isolation Forest)"
            }
        },
        "scales": {
            "x": {
                "title": {
                    "display": True,
                    "text": "Tanggal"
                }
            },
            "y": {
                "title": {
                    "display": True,
                    "text": "Suhu (°C)"
                }
            }
        }
    }
}

# Alert
if (data['anomaly'] == -1).any():
    st.warning('Anomali Terdeteksi pada Sensor!')

# Contoh Data
st.subheader("Contoh Data per Cluster")
for cluster in data['cluster'].unique():
    st.write(data[data['cluster'] == cluster][['noted_date', 'temp', 'out/in']].head(3))

st.markdown(f"---\nDiperbarui pada: {datetime.now().strftime('%I:%M %p WIB, %d %B %Y')}")

# Sel 9: Deployment dengan ngrok
def run_streamlit():
    os.system("streamlit run app.py --server.port 8501")

try:
    thread = threading.Thread(target=run_streamlit, daemon=True)
    thread.start()
    time.sleep(5)  # Beri waktu agar Streamlit stabil
    public_url = ngrok.connect(addr='8501')
    st.write(f"URL Publik Dashboard: {public_url}")
except Exception as e:
    st.error(f"Error saat menghubungkan ngrok: {e}")