# Phase 1

## Loading data set

In [1]:
import pandas as pd

# Load the dataset (adjust the path if needed)
df = pd.read_csv("training-data/iot_main_synthetic_dataset.csv")

# Preview the data
df.head()


Unnamed: 0,device_id,sensor_name,sensor_type,device_type,location,timestamp,value,unit,status,sensitivity_label
0,35c3dee3-0b58-4ddb-b71a-cb627ed163f3,RFID_patient_tracker,location_tracking,location_tracking_device,Ward,2025-06-22T22:24:55.435760,73.66,%,idle,Sensitive
1,953120f5-d9e7-43cf-8cfa-e041c8ac2bd9,RFID_patient_tracker,location_tracking,location_tracking_device,Ward,2025-06-25T14:54:55.435890,ON,°C,idle,Sensitive
2,6d6f46f1-f524-4a2a-9f22-e0c474d26a0e,temperature_sensor,temperature,temperature_device,Server Room,2025-06-20T13:30:55.436043,18.15,%,active,Non-sensitive
3,b542fadc-8b2a-4452-8ce4-c6768934fb5a,humidity_sensor,environmental,environmental_device,Data Center,2025-06-22T07:48:55.436438,ON,bpm,offline,Non-sensitive
4,b6fa9848-fbc6-4f12-b638-343c04cfc280,temperature_sensor,temperature,temperature_device,Server Room,2025-06-23T13:29:55.436903,OFF,lux,idle,Non-sensitive


## Prepare Features and Labels

In [3]:
features = ["sensor_name", "sensor_type", "device_type", "location"]
target = "sensitivity_label"


X = df[features]
y = df[target]

## Build Preprocessing and Model Pipelines

In [4]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.pipeline import Pipeline

# One-hot encoding for categorical features
preprocessor = ColumnTransformer(
    transformers=[("cat", OneHotEncoder(handle_unknown="ignore"), features)]
)

# Combine preprocessing with model
pipeline = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("classifier", DecisionTreeClassifier(random_state=42))
])


## Train and Evaluate the Model

In [5]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit the model
pipeline.fit(X_train, y_train)

# Predict and evaluate
y_pred = pipeline.predict(X_test)
print(classification_report(y_test, y_pred))


               precision    recall  f1-score   support

Non-sensitive       1.00      1.00      1.00        20
    Sensitive       1.00      1.00      1.00        20

     accuracy                           1.00        40
    macro avg       1.00      1.00      1.00        40
 weighted avg       1.00      1.00      1.00        40



## Save the Trained Model

In [6]:
import joblib

# Save the trained pipeline
joblib.dump(pipeline, "models/iot_sensitivity_model.joblib")


['models/iot_sensitivity_model.joblib']

#