# Perimeter Security with IoT-AI<br>
## Project Overview<br>
Author: Fatih E. NAR (He is such a genious with a great soul) <br>
This project aims to deliver an anomaly detection approach for power grid<br>
Dataset: https://huggingface.co/datasets/fenar/iot-security <br>

In [None]:
# Run once only
#%pip install -r requirements.txt

In [None]:
import pandas as pd
from sklearn.ensemble import IsolationForest
import joblib
from scipy.stats import zscore
import matplotlib.pyplot as plt
import seaborn as sns
import zipfile

# Attempt to extract the file assuming it's a zip file
with zipfile.ZipFile('data/iot-data.csv.zip', 'r') as zip_ref:
    zip_ref.extractall('data')

# Load the synthetic telecom data
data_path = "data/iot-data.csv"
data = pd.read_csv(data_path)

In [None]:
# Convert DateTime to a pandas datetime object
data['DateTime'] = pd.to_datetime(data['DateTime'], format='%m/%d/%y %I:%M %p')

# Extract time segments
data['DayOfWeek'] = data['DateTime'].dt.dayofweek
data['WeekOfMonth'] = data['DateTime'].apply(lambda x: (x.day - 1) // 7 + 1)
data['Month'] = data['DateTime'].dt.month
data['Season'] = data['DateTime'].apply(lambda x: (x.month % 12 + 3) // 3)  # Spring=1, Summer=2, Autumn=3, Winter=4

# Handle missing values
data['avg(temp)'].fillna(data['avg(temp)'].mean(), inplace=True)
data['avg(humidity)'].fillna(data['avg(humidity)'].mean(), inplace=True)
data['max(pir)'].fillna(data['max(pir)'].median(), inplace=True)
data['min(als)'].fillna(data['min(als)'].median(), inplace=True)
data['max(als)'].fillna(data['max(als)'].median(), inplace=True)

# Calculate z-scores for avg(temp), avg(humidity), and max(pir)
data['zscore_avg_temp'] = zscore(data['avg(temp)'])
data['zscore_avg_humidity'] = zscore(data['avg(humidity)'])
data['zscore_max_pir'] = zscore(data['max(pir)'])

# Determine anomalies where z-score is above 3 or below -3
anomalies_temp = data[(data['zscore_avg_temp'] > 3) | (data['zscore_avg_temp'] < -3)]
anomalies_humidity = data[(data['zscore_avg_humidity'] > 3) | (data['zscore_avg_humidity'] < -3)]
anomalies_pir = data[(data['zscore_max_pir'] > 3) | (data['zscore_max_pir'] < -3)]

# Combine all anomalies
anomalies_combined = pd.concat([anomalies_temp, anomalies_humidity, anomalies_pir]).drop_duplicates()

# Selecting relevant features for the Isolation Forest model
features = ['avg(temp)', 'avg(humidity)', 'max(pir)', 'min(als)', 'max(als)',
            'min(tp0)', 'max(tp0)', 'min(tp1)', 'max(tp1)', 'min(tp2)', 'max(tp2)', 
            'min(tp3)', 'max(tp3)']


In [None]:
# Extracting the features
X = data[features]

# Train the Isolation Forest model
model = IsolationForest(contamination=0.01, random_state=42)
model.fit(X)

In [None]:
# Predict anomalies
data['anomaly'] = model.predict(X)

# Save the trained model
model_filename = 'models/iot-anomaly-detection.pkl'
joblib.dump(model, model_filename)

# Anomalies are marked as -1, normal data points are marked as 1
anomalies_ml = data[data['anomaly'] == -1]

# Define security-related anomalies
data['security_related'] = (
    (data['anomaly'] == -1) &
    ((data['zscore_max_pir'] > 3) |
     (data['zscore_avg_temp'] > 3) |
     (data['zscore_avg_humidity'] > 3) |
     (data['max(als)'] > data['max(als)'].mean() + 3 * data['max(als)'].std()))
)

# Filter security-related anomalies
security_anomalies = data[data['security_related']]

# Analyze anomaly patterns
anomaly_patterns = anomalies_ml.groupby(['DayOfWeek', 'WeekOfMonth', 'Month', 'Season']).size().reset_index(name='Count')

In [None]:
# Function to determine color based on value ranking using cold to warm colors
def get_bar_colors(values):
    sorted_indices = values.argsort()
    colors = ['#d73027', '#f46d43', '#fdae61', '#fee08b', '#abd9e9', '#74add1', '#4575b4', '#313695']
    sorted_colors = [colors[min(i, len(colors) - 1)] for i in range(len(values))]
    sorted_colors.reverse()  # Reverse the color order
    return [sorted_colors[sorted_indices.tolist().index(i)] for i in range(len(values))]

# Plot anomaly patterns
plt.figure(figsize=(18, 12))

# Side Bar Plot for Day of the Week
plt.subplot(2, 2, 1)
day_of_week_counts = anomalies_ml['DayOfWeek'].value_counts().sort_index()
colors = get_bar_colors(day_of_week_counts.values)
sns.barplot(y=day_of_week_counts.index, x=day_of_week_counts.values, palette=colors, orient='h')
plt.ylabel('Day of the Week')
plt.xlabel('Number of Anomalies')
plt.title('Predicted Security Anomalies by Day of the Week')
plt.yticks(range(7), ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'])

# Heatmap for Week of the Month
plt.subplot(2, 2, 2)
week_of_month_counts = anomalies_ml.groupby(['WeekOfMonth', 'DayOfWeek']).size().unstack(fill_value=0)
sns.heatmap(week_of_month_counts, cmap='YlGnBu', annot=True, fmt='d', linewidths=.5)
plt.xlabel('Day of the Week')
plt.ylabel('Week of the Month')
plt.title('Predicted Heatmap of Security Anomalies by Week of the Month')
plt.xticks(ticks=[0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5], labels=['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'], rotation=0)

# Scatter Plot for Month of the Year with Dot Connections
plt.subplot(2, 2, 3)
month_counts = anomalies_ml['Month'].value_counts().sort_index()
colors = get_bar_colors(month_counts.values)
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
plt.scatter(month_counts.index, month_counts.values, c=colors, s=100, alpha=0.7)
plt.plot(month_counts.index, month_counts.values, linestyle='-', color='gray', alpha=0.5)
plt.grid(True, which='both', linestyle='--', linewidth=0.5)
for i, value in enumerate(month_counts.values):
    plt.text(month_counts.index[i], value + 5, str(value), ha='center', va='bottom')
plt.xlabel('Month')
plt.ylabel('Number of Anomalies')
plt.title('Predicted Security Anomalies by Month')
plt.xticks(ticks=range(1, 13), labels=months)

# Pie Chart for Season
plt.subplot(2, 2, 4)
season_counts = anomalies_ml['Season'].value_counts().sort_index()
colors = get_bar_colors(season_counts.values)
plt.pie(season_counts.values, labels=['Spring', 'Summer', 'Autumn', 'Winter'], autopct='%1.1f%%', colors=colors)
plt.title('Predicted Security Anomalies by Season')

plt.tight_layout(pad=3.0)
plt.show()

# Display security-related anomalies
print(security_anomalies)