In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import IsolationForest
import matplotlib.pyplot as plt

# Generate dummy data
np.random.seed(42)
n_samples = 1000

data = {
    'microphone_requests': np.random.randint(0, 20, n_samples),
    'location_requests': np.random.randint(0, 30, n_samples),
    'ads_related_to_conversations': np.random.randint(0, 10, n_samples),
    'app_requested_permissions': np.random.choice([0, 1], n_samples),
    'privacy_violation': np.random.choice([0, 1], n_samples, p=[0.9, 0.1])  # 10% privacy violations
}

df = pd.DataFrame(data)

# Create an anomaly score based on the features
df['anomaly_score'] = (
    df['microphone_requests'] * 2 +
    df['location_requests'] +
    df['ads_related_to_conversations'] * 3 +
    (1 - df['app_requested_permissions']) * 10  # Higher score if permissions not requested
)

# Initialize and fit the Isolation Forest model
clf = IsolationForest(contamination=0.1, random_state=42)
clf.fit(df[['anomaly_score']])

# Predict anomalies
df['predicted_anomaly'] = clf.predict(df[['anomaly_score']])
df['predicted_anomaly'] = df['predicted_anomaly'].map({1: 0, -1: 1})  # Map 1 to 0 (normal) and -1 to 1 (anomaly)

# Calculate detection metrics
true_positives = ((df['predicted_anomaly'] == 1) & (df['privacy_violation'] == 1)).sum()
false_positives = ((df['predicted_anomaly'] == 1) & (df['privacy_violation'] == 0)).sum()
true_negatives = ((df['predicted_anomaly'] == 0) & (df['privacy_violation'] == 0)).sum()
false_negatives = ((df['predicted_anomaly'] == 0) & (df['privacy_violation'] == 1)).sum()

precision = true_positives / (true_positives + false_positives)
recall = true_positives / (true_positives + false_negatives)
f1_score = 2 * (precision * recall) / (precision + recall)

print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1_score:.2f}")

# Print some example anomalies
print("\nExample Anomalies:")
anomalies = df[df['predicted_anomaly'] == 1].head()
print(anomalies)

Precision: 0.13
Recall: 0.12
F1 Score: 0.12

Example Anomalies:
    microphone_requests  location_requests  ads_related_to_conversations  \
14                    5                  6                             1   
44                   14                 29                             9   
58                    1                  7                             0   
75                    0                  0                             4   
85                    2                  1                             1   

    app_requested_permissions  privacy_violation  anomaly_score  \
14                          1                  0             19   
44                          0                  0             94   
58                          1                  0              9   
75                          0                  0             22   
85                          1                  0              8   

    predicted_anomaly  
14                  1  
44                  1  
58  