In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

# Load the dataset
df = pd.read_csv('../data/barcode_scans.csv')

# Convert date column to datetime for easier analysis
df['date'] = pd.to_datetime(df['date'])

# Check the first few rows
df.head()


In [None]:
# Calculate Z-scores for scan counts
df['z_score'] = stats.zscore(df['scan_count'])

# Define anomaly threshold (anything >3 or <-3 is unusual)
df['anomaly'] = df['z_score'].apply(lambda x: abs(x) > 3)

# Display detected anomalies
anomalies = df[df['anomaly']]
print(f"✅ Detected {len(anomalies)} anomalies!")
print(anomalies.head())

In [None]:
plt.figure(figsize=(12,6))
plt.plot(df.index, df['scan_count'], label='Scan Count', color='blue')
plt.scatter(anomalies.index, anomalies['scan_count'], color='red', label='Anomalies', marker='o', s=100)
plt.xlabel('Row Index')
plt.ylabel('Scan Count')
plt.title('Barcode Scan Counts with Detected Anomalies')
plt.legend()
plt.show()

In [None]:
# Group anomalies by location & device
anomaly_summary = anomalies.groupby(['location', 'device_type']).agg({
    'scan_count': ['mean', 'max', 'count']
}).reset_index()

# Rename columns for readability
anomaly_summary.columns = ['location', 'device_type', 'avg_scan_count', 'max_scan_count', 'anomaly_count']

# Sort by most frequent anomalies
anomaly_summary = anomaly_summary.sort_values(by='anomaly_count', ascending=False)

print("📌 Locations & Devices with Most Anomalies:")
print(anomaly_summary.head(5))

In [None]:
# Save anomalies for the AI storyteller
anomalies.to_csv("../data/detected_anomalies.csv", index=False)

print("✅ Anomaly detection complete! Data saved for storytelling.")