# 🏭 EDA: Anomaly Detection in Warehouse Operations

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style='whitegrid')

df = pd.read_csv("../data/raw/warehouse_logs.csv", parse_dates=["timestamp"])
df.head()

## 📊 Class Balance: Anomalies vs Normal

In [None]:
sns.countplot(data=df, x='is_anomaly')
plt.title("Anomaly Class Balance")
plt.show()

## 🕒 Duration vs Device Lag (Anomalies Highlighted)

In [None]:
sns.scatterplot(data=df, x='duration_sec', y='device_lag_ms', hue='is_anomaly', alpha=0.7)
plt.title("Task Duration vs Device Lag")
plt.show()

## 🔍 Error Counts & Scanner Failures

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(12, 4))
sns.histplot(df['errors'], ax=axs[0], bins=3)
axs[0].set_title("Errors")
sns.histplot(df['scanner_failures'], ax=axs[1])
axs[1].set_title("Scanner Failures")
plt.tight_layout()
plt.show()

## 🧹 Zone and Task Type Distributions

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(12, 4))
sns.countplot(data=df, x='zone_id', ax=axs[0])
axs[0].set_title("Zone Distribution")
sns.countplot(data=df, x='task_type', ax=axs[1])
axs[1].set_title("Task Type Distribution")
plt.tight_layout()
plt.show()