# SARIMA Anomaly Detection
This notebook uses SARIMA to detect anomalies in hourly API volume data by identifying points that fall outside the model's confidence intervals.

In [None]:
!pip install pandas numpy matplotlib statsmodels

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.statespace.sarimax import SARIMAX

## Step 1: Load the Dataset

In [None]:
# Load the dataset (replace the path with your file location)
file_path = '/path/to/api_volume_data_with_anomalies_rounded.csv'
data = pd.read_csv(file_path)

# Convert 'time' to datetime and set as index
data['time'] = pd.to_datetime(data['time'])
data.set_index('time', inplace=True)

# Preview the data
data.head()

## Step 2: Initialize and Fit the SARIMA Model

In [None]:
# Define SARIMA model parameters (p, d, q, P, D, Q, s)
p, d, q = 1, 1, 1  # Non-seasonal order
P, D, Q, s = 1, 1, 1, 24  # Seasonal order (assuming daily seasonality)

# Initialize SARIMA model
model = SARIMAX(data['login_count'], 
                order=(p, d, q),
                seasonal_order=(P, D, Q, s),
                enforce_stationarity=False,
                enforce_invertibility=False)

# Fit the model
sarima_result = model.fit(disp=False)
sarima_result.summary()

## Step 3: Make Predictions and Detect Anomalies

In [None]:
# Generate in-sample predictions
data['forecast'] = sarima_result.predict(start=0, end=len(data)-1, dynamic=False)
pred_conf_int = sarima_result.get_prediction(start=0, end=len(data)-1).conf_int()
data['lower_bound'] = pred_conf_int['lower login_count']
data['upper_bound'] = pred_conf_int['upper login_count']

# Detect anomalies: when actual data is outside the prediction confidence interval
data['anomaly'] = (data['login_count'] < data['lower_bound']) | (data['login_count'] > data['upper_bound'])

## Step 4: Plot the Results with Anomalies Highlighted

In [None]:
# Plot actual data with SARIMA forecast and confidence intervals
plt.figure(figsize=(14, 7))
plt.plot(data.index, data['login_count'], label='Actual Data', color='blue')
plt.plot(data.index, data['forecast'], label='Forecast', color='green')
plt.fill_between(data.index, data['lower_bound'], data['upper_bound'], color='lightgreen', alpha=0.5)

# Highlight anomalies
anomalies = data[data['anomaly']]
plt.scatter(anomalies.index, anomalies['login_count'], color='red', label='Anomalies', marker='o')

plt.xlabel('Date')
plt.ylabel('Login Count')
plt.title('API Volume Anomaly Detection with SARIMA')
plt.legend()
plt.show()

## Step 5: Save the Results

In [None]:
# Save the data with anomaly indicators
output_path = '/path/to/sarima_anomaly_detection_output.csv'
data[['login_count', 'forecast', 'lower_bound', 'upper_bound', 'anomaly']].to_csv(output_path, index=True)
print(f'Results saved to {output_path}')