# Insert CSV to Pandas Dataframe

In [None]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt

url = 'https://raw.githubusercontent.com/tal2k4xj/iai-workshop/main/IAI%20data%20lab/0807.csv'
df = pd.read_csv(url)
df

# Forward and Backward Filling

In [None]:
df = df.ffill().bfill()
df

# Convert Date

In [None]:
df['time'] = pd.to_datetime(df['time'],format='%Y%m%d%H%M%S%f')

df = df.replace(r'^\s*$', np.nan, regex=True)

df = df.dropna(thresh=200,axis=1)
df = df.dropna(thresh=20,axis=0)

df['time'] = df['time'].replace('[.]\d{3}','',regex=True)

df.rename(columns = {'time':'date'}, inplace = True)
df.set_index('date', inplace=True)

df

# Create the time series model

In [None]:
# Separate the time series data into separate dataframes
ts_data = {}
for col in df.columns:
    ts_data[col] = df[col]

# Create a seasonal decomposition plot for each time series
for col, data in ts_data.items():
    decomposition = sm.tsa.seasonal_decompose(data, model='additive',period=4)
    fig = decomposition.plot()
    plt.title(col)
    plt.show()

# Create a time series model for each time series
models = {}
for col, data in ts_data.items():
    model = sm.tsa.SARIMAX(data, order=(1, 1, 1), seasonal_order=(1, 1, 1, 12))
    results = model.fit()
    models[col] = results

# Read the new CSV file for comparison
new_df = pd.read_csv('https://raw.githubusercontent.com/tal2k4xj/iai-workshop/main/IAI%20data%20lab/2007.csv')

new_df.rename(columns = {'time':'date'}, inplace = True)

# Convert 'date' column to datetime object
new_df['date'] = pd.to_datetime(new_df['date'])

# Set 'date' column as index
new_df.set_index('date', inplace=True)

# Iterate over each time series in the new data and compare with the model
for col in ts_data.keys():
    new_data = new_df[col]
    predicted = models[col].predict(start=new_data.index[0], end=new_data.index[-1])
    residuals = new_data - predicted
    anomalies = residuals[abs(residuals) > 2 * residuals.std()]
    print(f"Anomalies in {col}:\n{anomalies}")
