# Insert 0807.csv - Training data

# Insert 2007.csv - Testing data

# Forward and Backward Filling

In [None]:
train_df = train_df.ffill().bfill()
test_df = test_df.ffill().bfill()

train_df

# Convert Date

In [None]:
train_df['time'] = pd.to_datetime(train_df['time'],format='%Y%m%d%H%M%S%f')
train_df.rename(columns = {'time':'date'}, inplace = True)
train_df.set_index('date', inplace=True)

test_df['time'] = pd.to_datetime(test_df['time'],format='%Y%m%d%H%M%S%f')
test_df.rename(columns = {'time':'date'}, inplace = True)
test_df.set_index('date', inplace=True)

# See training data

In [None]:
train_df

# See testing data

In [None]:
test_df

# Create the time series model

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm

# Separate the time series data into separate dataframes
ts_data = {}
for col in train_df.columns:
    ts_data[col] = train_df[col]

# Create a seasonal decomposition plot for each time series
for col, data in ts_data.items():
    decomposition = sm.tsa.seasonal_decompose(data, model='additive',period=4)
    fig = decomposition.plot()
    plt.title(col)
    plt.show()

# Create a time series model for each time series
models = {}
for col, data in ts_data.items():
    model = sm.tsa.SARIMAX(data, order=(1, 1, 1), seasonal_order=(1, 1, 1, 12))
    results = model.fit()
    models[col] = results

# Iterate over each time series in the new data and compare with the model
for col in ts_data.keys():
    new_data = test_df[col]
    predicted = models[col].predict(start=new_data.index[0], end=new_data.index[-1])
    residuals = new_data - predicted
    anomalies = residuals[abs(residuals) > 2 * residuals.std()]
    print(f"Anomalies in {col}:\n{anomalies}")
