In [1]:
import zipfile
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import os
import statsmodels.tsa.api as tsa
from pmdarima.model_selection import train_test_split
from pmdarima.arima.utils import ndiffs, nsdiffs

In [2]:
# Open the ZIP file
with zipfile.ZipFile('info/Chicago_Crime_2001-2022.zip') as zf:
    file_list = zf.namelist()
    
    dfs = []
    
    for file_name in file_list:
        if file_name.endswith('.csv'):
            with zf.open(file_name) as f:
                df = pd.read_csv(f)
            dfs.append(df)

combined_df = pd.concat(dfs, ignore_index=True)

combined_df.to_csv('combined_data.csv', index=False)

print(combined_df)

FileNotFoundError: [Errno 2] No such file or directory: 'info/Chicago_Crime_2001-2022.zip'

In [None]:
df= pd.read_csv('combined_data.csv')
df

In [None]:
df['Date'] = pd.to_datetime(df['Date'])
df.info()

In [None]:
df= df.set_index('Date')

In [None]:
df.isna().sum()

In [None]:
df = df.interpolate()
df.isna().sum().sum()

In [None]:
crime_per_yr = df.groupby([ 'Date','District','Arrest','Domestic','Beat']).size().reset_index(name='Count')
crime_per_yr

In [None]:
decomp = tsa.seasonal_decompose(df)
fig = decomp.plot()
fig.set_size_inches(12,5)
fig.tight_layout()

In [None]:
df= df.resample("M").mean()

In [None]:
d = ndiffs(df)
print (f'd = {d}')

In [None]:
D = nsdiffs(ts, m =12)
print(f'D = {D}')

In [None]:
ts_diff = ts.diff().dropna()

In [None]:
plot_acf_pacf(ts_diff, annotate_seas=True, m = 12);

In [None]:
train, test = train_test_split(ts, test_size=.25)
ax = train.plot(label='Train')
test.plot(ax=ax, label='Test')
ax.legend();

In [None]:
# Fitting a SARIMA model

# Orders for non seasonal components
p = 1  # nonseasonal AR
d = 1  # nonseasonal differencing
q = 1  # nonseasonal MA

# Orders for seasonal components
P = 1  # Seasonal AR
D = 0  # Seasonal differencing
Q = 1  # Seasonal MA
m = 12 # Seasonal period

sarima = tsa.ARIMA(train, order = (p,d,q), seasonal_order=(P,D,Q,m)).fit()

In [None]:
# Obtain summary of forecast as dataframe
forecast_df = sarima.get_forecast(len(test)).summary_frame()

plot_forecast(train, test, forecast_df)

regression_metrics_ts(test, forecast_df["mean"])

In [None]:
sarima.summary()

In [None]:
fig = sarima.plot_diagnostics()
fig.set_size_inches(10,6)
fig.tight_layout()