In [58]:
import pandas as pd
import pmdarima as pm
from statsmodels.tsa.arima.model import ARIMA
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [59]:
total_crimes_by_type = pd.read_csv('crime_by_tipe.csv')
total_crimes_by_type

Unnamed: 0.1,Unnamed: 0,Crime Type,Year,Total_Crimes
0,0,ASSAULT,2014,16515.0
1,1,ASSAULT,2015,17858.0
2,2,ASSAULT,2016,18608.0
3,3,ASSAULT,2017,18906.0
4,4,ASSAULT,2018,19565.0
...,...,...,...,...
94,94,THEFTOVER,2020,1209.0
95,95,THEFTOVER,2021,1052.0
96,96,THEFTOVER,2022,1444.0
97,97,THEFTOVER,2023,1719.0


In [60]:
total_crimes_by_type.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 99 entries, 0 to 98
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Unnamed: 0    99 non-null     int64  
 1   Crime Type    99 non-null     object 
 2   Year          99 non-null     int64  
 3   Total_Crimes  99 non-null     float64
dtypes: float64(1), int64(2), object(1)
memory usage: 3.2+ KB


In [61]:
total_crimes_by_type['Crime Type'] = total_crimes_by_type['Crime Type'].astype('string')
total_crimes_by_type.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 99 entries, 0 to 98
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Unnamed: 0    99 non-null     int64  
 1   Crime Type    99 non-null     string 
 2   Year          99 non-null     int64  
 3   Total_Crimes  99 non-null     float64
dtypes: float64(1), int64(2), string(1)
memory usage: 3.2 KB


In [62]:
def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    non_zero_indices = y_true != 0
    return np.mean(np.abs((y_true[non_zero_indices] - y_pred[non_zero_indices]) / y_true[non_zero_indices])) * 100

all_predictions = pd.DataFrame()
total_maes = []
total_rmses = []
total_mapes = []
crime_types = total_crimes_by_type['Crime Type'].unique()

for crime in crime_types:
    crime_type_data = total_crimes_by_type[total_crimes_by_type['Crime Type'] == crime].copy()
    crime_type_data = crime_type_data.set_index('Year')
    crime_type_data.index = crime_type_data.index.astype(int)
    total_crimes_series = crime_type_data['Total_Crimes']
    
    train_data = total_crimes_series.loc[total_crimes_series.index <= 2023]
    test_data = total_crimes_series.loc[total_crimes_series.index > 2023]

    auto_model = pm.auto_arima(train_data,
                           start_p=1, start_q=1,
                           max_p=10, max_q=10,
                           m=1, 
                           seasonal=False,
                           d=None, 
                           trace=True,
                           error_action='ignore',
                           suppress_warnings=True,
                           stepwise=True)
    best_order = auto_model.order

    model = ARIMA(train_data, order=best_order)
    fitted_model = model.fit()
    start_year = test_data.index[0]
    end_year = test_data.index[-1]

    predictions = fitted_model.predict(start=start_year, end=end_year)
    predictions_df = predictions.to_frame(name='Predicted_Crimes')
    predictions_df['Crime_Type'] = crime

    mae = mean_absolute_error(test_data, predictions)
    rmse = np.sqrt(mean_squared_error(test_data, predictions))
    mape = mean_absolute_percentage_error(test_data, predictions)

    total_maes.append(mae)
    total_rmses.append(rmse)
    total_mapes.append(mape)

    evaluation_df = predictions.to_frame(name='Predicted_Crimes')
    evaluation_df['Crime Type'] = crime
    evaluation_df['Actual_Crimes'] = test_data 
    evaluation_df['MAE'] = mae
    evaluation_df['RMSE'] = rmse
    evaluation_df['MAPE'] = mape

    all_predictions = pd.concat([all_predictions, evaluation_df])
    
    if total_maes:
        total_mae = np.mean(total_maes)
        total_rmse = np.mean(total_rmses)
        total_mape = np.mean(total_mapes)
        print(all_predictions)
        print(f"\nMean Absolute Error (MAE): {total_mae:.2f}")
        print(f"Root Mean Squared Error (RMSE): {total_rmse:.2f}")
        print(f"Mean Absolute Percentage Error (MAPE): {total_mape:.2f}%")
    else:
        print("\nNo models were successfully fitted and evaluated.")

    print(all_predictions)

    

Performing stepwise search to minimize aic




 ARIMA(1,0,1)(0,0,0)[0]             : AIC=187.811, Time=0.12 sec
 ARIMA(0,0,0)(0,0,0)[0]             : AIC=227.975, Time=0.01 sec
 ARIMA(1,0,0)(0,0,0)[0]             : AIC=inf, Time=0.04 sec
 ARIMA(0,0,1)(0,0,0)[0]             : AIC=224.862, Time=0.02 sec
 ARIMA(2,0,1)(0,0,0)[0]             : AIC=inf, Time=0.29 sec
 ARIMA(1,0,2)(0,0,0)[0]             : AIC=189.631, Time=0.09 sec
 ARIMA(0,0,2)(0,0,0)[0]             : AIC=225.320, Time=0.02 sec
 ARIMA(2,0,0)(0,0,0)[0]             : AIC=inf, Time=0.06 sec




 ARIMA(2,0,2)(0,0,0)[0]             : AIC=inf, Time=0.16 sec
 ARIMA(1,0,1)(0,0,0)[0] intercept   : AIC=185.250, Time=0.04 sec
 ARIMA(0,0,1)(0,0,0)[0] intercept   : AIC=184.412, Time=0.08 sec
 ARIMA(0,0,0)(0,0,0)[0] intercept   : AIC=184.752, Time=0.02 sec




 ARIMA(0,0,2)(0,0,0)[0] intercept   : AIC=inf, Time=0.12 sec
 ARIMA(1,0,0)(0,0,0)[0] intercept   : AIC=184.202, Time=0.02 sec
 ARIMA(2,0,0)(0,0,0)[0] intercept   : AIC=185.041, Time=0.03 sec
 ARIMA(2,0,1)(0,0,0)[0] intercept   : AIC=186.928, Time=0.08 sec

Best model:  ARIMA(1,0,0)(0,0,0)[0] intercept
Total fit time: 1.214 seconds


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


      Predicted_Crimes Crime Type  Actual_Crimes         MAE        RMSE  \
2024       19425.70054    ASSAULT        25445.0  6019.29946  6019.29946   

           MAPE  
2024  23.656119  

Mean Absolute Error (MAE): 6019.30
Root Mean Squared Error (RMSE): 6019.30
Mean Absolute Percentage Error (MAPE): 23.66%
      Predicted_Crimes Crime Type  Actual_Crimes         MAE        RMSE  \
2024       19425.70054    ASSAULT        25445.0  6019.29946  6019.29946   

           MAPE  
2024  23.656119  
Performing stepwise search to minimize aic
 ARIMA(1,0,1)(0,0,0)[0]             : AIC=178.574, Time=0.06 sec
 ARIMA(0,0,0)(0,0,0)[0]             : AIC=205.852, Time=0.01 sec
 ARIMA(1,0,0)(0,0,0)[0]             : AIC=181.403, Time=0.04 sec
 ARIMA(0,0,1)(0,0,0)[0]             : AIC=inf, Time=0.04 sec




 ARIMA(2,0,1)(0,0,0)[0]             : AIC=178.658, Time=0.17 sec
 ARIMA(1,0,2)(0,0,0)[0]             : AIC=inf, Time=0.09 sec
 ARIMA(0,0,2)(0,0,0)[0]             : AIC=inf, Time=0.08 sec




 ARIMA(2,0,0)(0,0,0)[0]             : AIC=176.853, Time=0.07 sec




 ARIMA(3,0,0)(0,0,0)[0]             : AIC=178.647, Time=0.25 sec




 ARIMA(3,0,1)(0,0,0)[0]             : AIC=inf, Time=0.56 sec




 ARIMA(2,0,0)(0,0,0)[0] intercept   : AIC=175.682, Time=0.62 sec




 ARIMA(1,0,0)(0,0,0)[0] intercept   : AIC=181.634, Time=0.27 sec




 ARIMA(3,0,0)(0,0,0)[0] intercept   : AIC=inf, Time=0.47 sec




 ARIMA(2,0,1)(0,0,0)[0] intercept   : AIC=inf, Time=0.31 sec




 ARIMA(1,0,1)(0,0,0)[0] intercept   : AIC=inf, Time=0.54 sec




 ARIMA(3,0,1)(0,0,0)[0] intercept   : AIC=inf, Time=0.88 sec

Best model:  ARIMA(2,0,0)(0,0,0)[0] intercept
Total fit time: 4.499 seconds


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'
  return get_prediction_index(
  return get_prediction_index(


      Predicted_Crimes Crime Type  Actual_Crimes          MAE         RMSE  \
2024      19425.700540    ASSAULT        25445.0  6019.299460  6019.299460   
2024       7907.995601  AUTOTHEFT         9256.0  1348.004399  1348.004399   

           MAPE  
2024  23.656119  
2024  14.563574  

Mean Absolute Error (MAE): 3683.65
Root Mean Squared Error (RMSE): 3683.65
Mean Absolute Percentage Error (MAPE): 19.11%
      Predicted_Crimes Crime Type  Actual_Crimes          MAE         RMSE  \
2024      19425.700540    ASSAULT        25445.0  6019.299460  6019.299460   
2024       7907.995601  AUTOTHEFT         9256.0  1348.004399  1348.004399   

           MAPE  
2024  23.656119  
2024  14.563574  
Performing stepwise search to minimize aic
 ARIMA(1,0,1)(0,0,0)[0]             : AIC=155.687, Time=0.10 sec
 ARIMA(0,0,0)(0,0,0)[0]             : AIC=193.521, Time=0.01 sec
 ARIMA(1,0,0)(0,0,0)[0]             : AIC=inf, Time=0.01 sec
 ARIMA(0,0,1)(0,0,0)[0]             : AIC=inf, Time=0.05 sec




 ARIMA(2,0,1)(0,0,0)[0]             : AIC=inf, Time=0.10 sec
 ARIMA(1,0,2)(0,0,0)[0]             : AIC=inf, Time=0.17 sec




 ARIMA(0,0,2)(0,0,0)[0]             : AIC=inf, Time=0.46 sec




 ARIMA(2,0,0)(0,0,0)[0]             : AIC=inf, Time=1.73 sec




 ARIMA(2,0,2)(0,0,0)[0]             : AIC=inf, Time=0.32 sec
 ARIMA(1,0,1)(0,0,0)[0] intercept   : AIC=151.991, Time=0.03 sec
 ARIMA(0,0,1)(0,0,0)[0] intercept   : AIC=150.716, Time=0.17 sec
 ARIMA(0,0,0)(0,0,0)[0] intercept   : AIC=152.048, Time=0.01 sec




 ARIMA(0,0,2)(0,0,0)[0] intercept   : AIC=149.847, Time=0.10 sec




 ARIMA(1,0,2)(0,0,0)[0] intercept   : AIC=inf, Time=0.24 sec
 ARIMA(0,0,3)(0,0,0)[0] intercept   : AIC=inf, Time=0.12 sec




 ARIMA(1,0,3)(0,0,0)[0] intercept   : AIC=inf, Time=0.50 sec

Best model:  ARIMA(0,0,2)(0,0,0)[0] intercept
Total fit time: 4.152 seconds


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


      Predicted_Crimes Crime Type  Actual_Crimes          MAE         RMSE  \
2024      19425.700540    ASSAULT        25445.0  6019.299460  6019.299460   
2024       7907.995601  AUTOTHEFT         9256.0  1348.004399  1348.004399   
2024       3456.609721  BIKETHEFT         2709.0   747.609721   747.609721   

           MAPE  
2024  23.656119  
2024  14.563574  
2024  27.597258  

Mean Absolute Error (MAE): 2704.97
Root Mean Squared Error (RMSE): 2704.97
Mean Absolute Percentage Error (MAPE): 21.94%
      Predicted_Crimes Crime Type  Actual_Crimes          MAE         RMSE  \
2024      19425.700540    ASSAULT        25445.0  6019.299460  6019.299460   
2024       7907.995601  AUTOTHEFT         9256.0  1348.004399  1348.004399   
2024       3456.609721  BIKETHEFT         2709.0   747.609721   747.609721   

           MAPE  
2024  23.656119  
2024  14.563574  
2024  27.597258  
Performing stepwise search to minimize aic
 ARIMA(1,0,1)(0,0,0)[0]             : AIC=174.438, Time=0.06 sec




 ARIMA(2,0,1)(0,0,0)[0]             : AIC=176.433, Time=0.14 sec
 ARIMA(1,0,2)(0,0,0)[0]             : AIC=173.509, Time=0.11 sec
 ARIMA(0,0,2)(0,0,0)[0]             : AIC=inf, Time=0.09 sec




 ARIMA(2,0,2)(0,0,0)[0]             : AIC=inf, Time=0.17 sec
 ARIMA(1,0,3)(0,0,0)[0]             : AIC=inf, Time=0.14 sec




 ARIMA(0,0,3)(0,0,0)[0]             : AIC=inf, Time=0.08 sec
 ARIMA(2,0,3)(0,0,0)[0]             : AIC=175.864, Time=0.19 sec




 ARIMA(1,0,2)(0,0,0)[0] intercept   : AIC=inf, Time=0.21 sec

Best model:  ARIMA(1,0,2)(0,0,0)[0]          
Total fit time: 1.255 seconds
      Predicted_Crimes  Crime Type  Actual_Crimes          MAE         RMSE  \
2024      19425.700540     ASSAULT        25445.0  6019.299460  6019.299460   
2024       7907.995601   AUTOTHEFT         9256.0  1348.004399  1348.004399   
2024       3456.609721   BIKETHEFT         2709.0   747.609721   747.609721   
2024       7000.755621  BREAKENTER         6842.0   158.755621   158.755621   

           MAPE  
2024  23.656119  
2024  14.563574  
2024  27.597258  
2024   2.320310  

Mean Absolute Error (MAE): 2068.42
Root Mean Squared Error (RMSE): 2068.42
Mean Absolute Percentage Error (MAPE): 17.03%
      Predicted_Crimes  Crime Type  Actual_Crimes          MAE         RMSE  \
2024      19425.700540     ASSAULT        25445.0  6019.299460  6019.299460   
2024       7907.995601   AUTOTHEFT         9256.0  1348.004399  1348.004399   
2024       3456.6

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'
  return get_prediction_index(
  return get_prediction_index(


 ARIMA(1,0,1)(0,0,0)[0]             : AIC=89.330, Time=0.06 sec
 ARIMA(0,0,0)(0,0,0)[0]             : AIC=116.479, Time=0.01 sec
 ARIMA(1,0,0)(0,0,0)[0]             : AIC=90.179, Time=0.03 sec
 ARIMA(0,0,1)(0,0,0)[0]             : AIC=inf, Time=0.05 sec
 ARIMA(2,0,1)(0,0,0)[0]             : AIC=90.966, Time=0.07 sec




 ARIMA(1,0,2)(0,0,0)[0]             : AIC=90.434, Time=0.10 sec
 ARIMA(0,0,2)(0,0,0)[0]             : AIC=inf, Time=0.08 sec
 ARIMA(2,0,0)(0,0,0)[0]             : AIC=inf, Time=0.03 sec




 ARIMA(2,0,2)(0,0,0)[0]             : AIC=92.579, Time=0.16 sec
 ARIMA(1,0,1)(0,0,0)[0] intercept   : AIC=84.873, Time=0.06 sec
 ARIMA(0,0,1)(0,0,0)[0] intercept   : AIC=82.876, Time=0.03 sec
 ARIMA(0,0,0)(0,0,0)[0] intercept   : AIC=80.896, Time=0.01 sec
 ARIMA(1,0,0)(0,0,0)[0] intercept   : AIC=82.873, Time=0.07 sec

Best model:  ARIMA(0,0,0)(0,0,0)[0] intercept
Total fit time: 0.752 seconds


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


      Predicted_Crimes  Crime Type  Actual_Crimes          MAE         RMSE  \
2024      19425.700540     ASSAULT        25445.0  6019.299460  6019.299460   
2024       7907.995601   AUTOTHEFT         9256.0  1348.004399  1348.004399   
2024       3456.609721   BIKETHEFT         2709.0   747.609721   747.609721   
2024       7000.755621  BREAKENTER         6842.0   158.755621   158.755621   
2024         73.199995    HOMICIDE           85.0    11.800005    11.800005   

           MAPE  
2024  23.656119  
2024  14.563574  
2024  27.597258  
2024   2.320310  
2024  13.882359  

Mean Absolute Error (MAE): 1657.09
Root Mean Squared Error (RMSE): 1657.09
Mean Absolute Percentage Error (MAPE): 16.40%
      Predicted_Crimes  Crime Type  Actual_Crimes          MAE         RMSE  \
2024      19425.700540     ASSAULT        25445.0  6019.299460  6019.299460   
2024       7907.995601   AUTOTHEFT         9256.0  1348.004399  1348.004399   
2024       3456.609721   BIKETHEFT         2709.0   747.60



 ARIMA(1,0,2)(0,0,0)[0]             : AIC=159.503, Time=0.17 sec
 ARIMA(0,0,2)(0,0,0)[0]             : AIC=inf, Time=0.10 sec
 ARIMA(2,0,0)(0,0,0)[0]             : AIC=159.112, Time=0.04 sec




 ARIMA(2,0,2)(0,0,0)[0]             : AIC=179.270, Time=0.17 sec
 ARIMA(1,0,1)(0,0,0)[0] intercept   : AIC=154.001, Time=0.04 sec
 ARIMA(0,0,1)(0,0,0)[0] intercept   : AIC=inf, Time=0.04 sec
 ARIMA(1,0,0)(0,0,0)[0] intercept   : AIC=153.447, Time=0.02 sec
 ARIMA(0,0,0)(0,0,0)[0] intercept   : AIC=157.105, Time=0.01 sec
 ARIMA(2,0,0)(0,0,0)[0] intercept   : AIC=153.897, Time=0.03 sec




 ARIMA(2,0,1)(0,0,0)[0] intercept   : AIC=155.147, Time=0.17 sec

Best model:  ARIMA(1,0,0)(0,0,0)[0] intercept
Total fit time: 0.993 seconds
      Predicted_Crimes  Crime Type  Actual_Crimes          MAE         RMSE  \
2024      19425.700540     ASSAULT        25445.0  6019.299460  6019.299460   
2024       7907.995601   AUTOTHEFT         9256.0  1348.004399  1348.004399   
2024       3456.609721   BIKETHEFT         2709.0   747.609721   747.609721   
2024       7000.755621  BREAKENTER         6842.0   158.755621   158.755621   
2024         73.199995    HOMICIDE           85.0    11.800005    11.800005   
2024       3285.874177     ROBBERY         3156.0   129.874177   129.874177   

           MAPE  
2024  23.656119  
2024  14.563574  
2024  27.597258  
2024   2.320310  
2024  13.882359  
2024   4.115151  

Mean Absolute Error (MAE): 1402.56
Root Mean Squared Error (RMSE): 1402.56
Mean Absolute Percentage Error (MAPE): 14.36%
      Predicted_Crimes  Crime Type  Actual_Crimes       

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


 ARIMA(1,0,0)(0,0,0)[0]             : AIC=118.727, Time=0.03 sec
 ARIMA(0,0,1)(0,0,0)[0]             : AIC=inf, Time=0.04 sec
 ARIMA(2,0,0)(0,0,0)[0]             : AIC=118.007, Time=0.04 sec
 ARIMA(3,0,0)(0,0,0)[0]             : AIC=119.969, Time=0.04 sec
 ARIMA(2,0,1)(0,0,0)[0]             : AIC=inf, Time=0.07 sec




 ARIMA(3,0,1)(0,0,0)[0]             : AIC=inf, Time=0.08 sec
 ARIMA(2,0,0)(0,0,0)[0] intercept   : AIC=116.739, Time=0.12 sec
 ARIMA(1,0,0)(0,0,0)[0] intercept   : AIC=117.835, Time=0.08 sec




 ARIMA(3,0,0)(0,0,0)[0] intercept   : AIC=118.728, Time=0.13 sec
 ARIMA(2,0,1)(0,0,0)[0] intercept   : AIC=inf, Time=0.16 sec




 ARIMA(1,0,1)(0,0,0)[0] intercept   : AIC=inf, Time=0.09 sec
 ARIMA(3,0,1)(0,0,0)[0] intercept   : AIC=inf, Time=0.08 sec

Best model:  ARIMA(2,0,0)(0,0,0)[0] intercept
Total fit time: 1.040 seconds


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


      Predicted_Crimes  Crime Type  Actual_Crimes          MAE         RMSE  \
2024      19425.700540     ASSAULT        25445.0  6019.299460  6019.299460   
2024       7907.995601   AUTOTHEFT         9256.0  1348.004399  1348.004399   
2024       3456.609721   BIKETHEFT         2709.0   747.609721   747.609721   
2024       7000.755621  BREAKENTER         6842.0   158.755621   158.755621   
2024         73.199995    HOMICIDE           85.0    11.800005    11.800005   
2024       3285.874177     ROBBERY         3156.0   129.874177   129.874177   
2024        342.416196    SHOOTING          460.0   117.583804   117.583804   

           MAPE  
2024  23.656119  
2024  14.563574  
2024  27.597258  
2024   2.320310  
2024  13.882359  
2024   4.115151  
2024  25.561696  

Mean Absolute Error (MAE): 1218.99
Root Mean Squared Error (RMSE): 1218.99
Mean Absolute Percentage Error (MAPE): 15.96%
      Predicted_Crimes  Crime Type  Actual_Crimes          MAE         RMSE  \
2024      19425.700540



 ARIMA(2,0,1)(0,0,0)[0]             : AIC=176.741, Time=0.13 sec
 ARIMA(1,0,2)(0,0,0)[0]             : AIC=inf, Time=0.08 sec
 ARIMA(0,0,2)(0,0,0)[0]             : AIC=inf, Time=0.09 sec
 ARIMA(2,0,0)(0,0,0)[0]             : AIC=inf, Time=0.02 sec




 ARIMA(2,0,2)(0,0,0)[0]             : AIC=inf, Time=0.20 sec
 ARIMA(1,0,1)(0,0,0)[0] intercept   : AIC=168.916, Time=0.03 sec
 ARIMA(0,0,1)(0,0,0)[0] intercept   : AIC=166.810, Time=0.06 sec
 ARIMA(0,0,0)(0,0,0)[0] intercept   : AIC=165.015, Time=0.01 sec
 ARIMA(1,0,0)(0,0,0)[0] intercept   : AIC=166.883, Time=0.02 sec

Best model:  ARIMA(0,0,0)(0,0,0)[0] intercept
Total fit time: 0.813 seconds


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


      Predicted_Crimes   Crime Type  Actual_Crimes          MAE         RMSE  \
2024      19425.700540      ASSAULT        25445.0  6019.299460  6019.299460   
2024       7907.995601    AUTOTHEFT         9256.0  1348.004399  1348.004399   
2024       3456.609721    BIKETHEFT         2709.0   747.609721   747.609721   
2024       7000.755621   BREAKENTER         6842.0   158.755621   158.755621   
2024         73.199995     HOMICIDE           85.0    11.800005    11.800005   
2024       3285.874177      ROBBERY         3156.0   129.874177   129.874177   
2024        342.416196     SHOOTING          460.0   117.583804   117.583804   
2024       8925.800000  THEFTFROMMV         7644.0  1281.800000  1281.800000   

           MAPE  
2024  23.656119  
2024  14.563574  
2024  27.597258  
2024   2.320310  
2024  13.882359  
2024   4.115151  
2024  25.561696  
2024  16.768707  

Mean Absolute Error (MAE): 1226.84
Root Mean Squared Error (RMSE): 1226.84
Mean Absolute Percentage Error (MAPE): 16



 ARIMA(2,0,1)(0,0,0)[0]             : AIC=inf, Time=0.06 sec
 ARIMA(1,0,2)(0,0,0)[0]             : AIC=inf, Time=0.10 sec
 ARIMA(0,0,2)(0,0,0)[0]             : AIC=inf, Time=0.09 sec




 ARIMA(2,0,0)(0,0,0)[0]             : AIC=142.792, Time=0.06 sec
 ARIMA(2,0,2)(0,0,0)[0]             : AIC=inf, Time=0.15 sec




 ARIMA(1,0,1)(0,0,0)[0] intercept   : AIC=inf, Time=0.15 sec

Best model:  ARIMA(1,0,1)(0,0,0)[0]          
Total fit time: 0.772 seconds
      Predicted_Crimes   Crime Type  Actual_Crimes          MAE         RMSE  \
2024      19425.700540      ASSAULT        25445.0  6019.299460  6019.299460   
2024       7907.995601    AUTOTHEFT         9256.0  1348.004399  1348.004399   
2024       3456.609721    BIKETHEFT         2709.0   747.609721   747.609721   
2024       7000.755621   BREAKENTER         6842.0   158.755621   158.755621   
2024         73.199995     HOMICIDE           85.0    11.800005    11.800005   
2024       3285.874177      ROBBERY         3156.0   129.874177   129.874177   
2024        342.416196     SHOOTING          460.0   117.583804   117.583804   
2024       8925.800000  THEFTFROMMV         7644.0  1281.800000  1281.800000   
2024       1235.857650    THEFTOVER         1896.0   660.142350   660.142350   

           MAPE  
2024  23.656119  
2024  14.563574  
2024  2

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  return get_prediction_index(
  return get_prediction_index(


In [63]:
total_mae, total_rmse, total_mape

(1163.8743929705875, 1163.8743929705875, 18.142534393352317)

In [64]:
# prediction for 2025:

predictions_2025 = pd.DataFrame()
for crime in crime_types:
    crime_type_data = total_crimes_by_type[total_crimes_by_type['Crime Type'] == crime].copy()
    
    if 'Year' not in crime_type_data.columns:
        total_crimes_by_type = total_crimes_by_type.reset_index()
    
    crime_type_data['Year'] = pd.to_datetime(crime_type_data['Year'], format='%Y')
    crime_type_data = crime_type_data.set_index('Year')
    crime_type_data.index = crime_type_data.index.astype(int)
    total_crimes_series = crime_type_data['Total_Crimes']
    train_data = total_crimes_series
    
    auto_model = pm.auto_arima(train_data,
                           start_p=1, start_q=1,
                           max_p=12, max_q=12,
                           m=1, 
                           seasonal=False,
                           d=None, 
                           trace=True,
                           error_action='ignore',
                           suppress_warnings=True,
                           stepwise=True,
                           trend = 't')
    best_order = auto_model.order
    
    model = ARIMA(train_data, order=best_order)
    fitted_model = model.fit()
    predictions = fitted_model.forecast(steps=1)

    predictions_df = pd.DataFrame({
            'Crime Type': [crime],
            'Year': [2025],
            'Predicted_Crimes': [predictions.iloc[0]]
        })
    predictions_2025 = pd.concat([predictions_2025, predictions_df], ignore_index=True)
        
predictions_2025




Performing stepwise search to minimize aic
 ARIMA(1,0,1)(0,0,0)[0]             : AIC=inf, Time=0.18 sec
 ARIMA(0,0,0)(0,0,0)[0]             : AIC=234.167, Time=0.02 sec
 ARIMA(1,0,0)(0,0,0)[0]             : AIC=198.624, Time=0.10 sec
 ARIMA(0,0,1)(0,0,0)[0]             : AIC=inf, Time=0.05 sec
 ARIMA(2,0,0)(0,0,0)[0]             : AIC=200.604, Time=0.09 sec




 ARIMA(2,0,1)(0,0,0)[0]             : AIC=202.580, Time=0.20 sec
 ARIMA(1,0,0)(0,0,0)[0] intercept   : AIC=198.624, Time=0.06 sec

Best model:  ARIMA(1,0,0)(0,0,0)[0] intercept
Total fit time: 0.708 seconds
Performing stepwise search to minimize aic
 ARIMA(1,0,1)(0,0,0)[0]             : AIC=inf, Time=0.10 sec


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


 ARIMA(0,0,0)(0,0,0)[0]             : AIC=195.446, Time=0.03 sec
 ARIMA(1,0,0)(0,0,0)[0]             : AIC=196.255, Time=0.08 sec
 ARIMA(0,0,1)(0,0,0)[0]             : AIC=inf, Time=0.04 sec
 ARIMA(0,0,0)(0,0,0)[0] intercept   : AIC=195.446, Time=0.02 sec

Best model:  ARIMA(0,0,0)(0,0,0)[0] intercept
Total fit time: 0.278 seconds
Performing stepwise search to minimize aic


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


 ARIMA(1,0,1)(0,0,0)[0]             : AIC=171.899, Time=0.15 sec
 ARIMA(0,0,0)(0,0,0)[0]             : AIC=201.955, Time=0.02 sec
 ARIMA(1,0,0)(0,0,0)[0]             : AIC=inf, Time=0.05 sec
 ARIMA(0,0,1)(0,0,0)[0]             : AIC=inf, Time=0.05 sec




 ARIMA(2,0,1)(0,0,0)[0]             : AIC=inf, Time=0.15 sec




 ARIMA(1,0,2)(0,0,0)[0]             : AIC=173.552, Time=0.68 sec




 ARIMA(0,0,2)(0,0,0)[0]             : AIC=inf, Time=0.38 sec
 ARIMA(2,0,0)(0,0,0)[0]             : AIC=inf, Time=0.19 sec




 ARIMA(2,0,2)(0,0,0)[0]             : AIC=inf, Time=0.55 sec
 ARIMA(1,0,1)(0,0,0)[0] intercept   : AIC=171.899, Time=0.12 sec

Best model:  ARIMA(1,0,1)(0,0,0)[0] intercept
Total fit time: 2.354 seconds


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Performing stepwise search to minimize aic
 ARIMA(1,0,1)(0,0,0)[0]             : AIC=inf, Time=0.20 sec
 ARIMA(0,0,0)(0,0,0)[0]             : AIC=215.961, Time=0.03 sec
 ARIMA(1,0,0)(0,0,0)[0]             : AIC=191.333, Time=0.05 sec
 ARIMA(0,0,1)(0,0,0)[0]             : AIC=inf, Time=0.04 sec
 ARIMA(2,0,0)(0,0,0)[0]             : AIC=193.333, Time=0.08 sec




 ARIMA(2,0,1)(0,0,0)[0]             : AIC=inf, Time=0.17 sec
 ARIMA(1,0,0)(0,0,0)[0] intercept   : AIC=191.333, Time=0.03 sec

Best model:  ARIMA(1,0,0)(0,0,0)[0] intercept
Total fit time: 0.612 seconds
Performing stepwise search to minimize aic


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


 ARIMA(1,0,1)(0,0,0)[0]             : AIC=91.548, Time=0.19 sec
 ARIMA(0,0,0)(0,0,0)[0]             : AIC=113.485, Time=0.02 sec
 ARIMA(1,0,0)(0,0,0)[0]             : AIC=99.071, Time=0.02 sec
 ARIMA(0,0,1)(0,0,0)[0]             : AIC=108.416, Time=0.04 sec




 ARIMA(2,0,1)(0,0,0)[0]             : AIC=inf, Time=0.43 sec
 ARIMA(1,0,2)(0,0,0)[0]             : AIC=99.253, Time=0.16 sec




 ARIMA(0,0,2)(0,0,0)[0]             : AIC=107.260, Time=0.18 sec
 ARIMA(2,0,0)(0,0,0)[0]             : AIC=98.017, Time=0.10 sec




 ARIMA(2,0,2)(0,0,0)[0]             : AIC=inf, Time=0.71 sec




 ARIMA(1,0,1)(0,0,0)[0] intercept   : AIC=91.548, Time=0.32 sec

Best model:  ARIMA(1,0,1)(0,0,0)[0]          
Total fit time: 2.194 seconds
Performing stepwise search to minimize aic


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


 ARIMA(1,0,1)(0,0,0)[0]             : AIC=174.132, Time=0.27 sec
 ARIMA(0,0,0)(0,0,0)[0]             : AIC=202.204, Time=0.07 sec




 ARIMA(1,0,0)(0,0,0)[0]             : AIC=inf, Time=0.25 sec
 ARIMA(0,0,1)(0,0,0)[0]             : AIC=inf, Time=0.12 sec




 ARIMA(2,0,1)(0,0,0)[0]             : AIC=inf, Time=0.58 sec




 ARIMA(1,0,2)(0,0,0)[0]             : AIC=inf, Time=0.34 sec
 ARIMA(0,0,2)(0,0,0)[0]             : AIC=inf, Time=0.08 sec




 ARIMA(2,0,0)(0,0,0)[0]             : AIC=174.403, Time=0.15 sec




 ARIMA(2,0,2)(0,0,0)[0]             : AIC=inf, Time=0.70 sec




 ARIMA(1,0,1)(0,0,0)[0] intercept   : AIC=174.132, Time=0.32 sec

Best model:  ARIMA(1,0,1)(0,0,0)[0]          
Total fit time: 2.897 seconds


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Performing stepwise search to minimize aic
 ARIMA(1,0,1)(0,0,0)[0]             : AIC=132.169, Time=0.08 sec
 ARIMA(0,0,0)(0,0,0)[0]             : AIC=149.043, Time=0.03 sec
 ARIMA(1,0,0)(0,0,0)[0]             : AIC=132.732, Time=0.07 sec




 ARIMA(0,0,1)(0,0,0)[0]             : AIC=inf, Time=0.06 sec
 ARIMA(2,0,1)(0,0,0)[0]             : AIC=inf, Time=1.44 sec




 ARIMA(1,0,2)(0,0,0)[0]             : AIC=inf, Time=0.50 sec
 ARIMA(0,0,2)(0,0,0)[0]             : AIC=inf, Time=0.16 sec




 ARIMA(2,0,0)(0,0,0)[0]             : AIC=132.308, Time=0.18 sec




 ARIMA(2,0,2)(0,0,0)[0]             : AIC=inf, Time=0.78 sec
 ARIMA(1,0,1)(0,0,0)[0] intercept   : AIC=132.169, Time=0.14 sec

Best model:  ARIMA(1,0,1)(0,0,0)[0]          
Total fit time: 3.457 seconds


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Performing stepwise search to minimize aic
 ARIMA(1,0,1)(0,0,0)[0]             : AIC=inf, Time=0.42 sec
 ARIMA(0,0,0)(0,0,0)[0]             : AIC=221.014, Time=0.03 sec
 ARIMA(1,0,0)(0,0,0)[0]             : AIC=inf, Time=0.06 sec
 ARIMA(0,0,1)(0,0,0)[0]             : AIC=inf, Time=0.08 sec
 ARIMA(0,0,0)(0,0,0)[0] intercept   : AIC=221.014, Time=0.04 sec

Best model:  ARIMA(0,0,0)(0,0,0)[0]          
Total fit time: 0.634 seconds


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Performing stepwise search to minimize aic
 ARIMA(1,0,1)(0,0,0)[0]             : AIC=inf, Time=0.20 sec




 ARIMA(0,0,0)(0,0,0)[0]             : AIC=170.903, Time=0.18 sec
 ARIMA(1,0,0)(0,0,0)[0]             : AIC=149.582, Time=0.25 sec
 ARIMA(0,0,1)(0,0,0)[0]             : AIC=inf, Time=0.11 sec




 ARIMA(2,0,0)(0,0,0)[0]             : AIC=151.580, Time=0.11 sec




 ARIMA(2,0,1)(0,0,0)[0]             : AIC=inf, Time=0.81 sec
 ARIMA(1,0,0)(0,0,0)[0] intercept   : AIC=149.582, Time=0.07 sec

Best model:  ARIMA(1,0,0)(0,0,0)[0] intercept
Total fit time: 1.738 seconds


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Unnamed: 0,Crime Type,Year,Predicted_Crimes
0,ASSAULT,2025,24797.071408
1,AUTOTHEFT,2025,6115.172173
2,BIKETHEFT,2025,2840.847033
3,BREAKENTER,2025,6929.497823
4,HOMICIDE,2025,74.538275
5,ROBBERY,2025,3214.423409
6,SHOOTING,2025,514.678439
7,THEFTFROMMV,2025,8809.27267
8,THEFTOVER,2025,1820.642861


In [65]:
predictions_2025['Predicted_Crimes'] = predictions_2025['Predicted_Crimes'].astype(int)
predictions_2025

Unnamed: 0,Crime Type,Year,Predicted_Crimes
0,ASSAULT,2025,24797
1,AUTOTHEFT,2025,6115
2,BIKETHEFT,2025,2840
3,BREAKENTER,2025,6929
4,HOMICIDE,2025,74
5,ROBBERY,2025,3214
6,SHOOTING,2025,514
7,THEFTFROMMV,2025,8809
8,THEFTOVER,2025,1820
