In [1]:
# install missing packages
%pip install plotly
%pip install py-cpuinfo
%pip install pandas-datareader

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [2]:
# check system details
import os
import psutil
import cpuinfo

ram_info = psutil.virtual_memory()
print(f"Total RAM: {ram_info.total / 1024 / 1024 / 1024:.2f} GB")
print(f"Available RAM: {ram_info.available / 1024 / 1024 / 1024:.2f} GB")
print(f"Used RAM: {ram_info.used / 1024 / 1024 / 1024:.2f} GB")
print(f"Percentage Usage Of RAM: {ram_info.percent}%")
print(f"CPU Cores: {os.cpu_count()}")
print(f"CPU Speed: {cpuinfo.get_cpu_info()['hz_actual_friendly']}")
disk_info = psutil.disk_usage(os.getcwd())
print(f"Total Disk: {disk_info.total / 1024 / 1024 / 1024:.2f} GB")
print(f"Available Disk: {disk_info.free / 1024 / 1024 / 1024:.2f} GB")
print(f"Used Disk: {disk_info.used / 1024 / 1024 / 1024:.2f} GB")
print(f"Percentage Usage Of Disk: {disk_info.percent}%")

Total RAM: 15.47 GB
Available RAM: 13.46 GB
Used RAM: 1.75 GB
Percentage Usage Of RAM: 13.0%
CPU Cores: 4
CPU Speed: 2.5000 GHz
Total Disk: 24.99 GB
Available Disk: 15.15 GB
Used Disk: 9.84 GB
Percentage Usage Of Disk: 39.4%


In [3]:
# import requirements
import pandas as pd
from sklearn.metrics import mean_squared_error, r2_score
from wrangle import prepare
from nnet_regressor import Regression

2023-10-16 13:43:53.304199: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
# get the data
crime = pd.read_csv("crime.csv")

In [5]:
# prepare the data for machine learning
crime = prepare(df=crime, name="Data Preparation", path=None, plots=True)

Data Wrangling:
> Converting Timestamps
> Computing Crime Rate
> Finding Days With No Crime
> Extracting Time Features
> Getting Economic Data
> Computing Weekly Crime Rate
> Computing Previous Weeks Of Crime
6.55 Seconds
Plotting:
> Plotting Correlations
> Crimes vs. NASDAQ
> Crimes vs. Unemployment
> Crimes vs. CPI
> Crimes vs. PPI
> Crimes vs. GDP
> Crimes vs. GDI
> Crimes vs. Federal_Funds_Rate
> Crimes vs. Crimes(t-1)
> Crimes vs. Crimes(t-2)
> Crimes vs. Crimes(t-3)
> Crimes vs. Crimes(t-4)
> NASDAQ vs. Unemployment
> NASDAQ vs. CPI
> NASDAQ vs. PPI
> NASDAQ vs. GDP
> NASDAQ vs. GDI
> NASDAQ vs. Federal_Funds_Rate
> NASDAQ vs. Crimes(t-1)
> NASDAQ vs. Crimes(t-2)
> NASDAQ vs. Crimes(t-3)
> NASDAQ vs. Crimes(t-4)
> Unemployment vs. CPI
> Unemployment vs. PPI
> Unemployment vs. GDP
> Unemployment vs. GDI
> Unemployment vs. Federal_Funds_Rate
> Unemployment vs. Crimes(t-1)
> Unemployment vs. Crimes(t-2)
> Unemployment vs. Crimes(t-3)
> Unemployment vs. Crimes(t-4)
> CPI vs. PPI
> CP

In [6]:
# get the testing data
y = crime[["Crimes"]]
X = crime.drop(columns="Crimes")
testX = X.tail(int(0.2 * X.shape[0])).reset_index(drop=True)
testy = y.tail(int(0.2 * y.shape[0])).reset_index(drop=True)

In [7]:
# build the model
print("\n---- Crime Regression Analysis ----\n")
model = Regression(
    name="Tensorflow Without Feature Engineering", 
    path=None,
    rename=False, 
    deep=True,
    time=False, 
    binary=True, 
    imputation=False, 
    variance=True,
    scale=True,
    atwood=False,
    binning=False,
    reciprocal=False, 
    interaction=False, 
    selection=False,
    plots=True,
)
try:
    model.load()  # load the machine learning pipeline
    predictions = model.predict(testX)
except:
    model.validate(X, y)  # build the machine learning pipeline
    predictions = model.predict(testX)
    print(f"R2: {model.r2}")
    print(f"RMSE: {model.rmse}")
    print(f"In Control: {model.in_control}")


---- Crime Regression Analysis ----

Model Training:
> Transforming The Training Data
> Transforming Categorical Features
> Removing Constant Features
> Scaling Features
> Training Neural Network
16.35 Minutes
Model Performance:
> Transforming The Testing Data
> Scoring The Model
3.0 Seconds
Model Indicators:
> Perturbing Features
1.34 Minutes
Model Prediction:
> Transforming The New Data
> Getting Predictions
0.27 Seconds
Model Monitoring:
> Computing Feature Drift
0.89 Seconds
R2: 0.888439143937185
RMSE: 28.484833354620775
In Control: 97.36%


In [8]:
# model diagnostics
print("Model Indicators:")
for i, indicator in enumerate(model.indicators["Indicator"][:10]):
    print(f"{i+1}. {indicator}")
print(" ")
print("Feature Drift:")
for i, feature in enumerate(model.drift.loc[model.drift["pvalue"] < 0.05, "Feature"][:10]):
    print(f"{i+1}. {feature}")
if model.drift.loc[model.drift["pvalue"] < 0.05].shape[0] == 0:
    print("None")

Model Indicators:
1. GDI
2. Federal_Funds_Rate
3. GDP
4. PPI
5. Unemployment
6. CPI
7. NASDAQ
8. Week_9
9. Week_8
10. Crimes(t-1)
 
Feature Drift:
1. Month_1
2. Quarter_1
3. Crimes(t-4)
4. Crimes(t-3)
5. Crimes(t-2)
6. Crimes(t-1)
7. Year_2006
8. Year_2007
9. Year_2013
10. Year_2012


In [9]:
# score the model
rmse = mean_squared_error(
    y_true=testy.iloc[:,0].to_numpy(),
    y_pred=predictions,
    squared=False,
)
r2 = r2_score(
    y_true=testy.iloc[:,0].to_numpy(),
    y_pred=predictions,
)

print(f"RMSE: {rmse}")
print(f"R2: {r2}")

RMSE: 28.463612644240648
R2: 0.8890745742566586


In [10]:
# save the machine learning pipeline
model.dump()

In [11]:
# refit the model to include the test data
model.refit(testX, testy)

Model Retraining:
> Transforming The Updated Data
> Transforming Categorical Features
> Removing Constant Features
> Scaling Features
> Training Neural Network
25.07 Minutes
Model Indicators:
> Perturbing Features
1.81 Minutes
