In [1]:
import gzip
import shutil
import numpy as np

In [2]:
compressed = '../data/lgb_model.txt.gz'
decompressed = '../data/lgb_model.txt'

with gzip.open(compressed, 'rb') as f_in:
    with open(decompressed, 'wb') as f_out:
        shutil.copyfileobj(f_in, f_out)

In [3]:
import lightgbm as lgb

model = lgb.Booster(model_file='../data/lgb_model.txt')
feature_names = model.feature_name()
feature_importance = model.feature_importance()
print(feature_names)
print(feature_importance)


['time_occured', 'AREA', 'district_number', 'victim_age', 'victim_sex', 'victim_race', 'premis_code', 'weapon_code', 'mo_codes_hashed', 'year', 'month', 'day']
[295660   6131  66195 242948  32242   2782  13295   5301  21009   1325
   5675  30207]


In [4]:
import pandas as pd
exported_dat = pd.read_csv('../data/exported_data.csv.gz')
exported_dat

Unnamed: 0,time_occured,AREA,district_number,victim_age,victim_sex,victim_race,premis_code,weapon_code,mo_codes_hashed,year,month,day,label
0,200,6,666,69,M,White,502.0,0.0,28274708,2020,2,8,13
1,2147,14,1437,24,F,Other,502.0,0.0,67918447,2020,2,2,38
2,1530,15,1506,51,F,Hispanic,501.0,511.0,41932281,2021,10,1,125
3,1807,17,1764,35,X,Unknown,404.0,0.0,31915443,2024,1,27,40
4,930,20,2056,53,F,Korean,210.0,0.0,31677473,2022,3,28,17
...,...,...,...,...,...,...,...,...,...,...,...,...,...
982617,2000,8,859,60,M,Other,501.0,400.0,38683879,2023,6,29,110
982618,600,19,1966,48,F,Hispanic,502.0,0.0,25618947,2021,6,16,113
982619,1500,12,1259,44,M,Hispanic,102.0,308.0,53839431,2020,5,29,6
982620,1200,14,1444,33,F,Other,501.0,400.0,10195218,2020,1,1,64


In [5]:
exported_dat['AREA'] = exported_dat['AREA'].astype('category')
exported_dat['district_number'] = exported_dat['district_number'].astype('category')
exported_dat['victim_sex'] = exported_dat['victim_sex'].astype('category')
exported_dat['victim_race'] = exported_dat['victim_race'].astype('category')
exported_dat['premis_code'] = exported_dat['premis_code'].astype('category')
exported_dat['weapon_code'] = exported_dat['weapon_code'].astype('category')
exported_dat['mo_codes_hashed'] = exported_dat['mo_codes_hashed'].astype('category')
exported_dat['year'] = exported_dat['year'].astype('category')
exported_dat['month'] = exported_dat['month'].astype('category')
exported_dat['day'] = exported_dat['day'].astype('category')

In [6]:
exported_dat.dtypes

time_occured          int64
AREA               category
district_number    category
victim_age            int64
victim_sex         category
victim_race        category
premis_code        category
weapon_code        category
mo_codes_hashed    category
year               category
month              category
day                category
label                 int64
dtype: object

# Testing if the Model Works After Loading it in

In [7]:
from sklearn.preprocessing import LabelEncoder

# Encode categorical features
label_encoders = {}
for column in exported_dat.select_dtypes(include=['category']).columns:
	le = LabelEncoder()
	exported_dat[column] = le.fit_transform(exported_dat[column])
	label_encoders[column] = le

# Prepare the test input again
test_input = [exported_dat.get(feature, [0])[1] for feature in feature_names]
test_input_array = np.array(test_input).reshape(1,-1)

# Make prediction
prediction = model.predict(test_input_array)
np.argmax(prediction, axis=1)

array([38])

In [8]:
test_input_array

array([[  2147,     13,    750,     24,      0,     12,    131,      0,
        209969,      0,      1,      1]])

In [9]:
features_df = exported_dat.drop(columns='label')
features_df.head()

Unnamed: 0,time_occured,AREA,district_number,victim_age,victim_sex,victim_race,premis_code,weapon_code,mo_codes_hashed,year,month,day
0,200,5,308,69,2,18,131,0,87587,0,1,7
1,2147,13,750,24,0,12,131,0,209969,0,1,1
2,1530,14,804,51,0,8,130,74,129542,1,9,0
3,1807,16,974,35,3,16,115,0,98646,4,0,26
4,930,19,1138,53,0,10,61,0,97929,2,2,27


In [10]:
# Create a dictionary to store unique values for each column
unique_values = {col: features_df[col].dropna().unique().tolist() for col in features_df.columns}
import json

# Save the unique values dictionary to a JSON file
with open('../data/unique_values.json', 'w') as f:
    json.dump(unique_values, f, indent=4)

print("Unique values saved to 'unique_values.json'")



Unique values saved to 'unique_values.json'


In [12]:
import requests

# Encode categorical features using the same label encoders
for column, le in label_encoders.items():
	# Handle new categories by setting them to -1
	exported_dat[column] = exported_dat[column].apply(lambda x: le.transform([x])[0] if x in le.classes_ else -1)

# Prepare the test input again
test_input = [exported_dat.get(feature, [0])[1] for feature in feature_names]
test_input_array = np.array(test_input).reshape(1,-1)

url = "http://127.0.0.1:5000/predict"
input_array = test_input_array.tolist()  # Convert NumPy array to list

# Ensure the input array is in the correct format (2D array)
input_data = {"data": input_array}

response = requests.post(url, json=input_data)
print(response.json())


KeyboardInterrupt: 