In [None]:
import pandas as pd
import numpy as np
import sys
import os
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
from keras import regularizers
from keras.models import load_model
import matplotlib.pyplot as plt
import joblib
import json

In [None]:
# regulate the base path for different environments
if (sys.platform.startswith("linux")):
    pathNav = "/"
else:
    pathNav = "\\"

idx = os.path.abspath("").split(pathNav).index("roi-prediction") + 1
base_path = pathNav.join(os.path.abspath("").split(pathNav)[:idx])

In [None]:
def read_dataframe_from_folder(parent_path, file_name):
    return pd.read_csv(base_path + pathNav + parent_path + pathNav + file_name)

In [None]:
# load model
model = load_model("v1_model.keras")

df_observed_data = read_dataframe_from_folder("datasets_filtered", "preprocessed_dataset.csv")
df_FIPS = read_dataframe_from_folder("datasets_filtered", "FIPS_code.csv")

In [None]:
def make_data_from_county(df, county_name, start_year, end_year):
    # get the rows of that county
    df_rows = df[(df['name'].str.contains(county_name, case=False, na=False)) & (df["year"] >= start_year) & (df["year"] <= end_year)]
    time_series_list = []

    if (df_rows.empty):
        return [], []
    else:
        for index, row in df_rows.iterrows():
            gdp = row["gdp"]
            unemployment_rate = row["unemployment_rate"]
            zhvi = row["zhvi"]
            gdp_last_year = row["gdp_last_year"]
            unemployment_rate_last_year = row["unemployment_rate_last_year"]
            tax_rate = row["tax_rate"]
            _4g_pct = row["4g_st_pct"]
            median_income = row["median_income"]
            population = row["population"]

            time_series_list.append([gdp, unemployment_rate, zhvi, gdp_last_year, unemployment_rate_last_year])
        
        static_series_list = [tax_rate, _4g_pct, median_income, population]

        time_series_data = np.array(time_series_list)
        static_series_data = np.array(static_series_list)

        return time_series_data, static_series_data

# geoID: 19103
# time_series_data = np.array([
#     [10098945, 3.5106870229007634, 226114.939065919, 9956797, 3.7609022556390976],
#     [10120081, 5.687313432835821, 232398.62165049397, 10098945, 3.5106870229007634],
#     [10824259, 3.875187969924812, 244657.10304912183, 10120081, 5.687313432835821],
#     [11428475, 3.2616, 267427.1637093905, 10824259, 3.875187969924812],
#     [11947432, 3.415748031496063, 279344.8155526302, 11428475, 3.2616]
# ])

# static_series_data = np.array([
#     0.01565,
#     0.9991,
#     62214,
#     150819
# ])

In [None]:
# get the data for specific county name and state abbreviate
county_name = "Lee, IA"
start_year = 2019
end_year = 2023
time_series_data, static_series_data = make_data_from_county(df_observed_data, county_name, start_year, end_year)

In [None]:
# print(time_series_data)
# print(static_series_data)

In [None]:
# load normal standardization
time_series_std = joblib.load("time_series_std.pkl")
static_series_std = joblib.load("static_series_std.pkl")
time_series_stadardized_data = time_series_std.transform(time_series_data.reshape(-1, 5))
static_series_stadardized_data = static_series_std.transform(static_series_data.reshape(1, -1))

time_series_stadardized_data = time_series_stadardized_data.reshape(1, 5, 5)


In [None]:
# print("Time-series input shape:", time_series_stadardized_data.shape)
# print("Static input shape:", static_series_stadardized_data.shape) 

In [None]:
prediction = model.predict([time_series_stadardized_data, static_series_stadardized_data])
print(f"Predicted ROI Change: {prediction[0][0]:.2f}%")

In [None]:
# run for every county
roi_prediction_list = []
start_year = 2019
end_year = 2023
time_series_std = joblib.load("time_series_std.pkl")
static_series_std = joblib.load("static_series_std.pkl")

for index, county_row in df_FIPS.iterrows():
    county_name = ", ".join([county_row["Name"].split(" ")[0], county_row["StateName"]])
    
    time_series_data, static_series_data = make_data_from_county(df_observed_data, county_name, start_year, end_year)
    
    if (len(time_series_data) != 5):
        continue

    time_series_stadardized_data = time_series_std.transform(time_series_data.reshape(-1, 5))
    static_series_stadardized_data = static_series_std.transform(static_series_data.reshape(1, -1))
    time_series_stadardized_data = time_series_stadardized_data.reshape(1, 5, 5)

    prediction = model.predict([time_series_stadardized_data, static_series_stadardized_data])

    roi_prediction_list.append({ "county": county_name, "next_year_roi": prediction })
    print(county_name, index)

In [None]:
max_roi = 0
max_name = ""
list_of_more_than_target = []
target_roi = 8.0

for county in roi_prediction_list:
    name = county["county"]
    roi = round(county["next_year_roi"][0][0], 2)

    if (roi > max_roi):
        max_roi = roi
        max_name = name
    
    if (roi > target_roi):
        list_of_more_than_target.append({ "county": name, "next_year_roi": float(roi)})

print(max_name, max_roi)

In [None]:
print(json.dumps(list_of_more_than_target, indent=4))