# Code for forecasting page

## Model information gathering

In [2]:
# Import dependency
import pandas as pd

In [5]:
# Read in model summary
model_info = pd.read_csv("../Model_summary.csv")
model_info.head()

Unnamed: 0,State,Model,R2 Score
0,National,LinearRegression,0.603216
1,Alabama,LinearRegression,0.956442
2,Arizona,LinearRegression,0.882848
3,Arkansas,LinearRegression,0.96113
4,California,LinearRegression,0.65197


In [6]:
# Narrow df to state selected
model_used = model_info.loc[model_info['State'] == "Alabama"]
model_used.head()

Unnamed: 0,State,Model,R2 Score
1,Alabama,LinearRegression,0.956442


In [18]:
# Get state
model_used["State"].item()

'Alabama'

In [17]:
# Get model type
model_used["Model"].item()

'LinearRegression'

In [19]:
# Get r2 score rounded to two decimals
round(model_used["R2 Score"].item(), 2)

0.96

## Generating csv for feature ranges by state

In [20]:
# Read in econ features data
econ_features = pd.read_csv('../../Data_Preprocessing/Raw_Data/dataset_na_dropped.csv')

In [21]:
# Drop unwanted columns
features_df = econ_features

In [22]:
# Calculate additional features
features_df['education_per_capita'] = features_df['education_million'] / features_df['population_million']
features_df['welfare_per_capita'] = features_df['welfare_million'] / features_df['population_million']

# Drop the non-beneficial columns
columns_to_drop = ['education_million','welfare_million']
features_df = features_df.drop(columns=columns_to_drop, axis=1)

In [23]:
# Generate data for min and max values for each state and feature
data = []
for state in features_df['state'].unique():
    temp_df = features_df.loc[econ_features['state'] == state]
    for feature in features_df.drop(columns = ['year', 'state', 'population_million']).columns:
        minimum = temp_df[feature].min()
        maximum = temp_df[feature].max()
        record = {'state': state, 'feature':feature, 'min': minimum, 'max': maximum}
        data.append(record)
data

[{'state': 'National', 'feature': 'crime_rate', 'min': 372.0, 'max': 758.1},
 {'state': 'National',
  'feature': 'unemployment_rate',
  'min': 3.71836707699005,
  'max': 9.86085724560746},
 {'state': 'National',
  'feature': 'divorce_rate_per_1000_people',
  'min': 2.7,
  'max': 5.3},
 {'state': 'National',
  'feature': 'homeownership_rate',
  'min': 3.72,
  'max': 9.86},
 {'state': 'National',
  'feature': 'minimum_wage_effective',
  'min': 2.226666666666668,
  'max': 9.068823529411764},
 {'state': 'National',
  'feature': 'CPI_Average',
  'min': 56.900000000000055,
  'max': 255.65699999999975},
 {'state': 'National', 'feature': 'inflation_rate', 'min': -0.4, 'max': 13.5},
 {'state': 'National',
  'feature': 'avg_wage_index',
  'min': 9226.48,
  'max': 54099.99},
 {'state': 'National', 'feature': 'poverty_rate', 'min': 12.3, 'max': 16.79},
 {'state': 'National',
  'feature': 'education_per_capita',
  'min': 494.5493097328816,
  'max': 3602.8157646270524},
 {'state': 'National',
  'fea

In [24]:
# Create dataframe
min_max_df = pd.DataFrame(data)
min_max_df.head()

Unnamed: 0,state,feature,min,max
0,National,crime_rate,372.0,758.1
1,National,unemployment_rate,3.718367,9.860857
2,National,divorce_rate_per_1000_people,2.7,5.3
3,National,homeownership_rate,3.72,9.86
4,National,minimum_wage_effective,2.226667,9.068824


In [28]:
# Check dataframe for a state
ca_feat_values = min_max_df.loc[min_max_df['state'] == 'California']
ca_feat_values

Unnamed: 0,state,feature,min,max
55,California,crime_rate,396.4,1119.7
56,California,unemployment_rate,4.2,12.5
57,California,divorce_rate_per_1000_people,2.7,4.9778
58,California,homeownership_rate,53.6,60.2
59,California,minimum_wage_effective,3.35,12.0
60,California,CPI_Average,103.9,255.657
61,California,inflation_rate,-0.4,5.4
62,California,avg_wage_index,16135.07,54099.99
63,California,poverty_rate,11.8,26.6
64,California,education_per_capita,761.899071,3825.43486


In [30]:
# Getting a max value for a feature
ca_max_crime = ca_feat_values.loc[ca_feat_values['feature'] == 'crime_rate']['max'].item()
ca_max_crime

1119.7

In [31]:
# Getting a min value for a feature
ca_min_crime = ca_feat_values.loc[ca_feat_values['feature'] == 'crime_rate']['min'].item()
ca_min_crime

396.4

In [None]:
# Export csv to be used
min_max_df.to_csv('min_max_values.csv', index = False)