In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import pandas as pd
import tensorflow as tf
import numpy as np

# Import our input dataset
df = pd.read_excel('Percent_Total_Homeless_DroppedColumns_AllCoC.xlsx')
df.head()

Unnamed: 0,Year,TotalHomeless,Population,Unemployment,Unsheltered_perc_tot,TotalBeds_perc_tot,Emergency_perc_tot,Transitional_perc_tot,SafeHaven_perc_tot,PermanentSupportive_perc_tot,PermanentOther_perc_tot,RapidRehousing_perc_tot
0,2019,1111,288000,4.4,8.730873,92.979298,71.917192,21.062106,0.0,54.09541,6.390639,15.391539
1,2018,1094,290521,5.0,8.592322,93.967093,68.464351,25.502742,0.0,55.210238,6.489945,11.42596
2,2017,1128,294338,5.4,13.741135,90.691489,52.925532,37.765957,0.0,50.620567,6.294326,3.812057
3,2016,1105,297249,5.3,21.719457,91.131222,50.045249,41.085973,0.0,39.276018,8.778281,4.977376
4,2015,1208,297547,4.9,14.817881,84.354305,49.917219,34.437086,0.0,40.728477,5.877483,2.317881


In [2]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2199 entries, 0 to 2198
Data columns (total 12 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Year                          2199 non-null   int64  
 1   TotalHomeless                 2199 non-null   int64  
 2   Population                    2199 non-null   int64  
 3   Unemployment                  2199 non-null   float64
 4   Unsheltered_perc_tot          2199 non-null   float64
 5   TotalBeds_perc_tot            2199 non-null   float64
 6   Emergency_perc_tot            2199 non-null   float64
 7   Transitional_perc_tot         2199 non-null   float64
 8   SafeHaven_perc_tot            2199 non-null   float64
 9   PermanentSupportive_perc_tot  2199 non-null   float64
 10  PermanentOther_perc_tot       2199 non-null   float64
 11  RapidRehousing_perc_tot       2199 non-null   float64
dtypes: float64(9), int64(3)
memory usage: 206.3 KB


In [3]:
pd.cut(df['Unsheltered_perc_tot'], bins=3).value_counts()

(-0.0961, 32.033]    1500
(32.033, 64.065]      484
(64.065, 96.098]      215
Name: Unsheltered_perc_tot, dtype: int64

In [4]:
size_bins=[-1, 33, 66, 100]
group_names = ["0", "1", "2"]

In [5]:
df['Unsheltered_thirds'] = pd.cut(df['Unsheltered_perc_tot'], size_bins, labels=group_names)

In [6]:
df['Unsheltered_thirds'] = df['Unsheltered_thirds'].astype(int)

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2199 entries, 0 to 2198
Data columns (total 13 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Year                          2199 non-null   int64  
 1   TotalHomeless                 2199 non-null   int64  
 2   Population                    2199 non-null   int64  
 3   Unemployment                  2199 non-null   float64
 4   Unsheltered_perc_tot          2199 non-null   float64
 5   TotalBeds_perc_tot            2199 non-null   float64
 6   Emergency_perc_tot            2199 non-null   float64
 7   Transitional_perc_tot         2199 non-null   float64
 8   SafeHaven_perc_tot            2199 non-null   float64
 9   PermanentSupportive_perc_tot  2199 non-null   float64
 10  PermanentOther_perc_tot       2199 non-null   float64
 11  RapidRehousing_perc_tot       2199 non-null   float64
 12  Unsheltered_thirds            2199 non-null   int32  
dtypes: 

In [8]:
df=df.drop(['Unsheltered_perc_tot', 'Year'], axis=1)

In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2199 entries, 0 to 2198
Data columns (total 11 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   TotalHomeless                 2199 non-null   int64  
 1   Population                    2199 non-null   int64  
 2   Unemployment                  2199 non-null   float64
 3   TotalBeds_perc_tot            2199 non-null   float64
 4   Emergency_perc_tot            2199 non-null   float64
 5   Transitional_perc_tot         2199 non-null   float64
 6   SafeHaven_perc_tot            2199 non-null   float64
 7   PermanentSupportive_perc_tot  2199 non-null   float64
 8   PermanentOther_perc_tot       2199 non-null   float64
 9   RapidRehousing_perc_tot       2199 non-null   float64
 10  Unsheltered_thirds            2199 non-null   int32  
dtypes: float64(8), int32(1), int64(2)
memory usage: 180.5 KB


In [10]:
# Remove outcome target from features data
y = df.Unsheltered_thirds.values
X = df.drop(columns="Unsheltered_thirds").values

# Split training/test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

In [11]:
# Preprocess numerical data for neural network

# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [12]:
# Define the deep learning model 
nn_model = tf.keras.models.Sequential()
nn_model.add(tf.keras.layers.Dense(units=64, activation="sigmoid", input_dim=10))
nn_model.add(tf.keras.layers.Dense(units=32, activation="relu"))
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the Sequential model together and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model
fit_model = nn_model.fit(X_train_scaled, y_train, epochs=100)

# Evaluate the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
18/18 - 0s - loss: -3.9248e+02 - accuracy: 0.7745 - 142ms/epoch - 8ms/step
Loss: -392.475830078125, Accuracy: 0.774545431137085


In [13]:
# Import our input dataset
new_data = pd.read_excel('CA502_2019_Simulation_vf.xlsx')
new_data.head()

Unnamed: 0,TotalHomeless,Population,Unemployment,TotalBeds_perc_tot,Emergency_perc_tot,Transitional_perc_tot,SafeHaven_perc_tot,PermanentSupportive_perc_tot,PermanentOther_perc_tot,RapidRehousing_perc_tot
0,8022,1671329,3,21.902269,14.460234,7.043131,0.398903,41.797557,0,7.554226
1,8022,1671329,3,32.442034,25.0,7.043131,0.398903,41.797557,0,7.554226
2,8022,1671329,3,57.442034,50.0,7.043131,0.398903,41.797557,0,7.554226
3,8022,1671329,3,82.442034,75.0,7.043131,0.398903,41.797557,0,7.554226
4,8022,1671329,3,107.442034,100.0,7.043131,0.398903,41.797557,0,7.554226


In [14]:
new_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15 entries, 0 to 14
Data columns (total 10 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   TotalHomeless                 15 non-null     int64  
 1   Population                    15 non-null     int64  
 2   Unemployment                  15 non-null     int64  
 3   TotalBeds_perc_tot            15 non-null     float64
 4   Emergency_perc_tot            15 non-null     float64
 5   Transitional_perc_tot         15 non-null     float64
 6   SafeHaven_perc_tot            15 non-null     float64
 7   PermanentSupportive_perc_tot  15 non-null     float64
 8   PermanentOther_perc_tot       15 non-null     int64  
 9   RapidRehousing_perc_tot       15 non-null     float64
dtypes: float64(6), int64(4)
memory usage: 1.3 KB


In [15]:
# Figure out what type of data we need to transform the dataframe into
type(X_train_scaled)

numpy.ndarray

In [16]:
# convert the dataframe to a numpy array
X_new_data = new_data.to_numpy()

In [17]:
# sanity check - does this look right?
X_new_data

array([[8.02200000e+03, 1.67132900e+06, 3.00000000e+00, 2.19022688e+01,
        1.44602344e+01, 7.04313139e+00, 3.98903017e-01, 4.17975567e+01,
        0.00000000e+00, 7.55422588e+00],
       [8.02200000e+03, 1.67132900e+06, 3.00000000e+00, 3.24420344e+01,
        2.50000000e+01, 7.04313139e+00, 3.98903017e-01, 4.17975567e+01,
        0.00000000e+00, 7.55422588e+00],
       [8.02200000e+03, 1.67132900e+06, 3.00000000e+00, 5.74420344e+01,
        5.00000000e+01, 7.04313139e+00, 3.98903017e-01, 4.17975567e+01,
        0.00000000e+00, 7.55422588e+00],
       [8.02200000e+03, 1.67132900e+06, 3.00000000e+00, 8.24420344e+01,
        7.50000000e+01, 7.04313139e+00, 3.98903017e-01, 4.17975567e+01,
        0.00000000e+00, 7.55422588e+00],
       [8.02200000e+03, 1.67132900e+06, 3.00000000e+00, 1.07442034e+02,
        1.00000000e+02, 7.04313139e+00, 3.98903017e-01, 4.17975567e+01,
        0.00000000e+00, 7.55422588e+00],
       [8.02200000e+03, 1.67132900e+06, 3.00000000e+00, 1.57442034e+02,
   

In [18]:
# Don't forget to scale the data with the existing scaler!
X_new_data_scaled = scaler.transform(X_new_data)

In [19]:
# Another sanity check
X_new_data_scaled

array([[ 1.38899120e+00,  6.37834834e-01, -1.10065966e+00,
        -1.70210783e+00, -1.23489593e+00, -1.22953113e+00,
        -4.95733651e-02, -4.60763706e-01, -2.47185089e-01,
        -4.52787147e-01],
       [ 1.38899120e+00,  6.37834834e-01, -1.10065966e+00,
        -1.40738010e+00, -8.80303877e-01, -1.22953113e+00,
        -4.95733651e-02, -4.60763706e-01, -2.47185089e-01,
        -4.52787147e-01],
       [ 1.38899120e+00,  6.37834834e-01, -1.10065966e+00,
        -7.08294994e-01, -3.92224341e-02, -1.22953113e+00,
        -4.95733651e-02, -4.60763706e-01, -2.47185089e-01,
        -4.52787147e-01],
       [ 1.38899120e+00,  6.37834834e-01, -1.10065966e+00,
        -9.20988467e-03,  8.01859009e-01, -1.22953113e+00,
        -4.95733651e-02, -4.60763706e-01, -2.47185089e-01,
        -4.52787147e-01],
       [ 1.38899120e+00,  6.37834834e-01, -1.10065966e+00,
         6.89875225e-01,  1.64294045e+00, -1.22953113e+00,
        -4.95733651e-02, -4.60763706e-01, -2.47185089e-01,
        -4.

In [20]:
# Here's what the original scaled data looked like
X_train_scaled

array([[ 0.09710797, -0.2457281 ,  0.73271631, ..., -0.60507944,
        -0.24718509, -0.5471636 ],
       [-0.24900405, -0.44477612, -0.29855767, ..., -0.27142641,
         0.2515256 , -0.6631204 ],
       [-0.17147835, -0.50603434, -0.41314367, ..., -0.74890236,
        -0.24718509,  0.62298316],
       ...,
       [-0.24985133, -0.56918321,  0.44625132, ..., -0.79808196,
        -0.24718509, -0.379089  ],
       [ 1.14222223,  0.01448038, -1.44441765, ...,  0.93657367,
        -0.24718509, -0.61572676],
       [-0.14754282, -0.47140753,  4.62864025, ..., -0.92699453,
        -0.24718509, -0.74283379]])

In [21]:
# The original data before scaling
X_train

array([[1.92300000e+03, 5.50660000e+05, 6.20000000e+00, ...,
        3.17212689e+01, 0.00000000e+00, 5.09620385e+00],
       [2.89000000e+02, 2.98197000e+05, 4.40000000e+00, ...,
        5.50173010e+01, 2.00692042e+01, 2.07612457e+00],
       [6.55000000e+02, 2.20500000e+05, 4.20000000e+00, ...,
        2.16793893e+01, 0.00000000e+00, 3.55725191e+01],
       ...,
       [2.85000000e+02, 1.40405000e+05, 5.70000000e+00, ...,
        1.82456140e+01, 0.00000000e+00, 9.47368421e+00],
       [6.85700000e+03, 8.80696000e+05, 2.40000000e+00, ...,
        1.39361237e+02, 0.00000000e+00, 3.31048564e+00],
       [7.68000000e+02, 2.64419000e+05, 1.30000000e+01, ...,
        9.24479167e+00, 0.00000000e+00, 0.00000000e+00]])

In [22]:
# Use predict to apply the model to the new data
y_new_data = nn_model.predict(X_new_data_scaled)

In [23]:
# Here's the result
# Notice this is not 1s and 0s. Rather, it's a probability.
# 0.9 is more likely to be a 1 than 0.6 or 0.2. 
y_new_data

array([[1.00000000e+00],
       [1.00000000e+00],
       [1.00000000e+00],
       [1.13395504e-26],
       [1.49271845e-18],
       [3.49480510e-02],
       [5.51051199e-02],
       [1.00000000e+00],
       [1.00000000e+00],
       [1.00000000e+00],
       [1.00000000e+00],
       [3.78764861e-22],
       [1.00000000e+00],
       [1.00000000e+00],
       [1.00000000e+00]], dtype=float32)

In [24]:
# Check the type of the result
type(y_new_data)

numpy.ndarray

In [25]:
# Add the result to the data frame
new_data['Prediction'] = y_new_data

In [27]:
new_data.head(20)

Unnamed: 0,TotalHomeless,Population,Unemployment,TotalBeds_perc_tot,Emergency_perc_tot,Transitional_perc_tot,SafeHaven_perc_tot,PermanentSupportive_perc_tot,PermanentOther_perc_tot,RapidRehousing_perc_tot,Prediction
0,8022,1671329,3,21.902269,14.460234,7.043131,0.398903,41.797557,0,7.554226,1.0
1,8022,1671329,3,32.442034,25.0,7.043131,0.398903,41.797557,0,7.554226,1.0
2,8022,1671329,3,57.442034,50.0,7.043131,0.398903,41.797557,0,7.554226,1.0
3,8022,1671329,3,82.442034,75.0,7.043131,0.398903,41.797557,0,7.554226,1.133955e-26
4,8022,1671329,3,107.442034,100.0,7.043131,0.398903,41.797557,0,7.554226,1.492718e-18
5,8022,1671329,3,157.442034,150.0,7.043131,0.398903,41.797557,0,7.554226,0.03494805
6,8022,1671329,3,207.442034,200.0,7.043131,0.398903,41.797557,0,7.554226,0.05510512
7,8022,1671329,3,39.859137,14.460234,25.0,0.398903,41.797557,0,7.554226,1.0
8,8022,1671329,3,64.859137,14.460234,50.0,0.398903,41.797557,0,7.554226,1.0
9,8022,1671329,3,46.503366,14.460234,7.043131,25.0,41.797557,0,7.554226,1.0


In [28]:
new_data.to_excel('CA-502_Prediction_output.xlsx', index=False)