In [54]:
# import ting
import numpy as np
import pandas as pd
import geopandas as gpd
from sklearn.preprocessing import MinMaxScaler

In [55]:
# add final_df back in
final_df = pd.read_csv('~/Downloads/opportunity mapper/final_df1.csv')

In [56]:
# Check that there are no NaNs. There aren't, barring one unimportant ZPOP entry.
final_df.isna().sum()

ZIP                                    0
Area__                                 0
Length__                               0
geometry                               0
CITY                                   0
RENT_STUDIO                            0
RENT_STUDIO_90                         0
RENT_STUDIO_110                        0
RENT_1BD                               0
RENT_1BD_90                            0
RENT_1BD_110                           0
RENT_2BD                               0
RENT_2BD_90                            0
RENT_2BD_110                           0
RENT_3BD                               0
RENT_3BD_90                            0
RENT_3BD_110                           0
RENT_4BD                               0
RENT_4BD_90                            0
RENT_4BD_110                           0
GEO_ID                                 0
MEDIAN_INCOME_HOUSEHOLD_EST            0
TOTAL_HOUSEHOLDS_EST                   0
BART_COUNT                             0
CalTrain_COUNT  

In [69]:
# Time to start work on normalizing our metrics
scaler = MinMaxScaler()

# store all rent columns in list to iterate over and assign a scalar score
rent_cols = ['RENT_STUDIO', 'RENT_1BD', 'RENT_2BD', 'RENT_3BD', 'RENT_4BD']
# go through all the rent columns, generate normalized scalar value, store in final_df
for col in rent_cols:
    scaled = scaler.fit_transform(final_df[[col]]) #scale 0-1
    new_col = f'norm_{col.lower()}' #Invert (1 - x) so that Cheap = 1.0, Expensive = 0.0
    final_df[new_col] = 1 - scaled
    # print(f"Generated {new_col} from {col}")

# scalar transform the income measurements
final_df['norm_income'] = scaler.fit_transform(final_df[['MEDIAN_INCOME_HOUSEHOLD_EST']])

# do some kind of scoring with transit instead of just normalizing it. yeah its technically arbitrary
# but better than total mathematical noramlization for a variable with a max value of 4.
def score_transit(count):
    if count >= 3:
        return 1.0
    elif count == 2:
        return 0.8
    elif count == 1:
        return 0.4
    else:
        return 0.0

final_df['norm_transit'] = final_df['TOTAL_TRANSIT'].apply(score_transit)

# scale crime, but do 1 - scaled_crime since we obviously don't want high crime (similar idea to rent)
# we have different crime rates based on violence or property (VIOL vs PROP) so let's make two scalers
scaled_crime_viol = scaler.fit_transform(final_df[['2024_CRIMERATE_VIOL']])
scaled_crime_prop = scaler.fit_transform(final_df[['2024_CRIMERATE_PROP']])
final_df['norm_crime_rate_viol'] = 1 - scaled_crime_viol
final_df['norm_crime_rate_prop'] = 1 - scaled_crime_prop
final_df

Unnamed: 0,ZIP,Area__,Length__,geometry,CITY,RENT_STUDIO,RENT_STUDIO_90,RENT_STUDIO_110,RENT_1BD,RENT_1BD_90,...,DISPLAY_MEDIAN_INCOME_HOUSEHOLD_EST,norm_rent_studio,norm_rent_1bd,norm_rent_2bd,norm_rent_3bd,norm_rent_4bd,norm_transit,norm_income,norm_crime_rate_viol,norm_crime_rate_prop
0,94558,1.231326e+10,995176.225313,POLYGON ((-122.10329200180091 38.5132829986466...,Napa,2000,1800,2200,2210,1989,...,"$109,444",0.643443,0.665517,0.615160,0.631702,0.564334,0.0,0.297585,0.875720,0.953259
1,94558,1.231326e+10,995176.225313,POLYGON ((-122.10329200180091 38.5132829986466...,Vallejo,2000,1800,2200,2210,1989,...,"$109,444",0.643443,0.665517,0.615160,0.631702,0.564334,0.0,0.297585,0.875720,0.953259
2,95620,7.236950e+09,441860.201400,POLYGON ((-121.65335500334426 38.3133870006294...,Vallejo,1560,1404,1716,1710,1539,...,"$100,224",0.823770,0.837931,0.825073,0.811189,0.794582,0.0,0.251509,0.579807,0.643656
3,95476,3.001414e+09,311318.546326,POLYGON ((-122.40684300305698 38.1556819994163...,Napa,1970,1773,2167,2180,1962,...,"$108,322",0.655738,0.675862,0.626822,0.575758,0.591422,0.0,0.291978,0.937028,1.000000
4,95476,3.001414e+09,311318.546326,POLYGON ((-122.40684300305698 38.1556819994163...,Santa Rosa-Petaluma,1970,1773,2167,2180,1962,...,"$108,322",0.655738,0.675862,0.626822,0.575758,0.591422,0.0,0.291978,0.937028,1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
179,94022,4.211365e+08,137369.776481,POLYGON ((-122.09598200263689 37.3860539999608...,San Jose-Sunnyvale-Santa Clara,3160,2844,3476,3540,3186,...,"$250,000+",0.168033,0.206897,0.239067,0.207459,0.191874,0.0,1.000000,0.666642,0.607739
180,94085,9.054792e+07,47415.860456,POLYGON ((-121.98688800346149 37.3886580004076...,San Jose-Sunnyvale-Santa Clara,3180,2862,3498,3610,3249,...,"$183,563",0.159836,0.182759,0.230321,0.191142,0.169300,0.0,0.667988,0.666642,0.607739
181,94086,1.289431e+08,65560.361543,POLYGON ((-122.05999500035823 37.3752980004996...,San Jose-Sunnyvale-Santa Clara,2910,2619,3201,3310,2979,...,"$180,217",0.270492,0.286207,0.332362,0.300699,0.282167,0.0,0.651266,0.666642,0.607739
182,94024,2.966973e+08,136286.642980,POLYGON ((-122.09598200263689 37.3860539999608...,San Jose-Sunnyvale-Santa Clara,3260,2934,3586,3710,3339,...,"$250,000+",0.127049,0.148276,0.195335,0.156177,0.130926,0.0,1.000000,0.666642,0.607739


In [64]:
final_df[['2020_CRIMERATE_VIOL', '2024_CRIMERATE_VIOL',
       'CHANGE_IN_CRIME_VIOL%', '2020_CRIMERATE_PROP', '2024_CRIMERATE_PROP',
       'CHANGE_IN_CRIME_PROP%']]

Unnamed: 0,2020_CRIMERATE_VIOL,2024_CRIMERATE_VIOL,CHANGE_IN_CRIME_VIOL%,2020_CRIMERATE_PROP,2024_CRIMERATE_PROP,CHANGE_IN_CRIME_PROP%
0,0.003905,0.003132,80.206877,0.016708,0.012410,74.272000
1,0.003905,0.003132,80.206877,0.016708,0.012410,74.272000
2,0.005181,0.004771,92.080192,0.023567,0.022392,95.011486
3,0.004525,0.002793,61.724677,0.013116,0.010903,83.124163
4,0.004525,0.002793,61.724677,0.013116,0.010903,83.124163
...,...,...,...,...,...,...
179,0.003172,0.004290,135.269837,0.022564,0.023550,104.368869
180,0.003172,0.004290,135.269837,0.022564,0.023550,104.368869
181,0.003172,0.004290,135.269837,0.022564,0.023550,104.368869
182,0.003172,0.004290,135.269837,0.022564,0.023550,104.368869
