In [1]:
# base libraries
import requests, json, os
import pandas as pd
import numpy as np
from collections import defaultdict

# graph libraries
import plotly.express as px
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

In [2]:
# list of attribute ids
attribute_lookup_df = pd.read_csv (r'./attribute_lookup.csv', dtype=str)
attribute_ids_extracted = attribute_lookup_df['attribute_id'].tolist()
attribute_ids = []
for attribute_id in attribute_ids_extracted:
    attribute_ids.extend(attribute_id.split(", "))
attribute_ids = list(set([x+"E" for x in attribute_ids]))

In [3]:
# function builds the api URL from tract_code, state_code, county_code, and attribute ids. 
def build_census_url(tract_code, state_code, county_code, attribute_ids, year):
    attributes = ','.join(attribute_ids)
    census_url = r'https://api.census.gov/data/{}/acs/acs5?get={}&for=tract:{}&in=state:{}&in=county:{}'\
                .format(year, attributes, tract_code, state_code, county_code)
    return census_url

In [4]:
# function makes a single api call and collects results in a pandas dataframe
def make_census_api_call(census_url):
    # make API call to Census
    resp = requests.get(census_url)
    if resp.status_code != 200:
        # this means something went wrong
        resp.raise_for_status()
       
    # retrieve data as json and convert to Pandas Dataframe
    data = resp.json()
    headers = data.pop(0)
    df = pd.DataFrame(data, columns=headers)

    # convert values that are not state, county, or tract to numeric type
    cols=[i for i in df.columns if i not in ["state","county","tract"]]
    for col in cols:
        df[col]=pd.to_numeric(df[col])
        
    return df

In [5]:
def return_merged(split_attribute_ids, tract_code, state_code, county_code, year):
    df=None
    first = True
    for ids in split_attribute_ids:
        census_url = build_census_url(tract_code, state_code, county_code,ids, year)
        returned_df = make_census_api_call(census_url)
        if first:
            df = returned_df
            first = False
        else:
            returned_df = returned_df.drop(columns=['state', 'county'])
            df = pd.merge(df, returned_df, on='tract', how='left')
    return df

In [11]:
# set geo variables for api call
tract_code = "*"
state_code = "06"
county_code = "075"
years = ["2015","2016","2017", "2018", "2019"]

# split attributes into groups of 45, run a census query for each, merge outputs into a single df
split_attribute_ids = [attribute_ids[i:i+45] for i in range(0, len(attribute_ids), 45)]
df_list = []
for year in years:
    print(year)
    df = return_merged(split_attribute_ids, tract_code, state_code, county_code, year)
    df['year'] = year
    df_list.append(df)

df_concat = pd.concat(df_list, ignore_index=True)


2015
2016
2017
2018
2019


In [12]:
# import geo_lookup csv
geo_lookup_df = pd.read_csv (r'./geo_lookup.csv', dtype=str)

tract_nb_lookup = defaultdict(list)
tract_sd_lookup = defaultdict(list)

# create tract lookup dictionary for neighborhoods
for i, j in zip(geo_lookup_df['neighborhood'], geo_lookup_df['tractid']):
    tract_nb_lookup[i].append(j)

# create tract lookup dictionary for supervisor districts
for i, j in zip(geo_lookup_df['supervisor_district'], geo_lookup_df['tractid']):
    tract_sd_lookup[i].append(j)

In [13]:
# import median tables from median_ranges csv and add empty columns for rows 'households and 'cumulative_totals'
range_df = pd.read_csv (r'./median_ranges.csv')
range_df['households']=0
range_df['cumulative_total']=0
range_df.head()

Unnamed: 0,name,id,range_start,range_end,households,cumulative_total
0,median_household_income,B19001_002E,2500.0,9999.0,0,0
1,median_household_income,B19001_003E,10000.0,14999.0,0,0
2,median_household_income,B19001_004E,15000.0,19999.0,0,0
3,median_household_income,B19001_005E,20000.0,24999.0,0,0
4,median_household_income,B19001_006E,25000.0,29999.0,0,0


In [14]:
# define median helper function
def calc_median(tract_df, range_df, median_to_calc):
    
    # subset range df for current median variable to calc
    range_df = range_df[range_df['name']==median_to_calc]
    
    # sort dataframe low to high by range start column
    range_df = range_df.sort_values(by=['range_start'])
    
    # calculate households as sum of tract level households for each row based on range id
    range_df['households'] = range_df.apply(lambda row : tract_df[row['id']].sum(), axis = 1)
    
    # calculate the cumulative total of households
    range_df['cumulative_total'] = range_df['households'].cumsum()
    
    # calculate total households and return 0 if total households is 0
    total_households = range_df['households'].sum()
    
    # if total households is 0 set median to 0
    if total_households == 0:
        return 0
    
    # calculate midpoint
    midpoint = total_households/2

    # if midpoint is below first range return median as end of first range value
    if midpoint < range_df['cumulative_total'].min():
        new_median = range_df['range_end'].min()
        return new_median
    
    # if midpoint is above last range set median to end of last range value
    if midpoint > range_df['cumulative_total'].max():
        new_median = range_df['range_end'].max()
        return new_median
    
    less_midpoint_df = range_df[range_df['cumulative_total']<midpoint]
    
    # get the single row containing the range just below the mid range by getting the row with the max range start from the subsetted median df
    range_below_mid_range_df = less_midpoint_df[less_midpoint_df['range_start'] == less_midpoint_df['range_start'].max()]
    
    # get the cumulative total value for the first row of the range below mid range dictionary
    total_hh_previous_range = range_below_mid_range_df['cumulative_total'].iloc[0]
    hh_to_mid_range = midpoint - total_hh_previous_range
    
    # extract rows above midrange by subsetting median df for rows with cumulative total grearter than midpoint.
    greater_midpoint_df = range_df[range_df['cumulative_total']>midpoint]
    
    # get the single row containing the mid range by getting the row with the min range start from the subsetted median df
    mid_range_df = greater_midpoint_df[greater_midpoint_df['range_start'] == greater_midpoint_df['range_start'].min()]
    
    # get the households value for the first row of the mid range dictionary
    hh_in_mid_range = mid_range_df['households'].iloc[0]
    
    # calculate proportion of number of households in the mid range that would be needed to get to the mid-point
    prop_of_hh = hh_to_mid_range/hh_in_mid_range
    
    # calculate width of the mid range
    width = (mid_range_df['range_end'].iloc[0]-mid_range_df['range_start'].iloc[0])+1
    
    # apply proportion to width of mid range
    prop_to_width = prop_of_hh*width
    beginning_of_mid_range = mid_range_df['range_start'].iloc[0]
    
    # calculate new median
    new_median = beginning_of_mid_range + prop_to_width
    
    return new_median

In [15]:
# define other helper functions
def calc_sum(df, attribute_id):
    return df[attribute_id].sum()

def calc_normalized(df, attribute_id, attribute_id2):
    if df[attribute_id2].sum() == 0:
        return 0
    else:
        return (df[attribute_id].sum()/df[attribute_id2].sum())

def calc_sum_normalized(df, attribute_list, attribute_id2):
    if df[attribute_id2].sum()==0:
        return 0
    else:
        sum_of_attributes = 0
        for attribute_id in attribute_list:
            sum_of_attributes+=df[attribute_id].sum()
        return sum_of_attributes/df[attribute_id2].sum()


In [16]:
# function runs all calcs for each neighborhood or supervisor district
def calc_socio_economic_data(df, tract_lookup):
    # create empty dictionary to add calculated attribute information to
    all_calc_data = defaultdict(dict) 
    # calculate all stats for each neighborhood
    for nb_name, tracts in tract_lookup.items():
        # extract attribute information for tracks associated with a neighborhood
        tract_df = df[df['tract'].isin(tracts)]
        # build dictionary with all stats for a neighborhood
        all_calc_data_nb = all_calc_data[nb_name]
        # population
        all_calc_data_nb["Total Population"] = calc_sum(tract_df, 'B01001_001E')
        all_calc_data_nb["Group Quarter Population"] = calc_sum(tract_df, 'B26001_001E')
        all_calc_data_nb["Percent Female"] = calc_normalized(tract_df, 'B01001_026E', 'B01001_001E')
        # household stats
        all_calc_data_nb["Housholds"] = calc_sum(tract_df, 'B11001_001E')
        all_calc_data_nb["Family Households"] = calc_normalized(tract_df, 'B11001_002E', 'B11001_001E')
        all_calc_data_nb["Non-Family Households"] = calc_normalized(tract_df, 'B11001_007E', 'B11001_001E')
        all_calc_data_nb["Single Person Households, % of Total"] = calc_normalized(tract_df, 'B11001_008E', 'B11001_001E')
        all_calc_data_nb["Households with Children, % of Total"] = calc_normalized(tract_df, 'B11005_002E', 'B11001_001E')
        all_calc_data_nb["Households with 60 years and older, % of Total"] = calc_normalized(tract_df, 'B11006_002E', 'B11001_001E')
        all_calc_data_nb["Average Household Size"] = calc_normalized(tract_df, 'B11002_001E', 'B11001_001E')
        all_calc_data_nb["Average Family Household Size"] = calc_normalized(tract_df, 'B11002_002E', 'B11001_002E')
        # race and ethnicity stats
        all_calc_data_nb["Asian"] = calc_normalized(tract_df, 'B02001_005E', 'B02001_001E')
        all_calc_data_nb["Black/African American"] = calc_normalized(tract_df, 'B02001_003E', 'B02001_001E')
        all_calc_data_nb["White"] = calc_normalized(tract_df, 'B02001_002E', 'B02001_001E')
        all_calc_data_nb["Native American Indian"] = calc_normalized(tract_df, 'B02001_005E', 'B02001_001E')
        all_calc_data_nb["Native Hawaiian/Pacific Islander"] = calc_normalized(tract_df, 'B02001_006E', 'B02001_001E')
        all_calc_data_nb["Other/Two or More Races"] = calc_sum_normalized(tract_df, ['B02001_008E', 'B02001_007E'], 'B02001_001E')
        all_calc_data_nb["% Latino (of Any Race)"] = calc_normalized(tract_df, 'B03001_003E', 'B03001_001E')
        # age
        all_calc_data_nb["0-4 Years"] = calc_sum_normalized(tract_df, ['B01001_003E', 'B01001_027E'], 'B01001_001E')
        all_calc_data_nb["5-17 Years"] = calc_sum_normalized(tract_df, ['B01001_004E', 'B01001_005E', 'B01001_006E', 'B01001_028E', 'B01001_029E', 'B01001_030E'],'B01001_001E')
        all_calc_data_nb["18-34 Years"] = calc_sum_normalized(tract_df, ['B01001_007E','B01001_008E','B01001_009E', 'B01001_010E', 'B01001_011E', 'B01001_012E','B01001_031E','B01001_032E','B01001_033E','B01001_034E','B01001_035E','B01001_036E'], 'B01001_001E')
        all_calc_data_nb["35-59 Years"] = calc_sum_normalized(tract_df, ['B01001_013E', 'B01001_014E', 'B01001_015E', 'B01001_016E', 'B01001_017E', 'B01001_037E', 'B01001_038E', 'B01001_039E', 'B01001_040E', 'B01001_041E'], 'B01001_001E')
        all_calc_data_nb["60 and Older"] = calc_sum_normalized(tract_df, ['B01001_018E', 'B01001_019E', 'B01001_020E', 'B01001_021E', 'B01001_022E', 'B01001_023E', 'B01001_024E', 'B01001_025E', 'B01001_042E', 'B01001_043E', 'B01001_044E', 'B01001_045E', 'B01001_046E', 'B01001_047E', 'B01001_048E', 'B01001_049E'], 'B01001_001E')
        #all_calc_data_nb["Median Age"]
        # educationa attainment
        all_calc_data_nb["High School or Less"] = calc_sum_normalized(tract_df, ['B15003_002E', 'B15003_003E', 'B15003_004E', 'B15003_005E', 'B15003_006E', 'B15003_007E', 'B15003_008E', 'B15003_009E', 'B15003_010E', 'B15003_011E', 'B15003_012E', 'B15003_013E', 'B15003_014E', 'B15003_015E', 'B15003_016E', 'B15003_017E', 'B15003_018E'], 'B15003_001E')
        all_calc_data_nb["Some College/Associate Degree"] = calc_sum_normalized(tract_df, ['B15003_019E', 'B15003_020E', 'B15003_021E'], 'B15003_001E')
        all_calc_data_nb["College Degree"] = calc_normalized(tract_df, 'B15003_022E', 'B15003_001E')
        all_calc_data_nb["Graduate/Professional Degree"] = calc_sum_normalized(tract_df, ['B15003_023E', 'B15003_024E', 'B15003_025E'], 'B15003_001E')
        # nativity
        all_calc_data_nb["Foreign Born"] = calc_normalized(tract_df, 'B05002_013E', 'B05002_001E')
        # language spoken at home
        all_calc_data_nb["English Only"] = calc_sum_normalized(tract_df, ['B16007_003E', 'B16007_009E', 'B16007_015E'], 'B16007_001E')
        all_calc_data_nb["Spanish Only"] = calc_sum_normalized(tract_df, ['B16007_004E', 'B16007_010E', 'B16007_016E'], 'B16007_001E')
        all_calc_data_nb["Asian/Pacific Islander"] = calc_sum_normalized(tract_df, ['B16007_006E', 'B16007_012E', 'B16007_018E'], 'B16007_001E')
        all_calc_data_nb["Other European Languages"] = calc_sum_normalized(tract_df, ['B16007_005E', 'B16007_011E', 'B16007_017E'], 'B16007_001E')
        all_calc_data_nb["Other Languages"] = calc_sum_normalized(tract_df, ['B16007_007E', 'B16007_013E', 'B16007_019E'], 'B16007_001E')
        # linguistic isolation
        all_calc_data_nb["% of All Households"] = calc_sum_normalized(tract_df, ['B16003_002E', 'B16003_008E'], 'B16004_001E')
        all_calc_data_nb["% of Spanish-Speaking Households"] = calc_sum_normalized(tract_df, ['B16003_004E', 'B16003_009E'], 'B16004_001E')
        all_calc_data_nb["% of Asian-Speaking Households"] = calc_sum_normalized(tract_df, ['B16003_006E', 'B16003_011E'], 'B16004_001E')
        all_calc_data_nb["% of Other European-Speaking Households"] = calc_sum_normalized(tract_df, ['B16003_005E', 'B16003_010E'], 'B16004_001E')
        all_calc_data_nb["% of Households Speaking Other Languages"] = calc_sum_normalized(tract_df, ['B16003_007E', 'B16003_012E'], 'B16004_001E')
        # housing
        all_calc_data_nb["Total Number of Units"] = calc_sum(df, 'B25001_001E')
        all_calc_data_nb["Median Year Structure Built"] = calc_median(tract_df, range_df, 'median_year_structure_built')
        all_calc_data_nb["Owner Occupied"] = calc_normalized(tract_df, 'B25007_002E', 'B25007_001E')
        all_calc_data_nb["Renter Occupied"] = calc_normalized(tract_df, 'B25007_012E', 'B25007_001E')
        all_calc_data_nb["Vacant Units"] = calc_normalized(tract_df, 'B25004_001E', 'B25001_001E')
        all_calc_data_nb["For Rent"] = calc_normalized(tract_df, 'B25004_002E', 'B25004_001E')
        all_calc_data_nb["For Sale Only"] = calc_normalized(tract_df, 'B25004_004E', 'B25004_001E')
        all_calc_data_nb["Rented or Sold, Not Occupied"] = calc_sum_normalized(tract_df, ['B25004_003E', 'B25004_005E'], 'B25004_001E')
        all_calc_data_nb["For Seasonal, Recreation or Occasional Use"] = calc_normalized(tract_df, 'B25004_006E', 'B25004_001E')
        all_calc_data_nb["Other Vacant"] = calc_normalized(tract_df, 'B25004_008E', 'B25004_001E')
        all_calc_data_nb["Median Year Moved in to Unit (Own)"] = calc_median(tract_df, range_df, 'median_year_moved_owner')
        all_calc_data_nb["Median Year Moved in to Unit (Rent)"] = calc_median(tract_df, range_df, 'median_year_moved_renter')
        all_calc_data_nb["Percent in Same House Last Year"] = calc_normalized(tract_df, 'B07001_017E', 'B07001_001E')
        all_calc_data_nb["Percent Abroad Last Year"] = calc_normalized(tract_df, 'B07003_016E', 'B07003_001E')
        # structure type
        all_calc_data_nb["Single Family Housing"] = calc_sum_normalized(tract_df, ['B25024_002E', 'B25024_003E'], 'B25024_001E')
        all_calc_data_nb["2-4 Units"] = calc_sum_normalized(tract_df, ['B25024_004E', 'B25024_005E'], 'B25024_001E')
        all_calc_data_nb["5-9 Units"] = calc_normalized(tract_df, 'B25024_006E', 'B25024_001E')
        all_calc_data_nb["10-19 Units"] = calc_normalized(tract_df, 'B25024_007E', 'B25024_001E')
        all_calc_data_nb["20 Units or More"] = calc_sum_normalized(tract_df, ['B25024_008E', 'B25024_009E'], 'B25024_001E')
        all_calc_data_nb["Other"] = calc_sum_normalized(tract_df, ['B25024_010E', 'B25024_011E'], 'B25024_001E')
        # unit size
        all_calc_data_nb["No Bedroom"] = calc_normalized(tract_df,'B25041_002E', 'B25041_001E')
        all_calc_data_nb["1 Bedroom"] = calc_normalized(tract_df, 'B25041_003E', 'B25041_001E')
        all_calc_data_nb["2 Bedrooms"] = calc_normalized(tract_df, 'B25041_004E', 'B25041_001E')
        all_calc_data_nb["3-4 Bedrooms"] = calc_sum_normalized(tract_df, ['B25041_005E', 'B25041_006E'], 'B25041_001E')
        all_calc_data_nb["5 Bedrooms"] = calc_normalized(tract_df, 'B25041_007E', 'B25041_001E')
        # housing prices
        all_calc_data_nb["Median Rent"] = calc_median(tract_df, range_df, 'median_rent')
        all_calc_data_nb["Median Contract Rent"] = calc_median(tract_df, range_df, 'median_rent_contract')
        all_calc_data_nb["Median Rent as % of Household Income"] = calc_median(tract_df, range_df, 'median_rent_percent_of_income')
        all_calc_data_nb["Median Home Value"] = calc_median(tract_df, range_df, 'median_home_value')
        # vehicles available
        all_calc_data_nb["Vehicles Available"] = calc_sum(df, 'B25046_001E')
        all_calc_data_nb["Vehicles Homeowners"] = calc_normalized(tract_df, 'B25046_002E', 'B25046_001E')
        all_calc_data_nb["Vehicles Renters"] = calc_normalized(tract_df, 'B25046_003E', 'B25046_001E')
        all_calc_data_nb["Vehicles Per Capita"] = calc_normalized(tract_df, 'B25046_001E', 'B01001_001E')
        all_calc_data_nb["Households with no Vehicle"] = calc_sum_normalized(tract_df, ['B25044_003E', 'B25044_010E'], 'B25044_001E')
        all_calc_data_nb["Percent of Homeowning Households"] = calc_normalized(tract_df, 'B25044_003E', 'B25044_002E')
        all_calc_data_nb["Percent of Renting Households"] = calc_normalized(tract_df, 'B25044_010E', 'B25044_009E')
        # income
        all_calc_data_nb["Median Household Income"] = calc_median(tract_df, range_df, 'median_household_income')
        all_calc_data_nb["Median Family Income"] = calc_median(tract_df, range_df, 'median_family_income')
        all_calc_data_nb["Per Capita Income"] = calc_normalized(tract_df, 'B19025_001E', 'B01001_001E')
        all_calc_data_nb["Percent in Poverty"] = calc_normalized(tract_df, 'B17001_002E', 'B17001_001E')
        # employment
        all_calc_data_nb["Unemployment Rate"] = calc_normalized(tract_df, 'B23025_005E', 'B23025_002E')
        all_calc_data_nb["Percent Unemployment Female"] = calc_sum_normalized(tract_df, ['B23001_094E', 'B23001_101E', 'B23001_108E', 'B23001_115E', 'B23001_122E', 'B23001_129E', 'B23001_136E', 'B23001_143E', 'B23001_150E', 'B23001_157E', 'B23001_162E', 'B23001_167E', 'B23001_172E', 'B23001_090E', 'B23001_097E', 'B23001_104E', 'B23001_111E', 'B23001_118E', 'B23001_125E', 'B23001_132E', 'B23001_139E', 'B23001_146E', 'B23001_153E', 'B23001_160E', 'B23001_165E', 'B23001_170E'], 'B23025_002E')
        all_calc_data_nb["Percent Unemployment Male"] = calc_sum_normalized(tract_df, ['B23001_008E', 'B23001_015E', 'B23001_022E', 'B23001_029E', 'B23001_036E', 'B23001_043E', 'B23001_050E', 'B23001_057E', 'B23001_064E', 'B23001_071E', 'B23001_076E', 'B23001_081E', 'B23001_086E', 'B23001_004E', 'B23001_011E', 'B23001_018E', 'B23001_025E', 'B23001_032E', 'B23001_039E', 'B23001_046E', 'B23001_053E', 'B23001_060E', 'B23001_067E', 'B23001_074E', 'B23001_079E', 'B23001_084E'], 'B23025_002E')
        all_calc_data_nb["Employed Residents"] = calc_sum(df, 'C24050_001E')
        all_calc_data_nb["Managerial Professional"] = calc_normalized(tract_df, 'C24050_015E', 'C24050_001E')
        all_calc_data_nb["Services"] = calc_normalized(tract_df, 'C24050_029E', 'C24050_001E')
        all_calc_data_nb["Sales and Office"] = calc_normalized(tract_df, 'C24050_043E', 'C24050_001E')
        all_calc_data_nb["Natural Resources"] = calc_normalized(tract_df, 'C24050_057E', 'C24050_001E')
        all_calc_data_nb["Production Transport Materials"] = calc_normalized(tract_df, 'C24050_071E', 'C24050_001E')
        # journey to work
        all_calc_data_nb["Workers 16 Years and Older"] = calc_sum(df, 'B08006_001E')
        all_calc_data_nb["Car"] = calc_normalized(tract_df, 'B08006_002E', 'B08006_001E')
        all_calc_data_nb["Drove Alone"] = calc_normalized(tract_df, 'B08006_003E', 'B08006_001E')
        all_calc_data_nb["Carpooled"] = calc_normalized(tract_df, 'B08006_004E', 'B08006_001E')
        all_calc_data_nb["Transit"] = calc_normalized(tract_df, 'B08006_008E', 'B08006_001E')
        all_calc_data_nb["Bike"] = calc_normalized(tract_df, 'B08006_014E', 'B08006_001E')
        all_calc_data_nb["Walk"] = calc_normalized(tract_df, 'B08006_015E', 'B08006_001E')
        all_calc_data_nb["Other"] = calc_normalized(tract_df, 'B08006_016E', 'B08006_001E')
        all_calc_data_nb["Worked at Home"] = calc_normalized(tract_df, 'B08006_017E', 'B08006_001E')
        # population density
        all_calc_data_nb["Population Density per Acre"] = calc_sum(df, 'B01001_001E')
            
    #return calc dictionary
    return all_calc_data

In [17]:
# set geography to summarize by. If supervisor districts set geo_summary_variable to "Superisor District"
geo_summary_variable = 'Neighborhood' #'Supervisor District'

# set path to download csvs
download_path = r"./"

# sets geo variables based on above choice
if geo_summary_variable == 'Neighborhood':
    tract_lookup = tract_nb_lookup
    geo_path = os.path.join(os.path.dirname(os.path.abspath("__file__")), 'neighborhoods', 'neighborhoods.shp')
    geo_merge_variable = 'nhood'
elif geo_summary_variable == 'Supervisor District':
    tract_lookup = tract_sd_lookup
    geo_path = os.path.join(os.path.dirname(os.path.abspath("__file__")), 'supervisor_districts', 'supervisor_districts.shp')
    geo_merge_variable = 'sup_dist'

In [18]:
calc_list = []
for year in years:
    df_year = df_concat[df_concat['year']==year]
    
    all_calc_data = calc_socio_economic_data(df_year, tract_lookup)
    df_all_calcs = pd.DataFrame.from_dict(all_calc_data).reset_index()
    df_all_calcs.rename(columns = {'index':'Attribute'}, inplace = True) 
    df_all_calcs['year'] = year
    calc_list.append(df_all_calcs)

calc_concat = pd.concat(calc_list, ignore_index=True)
calc_concat.head()

Unnamed: 0,Attribute,North Beach,Russian Hill,Financial District,Chinatown,Nob Hill,Tenderloin,Marina,Pacific Heights,Presidio Heights,...,Lakeshore,Inner Richmond,Outer Richmond,Seacliff,Presidio,Mission Bay,Lincoln Park,Golden Gate Park,McLaren Park,year
0,Total Population,12550.0,18179.0,16735.0,14336.0,26382.0,28820.0,24915.0,24737.0,10577.0,...,13469.0,22425.0,45120.0,2491.0,3681.0,9979.0,330.0,78.0,880.0,2015
1,Group Quarter Population,17.0,5.0,539.0,49.0,1033.0,1337.0,99.0,518.0,284.0,...,2446.0,128.0,221.0,0.0,0.0,390.0,204.0,0.0,93.0,2015
2,Percent Female,0.455697,0.524946,0.47326,0.510045,0.484724,0.387023,0.535822,0.514856,0.544483,...,0.528102,0.522096,0.518639,0.538739,0.487639,0.46187,0.257576,0.423077,0.581818,2015
3,Housholds,6478.0,10051.0,9783.0,6678.0,15448.0,16944.0,13845.0,13706.0,4795.0,...,4736.0,9486.0,18519.0,920.0,1269.0,4794.0,70.0,65.0,274.0,2015
4,Family Households,0.357672,0.310118,0.333333,0.504942,0.230451,0.234242,0.315132,0.326572,0.467362,...,0.361698,0.45836,0.538204,0.743478,0.49803,0.440968,0.3,0.0,0.686131,2015


In [19]:
melted = pd.melt(calc_concat, id_vars=["Attribute", "year"], value_vars=calc_concat.columns[1:-1])
melted


Unnamed: 0,Attribute,year,variable,value
0,Total Population,2015,North Beach,12550.000000
1,Group Quarter Population,2015,North Beach,17.000000
2,Percent Female,2015,North Beach,0.455697
3,Housholds,2015,North Beach,6478.000000
4,Family Households,2015,North Beach,0.357672
...,...,...,...,...
19675,Transit,2019,McLaren Park,0.350000
19676,Bike,2019,McLaren Park,0.000000
19677,Walk,2019,McLaren Park,0.091667
19678,Worked at Home,2019,McLaren Park,0.000000


In [21]:
def f(Neighborhood, Attribute):
    melted = pd.melt(calc_concat, id_vars=["Attribute", "year"], value_vars=calc_concat.columns[1:-1])
    melted = melted[melted["Attribute"]==Attribute]
    melted = melted[melted["variable"]==Neighborhood]
    fig = px.line(melted, x="year", y="value", color='variable')
    fig.show()
    
neighborhood = list(set(melted['variable'].tolist()))
attribute = list(set(melted['Attribute'].tolist()))
neighborhood.sort()
attribute.sort()

interact(f, Neighborhood=neighborhood, Attribute=attribute)

interactive(children=(Dropdown(description='Neighborhood', options=('Bayview Hunters Point', 'Bernal Heights',…

<function __main__.f(Neighborhood, Attribute)>

In [22]:
def f(Attribute):
    melted = pd.melt(calc_concat, id_vars=["Attribute", "year"], value_vars=calc_concat.columns[1:-1])
    melted = melted[melted["Attribute"]==Attribute]
    
    fig = px.line(melted, x="year", y="value", color='variable')
    fig.show()
    
attribute = list(set(melted['Attribute'].tolist()))

attribute.sort()

interact(f, Attribute=attribute)

interactive(children=(Dropdown(description='Attribute', options=('% Latino (of Any Race)', '% of All Household…

<function __main__.f(Attribute)>