In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np
from shapely.geometry import Point, Polygon
import matplotlib.pyplot as plt
from fiona.crs import from_epsg

In [2]:
import urllib, json, requests 
import geojson

In [3]:
from ipyleaflet import Map, GeoData, GeoJSON, basemaps, basemap_to_tiles, Icon, Circle, Marker, LayerGroup, WidgetControl
import ipywidgets as widgets
from ipywidgets import Button, Layout
from IPython.display import display, clear_output, Markdown as md

In [4]:
#need this to stop numpy from returning truncated arrays 
import sys
np.set_printoptions(threshold=sys.maxsize)

# for automatic linebreaks and multi-line cells
# pd.options.display.max_colwidth = 10000

In [5]:
#drawing basic map
center = (40.7210907,-73.9877836)
basemap = basemap_to_tiles(basemaps.CartoDB.Positron)

m = Map(layers=(basemap, ), center=center, zoom=15, min_zoom = 7, max_zoom = 20)

In [6]:
def extract_location():       
    global gdf, lat, lon
    
    lat = str(markerlocation[0])
    lon = str(markerlocation[1])
    
    df2 = pd.DataFrame(markerlocation)
    df=df2.transpose()
    df.columns=['Latitude','Longitude']

    gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.Longitude, df.Latitude), crs='epsg:4326')
    
    return gdf

In [7]:
draggable=False
marker_opacity=1
icon = Icon(icon_url='icon.png', icon_size=[15, 15])

marker = Marker(location=center, draggable=draggable, icon=icon, opacity=marker_opacity)

markerlocation = marker.location 

layer_group = LayerGroup(layers=(marker, ))
m.add_layer(layer_group)
   
def update_marker(**kwargs):
    
    if kwargs.get('type') == 'click':
        layer_group.clear_layers();
        
        marker = Marker(location=kwargs.get('coordinates'), draggable=draggable, icon=icon, opacity=marker_opacity, options=['rise_on_hover'])  
        
        global markerlocation
        markerlocation = marker.location 
        
        layer_group.add_layer(marker)
    
        draw_update_buffer(**kwargs)
        
        data_output.clear_output()
        output.clear_output()

In [8]:
def draw_update_buffer(**kwargs):     
    m.on_interaction(update_marker)
    extract_location()
    
    global half_mi
    half_mi=gdf.copy()
    half_mi['geometry'] = half_mi.geometry.buffer(.004,  cap_style=1, join_style=1)

    map_extent = gdf.copy()
    map_extent['geometry'] = map_extent.buffer(1,  cap_style=1, join_style=1)

    diff = gpd.overlay(map_extent, half_mi, how='difference')
    
    half_mi_difference = GeoData(geo_dataframe = diff,
                       style={'color': "black", \
                              'fillColor': "#000000", \
                              'fillOpacity': .2, \
                              'opacity': 1, \
                              'weight': 2},
                       name = "Test", crs='epsg:4326')

    layer_group.add_layer(half_mi_difference) 

draw_update_buffer()

In [9]:
def import_censustracts():
    extract_location()
    
    bounding_box = half_mi.envelope
    df = gpd.GeoDataFrame(gpd.GeoSeries(bounding_box), columns=['geometry'])
    minx, miny, maxx, maxy = df.geometry.total_bounds
    bounds = minx, miny, maxx, maxy

    # census tracts link
    endpoint = 'https://tigerweb.geo.census.gov/arcgis/rest/services/TIGERweb/Tracts_Blocks/MapServer/4/query'
    s = requests.session()
    s.params = {
        'geometry': str(bounds),
        'geometryType': 'esriGeometryEnvelope',
        'inSR': 4326,
        'spatialRel': 'esriSpatialRelIntersects',
        'outFields': 'GEOID,STATE,COUNTY,TRACT,NAME,STGEOMETRY,OBJECTID',
        'returnGeometry': True,
        'f': 'geojson',        
    }
    start = 0
    done = False
    features = []
    crs = None
    while not done:
        r = s.get(endpoint, params={
            'resultOffset': start,
            'resultRecordCount': 32,
        })
        censusgeo = geojson.loads(r.text)
        newfeats = censusgeo.__geo_interface__['features']
        if newfeats:
            features.extend(newfeats)
            crs=censusgeo.__geo_interface__['crs']
            start += len(newfeats)
#             print("Received", len(newfeats), "entries,", start, "total")
        else:
            done = True
    
    global tracts
    tracts = gpd.GeoDataFrame.from_features(features, crs=crs)

In [10]:
def download_acs():  
    state = tracts["STATE"].unique().tolist()
    state = ', '.join(map(str, state)).replace(" ", "")

    tract = tracts["TRACT"].unique().tolist()
    tract = ', '.join(map(str, tract)).replace(" ", "") 

    county = tracts["COUNTY"].unique().tolist()
    county = ', '.join(map(str, county)).replace(" ", "") 

    api_key = '9330dc4bf086a84f19fb412bb15f232507301de6'
    acs_url = f'https://api.census.gov/data/2018/acs/acs5/subject/'
    
    global acs_variables
    acs_variables_initial = 'S1603_C02_002E,S1603_C02_003E,S1603_C02_004E,S1603_C04_002E,S1603_C04_003E,S1603_C04_004E,S1601_C01_005E,S1601_C01_006E,S1601_C01_007E,S1601_C01_009E,S1601_C01_010E,S1601_C01_011E,S1601_C01_013E,S1601_C01_014E,S1601_C01_015E,S1601_C01_017E,S1601_C01_018E,S1601_C01_019E,S1901_C01_002E,S1901_C01_003E,S1901_C01_004E,S1901_C01_005E,S1901_C01_006E,S1901_C01_007E,S1901_C01_008E,S1901_C01_009E,S1901_C01_010E,S1901_C01_011E,S1901_C04_002E,S1901_C04_003E,S1901_C04_004E,S1901_C04_005E,S1901_C04_006E,S1901_C04_007E,S1901_C04_008E,S1901_C04_009E,S1901_C04_010E,S1901_C04_011E'
    acs_variables_additional = 'S1501_C01_002E,S1501_C01_004E,S1501_C01_003E,S1501_C01_005E,S1501_C01_017E,S1501_C01_018E,S1501_C01_020E,S1501_C01_021E,S1501_C01_023E,S1501_C01_024E,S1501_C01_025E,S1501_C01_026E,S1501_C03_002E,S1501_C03_003E,S1501_C03_004E,S1501_C03_005E,S1501_C03_017E,S1501_C03_018E,S1501_C03_020E,S1501_C03_021E,S1501_C03_023E,S1501_C03_024E,S1501_C03_026E,S1501_C03_027E,S1501_C05_002E,S1501_C05_003E,S1501_C05_004E,S1501_C05_005E,S1501_C05_017E,S1501_C05_018E,S1501_C05_020E,S1501_C05_021E,S1501_C05_023E,S1501_C05_024E,S1501_C05_026E,S1501_C05_027E,S1401_C01_030E,S1401_C01_032E,S1401_C01_034E,S1101_C01_003E,S1101_C05_001E'
    acs_variables = acs_variables_initial + "," + acs_variables_additional
    
    get_acs_initial = f'{acs_url}?&get={acs_variables_initial}&for=tract:{tract}&in=state:{state}%20county:{county}&key={api_key}'
    get_acs_additional = f'{acs_url}?&get={acs_variables_additional}&for=tract:{tract}&in=state:{state}%20county:{county}&key={api_key}'

    data_acs_initial=requests.get(get_acs_initial).json()
    data_acs_additional=requests.get(get_acs_additional).json()
    
    global acs
    acs_initial=pd.DataFrame(data_acs_initial[1:], columns=data_acs_initial[0])
    acs_additional=pd.DataFrame(data_acs_additional[1:], columns=data_acs_additional[0])

    acs=pd.merge(acs_initial, acs_additional, on='tract', how='left')

In [11]:
def clean_combine_census_and_geographic_data():
    import_censustracts()
    download_acs()
    
    global acs_site_sum, acs_site
    tracts["area"]=tracts.area
    acs_tracts = pd.merge(tracts, acs, left_on='TRACT', right_on='tract', how='left')
    
    acs_site = gpd.overlay(half_mi, acs_tracts, how='intersection')
    acs_site["area_clipped"]=acs_site.area 
    acs_site["ratio"] = acs_site["area_clipped"]/acs_site["area"]
    
    cols = acs_variables.split(",")
    acs_site[cols] = acs_site[cols].apply(pd.to_numeric, errors='coerce', axis=1)
    
    temp_df = acs_site[cols]    
    temp_df = temp_df.mul(acs_site.ratio, 0)
    acs_site.update(temp_df)

    acs_site_sum = pd.DataFrame(acs_site[cols].sum())

    acs_site_sum.reset_index(inplace=True)
    acs_site_sum.columns = ['variables', 'sum_in_area']
    acs_site_sum.head()

In [12]:
data_dict = pd.read_csv("data-dictionary.csv")
data_dict.head()

Unnamed: 0,sex,age_group,variable_group,variables,variable_name,ages
0,Male Female Both,5 to 17 years,Language Spoken At Home,S1603_C02_002E,Speak Only English at Home,"5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17"
1,Male Female Both,18 to 64 years,Language Spoken At Home,S1603_C02_003E,Speak Only English at Home,"18, 19, 20, 21, 22, 23, 24, 25,26, 27, 28, 29,..."
2,Male Female Both,65 years and over,Language Spoken At Home,S1603_C02_004E,Speak Only English at Home,"64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75..."
3,Male Female Both,5 to 17 years,Language Spoken At Home,S1601_C01_005E,Spanish,"5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17"
4,Male Female Both,18 to 64 years,Language Spoken At Home,S1601_C01_006E,Spanish,"18, 19, 20, 21, 22, 23, 24, 25,26, 27, 28, 29,..."


In [13]:
ALL = 'ALL'
def user_options_sorted_values_plus_ALL(array):
    unique = array.unique().tolist()
    unique.sort()
    unique.insert(0, ALL)
    unique.remove('Both')
    unique.remove('Male Female Both')
    return unique

def user_options_sorted_values(array):
    unique = array.unique().tolist()
    unique.sort()
    return unique

In [14]:
def user_selection():

    global selected_age, selected_gender, selected_percentile, text_generation_button, selection_filter, variable_inputs
#     selected_age = widgets.Dropdown(options = user_options_sorted_values(data_dict.age_group),\
#                                     value = "5 to 17 Years")
    selected_age = widgets.BoundedIntText(min=5, max=99, value=25, step=1, description='AGE:')
    selected_age_str= str(selected_age.value)
    
    selected_gender = widgets.ToggleButtons(options = user_options_sorted_values_plus_ALL(data_dict.sex),\
                                            value = "Male",\
                                            description='SEX:', \
                                            disabled=False, button_style='', )
#     selected_gender.style.font_weight = 'bold'
    
    selected_percentile = widgets.IntSlider(min=0, max=100, step=10, value=50, description='Percentile:',)
    
    if (selected_gender.value == 'ALL'):
        selection_filter = data_dict[(data_dict.ages.str.contains(selected_age_str)) & \
                              (data_dict.sex.str.contains('Both'))]
        
    else:     
        selection_filter = data_dict[(data_dict.ages.str.contains(selected_age_str)) & \
                              (data_dict.sex.str.contains(selected_gender.value))]
                         
    list_of_variable_inputs = selection_filter["variables"].values[0:]
    variable_inputs = ', '.join(list_of_variable_inputs).replace(" ", "")
    variable_inputs = variable_inputs.split(',')
        
user_selection()

In [15]:
def selection_filtering(age_group, sex):
    
    selected_age_str= str(selected_age.value)

    if (selected_gender.value == 'ALL'):
        selection_filter = data_dict[(data_dict.ages.str.contains(selected_age_str)) & \
                          (data_dict.sex.str.contains('Both'))]

    else:     
        selection_filter = data_dict[(data_dict.ages.str.contains(selected_age_str)) & \
                          (data_dict.sex.str.contains(selected_gender.value))]

    list_of_variable_inputs = selection_filter["variables"].values[0:]
    variable_inputs = ', '.join(list_of_variable_inputs).replace(" ", "")
    variable_inputs = variable_inputs.split(',')        
    
    data_output.clear_output()
    output.clear_output()
#     tab.layout.display = 'none'

def selected_age_eventhandler(change):
    selection_filtering(change.new, selected_age.value)
#     tab.layout.display = 'none'
def selected_gender_eventhandler(change):
    selection_filtering(selected_gender.value, change.new)
#     tab.layout.display = 'none'
def selected_percentile_eventhandler(change):
    selection_filtering(selected_percentile.value, change.new)
#     tab.layout.display = 'none'

In [16]:
def get_demographics_for_selection():
  
    global percentile_input, data

    data = pd.merge(acs_site_sum.loc[acs_site_sum['variables'].isin(variable_inputs)], \
                   selection_filter, how="outer", on="variables")    
    data["sum_in_area"] = data["sum_in_area"].astype(int)
    data.sort_values("sum_in_area", axis = 0, ascending = True, inplace = True)

    percentile_input = selected_percentile.value / 100
    
# split these up into the diff bins for different types of variable groups 
    global language, education, school_enrollment, family_household_income, nonfamily_household_income, household_type

    for item,i in enumerate(data):       
        language = data[(data["variable_group"].str.contains('Language'))]
        education = data[(data["variable_group"].str.contains('Educational Attainment'))]
        school_enrollment = data[(data["variable_group"].str.contains('School'))]
        family_household_income = data[(data["variable_group"].str.contains('Family'))]
        nonfamily_household_income = data[(data["variable_group"].str.contains('Nonfamily'))]
        household_type = data[(data["variable_group"].str.contains('Households'))]
#       travel_time_to_work = data[(data["variable_group"].str.contains('Travel Time'))].sort_values(by='sum_in_area')
        # means_of_transportation = data[(data["variable_group"].str.contains('Means of Transportation'))].sort_values(by='sum_in_area')
        
#Calculate individual percentile values
        global sum_for_percentile_language,sum_for_percentile_education,sum_for_percentile_school_enrollment,\
                sum_for_percentile_family_household_income,\
                sum_for_percentile_nonfamily_household_income, sum_for_percentile_household_type
        
        sum_for_percentile_language = language.sum_in_area.quantile(selected_percentile.value / 100).astype(int).astype(str)
        sum_for_percentile_language = sum_for_percentile_language.replace(sum_for_percentile_language, \
                                language.sum_in_area.quantile(selected_percentile.value / 100).astype(int).astype(str))
        
        sum_for_percentile_education = education.sum_in_area.quantile(selected_percentile.value / 100).astype(int).astype(str)
        sum_for_percentile_education = sum_for_percentile_education.replace(sum_for_percentile_education, \
                                education.sum_in_area.quantile(selected_percentile.value / 100).astype(int).astype(str))
        
        sum_for_percentile_school_enrollment = school_enrollment.sum_in_area.quantile(selected_percentile.value / 100).astype(int).astype(str)    
        sum_for_percentile_school_enrollment = sum_for_percentile_school_enrollment.replace(sum_for_percentile_school_enrollment, \
                                school_enrollment.sum_in_area.quantile(selected_percentile.value / 100).astype(int).astype(str))
        
        sum_for_percentile_family_household_income = family_household_income.sum_in_area.quantile(selected_percentile.value / 100).astype(int).astype(str)
        sum_for_percentile_family_household_income = sum_for_percentile_family_household_income.replace(sum_for_percentile_family_household_income, \
                                family_household_income.sum_in_area.quantile(selected_percentile.value / 100).astype(int).astype(str))
        
        sum_for_percentile_nonfamily_household_income = nonfamily_household_income.sum_in_area.quantile(selected_percentile.value / 100).astype(int).astype(str)
        sum_for_percentile_nonfamily_household_income = sum_for_percentile_nonfamily_household_income.replace(sum_for_percentile_nonfamily_household_income, \
                                nonfamily_household_income.sum_in_area.quantile(selected_percentile.value / 100).astype(int).astype(str))        
        
        sum_for_percentile_household_type = household_type.sum_in_area.quantile(selected_percentile.value / 100).astype(int).astype(str)
        sum_for_percentile_household_type = sum_for_percentile_household_type.replace(sum_for_percentile_household_type, \
                                household_type.sum_in_area.quantile(selected_percentile.value / 100).astype(int).astype(str))

In [17]:
def parse_tables_for_percentile_value():
#generating new transposed table with only the two fields needed : variables and sum in area.
#using other variables makes transposition weird
    
    global household_type_transposed, language_transposed, education_transposed, family_household_income_transposed, nonfamily_household_income_transposed,household_type_transposed, school_enrollment_transposed

    language_transposed = language.filter(["variables", "sum_in_area"]).T
    language_transposed.columns = language_transposed.iloc[0]
    language_transposed = language_transposed[1:]

    education_transposed = education.filter(["variables", "sum_in_area"]).T
    education_transposed.columns = education_transposed.iloc[0]
    education_transposed = education_transposed[1:]

    household_type_transposed = household_type.filter(["variables", "sum_in_area"]).T
    household_type_transposed.columns = household_type_transposed.iloc[0]
    household_type_transposed = household_type_transposed[1:]
    
    family_household_income_transposed = family_household_income.filter(["variables", "sum_in_area"]).T
    family_household_income_transposed.columns = family_household_income_transposed.iloc[0]
    family_household_income_transposed = family_household_income_transposed[1:]    
    
    nonfamily_household_income_transposed = nonfamily_household_income.filter(["variables", "sum_in_area"]).T
    nonfamily_household_income_transposed.columns = nonfamily_household_income_transposed.iloc[0]
    nonfamily_household_income_transposed = nonfamily_household_income_transposed[1:]  
    
    school_enrollment_transposed = school_enrollment.filter(["variables", "sum_in_area"]).T
    school_enrollment_transposed.columns = school_enrollment_transposed.iloc[0]
    school_enrollment_transposed = school_enrollment_transposed[1:]    

In [18]:
def get_range_for_each_variable():
    
    global range_table, range_table_all, ranges, first_range, other_ranges
    transposed = [education_transposed, family_household_income_transposed, household_type_transposed, \
            language_transposed, nonfamily_household_income_transposed, school_enrollment_transposed]
    data.sort_values(by=['variable_group', 'sum_in_area'], ascending=[True, True], inplace=True)
    data_sorted = data.reset_index()
    
    ranges=[]
    
    for df in transposed:
        for item, i in enumerate(df.columns):
            if item == 0:
                first_range = np.arange(df.max()[item]+1).astype(int)
                ranges.append([first_range])
            else:
                other_ranges = np.arange(df.min()[item-1]+1, \
                                       df.max()[item]+1).astype(int)
                ranges.append([other_ranges])

            range_table = pd.DataFrame(data=ranges, index=None, columns=["range_per_variable"])
            range_table = range_table.reset_index(drop=True)

    range_table_all = pd.merge(range_table, data_sorted, left_index=True, right_index=True, on=None)
    range_table_all["range_per_variable"] = range_table_all["range_per_variable"].astype(str)

In [19]:
def generate_info_for_text(): 
    
    global result_df
    
    result_df = pd.DataFrame(columns=None)
    for i in range_table_all['range_per_variable']:
        if '\n' in range_table_all:
            range_table_all['range_per_variable'].replace(r'\s+|\\n', ' ', regex=True, inplace=True) 
            
    sum_for_percentile_language = language.sum_in_area.quantile(selected_percentile.value / 100).astype(int).astype(str)
    sum_for_percentile_education = education.sum_in_area.quantile(selected_percentile.value / 100).astype(int).astype(str)
    sum_for_percentile_school_enrollment = school_enrollment.sum_in_area.quantile(selected_percentile.value / 100).astype(int).astype(str)    
    sum_for_percentile_family_household_income = family_household_income.sum_in_area.quantile(selected_percentile.value / 100).astype(int).astype(str)
    sum_for_percentile_nonfamily_household_income = nonfamily_household_income.sum_in_area.quantile(selected_percentile.value / 100).astype(int).astype(str)
    sum_for_percentile_household_type = household_type.sum_in_area.quantile(selected_percentile.value / 100).astype(int).astype(str)
    
    for item,i in enumerate(range_table_all.index):
        if sum_for_percentile_education.astype(int) > 0 :
            education_only = range_table_all[(range_table_all["variable_group"].str.contains('Educational'))]
            result = education_only[education_only["range_per_variable"].str.contains(sum_for_percentile_education)]
            result_df = result_df.append(result, ignore_index = True)

        if sum_for_percentile_language.astype(int) > 0 :
            language_only = range_table_all[(range_table_all["variable_group"].str.contains('Language'))]
            result = language_only[language_only["range_per_variable"].str.contains(sum_for_percentile_language)]
            result_df = result_df.append(result, ignore_index = True)

        if sum_for_percentile_household_type.astype(int) > 0:
            household_type_only = range_table_all[(range_table_all["variable_group"].str.contains('Households'))]
            result = household_type_only[household_type_only["range_per_variable"].str.contains(sum_for_percentile_household_type)]
            result_df = result_df.append(result, ignore_index = True)

        if sum_for_percentile_family_household_income.astype(int) > 0:
            family_household_income_only = range_table_all[(range_table_all["variable_group"].str.contains('Family'))]
            result = family_household_income_only[family_household_income_only["range_per_variable"].str.contains(sum_for_percentile_family_household_income)]
            result_df = result_df.append(result, ignore_index = True)

        if sum_for_percentile_nonfamily_household_income.astype(int) > 0:
            nonfamily_household_income_only = range_table_all[(range_table_all["variable_group"].str.contains('Nonfamily'))]
            result = nonfamily_household_income_only[nonfamily_household_income_only["range_per_variable"].str.contains(sum_for_percentile_nonfamily_household_income)]
            result_df = result_df.append(result, ignore_index = True)

        if sum_for_percentile_school_enrollment.astype(int) > 0:
            school_enrollment_only = range_table_all[(range_table_all["variable_group"].str.contains('School'))]
            result = school_enrollment_only[school_enrollment_only["range_per_variable"].str.contains(sum_for_percentile_school_enrollment)]
            result_df = result_df.append(result, ignore_index = True)

    result_df = result_df.drop_duplicates()

In [20]:
output = widgets.Output()
data_output = widgets.Output()

In [21]:
def construct_narrative():
    
    clean_combine_census_and_geographic_data()

    selected_age.observe(selected_age_eventhandler, names='value')
    selected_gender.observe(selected_gender_eventhandler, names='value')
    selected_percentile.observe(selected_percentile_eventhandler, names='value')
    
    get_demographics_for_selection()
    parse_tables_for_percentile_value()
    get_range_for_each_variable()
    generate_info_for_text()
    
    global result_df, resident_text, percentile_text, income_range, gender_text, subject_text, age_text,\
            household_type_text, income_text, school_text, language_text, education_text, resident_text_string
    
    percentile_text = " is representative of the top "+ str(selected_percentile.value) + "% of this area's residents. "

    if (selected_gender.value == "Female"):
        gender_text = "she"
        subject_text = "woman"
    if (selected_age.value >= 5) & (selected_age.value <= 17):
            subject_text = "girl"
    elif (selected_gender.value == "Male"):
        gender_text = "he"
        subject_text = "man"
        if (selected_age.value >= 5) & (selected_age.value <= 17):
            subject_text = "boy"
    elif (selected_gender.value == "ALL"):
        gender_text = "he or she"
        subject_text = "person"


    for i in result_df['variable_name']:
        if 'Nonfamily Household Income In ' in i:
            household_type_text = ' household consisting of non-family members'
            result_df=result_df.loc[~result_df["variable_name"].str.contains('Family Household Income')]
        elif 'Family Household Income In ' in i:
            result_df=result_df.loc[~result_df["variable_name"].str.contains('Nonfamily Household Income')]
            household_type_text = ' household consisting of family members'
        else: 
            household_type_text=' household '

        if '10,999' or '14,999' in i:
            income_text = " lower income"
        if '24,999' or '34,999' or '49,999' in i:
            income_text = " middle income"
        if '74,999' or '94,999' or '149,999' in i:
            income_text = " wealthy"
        if '199,999' or '200,000' in i:
            income_text = " very wealthy"

        income = result_df.loc[result_df["variable_name"].str.contains('Income')]
        income_range = income["variable_name"].str.split('$').str[1]


        if 'Enrolled in college or graduate school' in i:
            school_text = gender_text.capitalize() + " is also a college or graduate school student."
        else:
            school_text = ''

        if 'English' in i:
            language_text = gender_text.capitalize() + " speaks only English at home."
        if 'Spanish' in i:
            language_text = "In addition to English, " + gender_text + " speaks Spanish at home."
        if 'Indo-European' in i:
            language_text = "In addition to English, " + gender_text + " speaks an Indo-European language at home."            
        if 'Asian' in i:
            language_text = "In addition to English, " + gender_text + " speaks an Asian or Pacific Island language at home."            
        if 'Other languages' in i:
            language_text = "In addition to English, " + gender_text + " speaks other languages at home."

        if 'Less than high school graduate' in i:
            education_text = gender_text.capitalize() + ' does not have a high school degree.'
        if 'High school graduate (includes equivalency)' in i:
            education_text = gender_text.capitalize() + ' is a high school graduate.'
        if 'Some college' in i:
            education_text = gender_text.capitalize() + ' has attended some form of college but does not have a degree.'
        if 'Bachelor' in i:
            education_text = gender_text.capitalize() + " is well-educated and has at least a bachelor's degree."
        else:
            education_text = ''

    if (selected_age.value >= 5) & (selected_age.value <= 12):
        age_text = ''
        age_text = age_text.replace(age_text, age_text)
    elif (selected_age.value >= 13 ) & (selected_age.value <= 17):
        age_text = 'teenage'
        age_text = age_text.replace(age_text, age_text)
    elif (selected_age.value >= 18 ) & (selected_age.value <= 34):
        age_text = 'young'        
        age_text = age_text.replace(age_text, age_text)
    elif (selected_age.value >= 35 ) & (selected_age.value <= 64):
        age_text = 'middle-aged'        
        age_text = age_text.replace(age_text, age_text)
    elif (selected_age.value >= 65 ):
        age_text = 'senior'        
        age_text = age_text.replace(age_text, age_text)
    
    resident_text = "This " + age_text + " " + subject_text + percentile_text + \
                    gender_text.capitalize() + " lives in a" + income_text + household_type_text + \
                    "with an income in the $" + income_range + " range. " + \
                    language_text + \
                    education_text + \
                    school_text 
    resident_text_string = resident_text.values[0].strip('"\'')
    
    with output:
        display(resident_text_string)

construct_narrative()

In [22]:
text_generation_button = Button(description="GENERATE NARRATIVE",\
                               layout=Layout(width='60%', height='50px', border='solid .5px #000'))
text_generation_button.style.button_color = '#EDF9FC'
text_generation_button.style.font_weight = 'bold'

In [23]:
#TEXT GENERATION
def text_generation(b):
    output.clear_output()
    data_output.clear_output()
    construct_narrative()   
#     tab.layout.display = 'all'
    show_dashboard()
    
text_generation_button.on_click(text_generation)

In [24]:
def show_dashboard():
    output.clear_output()
    data_output.clear_output()
    
    item_layout = widgets.Layout(margin='0 0 10px 0', align_items='stretch')
    item_layout_tab = widgets.Layout(margin='0 0 10px 0')
    
    explore_data = range_table_all.filter(['sum_in_area', 'sex',\
       'age_group', 'variable_group', 'variable_name'])
    explore_data['sum_in_area'] = explore_data['sum_in_area'].astype(int)
    
    with output:
        display(md("> <font size = 3, font color = black> *{}*".format(resident_text_string)))
#         display(md("This **{resident_text.values[0]}**."))
#         display(resident_text.values[0])
    with data_output:
        display(explore_data)
    
    global tab, input_widgets
    input_widgets = widgets.VBox(
        [selected_age, selected_percentile, selected_gender, text_generation_button],
        layout=item_layout)
    
    tab = widgets.Tab([output, data_output],
        layout=item_layout_tab)
#     tab.layout.display = 'all'
    tab.set_title(0, 'Narrative')
    tab.set_title(1, 'Dataset')
    
    global dashboard
    dashboard = widgets.VBox([input_widgets, tab])

show_dashboard()


display(dashboard)
m

VBox(children=(VBox(children=(BoundedIntText(value=25, description='AGE:', max=99, min=5), IntSlider(value=50,…

Map(center=[40.7210907, -73.9877836], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_titl…