# Goal: Create the two heatmaps
Follow the instructions on the cell below to upload your correct files, set a save location on your computer, and download libraries. Then, run the entire code!
Code by Sejal Gupta

Right now, the lines that restrict the map to central Houston are commented out.

Note that the OY Code here maps OY by zipcode + training programs by location as briefcase markers. Use the https://github.com/sejal234/heatmaps_by/blob/main/OY_Trainings_Reproducible_Heatmap.ipynb code for OY and training programs by zipcode (it is zoomed in to central Houston). 

In [28]:
#libraries: download any libraries that may need to be downloaded. 
import pandas as pd
import geopandas as gpd
import numpy as np
import folium
from folium.plugins import MarkerCluster
import geopy
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent = 'example app')

#the opportunity youth file, a CSV made from my data cleaning doc with opp youth by zipcode
oy = pd.read_csv('https://raw.githubusercontent.com/sejal234/heatmaps_by/main/oy_zip_texas.csv')
    #mine is 2020 ACS data
    
#the training programs file, a CSV made from my data cleaning doc with programs by zipcode
#trainings_zips = pd.read_csv('https://raw.githubusercontent.com/sejal234/heatmaps_by/main/trainings_zips.csv')

#replace this variable with your path to training programs CSV
trainings_full = pd.read_csv("https://raw.githubusercontent.com/sejal234/heatmaps_by/main/data/trainings_full.csv", encoding = "ISO-8859-1")

#replace this variable with your path to schools CSV
schools = pd.read_csv("https://raw.githubusercontent.com/sejal234/heatmaps_by/main/data/CTD_sf_report.csv")

#the json file w the houston zipcode shapes, do not change unless COH updated the zipcode boundaries lol
geo = gpd.read_file('https://raw.githubusercontent.com/sejal234/heatmaps_by/main/Zip_Codes.geojson')

In [29]:
inner_zips = [77002, 77003, 77004, 77005, 77006,77007, 77008, 77009, 77010, 77011, 77012, 77019, 
77020, 77021, 77023, 77025, 77026, 77027, 77029, 77030, 77054, 77055, 77046, 77087, 77098]
              #77018, 77056,77024, #77057, 77079, 77082]
#Read more: http://www.city-data.com/forum/houston/1061817-houston-density-land-area-inner-loop.html

oy.head()

Unnamed: 0.1,Unnamed: 0,ZIPCODE,Total 16-19,Opportunity Youth
0,1,73949,94,0
1,2,75001,305,0
2,3,75002,4168,0
3,4,75006,2353,52
4,5,75007,3049,15


In [30]:
trainings_full = trainings_full[['Training: Training Name', 'TL Zipcode', 'Location (Latitude)', 'Location (Longitude)', 'Industry']]
#trainings_full.columns
trainings_full.head()

Unnamed: 0,Training: Training Name,TL Zipcode,Location (Latitude),Location (Longitude),Industry
0,Construction Trades Grant - Carpentry I NCCER,77088.0,31.968599,-99.901813,Construction
1,Construction Trades Grant - Electrical I NCCER,77073.0,31.968599,-99.901813,Construction
2,Construction Trades Grant - Field Service Tech...,77070.0,31.968599,-99.901813,
3,Construction Trades Grant - Construction Helper,77088.0,31.968599,-99.901813,Construction
4,Construction Trades Grant - Mechatronics Techn...,77070.0,31.968599,-99.901813,


In [31]:
schools.head()

Unnamed: 0,Career Test Drive Name,Num of Students Attended,Created Date,School Address,Record Type
0,Austin High School - February 2022,76,5/25/2022,"1700 Dumble Street<br>Houston, TX 77023",CTD Fair
1,Booker T. Washington High School - April 2022,84,6/7/2022,"4204 Yale St.<br>Houston, TX 77018",CTD Fair
2,Eisenhower 9th Grade Center - May 2022,165,5/25/2022,"3550 West Gulf Bank Road<br>Houston, TX 77088",CTD Fair
3,Eisenhower High School - May 2022,225,5/25/2022,"7922 Antoine Dr<br>Houston, Texas 77088",CTD Fair
4,Harmony Science Academy (Houston) - February 2...,200,1/31/2022,9431 West Sam Houston Parkway South<br>Houston...,CTD Fair


In [32]:
def clean_schools(schools):
    #remove schools w/o addresses
    schools = schools.dropna(subset = ['School Address'])
    
    #edit out <br>
    schools = schools.replace('<br>',' ', regex=True)
    
    #find long, lat coordinates
    schools["loc"] = schools["School Address"].apply(geolocator.geocode)
    schools["point"]= schools["loc"].apply(lambda loc: tuple(loc.point) if loc else None)
    
    #remove schools it did not find coordinates for 
        #(usually if address is incomplete)
    schools = schools.dropna(subset = ['point'])
    
    #split the .point into separate columns 'lat' 'lon' and 'altitude'
    schools[['lat', 'lon', 'altitude']] = pd.DataFrame(schools['point'].to_list(), index=schools.index)
    
    #keep just the columns i want
    schools = schools[["Career Test Drive Name", "School Address", "lat", "lon"]]
    
    #uncomment if you don't need
    print("Number of Schools:", len(schools))
    
    return schools

def clean_trainings(trainings):
    #theres a lot of repeats, need to only keep unique values
    #trainings = trainings.drop_duplicates()
    
    #some don't have locations
    #trainings = trainings.dropna(subset = ['Location (Latitude)'])
    
    #uncomment if you don't need
    print("Number of Trainings:", len(trainings))
    
    return trainings 

schools = clean_schools(schools)
trainings = clean_trainings(trainings_full)

Number of Schools: 51
Number of Trainings: 225


In [33]:
trainings.head()

Unnamed: 0,Training: Training Name,TL Zipcode,Location (Latitude),Location (Longitude),Industry
0,Construction Trades Grant - Carpentry I NCCER,77088.0,31.968599,-99.901813,Construction
1,Construction Trades Grant - Electrical I NCCER,77073.0,31.968599,-99.901813,Construction
2,Construction Trades Grant - Field Service Tech...,77070.0,31.968599,-99.901813,
3,Construction Trades Grant - Construction Helper,77088.0,31.968599,-99.901813,Construction
4,Construction Trades Grant - Mechatronics Techn...,77070.0,31.968599,-99.901813,


In [34]:
schools.head()

Unnamed: 0,Career Test Drive Name,School Address,lat,lon
0,Austin High School - February 2022,"1700 Dumble Street Houston, TX 77023",29.73164,-95.332032
1,Booker T. Washington High School - April 2022,"4204 Yale St. Houston, TX 77018",29.823152,-95.3979
2,Eisenhower 9th Grade Center - May 2022,"3550 West Gulf Bank Road Houston, TX 77088",29.884251,-95.458713
3,Eisenhower High School - May 2022,"7922 Antoine Dr Houston, Texas 77088",29.882121,-95.470761
4,Harmony Science Academy (Houston) - February 2...,"9431 West Sam Houston Parkway South Houston, T...",29.68076,-95.558675


In [35]:
#only keep values for certain zipcodes, filter the geo, oy, trainings, & school tables to only have vals in the inner loop 
#geo2 = geo.loc[geo['ZIP_CODE'].isin(inner_zips)]
geo2 = geo
geo2

Unnamed: 0,FID,OBJECTID_1,ZIP_CODE,Shape__Area,Shape__Length,geometry
0,1,1,77371,1.244441e+09,223749.362712,"POLYGON ((-95.00408 30.58015, -94.99703 30.579..."
1,2,2,77331,3.045327e+08,130259.009143,"POLYGON ((-95.00991 30.62802, -95.00944 30.627..."
2,3,3,77358,1.497502e+08,71529.419671,"POLYGON ((-95.35904 30.50447, -95.36196 30.508..."
3,4,4,77378,3.752378e+08,121150.753751,"POLYGON ((-95.48586 30.38496, -95.48662 30.384..."
4,5,5,77868,1.101315e+09,210542.360669,"POLYGON ((-95.87115 30.24176, -95.93710 30.237..."
...,...,...,...,...,...,...
208,209,209,77048,3.873736e+07,28967.889007,"POLYGON ((-95.29776 29.59669, -95.29817 29.596..."
209,210,210,77053,2.509866e+07,24896.081126,"POLYGON ((-95.49234 29.58049, -95.49209 29.584..."
210,211,211,77515,6.706386e+08,175527.496564,"POLYGON ((-95.56051 29.25154, -95.53037 29.253..."
211,212,212,77377,1.429633e+08,76348.236073,"POLYGON ((-95.78116 30.06273, -95.78066 30.068..."


In [36]:
#oy2 = oy.loc[oy['ZIPCODE'].isin(inner_zips)]
oy2 = oy
oy2

Unnamed: 0.1,Unnamed: 0,ZIPCODE,Total 16-19,Opportunity Youth
0,1,73949,94,0
1,2,75001,305,0
2,3,75002,4168,0
3,4,75006,2353,52
4,5,75007,3049,15
...,...,...,...,...
1934,1935,79938,4391,90
1935,1936,79942,0,0
1936,1937,88063,871,20
1937,1938,88220,2104,32


In [37]:
#lets map to see if this works
htx_map1 = folium.Map(location=[29.749907, -95.358421], tiles = 'cartodbpositron', zoom_start = 12)
layer1=folium.Choropleth(
            geo_data = geo2,
            data = oy2,
            columns=['ZIPCODE', 'Opportunity Youth'],  
            key_on='feature.properties.ZIP_CODE', 
            fill_color='Greens',
            nan_fill_color="White", #Use white color if there is no data available for the county
            nan_fill_opacity=0.5,
            fill_opacity=0.7,
            line_opacity=0.2,
            name='Opportunity Youth',
            legend_name='Opportunity Youth Per Zip',
            highlight=True,
            #overlay=False,
            line_color='black').add_to(htx_map1)
htx_map1

In [38]:
trainings

Unnamed: 0,Training: Training Name,TL Zipcode,Location (Latitude),Location (Longitude),Industry
0,Construction Trades Grant - Carpentry I NCCER,77088.0,31.968599,-99.901813,Construction
1,Construction Trades Grant - Electrical I NCCER,77073.0,31.968599,-99.901813,Construction
2,Construction Trades Grant - Field Service Tech...,77070.0,31.968599,-99.901813,
3,Construction Trades Grant - Construction Helper,77088.0,31.968599,-99.901813,Construction
4,Construction Trades Grant - Mechatronics Techn...,77070.0,31.968599,-99.901813,
...,...,...,...,...,...
220,Computer Support Technician,77550.0,29.283948,-94.807789,Information Technology
221,Diesel Engines Testing and Repair,77550.0,29.283948,-94.807789,"Trade, Transportation & Utilities"
222,Automotive Climate Repair,77550.0,29.283948,-94.807789,"Trade, Transportation & Utilities"
223,Logistics Technician,77550.0,29.283948,-94.807789,"Trade, Transportation & Utilities"


in the cell below, you can uncomment the lines to ensure only programs in central houston get mapped

In [39]:
trainings2 = trainings #just so i don't have to change variable names

#uncomment if you only want central houston to be mapped
#trainings2 = trainings.loc[trainings['TL Zipcode'].isin(inner_zips)]
#print("Num Programs", len(trainings2))
#trainings2

#i dont like this way of sorting bc it doesnt get programs w/o zipcodes

In [40]:
#where training programs are btwn a certain long and certain lat
#top right is 29.8153931, -95.2809456
#bottom left is 29.6803103, -95.4584231

#uncomment below to only map schools in central houston
#trainings3 = trainings[trainings['Location (Latitude)'].between(29.67, 29.82)]
#trainings3 = trainings3[trainings3['Location (Longitude)'].between(-95.46, -95.24)]
trainings3 = trainings

trainings3.columns = ['Trainings', 'Zipcode', 'lat', 'lon', 'Industry']
trainings3["Program Name, Industry"] = trainings3['Trainings'] + ", " + trainings3["Industry"]

print("Num Programs", len(trainings3))
trainings3

Num Programs 225


Unnamed: 0,Trainings,Zipcode,lat,lon,Industry,"Program Name, Industry"
0,Construction Trades Grant - Carpentry I NCCER,77088.0,31.968599,-99.901813,Construction,"Construction Trades Grant - Carpentry I NCCER,..."
1,Construction Trades Grant - Electrical I NCCER,77073.0,31.968599,-99.901813,Construction,Construction Trades Grant - Electrical I NCCER...
2,Construction Trades Grant - Field Service Tech...,77070.0,31.968599,-99.901813,,
3,Construction Trades Grant - Construction Helper,77088.0,31.968599,-99.901813,Construction,Construction Trades Grant - Construction Helpe...
4,Construction Trades Grant - Mechatronics Techn...,77070.0,31.968599,-99.901813,,
...,...,...,...,...,...,...
220,Computer Support Technician,77550.0,29.283948,-94.807789,Information Technology,"Computer Support Technician, Information Techn..."
221,Diesel Engines Testing and Repair,77550.0,29.283948,-94.807789,"Trade, Transportation & Utilities","Diesel Engines Testing and Repair, Trade, Tran..."
222,Automotive Climate Repair,77550.0,29.283948,-94.807789,"Trade, Transportation & Utilities","Automotive Climate Repair, Trade, Transportati..."
223,Logistics Technician,77550.0,29.283948,-94.807789,"Trade, Transportation & Utilities","Logistics Technician, Trade, Transportation & ..."


In [41]:
#add the markers

for i,r in trainings3.iterrows():
        #if the column names are diff in your input report, edit them below
        location = (r["lat"], r["lon"])
        folium.Marker(location=location,
                      popup = r["Program Name, Industry"],
                      tooltip=r["Program Name, Industry"],
                     icon=folium.Icon(color = 'red', icon = 'briefcase'))\
        .add_to(htx_map1)

        
folium.LayerControl().add_to(htx_map1)
htx_map1

In [42]:
#add labels (tooltips w hover functionality)
both = geo2.merge(oy2, left_on = 'ZIP_CODE', right_on = 'ZIPCODE', how = 'left')
both = both.replace(np.nan, 0)
both.head()

style_function = lambda x: {'fillColor': '#ffffff', 
                            'color':'#000000', 
                            'fillOpacity': 0.1, 
                            'weight': 0.1}
highlight_function = lambda x: {'fillColor': '#000000', 
                                'color':'#000000', 
                                'fillOpacity': 0.50, 
                                'weight': 0.1}
NIL = folium.features.GeoJson(
    both,
    style_function=style_function, 
    control=False,
    highlight_function=highlight_function, 
    tooltip=folium.features.GeoJsonTooltip(
        fields=['ZIP_CODE', 'Opportunity Youth', "Total 16-19"],  # use fields from the json file
        aliases=['Zipcode:', "Number of Opportunity Youth:", "Total Population Ages 16-19:"],
        style=("background-color: white; color: #333333; font-family: arial; font-size: 12px; padding: 10px;") 
    )
)
htx_map1.add_child(NIL)
htx_map1.keep_in_front(NIL)
htx_map1

In [44]:
#now do the same w schools
htx_map2 = folium.Map(location=[29.749907, -95.358421], tiles = 'cartodbpositron', zoom_start = 12)

#uncomment below to only map schools in central houston
#schools2 = schools[schools['lat'].between(29.69, 29.82)]
#schools2 = schools2[schools2['lon'].between(-95.46, -95.24)]
schools2=schools

for i,r in schools2.iterrows():
        location = (r["lat"], r["lon"])
        folium.Marker(location=location,
                          popup = r['Career Test Drive Name'],
                          tooltip=r["Career Test Drive Name"],
                     icon=folium.Icon(color = 'blue', icon = 'book'))\
        .add_to(htx_map2)
        
for i,r in trainings3.iterrows():
        #if the column names are diff in your input report, edit them below
        location = (r["lat"], r["lon"])
        folium.Marker(location=location,
                      popup = r["Program Name, Industry"],
                      tooltip=r["Program Name, Industry"],
                     icon=folium.Icon(color = 'red', icon = 'briefcase'))\
        .add_to(htx_map2)

htx_map2

In [45]:
#replace with the path is where the completed chloropleth will save onto your computer
save1 = '/Users/sejalgupta/Documents/heatmap_docs/inner_loop_OY.html'
save2 = '/Users/sejalgupta/Documents/heatmap_docs/inner_loop_schools.html'
save3 = '/Users/sejalgupta/Documents/heatmap_docs/zoom_in_school.html'
#htx_map1.save(save1)
htx_map2.save(save3)