In [1]:
import folium
import pandas as pd

### Starting to work on maps for our project

In [2]:
df = pd.read_csv('../Data/final_flat_for_folium.csv')
df.head()

Unnamed: 0,lat,lng,median_home_value,median_household_income,occupied_housing_units,population,Zip,City,State,Median_Home_Value_Z,Top_Tier_Median_Home_Value,Bottom_Tier_Median_Home_Value
0,40.8,-73.97,934600.0,68516.0,44432.0,94600.0,10025,New York,NY,1167725.0,1300000.0,0.0
1,41.94,-87.66,392500.0,75885.0,37865.0,65996.0,60657,Chicago,IL,450000.0,585000.0,0.0
2,29.74,-95.84,262100.0,126425.0,19282.0,61600.0,77494,Katy,TX,364575.0,399900.0,0.0
3,41.92,-87.65,489300.0,86215.0,34850.0,66617.0,60614,Chicago,IL,695000.0,799000.0,0.0
4,31.78,-106.31,121700.0,52024.0,34452.0,111086.0,79936,El Paso,TX,146500.0,220000.0,96750.0


### Testing

In [3]:
df['Long'] = df['lng']
df['Lat'] = df['lat']
df['Zipcode'] = df['Zip']
df['Zipcode'] = [str(i) for i in df['Zipcode']]
df.drop(columns=['lng', 'lat', 'Zip'], inplace=True)

In [4]:
df.head()

Unnamed: 0,median_home_value,median_household_income,occupied_housing_units,population,City,State,Median_Home_Value_Z,Top_Tier_Median_Home_Value,Bottom_Tier_Median_Home_Value,Long,Lat,Zipcode
0,934600.0,68516.0,44432.0,94600.0,New York,NY,1167725.0,1300000.0,0.0,-73.97,40.8,10025
1,392500.0,75885.0,37865.0,65996.0,Chicago,IL,450000.0,585000.0,0.0,-87.66,41.94,60657
2,262100.0,126425.0,19282.0,61600.0,Katy,TX,364575.0,399900.0,0.0,-95.84,29.74,77494
3,489300.0,86215.0,34850.0,66617.0,Chicago,IL,695000.0,799000.0,0.0,-87.65,41.92,60614
4,121700.0,52024.0,34452.0,111086.0,El Paso,TX,146500.0,220000.0,96750.0,-106.31,31.78,79936


In [5]:
df.dtypes

median_home_value                float64
median_household_income          float64
occupied_housing_units           float64
population                       float64
City                              object
State                             object
Median_Home_Value_Z              float64
Top_Tier_Median_Home_Value       float64
Bottom_Tier_Median_Home_Value    float64
Long                             float64
Lat                              float64
Zipcode                           object
dtype: object

In [40]:
def set_marker(*args):
    zip_list = []
    for i in args:
        # Checking to see if input is string and 5 chars
        if isinstance(i, str) and len(i) == 5:
            pass
        # Setting error codes for strings fewer than 5 chars
        elif isinstance(i, str) and len(i) != 5:
            try:
                int(i)
                return 'String zip {} must be 5 characters'.format(i) # ex: '674'
            except:
                return 'Invalid Entry {}'.format(i) # ex: 'hey'
            
        # Converting ints inputs to strings and adding 0's if necessary
        else:
            if len(str(i)) == 3:
                i = '00' + str(i)
            elif len(str(i)) == 4:
                i = '0' + str(i)
            elif len(str(i)) == 5:
                i = str(i)
            else:
                return 'Integer zip {} must be 3 to 5 characters'.format() # ex: 654789
        
        # Catches 5 char non-numeric strings
        try:
            int(i)
        except:
            return 'Invalid Entry'.format(i) # ex: 'hello'
        
        # Check to see if input is valid US zip
        if i in df['Zipcode'].values:
            zip_list.append(i)
        else:
            return '{} not in US zip database'.format(i)
        
    lat_map = 0
    long_map = 0
    
    # Finding average Latitude and Longitude for map center
    for i in zip_list:
        lat = df.loc[df['Zipcode'] == i]['Lat'].values[0]
        long = df.loc[df['Zipcode'] == i]['Long'].values[0]
        lat_map = lat_map + lat
        long_map = long_map + long
    lat_map = lat_map/(len(zip_list))
    long_map = long_map/(len(zip_list))
    folium_map = folium.Map(location=[lat_map, long_map],
                            tiles="CartoDB dark_matter",
                            zoom_start=4)

    for i in zip_list:
        # Creating popup html text for zip markers
        popup_text = """<b>Demographic Information</b><br>
                    Median Overall Home Value: {}<br>
                    Bottom Third Median Home Value: {}<br>
                    Top Third Median Home Value: {}<br>
                    Population: {}<br>
                    Median Household Income: {}"""
        
        # Pulling popup data from df
        if df[df['Zipcode'] == i]['Bottom_Tier_Median_Home_Value'].values[0] == 0.0:
            popup_text = popup_text.format(df[df['Zipcode'] == i]['Median_Home_Value_Z'].values[0],
                                       'Not Available',
                                       df[df['Zipcode'] == i]['Top_Tier_Median_Home_Value'].values[0],
                                       df[df['Zipcode'] == i]['population'].values[0],
                                       df[df['Zipcode'] == i]['median_household_income'].values[0])
        else:
            popup_text = popup_text.format(df[df['Zipcode'] == i]['Median_Home_Value_Z'].values[0],
                                       df[df['Zipcode'] == i]['Top_Tier_Median_Home_Value'].values[0],
                                       df[df['Zipcode'] == i]['Top_Tier_Median_Home_Value'].values[0],
                                       df[df['Zipcode'] == i]['population'].values[0],
                                       df[df['Zipcode'] == i]['median_household_income'].values[0])
        
        # Latitude/Longitude values for each zip
        lat = df.loc[df['Zipcode'] == i]['Lat'].values[0]
        long = df.loc[df['Zipcode'] == i]['Long'].values[0]
        
        # Making marker for each zip at lat/long
        marker = folium.CircleMarker(location=[lat, long],
                                     popup = popup_text,
                                     tooltip = i,
                                     fill=True,
                                     color='#b0c4de',
                                     radius=10)
        marker.add_to(folium_map)

    display(folium_map)

In [41]:
set_marker(33480, 84098, 80424, 32407)

### Known problems: 

-Can't enter zip codes as int if they start with 0

-Dataset is missing some zip information

-Missing data for lower third of house income for many zipcodes

-If points are very far apart, initial map zoom level won't show them all