In [3]:
import pandas as pd
import numpy as np
from pathlib import Path
from uszipcode import SearchEngine
import mpu 
import math
import requests
import json
import plotly.express as px
import plotly.graph_objects as go
search = SearchEngine()
from sklearn.linear_model import LogisticRegression
import plotly.subplots as sp



In [47]:
def compare_zipcodes():
    search = SearchEngine()

    # Ask the user for the zipcodes to compare
    zipcode1 = input("Enter the first zipcode: ")
    zipcode2 = input("Enter the second zipcode: ")

    zipcode1_info = search.by_zipcode(zipcode1)
    zipcode2_info = search.by_zipcode(zipcode2)

    # ask for salary input
    #salary = float(input("What is your salary? "))

    data = {
        "Population": [zipcode1_info.population, zipcode2_info.population],
        "Median Household Income": [zipcode1_info.median_household_income, zipcode2_info.median_household_income],
        "Median Home Value": [zipcode1_info.median_home_value, zipcode2_info.median_home_value],
        "Latitude": [zipcode1_info.lat, zipcode2_info.lat],
        "Longitude": [zipcode1_info.lng, zipcode2_info.lng]
    }

    df1 = pd.DataFrame(data, index=[f"Zipcode {zipcode1}", f"Zipcode {zipcode2}"])

    differences = abs(df1.diff().iloc[1])

    differences.name = "Difference"

    df2 = pd.DataFrame(differences).T

    df = pd.concat([df1, df2])
    
    fig = plot_zipcode_outline(zipcode1, zipcode2)
    fig.show()

    distance = calculate_distance(zipcode1, zipcode2)
    print(f"The distance between the two zipcodes is {distance:.2f} miles.")

    fig2 = go.Figure()
    fig2.add_trace(go.Bar(x=[f"Zipcode {zipcode1} Median Income", f"Zipcode {zipcode1} Median Home Value", f"Zipcode {zipcode2} Median Income", f"Zipcode {zipcode2} Median Home Value"],
                      y=[zipcode1_info.median_household_income, zipcode1_info.median_home_value, zipcode2_info.median_household_income, zipcode2_info.median_home_value],
                      name='Median Income and Home Value Comparison',
                      marker_color=[zipcode1_info.median_household_income, zipcode1_info.median_home_value, zipcode2_info.median_household_income, zipcode2_info.median_home_value]))
    fig2.update_layout(title='Comparison of Median Household Income and Home Value Between Zipcodes',
                   xaxis_title='Zipcodes', yaxis_title='Dollars')
    fig2.show()
    
    print('Summary Information')
    return df

compare_zipcodes()

The distance between the two zipcodes is 145.16 miles.


Summary Information


Unnamed: 0,Population,Median Household Income,Median Home Value,Latitude,Longitude
Zipcode 78704,42117.0,50930.0,347500.0,30.24,-97.77
Zipcode 77005,25528.0,159732.0,754000.0,29.72,-95.42
Difference,16589.0,108802.0,406500.0,0.52,2.35


In [10]:
def plot_zipcode_outline(zipcode1, zipcode2):
    search = SearchEngine()

    zipcode1_info = search.by_zipcode(zipcode1)
    zipcode2_info = search.by_zipcode(zipcode2)

    fig = px.line_mapbox(
        lat=[zipcode1_info.lat, zipcode2_info.lat],
        lon=[zipcode1_info.lng, zipcode2_info.lng],
        zoom=10,
        mapbox_style="open-street-map"
    )

    fig.add_trace(
        px.scatter_mapbox(
            lat=[zipcode1_info.lat, zipcode2_info.lat],
            lon=[zipcode1_info.lng, zipcode2_info.lng],
            text=[zipcode1, zipcode2],
            #color= [zipcode1_info.median_household_income, zipcode2_info.median_household_income],
            #hover_data= ['test_value'],
            #hover_name= [zipcode1_info.median_household_income, zipcode2_info.median_household_income],
            zoom=10,
            mapbox_style="open-street-map"
        ).data[0]
)

    fig.update_layout(
        title={
            'text': f"Zipcode Comparison: {zipcode1} vs {zipcode2}",
            'y':0.95,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top'
        },
        margin=dict(l=20, r=20, t=50, b=20)
    )

    return fig

In [7]:
def calculate_distance(zipcode1, zipcode2):
    search = SearchEngine()
    zip1 = search.by_zipcode(zipcode1)
    zip2 = search.by_zipcode(zipcode2)
    distance = mpu.haversine_distance((zip1.lat, zip1.lng), (zip2.lat, zip2.lng))
    return distance * 0.621371 

In [43]:
def predict_affordability(zipcodes, salaries):
    search = SearchEngine()
    affordability_probs = []

    for zipcode, salary in zip(zipcodes, salaries):
        zipcode_info = search.by_zipcode(zipcode)
        X = np.array(zipcode_info.median_household_income).reshape(-1, 1)
        y = np.array(zipcode_info.median_home_value <= 4 * salary, dtype=int)
        if np.unique(y).size == 1:
            affordability_probs.append(0.5)
        else:
            model = LogisticRegression()
            model.fit(X, y)
            affordability_prob = model.predict_proba([[salary]])[0][1]
            affordability_probs.append(affordability_prob)
    
    return affordability_probs

