In [1]:
# Import necessary libraries
import os
import json
from pathlib import Path
import numpy as np
import pandas as pd
import geopandas as gpd
import plotly.graph_objects as go
from dash import Dash, html, dcc
import dash_mantine_components as dmc
from dash.dependencies import Input, Output
from flask import Flask
import datetime as dt
from shapely.geometry import MultiPoint, mapping


In [2]:
fn = r'C:\Users\markd\projects\Hamilton County Homes\kx-ohio-school-districts-SHP'
os.chdir(fn)
# open it...
geodf = gpd.read_file(list(Path.cwd().glob("ohio*.shp"))[0])


In [3]:
fn = r'C:\Users\markd\projects\Hamilton County Homes'
os.chdir(fn)
# open it...
homes = gpd.read_file(list(Path.cwd().glob("finalsold2009_2023.csv"))[0])


In [4]:
# Convert market land value from string to float, cleaning up currency formatting
homes['market_land_value_num'] = homes['market_land_value'].replace('[\$,]', '', regex=True).astype(float)

# Group the data by 'latitude' and calculate the average market land value for each latitude
average_land_value_by_latitude = homes.groupby('latitude')['market_land_value_num'].mean().reset_index()

# Find the latitudes with the highestdata average market land value
highest_land_values = average_land_value_by_latitude.sort_values(by='market_land_value_num', ascending=False).head()

highest_land_values

Unnamed: 0,latitude,market_land_value_num
89,39.1465623,242610.0
27,39.1436349,239560.0
185,39.186685,221980.0
652,39.2495471,214620.0
91,39.1466163,209820.0


In [5]:
homes = homes.drop(columns=['foreclosure','cauv_value','tif_value','exempt_value','new_address','missing_house_number','num_amount'])

In [6]:
homes['month_yr']= pd.to_datetime(homes['transfer_date']).dt.strftime('%m/%Y')

In [7]:
convert_dict = {
    'year_built':int,
    'total_rooms':int,
    'bedrooms':int,
    'full_baths':int,
    'half_baths':int,
    'num_parcels_sold':int,
    'acreage':float,
    'effective_tax_rate':float,
    'market_land_value':int,
    'market_improvement_value':int,
    'market_total_value':int,
    'abated_value':int}
homes = homes.astype(convert_dict)
homes.transfer_date = pd.to_datetime(homes.transfer_date)
homes.amount = homes.amount.replace('[\$,]', '', regex=True).astype('int')
homes.taxes_paid = homes.taxes_paid.replace('[\$,]', '', regex=True).astype('float')
homes.annual_tax = homes.annual_tax.replace('[\$,]', '', regex=True).astype('float')


In [8]:
homes['age_of_house'] = dt.datetime.today().year - homes.year_built

In [9]:
homes['city'] = homes['formatted_address'].str.extract(r'(\b[a-zA-Z]+(?:\s+[a-zA-Z]+)*),\sOH\b', expand=False)

In [10]:
geodf = geodf.rename(columns={'ID':'district_id'})

In [11]:
geodf['created_da'] = pd.to_datetime(geodf['created_da'])
geodf['last_edi_1'] = pd.to_datetime(geodf['last_edi_1'])

In [12]:
geodf = geodf.drop(columns=['created_da','last_edi_1'])

In [13]:
# # Define constants for the script
# FN = r'C:\Users\markd\projects\Hamilton County Homes\kx-ohio-school-districts-SHP'
# DISTRICT_NAMES = ['SYCAMORE', 'WYOMING', 'MADEIRA','MARIEMONT','LOVELAND']
# ODE_IRN_LIST = ['044867','045146','044289','044313','044271']
# MAP_CENTER = dict(lat=39.2127649, lon=-84.3831728)
# COLORSCALE = {
#      '044867':'rgba(0, 38, 66,.1)',    
#      '045146':'rgba(132, 0, 50,.1)',
#      '044289':'rgba(0, 187, 249,.1)',
#      '044313':'rgba(0, 245, 212,.1)',
#     '044271':'rgba(175, 43, 191,.1)',
# }

# district_color_map = {
#     'SYCAMORE CSD': ' rgba(132, 0, 50,1)',
#     'WYOMING CSD': 'rgba(0, 38, 66,1)',
#     'MADEIRA CSD': 'rgba(0, 187, 249,1)',
#     'MARIEMONT CSD': 'rgba(0, 245, 212,1)',
#     'LOVELAND CSD':'rgba(175, 43, 191,1)'
#     # Add more districts and colors as needed
# }

# def read_shapefile(directory, pattern):
#     """Read the shapefile and return a GeoDataFrame."""
#     os.chdir(directory)
#     shapefile_path = list(Path.cwd().glob(pattern))[0]
#     return gpd.read_file(shapefile_path)

# def filter_districts(geodf, district_names):
#     """Filter GeoDataFrame for specified districts and assign unique IDs."""
#     filtered = geodf[geodf.NAME.str.contains('|'.join(district_names))].copy()
#     unique_districts = filtered['NAME'].unique()
#     district_mapping = {name: i for i, name in enumerate(unique_districts, start=1)}
#     filtered['district_id'] = filtered['NAME'].map(district_mapping)
#     return filtered

# def create_choroplethmapbox(geodf, colorscale):
#     """Create a Choroplethmapbox trace from a GeoDataFrame."""
#     return go.Choroplethmapbox(
#         geojson=json.loads(geodf.to_json()),
#         locations=geodf.index,
#         z=geodf['district_id'],
#         text=geodf.NAME.str.title(),
#         colorscale=geodf.loc[:,'ODE_IRN'].map(COLORSCALE ).fillna('rgba(126, 232, 250, 0.25)'),
#         showscale=False,
#         marker_line_width=3,
#         marker_opacity=0.7
#     )

# def create_bounding_box(homes, district_color_map):
#     traces = []  # List to hold all plot traces
    
#     for year in homes['year'].unique():
#         filtered_homes = homes[homes['year'] == year]
        
#         for district in filtered_homes['school_district'].unique():
#             filtered_districts = filtered_homes[filtered_homes.school_district == district]
#             lat_list = filtered_districts.latitude.astype('float').tolist()
#             lng_list = filtered_districts.longitude.astype('float').tolist()
            
#             # Create a MultiPoint object from the points
#             points = list(zip(lng_list, lat_list))
#             if len(points) > 1:
#                 multi_point = MultiPoint(points)
                
#                 # Compute the convex hull
#                 convex_hull = multi_point.convex_hull
                
#                 # Check the type of convex hull
#                 if isinstance(convex_hull, Polygon):
#                     hull_x, hull_y = convex_hull.exterior.xy
#                     mode = 'lines'
#                     fill = 'toself'
#                 elif convex_hull.geom_type == 'LineString':
#                     hull_x, hull_y = zip(*list(convex_hull.coords))
#                     mode = 'lines'
#                     fill = None
#                 else:  # It's a Point or empty
#                     hull_x, hull_y = [convex_hull.x], [convex_hull.y]
#                     mode = 'markers'
#                     fill = None
                
#                 # Prepare trace
#                 traces.append(go.Scattermapbox(
#                     lon = list(hull_x),
#                     lat = list(hull_y),
#                     mode = mode,
#                     showlegend=False,
#                     line = dict(width = 3, color = district_color_map.get(district, 'rgba(126, 232, 250, 0.25)')),
#                     name = f'District {district} - {year}',
#                     fill = fill
#                 ))
#             else:
#                 # Handle case with less than two points
#                 print(f"Not enough points to form a convex hull for District {district} in Year {year}")
    
#     return traces
    
# def create_scattermapbox_traces(homes, district_color_map):
#     """Generate Scattermapbox traces for different years with district-specific marker colors."""
#     traces = []
#     for year in homes['year'].unique():
#         filtered_homes = homes[homes['year'] == year]
#         # Apply district color map
#         marker_colors = filtered_homes.loc[:,'school_district'].map(district_color_map).fillna('rgba(126, 232, 250, 0.25)')
#         trace = go.Scattermapbox(
#             lat=filtered_homes['latitude'],
#             lon=filtered_homes['longitude'],
#             customdata=np.stack(
#                 (filtered_homes['amount'], filtered_homes['finsqft'], filtered_homes['year'], filtered_homes['address']),
#                 axis=-1
#             ),
#             mode='markers',
#             marker={
#                 "size": 10,
#                 'color': marker_colors,  # Set marker colors based on the district
#             },
#             visible=False,
#             opacity=0.5,
#             showlegend=False,
#             hovertemplate='<br>'.join([
#                 'amount: %{customdata[0]}',
#                 'Square Ft: %{customdata[1]}',
#                 'year Sold: %{customdata[2]}',
#                 'address: %{customdata[3]}'
#             ])
#         )
#         traces.append(trace)
#     traces[0].visible = True  # Make the first year's trace visible
#     return traces
# def create_slider(years, num_traces):
#     """Create a slider for the figure with a white background and labeled steps."""
#     slider_steps = []
#     for i, year in enumerate(years):
#         # Create a visibility list for each step
#         # Ensure the Choroplethmapbox trace remains visible (first element is True)
#         # and set visibility for Scattermapbox traces based on the year
#         visibility = [True] + [(i + 1) == j for j in range(1, num_traces)]
        
#         step = dict(
#             method='update',
#             args=[{'visible': visibility},
#                   {'title': 'Year: ' + str(year)}],
#             label=str(year)
#         )

#         slider_steps.append(step)
    
#     visibility_all_years = [True] * num_traces  # Show all traces
#     step_all_years = dict(
#         method='update',
#         args=[{'visible': visibility_all_years},
#               {'title': 'All Years'}],
#         label='All Years'
#     )
#     slider_steps.append(step_all_years)
#     sliders = [dict(
#         active=0,
#         steps=slider_steps,
#         # currentvalue={"prefix": "Year: "},
#         pad={"t": 80, "b": 30},
#         bgcolor='white',
#         x=0.1,
#         xanchor='left',
#         len=0.8,
#         y=-0.15,
#         yanchor='bottom'
#     )]
#     return sliders
    
# def main():
#     geodf = read_shapefile(FN, "ohio*.shp")
#     geodf = filter_districts(geodf, DISTRICT_NAMES)
#     geodf = geodf.map(lambda x: x.strftime('%Y-%m-%d') if isinstance(x, pd.Timestamp) else x)
#     geodf = geodf.to_crs("WGS84").set_index("LEA_ID")
    
#     choropleth_trace = create_choroplethmapbox(geodf, COLORSCALE)
#     scatter_traces = create_scattermapbox_traces(homes,district_color_map)
#     # bounding_box_traces = create_bounding_box(homes,district_color_map)
#     fig = go.Figure([choropleth_trace] + scatter_traces)
    
#     years = homes['year'].unique()
#     # Pass the total number of traces to the slider creation function
#     sliders = create_slider(years, len(fig.data))
    
#     fig.update_layout(
#         mapbox_style="open-street-map",
#         sliders=sliders,
#         height=600,
#         showlegend=False,
#         autosize=True,
#         margin={"r": 0, "t": 0, "l": 0, "b": 0},
#         paper_bgcolor='#FFFFFF',
#         plot_bgcolor='#303030',
#         mapbox=dict(center=MAP_CENTER, zoom=10.5)
#     )
#     fig.show()

In [14]:
# main()

In [73]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.graph_objs as go
import pandas as pd
import geopandas as gpd

# Constants (replace with your actual dataset paths)
FN = r'C:\Users\markd\projects\Hamilton County Homes\kx-ohio-school-districts-SHP'
DISTRICT_NAMES = ['SYCAMORE CSD', 'WYOMING CSD', 'MADEIRA CSD', 'MARIEMONT CSD', 'LOVELAND CSD']
ODE_IRN_LIST = ['044867','045146','044289','044313','044271']
MAP_CENTER = dict(lat=39.2127649, lon=-84.3831728)
MIN_YEAR = dcc.Slider

district_color_map = {
    'SYCAMORE CSD': ' rgba(132, 0, 50,1)',
    'WYOMING CSD': 'rgba(0, 38, 66,1)',
    'MADEIRA CSD': 'rgba(0, 187, 249,1)',
    'MARIEMONT CSD': 'rgba(0, 245, 212,1)',
    'LOVELAND CSD':'rgba(175, 43, 191,1)'
    # Add more districts and colors as needed
}


df = homes
df2 = pd.pivot_table(homes,
              index = ['school_district','year'],
              values=['amount','parcel_number']
              , aggfunc={'amount':'median','parcel_number':'count'}).reset_index()

# Initialize the app
app = dash.Dash(__name__)

# Layout with two sections: filter section on the left, and stacked sections (map and bar chart) on the right
app.layout =html.Div([ 
                html.H2("2009 to 2023 Cincinnati Home Sales", style={'text-align': 'center', 'color': '#FFFFFF','align':'center'}),
                html.Div([
                        html.Label("Choose the range of home sale amount:", style={'color': '#FFFFFF'}),
                        dcc.RangeSlider(
                            100000, 300000, 25000,
                            count = 25000,
                            value=[100000,3000000],
                            id='sale-price-slider',
                        )
                    ], style={'display': 'inline-block','width':'75%','text-align':'left'}),  # Max price column

                html.Div([
                    dcc.Graph(id='map', style={'height': '400px'}),
                ], style={'display': 'inline-block','width':'100%'}) 
            ], style={'display': 'inline-block','width':'100%','text-align':'center'})

@app.callback(
    Output('map', 'figure'),
    [Input('sale-price-slider', 'value')]
)
def update_map(value):
    filtered_df = df[(df['amount']>=value[0]) & (df['amount']<=value[1])]
    marker_colors = filtered_df.loc[:,'school_district'].map(district_color_map).fillna('rgba(126, 232, 250, 0.25)')
    map_figure = go.Figure(go.Scattermapbox(
        lat=filtered_df['latitude'],
        lon=filtered_df['longitude'],
        text=filtered_df['school_district'],
        customdata=np.stack(
            (filtered_df['amount'], filtered_df['finsqft'], filtered_df['year'], filtered_df['address']),
            axis=-1
        ),
        mode='markers',
        marker={
            "size": 10,
            'color': marker_colors,  # Set marker colors based on the district
        },
        opacity=0.5,
        showlegend=False,
        hovertemplate='<br>'.join([
            'amount: %{customdata[0]}',
            'Square Ft: %{customdata[1]}',
            'year Sold: %{customdata[2]}',
            'address: %{customdata[3]}'
        ])
        ))
    map_figure.update_layout(
        mapbox_style="open-street-map",
        mapbox_center=MAP_CENTER,  # Adjust map center based on your data
        mapbox_zoom=10,
        margin={"r": 0, "t": 0, "l": 0, "b": 0},
        title=f"Map of Home Sales"
    )
    return map_figure

def create_bounding_box(homes, district_color_map):
    traces = []  # List to hold all plot traces
    
    for year in homes['year'].unique():
        filtered_homes = homes[homes['year'] == year]
        
        for district in filtered_homes['school_district'].unique():
            filtered_districts = filtered_homes[filtered_homes.school_district == district]
            lat_list = filtered_districts.latitude.astype('float').tolist()
            lng_list = filtered_districts.longitude.astype('float').tolist()
            
            # Create a MultiPoint object from the points
            points = list(zip(lng_list, lat_list))
            if len(points) > 1:
                multi_point = MultiPoint(points)
                
                # Compute the convex hull
                convex_hull = multi_point.convex_hull
                
                # Check the type of convex hull
                if isinstance(convex_hull, Polygon):
                    hull_x, hull_y = convex_hull.exterior.xy
                    mode = 'lines'
                    fill = 'toself'
                elif convex_hull.geom_type == 'LineString':
                    hull_x, hull_y = zip(*list(convex_hull.coords))
                    mode = 'lines'
                    fill = None
                else:  # It's a Point or empty
                    hull_x, hull_y = [convex_hull.x], [convex_hull.y]
                    mode = 'markers'
                    fill = None
                
                # Prepare trace
                traces.append(go.Scattermapbox(
                    lon = list(hull_x),
                    lat = list(hull_y),
                    mode = mode,
                    showlegend=False,
                    line = dict(width = 3, color = district_color_map.get(district, 'rgba(126, 232, 250, 0.25)')),
                    name = f'District {district} - {year}',
                    fill = fill
                ))
            else:
                # Handle case with less than two points
                print(f"Not enough points to form a convex hull for District {district} in Year {year}")
    
    return traces

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)


In [57]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.graph_objs as go
import pandas as pd
import geopandas as gpd

# Constants (replace with your actual dataset paths)
FN = r'C:\Users\markd\projects\Hamilton County Homes\kx-ohio-school-districts-SHP'
DISTRICT_NAMES = ['SYCAMORE CSD', 'WYOMING CSD', 'MADEIRA CSD', 'MARIEMONT CSD', 'LOVELAND CSD']
ODE_IRN_LIST = ['044867','045146','044289','044313','044271']
MAP_CENTER = dict(lat=39.2127649, lon=-84.3831728)
MIN_YEAR = dcc.Slider

district_color_map = {
    'SYCAMORE CSD': ' rgba(132, 0, 50,1)',
    'WYOMING CSD': 'rgba(0, 38, 66,1)',
    'MADEIRA CSD': 'rgba(0, 187, 249,1)',
    'MARIEMONT CSD': 'rgba(0, 245, 212,1)',
    'LOVELAND CSD':'rgba(175, 43, 191,1)'
    # Add more districts and colors as needed
}


df = homes
df2 = pd.pivot_table(homes,
              index = ['school_district','year'],
              values=['amount','parcel_number']
              , aggfunc={'amount':'median','parcel_number':'count'}).reset_index()

# Initialize the app
app = dash.Dash(__name__)

# Layout with two sections: filter section on the left, and stacked sections (map and bar chart) on the right
app.layout = html.Div([
    # Main container for the two columns
    html.Div([
        # Left filter section
        html.Div([
            html.H2("Filters", style={'color': '#FFFFFF'}),
            
            # Minimum and Maximum Price side by side
            html.Div([
                html.Div([
                    html.Label("Minimum Price", style={'color': '#FFFFFF'}),
                    dcc.Dropdown(
                        id='low-price-dropdown',
                        options=[{'label': f'${price}', 'value': price} for price in [100000, 150000, 200000, 250000, 3000000]],
                        value=100000,
                        style={'width': '100%'}
                    )
                ], style={'width': '45%', 'display': 'inline-block'}),  # Min price column
                
                html.Span(' - ', style={'padding': '0 10px', 'color': '#FFFFFF', 'display': 'inline-block', 'vertical-align': 'middle'}),
                
                html.Div([
                    html.Label("Maximum Price", style={'color': '#FFFFFF'}),
                    dcc.Dropdown(
                        id='high-price-dropdown',
                        options=[{'label': f'${price}', 'value': price} for price in [100000, 150000, 200000, 250000, 3000000]],
                        value=3000000,
                        style={'width': '100%'}
                    )
                ], style={'width': '45%', 'display': 'inline-block'}),  # Max price column
                
            ], style={'display': 'flex', 'align-items': 'center', 'margin-bottom': '20px'}),
            
            # School district dropdown
            html.Label("Select School District:", style={'color': '#FFFFFF'}),
            dcc.Dropdown(
                id='district-dropdown',
                options=[{'label': district, 'value': district} for district in DISTRICT_NAMES],
                value='SYCAMORE CSD',
                style={'width': '80%', 'margin-bottom': '20px'}
            ),
        ], style={'width': '20%', 'padding': '20px', 'background-color': '#2C3E50'}),  # Left column

        # Right content section with stacked map and bar chart
        html.Div([
            # Map section
            html.Div([
                html.H2("Map of Home Sales", style={'text-align': 'center', 'color': '#FFFFFF'}),
                dcc.Graph(id='map', style={'height': '400px'}),
            ]),
            
            # Bar chart section
            html.Div([
                html.H2("Count of Home Sales", style={'text-align': 'center', 'color': '#FFFFFF'}),
                dcc.Graph(id='bar-chart', style={'height': '300px'}),
            ])
        ], style={'width': '75%', 'padding': '20px', 'background-color': '#34495E'})  # Right column
    ], style={'display': 'flex'})  # Flexbox to put columns side by side
], style={'background-color': '#1C2833', 'height': '100vh'})  # Outer div to control page background


@app.callback(
    Output('bar-chart', 'figure'),
    [Input('district-dropdown', 'value')]
)

def update_bar_chart(selected_district):
    if selected_district is None:
        filtered_df = df2
    else:
        filtered_df = df2[(df2['school_district'] == selected_district)]
    
    bar_chart = go.Figure(go.Bar(
        x=filtered_df['year'],
        y=filtered_df['amount'],
        marker=dict(color='rgba(255, 100, 102, 0.7)', line=dict(color='rgba(255, 100, 102, 1.0)', width=1.5))
    ))
    
    bar_chart.update_layout(
        title=f"Home Sales in {selected_district}",
        xaxis_title="Year",
        yaxis_title="Count of Sales",
        margin={"r": 0, "t": 40, "l": 40, "b": 40}
    )
    
    bar_chart.update_layout(
        title="Click on a bar to see map",
        xaxis_title="Year",
        yaxis_title="Sales Count",
        clickmode='event+select'  # Enables click events
    )
    return bar_chart

@app.callback(
    Output('map', 'figure'),
    [Input('bar-chart', 'clickData'),
     Input('district-dropdown', 'value'),
     Input('low-price-dropdown', 'value'),
     Input('high-price-dropdown', 'value')]  # Add district input
)
def update_map(clickData, selected_district,selected_min,selected_max):
    if clickData is None:
        # Filter data for the clicked year and selected district
        filtered_df = df[(df['school_district'] == selected_district) & (df['amount']>=selected_min) & (df['amount']<=selected_max)]
        marker_colors = filtered_df.loc[:,'school_district'].map(district_color_map).fillna('rgba(126, 232, 250, 0.25)')
        map_figure = go.Figure(go.Scattermapbox(
            lat=filtered_df['latitude'],
            lon=filtered_df['longitude'],
            text=filtered_df['school_district'],
            customdata=np.stack(
                (filtered_df['amount'], filtered_df['finsqft'], filtered_df['year'], filtered_df['address']),
                axis=-1
            ),
            mode='markers',
            marker={
                "size": 10,
                'color': marker_colors,  # Set marker colors based on the district
            },
            opacity=0.5,
            showlegend=False,
            hovertemplate='<br>'.join([
                'amount: %{customdata[0]}',
                'Square Ft: %{customdata[1]}',
                'year Sold: %{customdata[2]}',
                'address: %{customdata[3]}'
            ])
            ))
        map_figure.update_layout(
            mapbox_style="open-street-map",
            mapbox_center=MAP_CENTER,  # Adjust map center based on your data
            mapbox_zoom=10,
            margin={"r": 0, "t": 0, "l": 0, "b": 0},
            title=f"Map of Home Sales"
        )
        return map_figure
    # Get the year from the clicked bar
    clicked_year = clickData['points'][0]['x']

    # Filter data for the clicked year and selected district
    filtered_df = df[(df['year'] == str(clicked_year)) & (df['school_district'] == selected_district) & (df['amount']>=selected_min) & (df['amount']<=selected_max)]

    # Create the map with Scattermapbox
    marker_colors = filtered_df.loc[:,'school_district'].map(district_color_map).fillna('rgba(126, 232, 250, 0.25)')
    map_figure = go.Figure(go.Scattermapbox(
        lat=filtered_df['latitude'],
        lon=filtered_df['longitude'],
        text=filtered_df['school_district'],
        customdata=np.stack(
            (filtered_df['amount'], filtered_df['finsqft'], filtered_df['year'], filtered_df['address']),
            axis=-1
        ),
        mode='markers',
        marker={
            "size": 10,
            'color': marker_colors,  # Set marker colors based on the district
        },
        opacity=0.5,
        showlegend=False,
        hovertemplate='<br>'.join([
            'amount: %{customdata[0]}',
            'Square Ft: %{customdata[1]}',
            'year Sold: %{customdata[2]}',
            'address: %{customdata[3]}'
        ])
        ))

    map_figure.update_layout(
        mapbox_style="open-street-map",
        mapbox_center=MAP_CENTER,  # Adjust map center based on your data
        mapbox_zoom=10,
        margin={"r": 0, "t": 0, "l": 0, "b": 0},
        title=f"Map of Home Sales for {clicked_year}"
    )
    return map_figure

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)


In [17]:
homes.formatted_address.value_counts()

formatted_address
1110 Springfield Pike, Wyoming, OH 45215, USA               5
9830 Union Cemetery Rd, Loveland, OH 45140, USA             3
4200 Grove Ave, Cincinnati, OH 45227, USA                   3
9084 Blue Ash Rd, Cincinnati, OH 45242, USA                 3
4514 Victor Ave, Blue Ash, OH 45242, USA                    3
                                                           ..
15 Evergreen Cir, Wyoming, OH 45215, USA                    1
10928 Brookgreen Ct, Cincinnati, OH 45242, USA              1
8065 Hetz Dr, Cincinnati, OH 45242, USA                     1
9755 Bunker Hill Ln, Montgomery, OH 45242, USA              1
830 Carrington Pl Apartment 208, Loveland, OH 45140, USA    1
Name: count, Length: 1117, dtype: int64

In [18]:
# import sweetviz as sv

# my_report = sv.analyze([homes, 'Name'])
# my_report.show_html() # Default arguments will generate to "SWEETVIZ_REPORT.html"


IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html

Done! Use 'show' commands to display/save.   |██████████| [100%]   00:01 -> (00:00 left)


Report SWEETVIZ_REPORT.html was generated! NOTEBOOK/COLAB USERS: the web browser MAY not pop up, regardless, the report IS saved in your notebook/colab files.


In [53]:
homes.finsqft = homes.finsqft.astype('int64')

In [87]:
import hdbscan

# Assuming you already have 'coords_radians' as your latitude/longitude in radians
hdbscan_clusterer = hdbscan.HDBSCAN(min_cluster_size=5, metric='haversine')

# Fit the HDBSCAN model
hdbscan_labels = hdbscan_clusterer.fit_predict(coords_radians)

# Assign HDBSCAN cluster labels back to your cleaned data
coords_radians = np.radians(coords)
homes.loc[coords_radians.index, 'cluster'] = hdbscan_labels

# Inspect the resulting clusters
print(homes[['latitude', 'longitude', 'cluster']].dropna())

       latitude  longitude  cluster
0     39.164736 -84.315106        0
1     39.146012 -84.393614        2
2     39.143411 -84.390158        2
3     39.146337 -84.390794        2
4     39.145544 -84.388793        2
...         ...        ...      ...
1265  39.286614 -84.301031       22
1266  39.286614 -84.301031       22
1267  39.286614 -84.301031       22
1268  39.286614 -84.301031       22
1269  39.285034 -84.300550       -1

[1270 rows x 3 columns]


In [73]:
homes['latitude'] = homes['latitude'].astype('float')
homes['longitude'] = homes['longitude'].astype('float')

In [74]:
coords = homes[['latitude', 'longitude']]
coords_radians = np.radians(coords)

In [97]:
import folium
from scipy.spatial import ConvexHull
import numpy as np

# Create a map centered on an approximate location (change lat/lon as needed)
map_center = [39.16, -84.39]  # Adjust to your data's center
mymap = folium.Map(location=map_center, zoom_start=12)

# Group data by clusters
clustered_data = homes[homes['cluster'] >= 0]

folium.GeoJson(geodf, name="School Districts").add_to(mymap)


# Iterate over each cluster and create a convex hull polygon
for cluster_label in clustered_data['cluster'].unique():
    cluster_points = clustered_data[clustered_data['cluster'] == cluster_label][['latitude', 'longitude']].values
    
    if len(cluster_points) > 2:  # Convex hull requires at least 3 points
        try:
            hull = ConvexHull(cluster_points)
            hull_points = cluster_points[hull.vertices]  # Get the points that form the convex hull
            
            # Convert to list of (lat, lon) tuples for folium
            hull_coords = [(point[0], point[1]) for point in hull_points]
            
            # Create a polygon and add it to the map
            folium.Polygon(hull_coords, color='blue', fill=True, fill_opacity=0.4).add_to(mymap)
        
        except:
            # Fallback if ConvexHull fails (for collinear points) - just plot the points
            for point in cluster_points:
                folium.CircleMarker(location=(point[0], point[1]), radius=5, color='red').add_to(mymap)
    
    else:
        # If not enough points for a hull, plot just the points
        for point in cluster_points:
            folium.CircleMarker(location=(point[0], point[1]), radius=5, color='red').add_to(mymap)

for idx, row in homes.iterrows():
    # Only plot points that belong to a cluster
    if row['cluster'] >= 0:
        folium.CircleMarker(
            location=(row['latitude'], row['longitude']),
            radius=5,
            color='blue' if row['cluster'] == 0 else 'green',  # Assign colors for different clusters
            fill=True,
            fill_opacity=0.7
        ).add_to(mymap)

# Display the map
mymap.save('clustered_map.html')