## heatmaps


In [2]:
import pandas as pd
import folium
from folium.plugins import HeatMap, MarkerCluster
import math

# ==============================================
# 1. LOAD DATA
# ==============================================
train_data = pd.read_csv("/Users/malhar.inamdar/Desktop/cummins/train.csv")
print(f"Total houses in dataset: {len(train_data)}")
print(f"Number of neighborhoods: {train_data['Neighborhood'].nunique()}")
print(f"Neighborhoods: {train_data['Neighborhood'].unique()}")
# ==============================================
# 2. DEFINE NEIGHBORHOOD COORDINATES
# ==============================================
# Approx lat/lng for each Ames neighborhood code

# we used folium to define approximate latitudes and longitudes in the region of ames, iowa.

neighborhood_coords = {
    'NAmes': (42.0576, -93.6154),
    'CollgCr': (42.0211, -93.6487),
    'OldTown': (42.0202, -93.6152),
    'Edwards': (42.0238, -93.6481),
    'Somerst': (42.0618, -93.6507),
    'NridgHt': (42.0625, -93.6283),
    'Gilbert': (42.1078, -93.6343),
    'NWAmes': (42.0528, -93.6544),
    'Sawyer': (42.0371, -93.6192),
    'Crawfor': (42.0173, -93.6123),
    'IDOTRR': (42.0259, -93.5910),
    'NoRidge': (42.0537, -93.6271),
    'BrkSide': (42.0289, -93.6194),
    'SawyerW': (42.0397, -93.6286),
    'Blmngtn': (42.0641, -93.6188),
    'Mitchel': (42.0299, -93.6327),
    'Timber': (42.0384, -93.6318),
    'StoneBr': (42.0548, -93.6384),
    'SWISU': (42.0170, -93.6522),
    'ClearCr': (42.0336, -93.6599),
    'NPkVill': (42.0558, -93.6188),
    'BrDale': (42.0522, -93.6287),
    'MeadowV': (42.0143, -93.6251),
    'Veenker': (42.0457, -93.6464),
    'Blueste': (42.0099, -93.6390),
}

# ==============================================
# 3. HELPER FUNCTION: GENERATE CIRCLE POINTS
# ==============================================
def generate_circle_points(center_lat, center_lng, count, radius_factor=0.003):
    """
    Create 'count' points in a circular pattern around (center_lat, center_lng)
    to avoid overlapping markers.
    """
    points = []
    if count <= 30:
        # Single ring
        radius = radius_factor
        for i in range(count):
            angle = 2 * math.pi * i / count
            x = center_lat + radius * math.cos(angle)
            y = center_lng + radius * math.sin(angle)
            points.append((x, y))
    else:
        # Concentric rings for larger counts
        inner_count = min(20, count // 2)
        outer_count = count - inner_count

        inner_radius = radius_factor * 0.6
        for i in range(inner_count):
            angle = 2 * math.pi * i / inner_count
            x = center_lat + inner_radius * math.cos(angle)
            y = center_lng + inner_radius * math.sin(angle)
            points.append((x, y))

        outer_radius = radius_factor * 1.2
        for i in range(outer_count):
            angle = 2 * math.pi * i / outer_count
            x = center_lat + outer_radius * math.cos(angle)
            y = center_lng + outer_radius * math.sin(angle)
            points.append((x, y))
    return points

# ==============================================
# 4. PREPARE HEAT DATA & HOUSE POINTS
# ==============================================
heat_data = []           # For the HeatMap
all_house_points = []    # For storing each house’s popup info
max_price = train_data['SalePrice'].max()

# Loop over each neighborhood and place houses
for nbhd_code, (center_lat, center_lng) in neighborhood_coords.items():
    nbhd_houses = train_data[train_data['Neighborhood'] == nbhd_code].copy()
    count = len(nbhd_houses)
    if count == 0:
        continue

    # Add a center heat point scaled by house count
    intensity = min(1.0, count / 100)
    heat_data.append([center_lat, center_lng, intensity * 2])

    # Generate distinct points for each house
    circle_points = generate_circle_points(center_lat, center_lng, count)

    # Assign each house a unique lat/lng point
    for (idx, row), (lat, lng) in zip(nbhd_houses.iterrows(), circle_points):
        # Extra heat point for each house, scaled by price
        heat_data.append([lat, lng, row['SalePrice'] / max_price])

        # Prepare info for the marker popup
        house_info = {
            'lat': lat,
            'lng': lng,
            'Neighborhood': row['Neighborhood'],
            'SalePrice': row['SalePrice'],
            'YearBuilt': row['YearBuilt'],
            'Bedrooms': row['BedroomAbvGr'],
            'FullBaths': row['FullBath'],
            'LivingArea': row['GrLivArea']
        }
        all_house_points.append(house_info)

# ==============================================
# 5. CREATE THE CLUSTERED MAP WITH MARKERCluster
# ==============================================
ames_map2 = folium.Map(
    location=[42.034534, -93.620369],
    zoom_start=13,
    tiles="OpenStreetMap"
)

# adding heatmap layer to the map
raw_gradient = {0.2: 'blue', 0.4: 'purple', 0.6: 'orange', 0.8: 'red', 1.0: 'darkred'}
gradient_dict = {str(k): v for k, v in raw_gradient.items()}
HeatMap(
    heat_data,
    radius=20,
    max_zoom=13,
    blur=15,
    gradient=gradient_dict
).add_to(ames_map2)

# Create a MarkerCluster
marker_cluster = MarkerCluster().add_to(ames_map2)

# Simple HTML/CSS for a custom marker icon (home icon)
home_icon_html = '''
<div style="font-size: 12pt; color: white; background-color: #4287f5;
            border-radius: 50%; width: 25px; height: 25px; line-height: 25px;
            text-align: center;">
    <i class="fa fa-home"></i>
</div>
'''

# Add each house marker to the cluster
for house in all_house_points:
    popup_text = (
        f"<b>Neighborhood:</b> {house['Neighborhood']}<br>"
        f"<b>Price:</b> ${house['SalePrice']:,.0f}<br>"
        f"<b>Year Built:</b> {house['YearBuilt']}<br>"
        f"<b>Bedrooms:</b> {house['Bedrooms']}<br>"
        f"<b>Full Baths:</b> {house['FullBaths']}<br>"
        f"<b>Living Area:</b> {house['LivingArea']} sq ft"
    )
    folium.Marker(
        location=[house['lat'], house['lng']],
        icon=folium.DivIcon(html=home_icon_html),
        popup=popup_text
    ).add_to(marker_cluster)

# Optionally, add neighborhood circles and labels for areas with >= 20 houses
for nbhd_code, (lat, lng) in neighborhood_coords.items():
    count = len(train_data[train_data['Neighborhood'] == nbhd_code])
    if count >= 20:
        avg_price = train_data.loc[train_data['Neighborhood'] == nbhd_code, 'SalePrice'].mean()
        folium.CircleMarker(
            location=(lat, lng),
            radius=20,
            fill=True,
            fill_opacity=0.7,
            color='black',
            fill_color='orange' if count > 100 else 'yellow',
            weight=1,
            popup=f"{nbhd_code}<br>Count: {count}<br>Avg Price: ${avg_price:,.0f}"
        ).add_to(ames_map2)

        folium.Marker(
            [lat, lng],
            icon=folium.DivIcon(
                icon_size=(40, 40),
                icon_anchor=(20, 20),
                html=f'<div style="font-size: 10pt; font-weight: bold; text-align: center;">{count}</div>'
            )
        ).add_to(ames_map2)

# ==============================================
# 6. SAVE THE CLUSTERED MAP
# ==============================================
ames_map2.save("ames_housing_clustered1_map.html")
print("Clustered map saved as 'ames_housing_clustered_map.html'")
ames_map2


Total houses in dataset: 1460
Number of neighborhoods: 25
Neighborhoods: ['CollgCr' 'Veenker' 'Crawfor' 'NoRidge' 'Mitchel' 'Somerst' 'NWAmes'
 'OldTown' 'BrkSide' 'Sawyer' 'NridgHt' 'NAmes' 'SawyerW' 'IDOTRR'
 'MeadowV' 'Edwards' 'Timber' 'Gilbert' 'StoneBr' 'ClearCr' 'NPkVill'
 'Blmngtn' 'BrDale' 'SWISU' 'Blueste']
Clustered map saved as 'ames_housing_clustered_map.html'
