In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import folium
from folium import plugins
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler
import geopandas as gpd
from datetime import datetime
import plotly.express as px
import plotly.graph_objects as go

In [4]:
# Set style for better visualizations
plt.style.use('seaborn-v0_8')
sns.set_palette('husl')

# Load the violations dataset
print("Loading data...")
violations_df = pd.read_csv('data/PWV_processed.csv')

# print("2. Converting date columns to datetime...")
violations_df['status_dttm'] = pd.to_datetime(violations_df['status_dttm'], errors='coerce')
# violations_df = violations_df.dropna(subset=['status_dttm'])

# Display basic information about the dataset
print("\nDataset Information:")
print("Shape:", violations_df.shape)
print("\nColumns:", violations_df.columns.tolist())
print("\nMissing Values:")
print(violations_df.isnull().sum())

# Basic statistics
print("\nBasic Statistics:")
print(violations_df.describe())

# Create a base map centered on Boston
print("\nCreating base map...")
boston_map = folium.Map(location=[42.3601, -71.0589], zoom_start=12)

# Add violation points to the map (sampling for better performance)
sample_size = min(1000, len(violations_df))
sample_df = violations_df.sample(n=sample_size, random_state=42)

print(f"Adding {sample_size} violation points to the map...")
for idx, row in sample_df.iterrows():
    folium.CircleMarker(
        location=[row['latitude'], row['longitude']],
        radius=5,
        color='red',
        fill=True,
        popup=f"Violation: {row['description']}<br>Status: {row['status']}<br>Date: {row['status_dttm'].strftime('%Y-%m-%d')}",
    ).add_to(boston_map)

# Show the map
boston_map


Loading data...

Dataset Information:
Shape: (808691, 15)

Columns: ['case_no', 'status_dttm', 'status', 'code', 'value', 'description', 'violation_stno', 'violation_street', 'violation_city', 'violation_zip', 'ward', 'contact_addr1', 'sam_id', 'latitude', 'longitude']

Missing Values:
case_no             0
status_dttm         0
status              0
code                0
value               0
description         0
violation_stno      0
violation_street    0
violation_city      0
violation_zip       0
ward                0
contact_addr1       0
sam_id              0
latitude            0
longitude           0
dtype: int64

Basic Statistics:
                         status_dttm           code          value  \
count                         808691  808691.000000  808691.000000   
mean   2017-11-19 08:28:17.056338688       6.169793      58.491099   
min              1999-12-31 19:35:00       1.000000       0.000000   
25%              2014-06-06 08:39:30       1.000000      25.000000   
5

In [5]:
# Create a base map centered on Boston
print("\nCreating heatmap...")
boston_heatmap = folium.Map(location=[42.3601, -71.0589], zoom_start=12)

# Prepare heatmap data
# Group violations by location and count them
heat_data = violations_df.groupby(['latitude', 'longitude']).size().reset_index(name='count')

# Create heatmap data points with weights
heat_points = []
for _, row in heat_data.iterrows():
    try:
        lat = float(row['latitude'])
        lon = float(row['longitude'])
        count = float(row['count'])
        if not (np.isnan(lat) or np.isnan(lon) or np.isnan(count)):
            heat_points.append([lat, lon, count])
    except (ValueError, TypeError):
        continue

print(f"Created {len(heat_points)} heatmap points")

# Add heatmap layer
plugins.HeatMap(
    heat_points,
    radius=15,  # Radius of each point in pixels
    blur=10,    # Blur factor
    max_zoom=13,  # Maximum zoom level
    gradient={"0.2": 'blue', "0.4": 'lime', "0.6": 'yellow', "0.8": 'red', "1.0": 'red'},  # Color gradient
    min_opacity=0.3,  # Minimum opacity
    max_val=max(point[2] for point in heat_points)  # Maximum value for normalization
).add_to(boston_heatmap)

# Add a layer control
folium.LayerControl().add_to(boston_heatmap)

# Show the map
boston_heatmap


Creating heatmap...
Created 73230 heatmap points


  plugins.HeatMap(
