In [1]:
import sys
import pandas as pd
import joblib

In [2]:
min_in = [37.700090787, -122.517681208] 
max_in = [37.82999, -122.33257462]

In [3]:
sys.path.append('../usage')
sys.path.append('../data_preprocessing')

from prepare_data import match_weather_data
from emt_data import get_emt_data
from weather_data import get_weather_data
from grid import find_cells

In [4]:
emt_data = pd.read_parquet('../data/2007_subset_raw_emt_data.parquet')

In [5]:
weather_data = get_weather_data()

In [6]:
emt_weather_data = match_weather_data(weather_data=weather_data, emt_data=emt_data)
emt_weather_data = emt_weather_data.drop(columns=['snwd_in', 'call_number', 'incident_number'])

In [7]:
emt_weather_data

Unnamed: 0,year,month,day,hour,longitude,latitude,fmax,fmin,prcp_in,snow_in
0,2007,9,20,11,-122.401490,37.785828,68.0,52.0,0.00,0.0
1,2007,9,15,20,-122.403710,37.784046,67.0,57.0,0.00,0.0
2,2007,9,29,11,-122.400215,37.750750,69.0,52.0,0.00,0.0
3,2007,9,17,23,-122.415695,37.782585,68.0,56.0,0.00,0.0
4,2007,9,18,17,-122.405830,37.785744,65.0,56.0,0.00,0.0
...,...,...,...,...,...,...,...,...,...,...
235851,2007,4,17,18,-122.404366,37.753120,57.0,48.0,0.00,0.0
235852,2007,4,20,17,-122.392840,37.783690,59.0,46.0,0.10,0.0
235853,2007,4,22,13,-122.425766,37.758230,60.0,50.0,0.43,0.0
235854,2007,4,16,14,-122.395310,37.749805,55.0,51.0,0.00,0.0


In [8]:
emt_weather_data, lats, lons = find_cells(emt_weather_data, 32, 32, min_in, max_in)
emt_weather_data

Unnamed: 0,year,month,day,hour,longitude,latitude,fmax,fmin,prcp_in,snow_in,cell
0,2007,9,20,11,-122.401490,37.785828,68.0,52.0,0.00,0.0,693
1,2007,9,15,20,-122.403710,37.784046,67.0,57.0,0.00,0.0,660
2,2007,9,29,11,-122.400215,37.750750,69.0,52.0,0.00,0.0,405
3,2007,9,17,23,-122.415695,37.782585,68.0,56.0,0.00,0.0,658
4,2007,9,18,17,-122.405830,37.785744,65.0,56.0,0.00,0.0,692
...,...,...,...,...,...,...,...,...,...,...,...
235851,2007,4,17,18,-122.404366,37.753120,57.0,48.0,0.00,0.0,436
235852,2007,4,20,17,-122.392840,37.783690,59.0,46.0,0.10,0.0,662
235853,2007,4,22,13,-122.425766,37.758230,60.0,50.0,0.43,0.0,464
235854,2007,4,16,14,-122.395310,37.749805,55.0,51.0,0.00,0.0,406


In [9]:
features_order = [
    'cell', 'year', 'month', 'day', 'hour',
    'fmax', 'fmin', 'prcp_in', 'snow_in'
]

In [10]:
emt_weather_data = emt_weather_data[features_order].drop_duplicates(subset=['cell', 'year', 'month', 'day', 'hour'])

In [11]:
def load_model(model_path: str):
    """Loads the saved XGBoost model from a file."""
    try:
        print(f"Loading model from {model_path}...")
        model = joblib.load(model_path)
        print("Model loaded successfully.")
        return model
    except FileNotFoundError:
        print(f"Error: Model file not found at '{model_path}'.")
        raise

In [12]:
model = load_model('../model/predictor_v2.joblib')

Loading model from ../model/predictor_v2.joblib...
Model loaded successfully.


In [13]:
predictions = model.predict(emt_weather_data[features_order])
predictions = predictions.clip(0)  # Ensure no negative predictions

emt_weather_data['prediction'] = predictions

In [14]:
emt_weather_data

Unnamed: 0,cell,year,month,day,hour,fmax,fmin,prcp_in,snow_in,prediction
0,693,2007,9,20,11,68.0,52.0,0.0,0.0,0.000000
1,660,2007,9,15,20,67.0,57.0,0.0,0.0,0.000000
2,405,2007,9,29,11,69.0,52.0,0.0,0.0,0.001026
3,658,2007,9,17,23,68.0,56.0,0.0,0.0,0.000000
4,692,2007,9,18,17,65.0,56.0,0.0,0.0,0.000000
...,...,...,...,...,...,...,...,...,...,...
235801,755,2007,4,18,17,55.0,45.0,0.0,0.0,0.001738
235806,653,2007,4,20,7,59.0,46.0,0.1,0.0,0.000000
235827,497,2007,4,17,7,57.0,48.0,0.0,0.0,0.000000
235830,268,2007,4,15,20,65.0,46.0,0.0,0.0,0.000000


In [28]:
import pandas as pd
import folium
from io import StringIO
from branca.colormap import linear

# Import your grid functions
from grid import create_grid_axes

# --- Step 1: Helper function to get cell boundaries ---
def get_cell_bounds(cell_id, lats, lons):
    """
    Calculates the [south, west, north, east] boundaries for a given cell ID.
    """
    n_lon_cells = len(lons) - 1
    
    # Convert 1-based cell ID to 0-based indices
    zero_based_cell = cell_id - 1
    
    # Calculate the row (lat) and column (lon) index
    lat_index = zero_based_cell // n_lon_cells
    lon_index = zero_based_cell % n_lon_cells
    
    # Get the four corner coordinates
    south_lat = lats[lat_index]
    north_lat = lats[lat_index + 1]
    west_lon = lons[lon_index]
    east_lon = lons[lon_index + 1]
    
    # Folium.Rectangle needs bounds in [[lat_start, lon_start], [lat_end, lon_end]] format
    return [[south_lat, west_lon], [north_lat, east_lon]]

# --- Step 2: Reusable Map Generation Function ---
def create_map(map_data, title, filename):
    """
    Generates and saves a Folium map using individual Rectangles.
    """
    print(f"Generating map: '{title}'...")
    
    # Grid parameters
    min_lat, max_lat = 37.70, 37.81
    min_lon, max_lon = -122.52, -122.36
    num_lon_cells = 32
    num_lat_cells = 32

    lats, lons = create_grid_axes(min_lat, max_lat, min_lon, max_lon, num_lon_cells, num_lat_cells)

    # Create the base map
    sf_map = folium.Map(location=[37.76, -122.45], zoom_start=12, tiles="CartoDB positron")
    title_html = f'<h3 align="center" style="font-size:16px"><b>{title}</b></h3>'
    sf_map.get_root().html.add_child(folium.Element(title_html))

    # Create a colormap to translate prediction values to colors
    min_pred = map_data['prediction'].min()
    max_pred = map_data['prediction'].max()
    colormap = linear.YlOrRd_09.scale(min_pred, max_pred)
    colormap.caption = 'Predicted Number of Emergencies'
    sf_map.add_child(colormap)

    # Loop through the data and draw a rectangle for each cell
    for index, row in map_data.iterrows():
        cell_id = int(row['cell'])
        prediction = row['prediction']
        
        # Get the geographic boundaries for this cell
        bounds = get_cell_bounds(cell_id, lats, lons)
        
        # Create and add the rectangle to the map
        folium.Rectangle(
            bounds=bounds,
            color=colormap(prediction),
            fill=True,
            fill_color=colormap(prediction),
            fill_opacity=0.7,
            popup=f"<b>Cell ID:</b> {cell_id}<br><b>Prediction:</b> {prediction:.3f}"
        ).add_to(sf_map)

    return sf_map

df = emt_weather_data
df['snow_in'] = pd.to_numeric(df['snow_in'], errors='coerce').fillna(0)

# For this example, we'll create the yearly average map
TARGET_MONTH = 5
year_df = df[df['month'] == TARGET_MONTH]
yearly_data = year_df.groupby('cell')['prediction'].sum().reset_index()
yearly_title = f"Yearly Average Prediction for {TARGET_YEAR}"

sf_map = create_map(yearly_data, yearly_title, 'map_rect_yearly.html')
sf_map

Generating map: 'Yearly Average Prediction for 2007'...
