In [27]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns
import matplotlib.pyplot as plt
import geopandas as gpd
from shapely.geometry import Point, MultiPoint
from shapely.ops import unary_union
import numpy as np
from sklearn.neighbors import BallTree
import math
from tqdm import tqdm
from sklearn.cluster import DBSCAN
from matplotlib.patches import Patch
from shapely import wkt

In [28]:
FRP_FILTER = 25
RADIUS_FIRES = 150

In [29]:
with open('./token.txt', 'r') as f:
    TOKEN = f.read()
    px.set_mapbox_access_token(TOKEN)

In [30]:
df = pd.read_parquet('../data/fires_merged_comunas_timezone.parquet')

In [31]:
df.head()

Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,type,comuna,acq_datetime_gmt_3
0,-23.820446,-70.320282,301.51,0.74,0.76,2013-01-01,448,N,VIIRS,n,1,285.54,2.38,N,2.0,ANTOFAGASTA,2013-01-01 01:48:00-03:00
1,-23.823833,-70.318871,306.9,0.74,0.76,2013-01-01,448,N,VIIRS,n,1,285.8,2.33,N,2.0,ANTOFAGASTA,2013-01-01 01:48:00-03:00
2,-26.430983,-69.475632,299.73,0.58,0.7,2013-01-01,448,N,VIIRS,n,1,279.61,2.86,N,2.0,DIEGO DE ALMAGRO,2013-01-01 01:48:00-03:00
3,-32.760929,-71.47644,309.7,0.52,0.67,2013-01-01,448,N,VIIRS,n,1,285.42,2.5,N,3.0,PUCHUNCAVI,2013-01-01 01:48:00-03:00
4,-34.624073,-71.000023,319.97,0.44,0.63,2013-01-01,448,N,VIIRS,n,1,290.28,2.27,N,0.0,CHIMBARONGO,2013-01-01 01:48:00-03:00


In [32]:
df['acq_datetime_gmt_3'] = pd.to_datetime(df['acq_datetime_gmt_3'])

In [33]:
df['acq_datetime_gmt_3'].min(), df['acq_datetime_gmt_3'].max()

(Timestamp('2013-01-01 01:48:00-0300', tz='America/Santiago'),
 Timestamp('2023-01-31 16:38:00-0300', tz='America/Santiago'))

In [34]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 367362 entries, 0 to 465149
Data columns (total 17 columns):
 #   Column              Non-Null Count   Dtype                           
---  ------              --------------   -----                           
 0   latitude            367362 non-null  float64                         
 1   longitude           367362 non-null  float64                         
 2   brightness          367362 non-null  float64                         
 3   scan                367362 non-null  float64                         
 4   track               367362 non-null  float64                         
 5   acq_date            367362 non-null  datetime64[ns]                  
 6   acq_time            367362 non-null  int64                           
 7   satellite           367362 non-null  object                          
 8   instrument          367362 non-null  object                          
 9   confidence          367362 non-null  object                     

In [35]:
"""plt.figure(figsize=(10, 6))
sns.histplot(data=df, x="frp", binrange=(df["frp"].quantile(0.01), df["frp"].quantile(0.99)), bins=100)
plt.title('Distribution of FRP Values')
plt.xlabel('FRP')
plt.ylabel('Count')
plt.tight_layout()
plt.show();"""

'plt.figure(figsize=(10, 6))\nsns.histplot(data=df, x="frp", binrange=(df["frp"].quantile(0.01), df["frp"].quantile(0.99)), bins=100)\nplt.title(\'Distribution of FRP Values\')\nplt.xlabel(\'FRP\')\nplt.ylabel(\'Count\')\nplt.tight_layout()\nplt.show();'

In [67]:
a = df[df['frp'] > 0].copy()

In [68]:
a['year'] = a['acq_datetime_gmt_3'].dt.year

In [69]:
a.shape

(367342, 18)

In [70]:
fig = px.scatter_mapbox(a[(a['type'] == 0) & ((a['comuna'] == 'CONCEPCION') | (a['comuna'] == 'PENCO') | (a['comuna'] == 'FLORIDA')  | (a['comuna'] == 'TOME'))], lat="latitude", lon="longitude", zoom=3, color='frp', title='Wildfires')
fig.update_layout(mapbox_style='satellite', title='Clusters Analysis for Static land source', height=800)
fig.show();


*scatter_mapbox* is deprecated! Use *scatter_map* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/



In [40]:

def find_intersections_across_years(df, radius_meters=300):
    """
    Find points that intersect with points from other years, using a 300m radius.
    
    Parameters:
    -----------
    df : pandas DataFrame
        Contains latitude, longitude, and year columns
    radius_meters : float
        Radius in meters to consider points as intersecting
    
    Returns:
    --------
    pd.DataFrame : All points that intersect with points from other years
    """
    # Earth radius in meters
    EARTH_RADIUS = 6371000
    
    # Convert radius to radians for haversine distance
    radius_radians = radius_meters / EARTH_RADIUS
    
    # Group by year
    years = sorted(df['year'].unique())
    
    # Keep track of intersecting points
    intersecting_indices = set()
    
    # Process each year
    for year in tqdm(years, desc="Processing years"):
        year_points = df[df['year'] == year]
        
        # Skip if empty
        if len(year_points) == 0:
            continue
            
        # Convert lat/lon to radians for the BallTree
        year_coords = np.radians(year_points[['latitude', 'longitude']].values)
        
        # Create BallTree for current year
        tree = BallTree(year_coords, metric='haversine')
        
        # Check against all other years
        for other_year in years:
            if other_year == year:
                continue
                
            other_points = df[df['year'] == other_year]
            
            # Skip if empty
            if len(other_points) == 0:
                continue
                
            # Convert other year points to radians
            other_coords = np.radians(other_points[['latitude', 'longitude']].values)
            
            # Find neighbors within radius
            indices = tree.query_radius(other_coords, radius_radians)
            
            # Add points from current year that have neighbors
            for i, idx_array in enumerate(indices):
                if len(idx_array) > 0:
                    # Get the original dataframe indices
                    for idx in idx_array:
                        intersecting_indices.add(year_points.iloc[idx].name)
    
    # Return the intersecting points
    return df.loc[list(intersecting_indices)]

In [41]:
a.shape

(69283, 18)

In [42]:
b = find_intersections_across_years(a, RADIUS_FIRES)

Processing years: 100%|██████████| 11/11 [00:06<00:00,  1.68it/s]


In [43]:
b.shape

(6345, 18)

In [57]:
"""fig = px.scatter_mapbox(b[(b['type'] == 0) & (b['comuna'] == 'PENCO')], lat="latitude", lon="longitude", zoom=3, color='frp', title='Wildfires')
fig.update_layout(mapbox_style='satellite', title='Clusters Analysis for Static land source', height=800)
fig.show();"""

'fig = px.scatter_mapbox(b[(b[\'type\'] == 0) & (b[\'comuna\'] == \'PENCO\')], lat="latitude", lon="longitude", zoom=3, color=\'frp\', title=\'Wildfires\')\nfig.update_layout(mapbox_style=\'satellite\', title=\'Clusters Analysis for Static land source\', height=800)\nfig.show();'

In [45]:
def getClusterData(df):
    def parse_time(value):
        hours = value // 100
        minutes = value % 100
        return pd.to_timedelta(f"{hours} hours {minutes} minutes")

    clustered_data = pd.DataFrame()
    mbr_data = pd.DataFrame()

    min_samples = 2
    epsilon = 0.028

    subset = df[['latitude', 'longitude']].copy()
    
    if len(subset) >= min_samples:
        db = DBSCAN(eps=epsilon, min_samples=min_samples).fit(subset[['latitude', 'longitude']])
        subset['cluster'] = db.labels_

        # Get the MBR data
        for cluster_label in np.unique(db.labels_):
            if cluster_label == -1:
                continue
            cluster_points = subset[subset['cluster'] == cluster_label][['latitude', 'longitude']]

            mbr = MultiPoint(cluster_points.values).envelope

            mbr_df = pd.DataFrame({
                'cluster': [cluster_label],
                'mbr': [mbr],
            })
            mbr_data = pd.concat([mbr_data, mbr_df], axis=0)
        clustered_data = pd.concat([clustered_data, subset], ignore_index=True)

    return (clustered_data.reset_index(drop=True), mbr_data.reset_index(drop=True))

In [46]:
clusters, mbrs = getClusterData(b)

In [47]:
mbrs.head()

Unnamed: 0,cluster,mbr
0,0,"POLYGON ((-34.2969 -71.38, -34.2966 -71.38, -3..."
1,1,"POLYGON ((-38.4867 -72.8717, -38.400776 -72.87..."
2,2,"POLYGON ((-35.5701 -72.2132, -35.426 -72.2132,..."
3,3,"POLYGON ((-37.1843 -72.0726, -36.861359 -72.07..."
4,4,"POLYGON ((-36.841114 -71.947823, -36.7516 -71...."


In [48]:
geo_df = gpd.GeoDataFrame(geometry=mbrs['mbr'], data=mbrs.drop(columns=['mbr']))

In [49]:
geo_df.head()

Unnamed: 0,cluster,geometry
0,0,"POLYGON ((-34.2969 -71.38, -34.2966 -71.38, -3..."
1,1,"POLYGON ((-38.4867 -72.8717, -38.40078 -72.871..."
2,2,"POLYGON ((-35.5701 -72.2132, -35.426 -72.2132,..."
3,3,"POLYGON ((-37.1843 -72.0726, -36.86136 -72.072..."
4,4,"POLYGON ((-36.84111 -71.94782, -36.7516 -71.94..."


In [50]:
geo_df.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 479 entries, 0 to 478
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype   
---  ------    --------------  -----   
 0   cluster   479 non-null    int64   
 1   geometry  479 non-null    geometry
dtypes: geometry(1), int64(1)
memory usage: 7.6 KB


In [51]:
clusters.head()

Unnamed: 0,latitude,longitude,cluster
0,-34.2969,-71.38,0
1,-38.4417,-72.7954,1
2,-35.4271,-72.1955,2
3,-36.9934,-71.9466,3
4,-36.7554,-71.8931,4


In [52]:
clusters.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6345 entries, 0 to 6344
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   latitude   6345 non-null   float64
 1   longitude  6345 non-null   float64
 2   cluster    6345 non-null   int64  
dtypes: float64(2), int64(1)
memory usage: 148.8 KB


In [53]:
clusters_sample = clusters.sample(1000)

In [54]:
fig = px.scatter_mapbox(
    clusters_sample,
    lat="latitude",
    lon="longitude",
    color_continuous_scale=px.colors.cyclical.IceFire,
    size_max=15,
    zoom=5,
    title='Fires Clustered by DBSCAN'
)

for i, row in geo_df.iterrows():
    min_lat, min_lon, max_lat, max_lon = row['geometry'].bounds

    rectangle = go.Scattermapbox(
        lat=[min_lat, max_lat, max_lat, min_lat, min_lat],
        lon=[min_lon, min_lon, max_lon, max_lon, min_lon],
        mode="lines",
        line=dict(color='white'),
        fill='toself',
        fillcolor='rgba(255,0,0,0.3)',
        showlegend=False
    )
    fig.add_trace(rectangle)

fig.update_layout(
    mapbox_style='satellite',
    title='Fires Clustered by DBSCAN',
    height=800
)

# Show the plot
fig.show()

  fig = px.scatter_mapbox(

*scattermapbox* is deprecated! Use *scattermap* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/

