In [None]:
import pandas as pd
import numpy as np
import folium
import xgboost as xgb
from sklearn.cluster import KMeans
from sklearn.multioutput import MultiOutputRegressor
from scipy.interpolate import make_interp_spline
import os

In [None]:
df = pd.read_csv('birds_db.csv')
target_bird = 'Collared flycatcher'
print(f"Processing: {target_bird}")

df = df[df['species'] == target_bird].copy()

Processing: Collared flycatcher


In [None]:
coords = df[['latitude', 'longitude']].dropna()

if len(coords) > 5:
    kmeans = KMeans(n_clusters=5, random_state=42, n_init=10)
    df.loc[coords.index, 'cluster_label'] = kmeans.fit_predict(coords)
    df['cluster_label'] = df['cluster_label'].fillna(-1).astype(int)
    print("  - Clustering completed: Identified 5 migration zones.")
else:
    df['cluster_label'] = 0 
    print("  - Not enough data for robust clustering. Using a single cluster.")

  - Clustering completed: Identified 5 migration zones.


In [None]:
df['date'] = pd.to_datetime(df[['year', 'month', 'day']])
df['day_of_year'] = df['date'].dt.dayofyear

feature_cols = ['year', 'day_of_year', 'cluster_label', 'ndvi', 'ndwi', 'mndwi']
target_cols = ['latitude', 'longitude']

data_clean = df.dropna(subset=feature_cols + target_cols)

In [None]:
print(f"  - Training XGBoost model...")
X = data_clean[feature_cols]
y = data_clean[target_cols]

model = MultiOutputRegressor(xgb.XGBRegressor(
    n_estimators=200, 
    learning_rate=0.05, 
    max_depth=5, 
    objective='reg:squarederror',
    random_state=42
))
model.fit(X, y)

  - Training XGBoost model...


0,1,2
,estimator,"XGBRegressor(...ree=None, ...)"
,n_jobs,

0,1,2
,objective,'reg:squarederror'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,
,device,
,early_stopping_rounds,
,enable_categorical,False


In [None]:
def get_smooth_path_from_predictions(prediction_df, smooth_factor=300):
    """
    Takes daily/weekly predictions, finds monthly anchors, 
    and creates a smooth spline curve.
    """
    monthly = prediction_df.groupby('month')[['pred_lat', 'pred_lon']].mean().reset_index()
    monthly = monthly.sort_values('month')
    
    if len(monthly) < 4:
        return []

    lat = monthly['pred_lat'].values
    lon = monthly['pred_lon'].values
    
    t = np.arange(len(monthly))
    t_smooth = np.linspace(t.min(), t.max(), smooth_factor)
    
    spl_lat = make_interp_spline(t, lat, k=3) 
    spl_lon = make_interp_spline(t, lon, k=3)
    
    lat_smooth = spl_lat(t_smooth)
    lon_smooth = spl_lon(t_smooth)
    
    return list(zip(lat_smooth, lon_smooth))

In [None]:
future_days = np.arange(1, 366)
future_data = pd.DataFrame({'year': [2030]*365, 'day_of_year': future_days})
future_data['month'] = pd.to_datetime(future_data['day_of_year'], unit='D', origin='2030-01-01').dt.month

avg_env = data_clean.groupby('day_of_year')[['ndvi', 'ndwi', 'mndwi', 'cluster_label']].mean().reset_index()
future_data = future_data.merge(avg_env, on='day_of_year', how='left')
future_data = future_data.fillna(method='bfill').fillna(method='ffill')

preds = model.predict(future_data[feature_cols])
future_data['pred_lat'] = preds[:, 0]
future_data['pred_lon'] = preds[:, 1]

path_2030 = get_smooth_path_from_predictions(future_data)

def get_historical_smooth_path(year):
    hist = df[df['year'] == year].copy()
    if hist.empty: return []
    hist = hist.rename(columns={'latitude': 'pred_lat', 'longitude': 'pred_lon'})
    return get_smooth_path_from_predictions(hist)

path_2016 = get_historical_smooth_path(2016)
path_2024 = get_historical_smooth_path(2024)

  future_data = future_data.fillna(method='bfill').fillna(method='ffill')


In [None]:
center_lat = data_clean['latitude'].mean()
center_lon = data_clean['longitude'].mean()
m = folium.Map(location=[center_lat, center_lon], zoom_start=4, tiles='CartoDB dark_matter')

fg = folium.FeatureGroup(name=target_bird)

if path_2016:
    folium.PolyLine(path_2016, color='#00FFFF', weight=2, opacity=0.7, tooltip='2016 (Oldest)').add_to(fg)

if path_2024:
    folium.PolyLine(path_2024, color='#FF00FF', weight=3, opacity=0.8, tooltip='2024 (Newest)').add_to(fg)

if path_2030:
    folium.PolyLine(
        path_2030, color='#32CD32', weight=4, opacity=1.0, 
        dash_array='5, 10', tooltip='2030 (Predicted)'
    ).add_to(fg)
    
    folium.CircleMarker(path_2030[0], radius=5, color='#32CD32', fill=True, popup='Start').add_to(fg)

fg.add_to(m)

legend_html = f'''
 <div style="position: fixed; bottom: 50px; left: 50px; width: 230px; height: 110px; 
 border:1px solid grey; z-index:9999; font-size:13px;
 background-color:rgba(255, 255, 255, 0.9);
 padding: 10px; border-radius: 5px; font-family: sans-serif;">
 <b>{target_bird} Migration</b><br>
 <i style="background:#00FFFF; width: 20px; height: 3px; display:inline-block;"></i> 2016 (Recorded Path)<br>
 <i style="background:#FF00FF; width: 20px; height: 3px; display:inline-block;"></i> 2024 (Recorded Path)<br>
 <span style="color:#32CD32; font-weight:bold;">- - -</span> 2030 (Predicted Path)<br>
 </div>
 '''
m.get_root().html.add_child(folium.Element(legend_html))

output_file = f"{target_bird}.html"
if not os.path.exists('Samples'):
    os.mkdir('Samples')
output_path = os.path.join('Samples', output_file)
m.save(output_path)

print(f"Map generated: {output_path}")

Map generated: Samples\Collared flycatcher.html
