In [85]:
import pandas as pd
import datetime as dt

import rasterio
from pyproj import Transformer

In [86]:
# Import newly created dataset
df = pd.read_csv('/Users/joeypaulelihaynes/ALAN/Data/training_dataset_02072024.csv', low_memory=False)
df.shape

(78063, 5)

In [87]:
df.head()

Unnamed: 0,INCIDENT_DATE,LATITUDE,LONGITUDE,NUMBER_OF_STRIKES,LIGHT_POLLUTION_LEVEL
0,2000-01-01,9.071364,-79.383453,1,11.0
1,2000-01-01,12.141494,-86.168178,1,10.0
2,2000-01-01,15.452639,-87.923556,1,10.0
3,2000-01-01,18.429664,-69.668925,2,11.0
4,2000-01-01,18.43942,-66.00183,1,11.0


In [88]:
# Convert date column to datetime
df['INCIDENT_DATE'] = pd.to_datetime(df['INCIDENT_DATE'])

In [89]:
# Filter out records that occurred on February 29 (no BirdCast data available on this date)
df = df[~((df['INCIDENT_DATE'].dt.month == 2) & (df['INCIDENT_DATE'].dt.day == 29))]
df.shape

(77999, 5)

In [90]:
# Filter out records that occurred on December 25 (no BirdCast data available on this date)
df = df[~((df['INCIDENT_DATE'].dt.month == 12) & (df['INCIDENT_DATE'].dt.day == 25))]
df.shape

(77911, 5)

In [91]:
# Reduced sample of the dataset for testing
df_reduced = df.head(35).copy()
df_reduced = df_reduced.tail(25)
df_reduced.head(25)

Unnamed: 0,INCIDENT_DATE,LATITUDE,LONGITUDE,NUMBER_OF_STRIKES,LIGHT_POLLUTION_LEVEL
10,2000-01-01,25.79325,-80.29056,4,14.0
11,2000-01-01,25.99503,-81.67253,1,8.0
12,2000-01-01,26.07258,-80.15275,4,14.0
13,2000-01-01,26.15247,-81.77544,1,12.0
14,2000-01-01,26.17583,-98.23861,1,13.0
15,2000-01-01,26.19728,-80.17071,1,14.0
16,2000-01-01,26.24714,-80.11106,1,14.0
17,2000-01-01,26.53617,-81.75517,4,11.0
18,2000-01-01,27.39533,-82.55411,5,12.0
19,2000-01-01,27.77036,-97.50122,1,12.0


In [81]:
# Function to convert coordinates from EPSG:4326 to EPSG:3395
def convert_coords(lat, lon):
    # Initialize transformer - from EPSG:4326 to EPSG:3395
    transformer = Transformer.from_crs("epsg:4326", "epsg:3395", always_xy=True)
    
    # Transform coordinates
    x, y = transformer.transform(lon, lat)
    return x, y


# Function to extract all variables for a given date, latitude, and longitude
def extract_birdcast_features(month, day_of_month, lat, lon, birdcast_file_path):
    # Convert coordinates to match BirdCast CRS
    x, y = convert_coords(lat, lon)
    
    with rasterio.open(birdcast_file_path) as dataset:
        # Calculate the starting band for the given day
        start_band = (day_of_month - 1) * 8  # 8 bands per day

        # Adjust start band to account for missing day on Dec. 25
        if ((month == 12) and (day_of_month > 25)):
            start_band -= 8
        
        features = {}
        band_names = ['mtr', 'vid', 'u', 'v', 'mtr_var', 'vid_var', 'u_var', 'v_var']
           
        for i, name in enumerate(band_names, start=1):
            band_index = start_band + i
            
            # Use dataset.index to convert x, y to row, column indices
            row, col = dataset.index(x, y)

            # Ensure row and col are within the bounds of the raster data
            if (0 <= row < dataset.height) and (0 <= col < dataset.width):
                band_data = dataset.read(band_index)
                features[name] = band_data[row, col]
            else:
                # Handle cases where the indices are outside the raster bounds
                features[name] = None  # Or np.nan
            
        return features

# Dictionary mapping month to file path
birdcast_file_paths = {1: '/Users/joeypaulelihaynes/ALAN/birdcast_2023/birdcast_202301.tif', 
                      2: '/Users/joeypaulelihaynes/ALAN/birdcast_2023/birdcast_202302.tif', 
                      3: '/Users/joeypaulelihaynes/ALAN/birdcast_2023/birdcast_202303.tif', 
                      4: '/Users/joeypaulelihaynes/ALAN/birdcast_2023/birdcast_202304.tif', 
                      5: '/Users/joeypaulelihaynes/ALAN/birdcast_2023/birdcast_202305.tif', 
                      6: '/Users/joeypaulelihaynes/ALAN/birdcast_2023/birdcast_202306.tif', 
                      7: '/Users/joeypaulelihaynes/ALAN/birdcast_2023/birdcast_202307.tif', 
                      8: '/Users/joeypaulelihaynes/ALAN/birdcast_2023/birdcast_202308.tif', 
                      9: '/Users/joeypaulelihaynes/ALAN/birdcast_2023/birdcast_202309.tif', 
                      10: '/Users/joeypaulelihaynes/ALAN/birdcast_2023/birdcast_202310.tif', 
                      11: '/Users/joeypaulelihaynes/ALAN/birdcast_2023/birdcast_202311.tif', 
                      12: '/Users/joeypaulelihaynes/ALAN/birdcast_2023/birdcast_202312.tif'}


#Integrate BirdCast data, record by record
for index, row in df_reduced.iterrows():
    day_of_month = row['INCIDENT_DATE'].day
    month = row['INCIDENT_DATE'].month
    birdcast_file_path = birdcast_file_paths[month]
    
    features = extract_birdcast_features(month, day_of_month, row['LATITUDE'], row['LONGITUDE'], birdcast_file_path)
    
    for name, value in features.items():
        df_reduced.at[index, name] = value

In [82]:
df_reduced.head(25)

Unnamed: 0,INCIDENT_DATE,LATITUDE,LONGITUDE,NUMBER_OF_STRIKES,LIGHT_POLLUTION_LEVEL,mtr,vid,u,v,mtr_var,vid_var,u_var,v_var
10,2000-01-01,25.79325,-80.29056,4,14.0,6.620008,1.000095,0.711346,2.720662,0.035522,0.035522,0.035532,0.035532
11,2000-01-01,25.99503,-81.67253,1,8.0,5.018477,0.563174,-0.861042,3.928374,0.12509,0.12509,0.125185,0.125185
12,2000-01-01,26.07258,-80.15275,4,14.0,9.02692,1.361774,1.041099,3.123626,0.082118,0.082118,0.082166,0.082166
13,2000-01-01,26.15247,-81.77544,1,12.0,5.55026,0.613371,-0.466661,4.182549,0.13084,0.13084,0.130959,0.130959
14,2000-01-01,26.17583,-98.23861,1,13.0,72.032127,2.901743,0.203458,7.687656,0.12487,0.12487,0.124906,0.124906
15,2000-01-01,26.19728,-80.17071,1,14.0,10.275393,1.52665,1.161296,3.317141,0.096498,0.096498,0.096564,0.096564
16,2000-01-01,26.24714,-80.11106,1,14.0,10.803743,1.606483,1.247532,3.350317,0.104018,0.104018,0.10409,0.10409
17,2000-01-01,26.53617,-81.75517,4,11.0,7.09603,0.782544,0.356791,4.617605,0.128909,0.128909,0.12905,0.12905
18,2000-01-01,27.39533,-82.55411,5,12.0,5.682663,0.343708,2.087463,5.989912,0.058782,0.058782,0.058822,0.058822
19,2000-01-01,27.77036,-97.50122,1,12.0,268.509644,6.756544,-0.246343,11.019956,0.008164,0.008164,0.008164,0.008164
