## load libraries

In [1]:
import pandas as pd
import geopandas as gpd
import folium

from shapely.geometry import Polygon, Point
import numpy as np
import xarray as xr
import random
import os
import time
import re
import math

import matplotlib.pyplot as plt
import altair as alt

In [2]:
# Set GeoPandas to use pyogrio
gpd.options.io_engine = "pyogrio"

## load crop sequence boundaries

In [3]:
# sample_data_file = '../data/agricultural/CSB/siads696/geo_balanced_sample.parquet'
# sample_data_file = '../data/agricultural/CSB/siads696/csb_sample_with_growing_season_data.parquet'
sample_data_file = '../data/fields/geo_balanced_sample.parquet'
geo_balanced_csb_samples = gpd.read_parquet(sample_data_file)
geo_balanced_csb_samples

Unnamed: 0,CSBID,CSBYEARS,CSBACRES,CDL2016,CDL2017,CDL2018,CDL2019,CDL2020,CDL2021,CDL2022,...,INSIDE_X,INSIDE_Y,Shp_Len,Shp_Area,geometry,Longitude,Latitude,Elevation,color,Crop
478801,081623012787392,1623,3.791540,4,24,61,61,24,24,61,...,-6.035779e+05,1.704774e+06,532.211689,15343.877410,"MULTIPOLYGON (((-603525.799 1704829.066, -6035...",-102.962080,38.176922,1231,#bfbf7a,Fallow/Idle Cropland
107446,351623001627247,1623,4.325446,225,1,225,152,152,152,36,...,-7.967410e+05,1.149389e+06,723.964572,17504.529797,"MULTIPOLYGON (((-796679.747 1149498.895, -7966...",-104.600667,33.089835,1138,#ffa8e3,Alfalfa
121162,351623002777545,1623,3.222010,61,176,1,176,36,36,1,...,-1.086137e+06,1.590736e+06,549.623304,13039.063523,"MULTIPOLYGON (((-1086091.187 1590803.212, -108...",-108.309751,36.728108,1676,#ffd400,Corn
88813,351623000046581,1623,10.204036,36,36,36,36,152,37,37,...,-9.788489e+05,1.595846e+06,1274.741723,41294.433943,"MULTIPOLYGON (((-978851.432 1595726.11, -97884...",-107.112180,36.891083,2252,#a5f58d,Other Hay/Non Alfalfa
516266,081623013377366,1623,2.918481,24,29,29,24,1,61,61,...,-6.203751e+05,1.992249e+06,556.566951,11810.721459,"MULTIPOLYGON (((-620252.778 1992330.141, -6202...",-103.412893,40.723858,1330,#bfbf7a,Fallow/Idle Cropland
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
119071,351623002568175,1623,2.957066,176,176,176,61,24,24,24,...,-9.138551e+05,1.348095e+06,599.379620,11966.871324,"MULTIPOLYGON (((-913829.788 1348147.68, -91380...",-106.082615,34.756107,1877,#e9ffbe,Grass/Pasture
540278,081623014401456,1623,9.374885,61,24,61,1,1,1,1,...,-6.892023e+05,1.934718e+06,988.353218,37938.965442,"MULTIPOLYGON (((-689103.22 1934811.717, -68911...",-104.172849,40.161427,1417,#a5f58d,Other Hay/Non Alfalfa
591949,041623013556134,1623,3.770585,72,72,72,190,190,190,190,...,-1.707418e+06,1.273220e+06,523.387689,15259.076277,"MULTIPOLYGON (((-1707373.732 1273277.961, -170...",-114.508535,33.028054,101,#80b3b3,Woody Wetlands
217842,081623005639708,1623,4.307316,61,24,1,61,24,29,61,...,-6.004245e+05,1.898598e+06,634.862780,17431.160158,"MULTIPOLYGON (((-600373.153 1898692.428, -6003...",-103.092389,39.903658,1396,#a87000,Winter Wheat


## load ecocrop data

Imputed, includes USDA Hardiness Zones

In [4]:
ecocrop_file = '../data/agricultural/EcoCrop/siads699/EcoCrop_Clean_Imputed_All.pickle'
ecocrop_file = '../data/crops/EcoCrop_Clean_Imputed_All.pickle'
ecocrop_all = pd.read_pickle(ecocrop_file)
ecocrop_all

Unnamed: 0,Crop_Code,Scientific_Name,Genus,Species,Variety,Life_Form,Habit,Life_Span,Physiology,Category,...,Crop_Cycle_Max,Use_Main,Use_Detailed,Use_Part,Climate_Zone_Trewartha,USDA_Hardiness_Zone,USDA_Hardiness_Zone_Min,USDA_Hardiness_Zone_Max,Datasheet_URL,PFAF_URL
0,289,Abelmoschus esculentus,Abelmoschus,esculentus,,herb,erect,annual,single stem,vegetables,...,180.0,food & beverage,vitamins,fruits,"tropical wet & dry (Aw), tropical wet (Ar), st...",5-11,5,11,http://ecocrop.fao.org/ecocrop/srv/en/dataShee...,https://pfaf.org/user/Plant.aspx?LatinName=Abe...
1,290,Abelmoschus manihot,Abelmoschus,manihot,,shrub,erect,"annual, perennial","deciduous, multi stem","vegetables, ornamentals/turf, medicinals & aro...",...,365.0,food & beverage,vitamins,leaves,"tropical wet & dry (Aw), tropical wet (Ar)",8-11,8,11,http://ecocrop.fao.org/ecocrop/srv/en/dataShee...,https://pfaf.org/user/Plant.aspx?LatinName=Abe...
2,291,Abelmoschus moschatus,Abelmoschus,moschatus,,"herb, sub-shrub",prostrate/procumbent/semi-erect,"annual, biennial, perennial","deciduous, multi stem","ornamentals/turf, medicinals & aromatic",...,0.0,environmental,ornamental/turf,entire plant,tropical wet & dry (Aw),8-11,8,11,http://ecocrop.fao.org/ecocrop/srv/en/dataShee...,https://pfaf.org/user/Plant.aspx?LatinName=Abe...
3,295,Acacia auriculiformis,Acacia,auriculiformis,,tree,erect,perennial,single stem,forest/wood,...,240.0,material,dye/tannin,stems,"tropical wet & dry (Aw), tropical wet (Ar)",10-12,10,12,http://ecocrop.fao.org/ecocrop/srv/en/dataShee...,https://pfaf.org/user/Plant.aspx?LatinName=Aca...
4,297,Acacia farnesiana,Acacia,farnesiana,,tree,erect,perennial,single stem,"materials, ornamentals/turf, medicinals & arom...",...,240.0,environmental,ornamental/turf,entire plant,"tropical wet & dry (Aw), steppe or semiarid (B...",9-11,9,11,http://ecocrop.fao.org/ecocrop/srv/en/dataShee...,https://pfaf.org/user/Plant.aspx?LatinName=Aca...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2563,400000,Chamaecrista rotundifolia,Chamaecrista,rotundifolia,,herb,prostrate/procumbent/semi-erect,"annual, perennial","deciduous, multi stem","forage/pasture, medicinals & aromatic",...,0.0,animal food (feed),vitamins,entire plant,"tropical wet & dry (Aw), tropical wet (Ar), su...",9-11,9,11,http://ecocrop.fao.org/ecocrop/srv/en/dataShee...,https://pfaf.org/user/Plant.aspx?LatinName=Cha...
2564,400001,Acacia polyacantha,Acacia,polyacantha,,"shrub, tree",erect,perennial,"single stem, multi stem","forest/wood, environmental",...,0.0,fuels,fuelwood,bark,"desert or arid (Bw), steppe or semiarid (Bs)",10-12,10,12,http://ecocrop.fao.org/ecocrop/srv/en/dataShee...,https://pfaf.org/user/Plant.aspx?LatinName=Aca...
2565,400002,Prosopis affinis,Prosopis,affinis,,tree,erect,perennial,"deciduous, single stem, C3 photosynthesis",forest/wood,...,0.0,food & beverage,vitamins,bark,"tropical wet & dry (Aw), desert or arid (Bw), ...",10-12,10,12,http://ecocrop.fao.org/ecocrop/srv/en/dataShee...,https://pfaf.org/user/Plant.aspx?LatinName=Pro...
2566,400003,Vicia dasycarpa,Vicia,dasycarpa,,herb,climber/scrambler/scadent,annual,multi stem,forage/pasture,...,0.0,animal food (feed),minerals,entire plant,"tropical wet & dry (Aw), tropical wet (Ar), st...",10-12,10,12,http://ecocrop.fao.org/ecocrop/srv/en/dataShee...,https://pfaf.org/user/Plant.aspx?LatinName=Vic...


In [5]:
list(ecocrop_all.columns)

['Crop_Code',
 'Scientific_Name',
 'Genus',
 'Species',
 'Variety',
 'Life_Form',
 'Habit',
 'Life_Span',
 'Physiology',
 'Category',
 'Plant_Attributes',
 'Temp_Opt_Min',
 'Temp_Opt_Max',
 'Temp_Opt_Min_F',
 'Temp_Opt_Max_F',
 'Temp_Abs_Min',
 'Temp_Abs_Max',
 'Temp_Abs_Min_F',
 'Temp_Abs_Max_F',
 'Killing_Temp_Rest',
 'Killing_Temp_Growth',
 'Killing_Temp_Rest_F',
 'Killing_Temp_Growth_F',
 'Rain_Opt_Min',
 'Rain_Opt_Max',
 'Rain_Abs_Min',
 'Rain_Abs_Max',
 'Lat_Opt_Min',
 'Lat_Opt_Max',
 'Lat_Abs_Min',
 'Lat_Abs_Max',
 'Alt_Opt_Min',
 'Alt_Opt_Max',
 'Alt_Abs_Min',
 'Alt_Abs_Max',
 'pH_Opt_Min',
 'pH_Opt_Max',
 'pH_Abs_Min',
 'pH_Abs_Max',
 'Light_Opt_Min',
 'Light_Opt_Max',
 'Light_Abs_Min',
 'Light_Abs_Max',
 'Depth_Opt',
 'Depth_Abs',
 'Texture_Ops',
 'Texture_Abs',
 'Fertility_Ops',
 'Fertility_Abs',
 'Al_Toxicity_Opt',
 'Al_Toxicity_Abs',
 'Salinity_Ops',
 'Salinity_Abs',
 'Drainage_Opt',
 'Drainage_Abs',
 'Photoperiod',
 'Abiotic_Tolererance',
 'Abiotic_Susceptibility',
 'Intr

In [6]:
crop_columns = [
    'Crop_Code',
    'Scientific_Name',
    'Genus',
    'Species',
    # 'Variety',
    # 'Life_Form',
    # 'Habit',
    # 'Life_Span',
    # 'Physiology',
    # 'Category',
    # 'Plant_Attributes',
    # 'Temp_Opt_Min',
    # 'Temp_Opt_Max',
    # 'Temp_Opt_Min_F',
    # 'Temp_Opt_Max_F',
    # 'Temp_Abs_Min',
    # 'Temp_Abs_Max',
    # 'Temp_Abs_Min_F',
    # 'Temp_Abs_Max_F',
    # 'Killing_Temp_Rest',
    # 'Killing_Temp_Growth',
    # 'Killing_Temp_Rest_F',
    # 'Killing_Temp_Growth_F',
    'Rain_Opt_Min',
    'Rain_Opt_Max',
    'Rain_Abs_Min',
    'Rain_Abs_Max',
    # 'Lat_Opt_Min',
    # 'Lat_Opt_Max',
    # 'Lat_Abs_Min',
    # 'Lat_Abs_Max',
    # 'Alt_Opt_Min',
    # 'Alt_Opt_Max',
    # 'Alt_Abs_Min',
    # 'Alt_Abs_Max',
    # 'pH_Opt_Min',
    # 'pH_Opt_Max',
    # 'pH_Abs_Min',
    # 'pH_Abs_Max',
    # 'Light_Opt_Min',
    # 'Light_Opt_Max',
    # 'Light_Abs_Min',
    # 'Light_Abs_Max',
    # 'Depth_Opt',
    # 'Depth_Abs',
    # 'Texture_Ops',
    # 'Texture_Abs',
    # 'Fertility_Ops',
    # 'Fertility_Abs',
    # 'Al_Toxicity_Opt',
    # 'Al_Toxicity_Abs',
    # 'Salinity_Ops',
    # 'Salinity_Abs',
    # 'Drainage_Opt',
    # 'Drainage_Abs',
    # 'Photoperiod',
    # 'Abiotic_Tolererance',
    # 'Abiotic_Susceptibility',
    # 'Introduction_Risks',
    # 'Production_System',
    # 'Cropping_System',
    # 'Subsystem',
    # 'Companion_Species',
    # 'Level_of_Mechanization',
    # 'Labour_Intensity',
    'Crop_Cycle_Min',
    'Crop_Cycle_Max',
    # 'Use_Main',
    # 'Use_Detailed',
    # 'Use_Part',
    # 'Climate_Zone_Trewartha',
    # 'USDA_Hardiness_Zone',
    # 'USDA_Hardiness_Zone_Min',
    # 'USDA_Hardiness_Zone_Max',
    # 'Datasheet_URL',
    # 'PFAF_URL'
]

In [7]:
crops = ecocrop_all[crop_columns]

In [8]:
crops

Unnamed: 0,Crop_Code,Scientific_Name,Genus,Species,Rain_Opt_Min,Rain_Opt_Max,Rain_Abs_Min,Rain_Abs_Max,Crop_Cycle_Min,Crop_Cycle_Max
0,289,Abelmoschus esculentus,Abelmoschus,esculentus,600.000000,1200.000000,300.000000,2500.000000,50.0,180.0
1,290,Abelmoschus manihot,Abelmoschus,manihot,1200.000000,2000.000000,1000.000000,3000.000000,365.0,365.0
2,291,Abelmoschus moschatus,Abelmoschus,moschatus,1000.000000,1400.000000,900.000000,1500.000000,0.0,0.0
3,295,Acacia auriculiformis,Acacia,auriculiformis,1200.000000,2500.000000,500.000000,5000.000000,180.0,240.0
4,297,Acacia farnesiana,Acacia,farnesiana,400.000000,1400.000000,150.000000,4000.000000,60.0,240.0
...,...,...,...,...,...,...,...,...,...,...
2563,400000,Chamaecrista rotundifolia,Chamaecrista,rotundifolia,1000.000000,1600.000000,550.000000,2000.000000,0.0,0.0
2564,400001,Acacia polyacantha,Acacia,polyacantha,554.076087,976.358696,365.869565,1302.173913,0.0,0.0
2565,400002,Prosopis affinis,Prosopis,affinis,320.000000,640.000000,174.000000,862.500000,0.0,0.0
2566,400003,Vicia dasycarpa,Vicia,dasycarpa,577.272727,863.636364,323.636364,1425.454545,0.0,0.0


In [9]:
crops.columns

Index(['Crop_Code', 'Scientific_Name', 'Genus', 'Species', 'Rain_Opt_Min',
       'Rain_Opt_Max', 'Rain_Abs_Min', 'Rain_Abs_Max', 'Crop_Cycle_Min',
       'Crop_Cycle_Max'],
      dtype='object')

In [10]:
# Function to convert mm to inches
def mm_to_inches(mm):
    return mm/25.4

In [11]:
# Apply the conversion function to the appropriate columns
rainfall_columns  = [
       
    'Rain_Opt_Min', 
    'Rain_Opt_Max', 
    'Rain_Abs_Min', 
    'Rain_Abs_Max',

]
df = crops.copy()
for col in rainfall_columns:
    df.loc[:, f'{col}_In'] = df[col].apply(mm_to_inches)
crops = df.copy()

In [12]:
crops

Unnamed: 0,Crop_Code,Scientific_Name,Genus,Species,Rain_Opt_Min,Rain_Opt_Max,Rain_Abs_Min,Rain_Abs_Max,Crop_Cycle_Min,Crop_Cycle_Max,Rain_Opt_Min_In,Rain_Opt_Max_In,Rain_Abs_Min_In,Rain_Abs_Max_In
0,289,Abelmoschus esculentus,Abelmoschus,esculentus,600.000000,1200.000000,300.000000,2500.000000,50.0,180.0,23.622047,47.244094,11.811024,98.425197
1,290,Abelmoschus manihot,Abelmoschus,manihot,1200.000000,2000.000000,1000.000000,3000.000000,365.0,365.0,47.244094,78.740157,39.370079,118.110236
2,291,Abelmoschus moschatus,Abelmoschus,moschatus,1000.000000,1400.000000,900.000000,1500.000000,0.0,0.0,39.370079,55.118110,35.433071,59.055118
3,295,Acacia auriculiformis,Acacia,auriculiformis,1200.000000,2500.000000,500.000000,5000.000000,180.0,240.0,47.244094,98.425197,19.685039,196.850394
4,297,Acacia farnesiana,Acacia,farnesiana,400.000000,1400.000000,150.000000,4000.000000,60.0,240.0,15.748031,55.118110,5.905512,157.480315
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2563,400000,Chamaecrista rotundifolia,Chamaecrista,rotundifolia,1000.000000,1600.000000,550.000000,2000.000000,0.0,0.0,39.370079,62.992126,21.653543,78.740157
2564,400001,Acacia polyacantha,Acacia,polyacantha,554.076087,976.358696,365.869565,1302.173913,0.0,0.0,21.814019,38.439319,14.404314,51.266689
2565,400002,Prosopis affinis,Prosopis,affinis,320.000000,640.000000,174.000000,862.500000,0.0,0.0,12.598425,25.196850,6.850394,33.956693
2566,400003,Vicia dasycarpa,Vicia,dasycarpa,577.272727,863.636364,323.636364,1425.454545,0.0,0.0,22.727273,34.001432,12.741589,56.120258


In [13]:
crops.columns

Index(['Crop_Code', 'Scientific_Name', 'Genus', 'Species', 'Rain_Opt_Min',
       'Rain_Opt_Max', 'Rain_Abs_Min', 'Rain_Abs_Max', 'Crop_Cycle_Min',
       'Crop_Cycle_Max', 'Rain_Opt_Min_In', 'Rain_Opt_Max_In',
       'Rain_Abs_Min_In', 'Rain_Abs_Max_In'],
      dtype='object')

In [14]:
crops['Crop_Cycle_Avg'] = (crops['Crop_Cycle_Min']+crops['Crop_Cycle_Max'])/2

In [15]:
# crops.columns
crops['Crop_Cycle_Avg'].min(), crops['Crop_Cycle_Avg'].max(), crops['Crop_Cycle_Avg'].mean()

(np.float64(0.0), np.float64(365.0), np.float64(159.7249707830152))

## look up rainfall data

In [16]:
! ls '../data/climate/ClimateToolbox/'

mean_summer_temperature_1970_2000_historical.nc
mean_summer_temperature_2040_2069_rcp45.nc
mean_summer_temperature_2040_2069_rcp85.nc
minimum_annual_temperature_1970_2000_historical.nc
minimum_annual_temperature_2040_2069_rcp45.nc
minimum_annual_temperature_2040_2069_rcp85.nc
minimum_summer_temperature_1970_2000_historical.nc
minimum_summer_temperature_2040_2069_rcp45.nc
minimum_summer_temperature_2040_2069_rcp85.nc
precipitation_total_1971_2000_historical_JJA.nc
precipitation_total_1971_2000_historical_MAM.nc
precipitation_total_2040_2069_rcp45_JJA.nc
precipitation_total_2040_2069_rcp45_MAM.nc
precipitation_total_2040_2069_rcp85_JJA.nc
precipitation_total_2040_2069_rcp85_MAM.nc


In [17]:
climate_toolbox_data_folder = '../data/climate/ClimateToolbox/'

precipitation_filenames = [   
    'precipitation_total_1971_2000_historical_MAM.nc',
    'precipitation_total_2040_2069_rcp45_MAM.nc',
    'precipitation_total_2040_2069_rcp85_MAM.nc',
    'precipitation_total_1971_2000_historical_JJA.nc',
    'precipitation_total_2040_2069_rcp45_JJA.nc',
    'precipitation_total_2040_2069_rcp85_JJA.nc', 
]

In [18]:
precipitation_filepaths = [f'{climate_toolbox_data_folder}{filename}' for filename in precipitation_filenames ]
precipitation_filepaths

['../data/climate/ClimateToolbox/precipitation_total_1971_2000_historical_MAM.nc',
 '../data/climate/ClimateToolbox/precipitation_total_2040_2069_rcp45_MAM.nc',
 '../data/climate/ClimateToolbox/precipitation_total_2040_2069_rcp85_MAM.nc',
 '../data/climate/ClimateToolbox/precipitation_total_1971_2000_historical_JJA.nc',
 '../data/climate/ClimateToolbox/precipitation_total_2040_2069_rcp45_JJA.nc',
 '../data/climate/ClimateToolbox/precipitation_total_2040_2069_rcp85_JJA.nc']

In [19]:
precipitation_scenarios = [
    'precipitation_total_March_April_May_1971_2000_historical',
    'precipitation_total_March_April_May_2040_2069_rcp45',
    'precipitation_total_March_April_May_2040_2069_rcp85',
    
    'precipitation_total_June_July_August_1971_2000_historical',
    'precipitation_total_June_July_August_2040_2069_rcp45',
    'precipitation_total_June_July_August_2040_2069_rcp85', 
]

In [20]:
precipitation_scenarios_data = {scenario:filepath for scenario, filepath in zip(precipitation_scenarios, precipitation_filepaths)} 
precipitation_scenarios_data

{'precipitation_total_March_April_May_1971_2000_historical': '../data/climate/ClimateToolbox/precipitation_total_1971_2000_historical_MAM.nc',
 'precipitation_total_March_April_May_2040_2069_rcp45': '../data/climate/ClimateToolbox/precipitation_total_2040_2069_rcp45_MAM.nc',
 'precipitation_total_March_April_May_2040_2069_rcp85': '../data/climate/ClimateToolbox/precipitation_total_2040_2069_rcp85_MAM.nc',
 'precipitation_total_June_July_August_1971_2000_historical': '../data/climate/ClimateToolbox/precipitation_total_1971_2000_historical_JJA.nc',
 'precipitation_total_June_July_August_2040_2069_rcp45': '../data/climate/ClimateToolbox/precipitation_total_2040_2069_rcp45_JJA.nc',
 'precipitation_total_June_July_August_2040_2069_rcp85': '../data/climate/ClimateToolbox/precipitation_total_2040_2069_rcp85_JJA.nc'}

In [21]:
# Convert to GeoDataFrame with NAD83 CRS
gdf = gpd.GeoDataFrame(
    geo_balanced_csb_samples,
    geometry=gpd.points_from_xy(geo_balanced_csb_samples.Longitude, geo_balanced_csb_samples.Latitude),
    crs='EPSG:4269'  # NAD83
)

latitudes = gdf['Latitude']
longitudes = gdf['Longitude']

In [22]:
# Function to extract data variable names from a NetCDF file
def extract_data_variables(file_path):
    ds = xr.open_dataset(file_path)
    print(list(ds.data_vars))
    return list(ds.data_vars)

# Function to get climate values for specific locations
def get_climate_values_for_locations(latitudes, longitudes, file_path):
    ds = xr.open_dataset(file_path)

    variable_name = list(ds.data_vars)[0]
    print(f"Variable name: {variable_name}")

    # Check if the dataset has a time dimension and select the first time slice if present
    if 'time' in ds.dims:
        ds = ds.isel(time=0)

    # Ensure the variable name exists in the dataset
    if variable_name not in ds.data_vars:
        raise ValueError(f"Variable {variable_name} does not exist in the dataset.")

    # Check if longitudes in the dataset are in the range [0, 360]
    lon_min = ds.lon.min().values
    lon_max = ds.lon.max().values

    if lon_max > 180:
        # Adjust input longitudes to match the NetCDF file's 0-360 range
        longitudes = (longitudes + 360) % 360

    # Create a DataArray for the interpolation points
    points = xr.DataArray(
        np.array([latitudes, longitudes]).T,
        dims=["points", "coords"],
        coords={"points": range(len(latitudes)), "coords": ["lat", "lon"]}
    )

    # Interpolate the data
    climate_values = ds[variable_name].interp(lat=points[:, 0], lon=points[:, 1], method='linear')
    print(climate_values.values[:10])
    # Return the interpolated values for the specified variable
    return climate_values.values

In [23]:
# Time the step of adding growing season data to the dataframe

latitudes = gdf['Latitude']
longitudes = gdf['Longitude']

start_time = time.time()
for scenario_name, file_path in precipitation_scenarios_data.items():
    # Extract column name from the scenario name
    column_name = scenario_name
    print(f"Column name: {column_name}")
    
    if column_name:
        interpolated_values = get_climate_values_for_locations(
            latitudes,
            longitudes,
            file_path,
        )
        # Ensure the length of interpolated values matches the DataFrame's index length
        if len(interpolated_values) == len(gdf):
            # print(interpolated_values[:10])
            gdf[column_name] = interpolated_values
        else:
            raise ValueError(f"Length of interpolated values ({len(interpolated_values)}) does not match the length of the DataFrame's index ({len(gdf)})")

end_time = time.time()

# Print the execution time
execution_time = end_time - start_time
print(f"Execution time: {execution_time} seconds")

Column name: precipitation_total_March_April_May_1971_2000_historical
Variable name: precipitation
[3.95848933 2.35397949 1.76678236 3.97474549 4.99427215 5.58500854
 2.77882269 5.52458908 2.88362363 2.23617936]
Column name: precipitation_total_March_April_May_2040_2069_rcp45
Variable name: precipitation
[4.2434159  2.42320219 1.78424851 3.94694474 5.43343577 5.97909502
 2.79877316 5.6701984  3.04521063 2.27114543]
Column name: precipitation_total_March_April_May_2040_2069_rcp85
Variable name: precipitation
[4.05646487 2.30650401 1.74270474 3.78955674 5.47546328 5.99982015
 2.64627242 5.74176951 3.0454236  2.3100151 ]
Column name: precipitation_total_June_July_August_1971_2000_historical
Variable name: precipitation
[6.30716856 5.76073833 2.12763352 5.44689995 7.1200938  7.48951238
 5.63635978 2.92744404 3.18681683 2.38703255]
Column name: precipitation_total_June_July_August_2040_2069_rcp45
Variable name: precipitation
[6.02310383 5.79362486 2.20010473 5.57933011 6.91899618 7.19804657

In [24]:
gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 1200 entries, 478801 to 308295
Data columns (total 32 columns):
 #   Column                                                     Non-Null Count  Dtype   
---  ------                                                     --------------  -----   
 0   CSBID                                                      1200 non-null   object  
 1   CSBYEARS                                                   1200 non-null   object  
 2   CSBACRES                                                   1200 non-null   float64 
 3   CDL2016                                                    1200 non-null   int64   
 4   CDL2017                                                    1200 non-null   int64   
 5   CDL2018                                                    1200 non-null   int64   
 6   CDL2019                                                    1200 non-null   int64   
 7   CDL2020                                                    1200 non-null   in

In [25]:
# gdf

In [26]:
fields = gdf.copy()

In [27]:
for column in precipitation_scenarios:
    print(f'{column:>60} {gdf[column].min():>8.2f} (Min),   {gdf[column].max():>8.1f} (Max)')

    precipitation_total_March_April_May_1971_2000_historical     0.63 (Min),       13.2 (Max)
         precipitation_total_March_April_May_2040_2069_rcp45     0.59 (Min),       14.8 (Max)
         precipitation_total_March_April_May_2040_2069_rcp85     0.56 (Min),       15.0 (Max)
   precipitation_total_June_July_August_1971_2000_historical     0.66 (Min),       11.3 (Max)
        precipitation_total_June_July_August_2040_2069_rcp45     0.73 (Min),       11.3 (Max)
        precipitation_total_June_July_August_2040_2069_rcp85     0.78 (Min),       11.2 (Max)


In [28]:
crop_precipitation_columns = ['Rain_Opt_Min_In', 'Rain_Opt_Max_In', 'Rain_Abs_Min_In', 'Rain_Abs_Max_In']

for column in crop_precipitation_columns:
    print(f'{column:>60} {crops[column].min():>8.2f} (Min),   {crops[column].max():>8.1f} (Max)')

                                             Rain_Opt_Min_In     0.87 (Min),      354.3 (Max)
                                             Rain_Opt_Max_In     1.18 (Min),      472.4 (Max)
                                             Rain_Abs_Min_In     0.71 (Min),      118.1 (Max)
                                             Rain_Abs_Max_In     1.34 (Min),      389.8 (Max)


In [29]:
fields['precipitation_average_daily_March_to_August_1971_2000_historical'] = (
    gdf['precipitation_total_March_April_May_1971_2000_historical'] + 
    gdf['precipitation_total_June_July_August_1971_2000_historical']
)/184


fields['precipitation_average_daily_March_to_August_2040_2069_rcp45'] = (
    gdf['precipitation_total_March_April_May_2040_2069_rcp45'] + 
    gdf['precipitation_total_June_July_August_2040_2069_rcp45']
)/184

fields['precipitation_average_daily_March_to_August_2040_2069_rcp85'] = (
    gdf['precipitation_total_March_April_May_2040_2069_rcp85'] + 
    gdf['precipitation_total_June_July_August_2040_2069_rcp85']
)/184

# fields

In [30]:
scenario_names = [
    'historical', 
    'mid_century_medium_CO2', 
    'mid_century_high_CO2'
]

scenario_columns = [
    'precipitation_average_daily_March_to_August_1971_2000_historical',
    'precipitation_average_daily_March_to_August_2040_2069_rcp45',
    'precipitation_average_daily_March_to_August_2040_2069_rcp85',
]

In [31]:
# Example initialization of data (replace with actual data)
num_fields = len(fields)
num_crops = len(crops)
num_scenarios = len(scenario_names)
rainfall_scores = np.zeros((num_fields, num_crops, num_scenarios))

# Extract crop-specific data
crop_cycle_avg = crops['Crop_Cycle_Avg'].values[np.newaxis, :]
rain_opt_min = crops['Rain_Opt_Min_In'].values[np.newaxis, :]
rain_opt_max = crops['Rain_Opt_Max_In'].values[np.newaxis, :]
rain_abs_min = crops['Rain_Abs_Min_In'].values[np.newaxis, :]
rain_abs_max = crops['Rain_Abs_Max_In'].values[np.newaxis, :]

# Iterate over scenarios
for scenario_idx, (scenario_name, scenario_column) in enumerate(zip(scenario_names, scenario_columns)):
    start_time = time.time()

    
    # Extract the relevant average daily precipitation data for the scenario
    average_daily_precip = fields[scenario_column].values[:, np.newaxis]  # Shape: (num_fields, 1)
    
    # Calculate total rainfall over the average crop cycle
    rainfall_total = average_daily_precip * crop_cycle_avg  # Shape: (num_fields, num_crops)
    
    # Calculate scores
    scores = np.zeros((num_fields, num_crops))
    
    # Optimal rainfall range
    within_opt_rain_range = (rainfall_total >= rain_opt_min) & (rainfall_total <= rain_opt_max)
    scores += np.where(within_opt_rain_range, 1, 0)
    
    # Absolute rainfall range
    within_abs_rain_range = (rainfall_total >= rain_abs_min) & (rainfall_total <= rain_abs_max)
    scores += np.where(~within_opt_rain_range & within_abs_rain_range, 0.5, 0)
    
    # Store the scores in the matrix
    rainfall_scores[:, :, scenario_idx] = scores

    end_time = time.time()
    execution_time = end_time - start_time
    print(f"Execution time: {execution_time:.6f} seconds for scenario {scenario_name}")

# # Print the resulting score matrix
# print("Rainfall scores matrix:")
# print(rainfall_scores)

Execution time: 0.044834 seconds for scenario historical
Execution time: 0.029599 seconds for scenario mid_century_medium_CO2
Execution time: 0.024636 seconds for scenario mid_century_high_CO2


In [32]:
np.unique(rainfall_scores, return_counts=True)

(array([0. , 0.5, 1. ]), array([8467948,  626858,  149994]))

In [33]:
scenarios = ['historical', 'mid_century_medium_CO2', 'mid_century_high_CO2']

# Convert the results matrix to an xarray DataArray
results_da = xr.DataArray(
    rainfall_scores,
    dims=['fields', 'crops', 'scenarios'],
    coords={
        'fields': fields['CSBID'],
        'crops': crops['Scientific_Name'],
        'scenarios': scenarios,
    },
    name='crop_suitability_rainfall'
)

In [34]:
# Save the DataArray to a NetCDF file
results_da.to_netcdf('../data/scores/score_matrix_rainfall.nc')

## save field precipitation data

In [35]:
fields.to_crs(crs='EPSG:4269', inplace=True)

In [36]:
fields.to_parquet('../data/fields/csb_sample_with_rainfall_data.parquet')

In [37]:
fields['CSBID'].info()

<class 'pandas.core.series.Series'>
Index: 1200 entries, 478801 to 308295
Series name: CSBID
Non-Null Count  Dtype 
--------------  ----- 
1200 non-null   object
dtypes: object(1)
memory usage: 18.8+ KB


In [38]:
rainfall_scores.shape

(1200, 2568, 3)

## analyze rainfall recommendations

In [40]:
def get_crops_with_score_1(scores_matrix, crop_data):
    suitable_crops = {}
    for field_idx in range(scores_matrix.shape[0]):
        suitable_crops[field_idx] = [crop_data['Scientific_Name'][crop_idx] for crop_idx in range(scores_matrix.shape[1]) if scores_matrix[field_idx, crop_idx] == 1]
    return suitable_crops

In [41]:
rainfall_scores

array([[[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        ...,
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        ...,
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        ...,
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       ...,

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        ...,
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        ...,
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        ...,
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]]])

In [42]:
# Initialize the DataFrame to store the results using CSBID from fields
crops_with_score_1_df = pd.DataFrame({'CSBID': range(rainfall_scores.shape[0])})

# Process each scenario and add the results to the DataFrame
for scenario_idx, scenario in enumerate(scenario_names):
    # Extract crops with a score of 1 for this scenario
    crops_with_score_1 = get_crops_with_score_1(rainfall_scores[:, :, scenario_idx], crops)
    
    # Convert the results to a DataFrame for easier visualization
    scenario_column = f'Suitable_Crops_{scenario}'
    number_column = f'Number_of_Suitable_Crops_{scenario}'
    temp_df = pd.DataFrame(list(crops_with_score_1.items()), columns=['CSBID', scenario_column])
    temp_df[number_column] = temp_df[scenario_column].apply(len)

    # Merge with the main DataFrame
    crops_with_score_1_df = crops_with_score_1_df.merge(temp_df, on='CSBID', how='outer')

crops_with_score_1_df['CSBID'] = list(fields['CSBID'])

# Output the resulting DataFrame
# print(crops_with_score_1_df)

In [43]:
crops_with_score_1_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1200 entries, 0 to 1199
Data columns (total 7 columns):
 #   Column                                           Non-Null Count  Dtype 
---  ------                                           --------------  ----- 
 0   CSBID                                            1200 non-null   object
 1   Suitable_Crops_historical                        1200 non-null   object
 2   Number_of_Suitable_Crops_historical              1200 non-null   int64 
 3   Suitable_Crops_mid_century_medium_CO2            1200 non-null   object
 4   Number_of_Suitable_Crops_mid_century_medium_CO2  1200 non-null   int64 
 5   Suitable_Crops_mid_century_high_CO2              1200 non-null   object
 6   Number_of_Suitable_Crops_mid_century_high_CO2    1200 non-null   int64 
dtypes: int64(3), object(4)
memory usage: 65.8+ KB


In [44]:
data_for_plot = crops_with_score_1_df[['CSBID', 'Number_of_Suitable_Crops_historical', 'Number_of_Suitable_Crops_mid_century_medium_CO2', 'Number_of_Suitable_Crops_mid_century_high_CO2',]]
data_for_plot
# data_for_plot.info()

Unnamed: 0,CSBID,Number_of_Suitable_Crops_historical,Number_of_Suitable_Crops_mid_century_medium_CO2,Number_of_Suitable_Crops_mid_century_high_CO2
0,081623012787392,48,48,45
1,351623001627247,23,23,23
2,351623002777545,1,3,3
3,351623000046581,30,30,30
4,081623013377366,85,85,85
...,...,...,...,...
1195,351623002568175,23,23,21
1196,081623014401456,50,58,58
1197,041623013556134,0,0,0
1198,081623005639708,88,88,88


In [45]:
crop_suitability_columns = [
    'Number_of_Suitable_Crops_historical', 
    'Number_of_Suitable_Crops_mid_century_medium_CO2', 
    'Number_of_Suitable_Crops_mid_century_high_CO2', 
]

for column in crop_suitability_columns:
    print(f'{column:>50}  {crops_with_score_1_df[column].min():>8.2f} (Min),  {crops_with_score_1_df[column].max():>8.1f} (Max),  {crops_with_score_1_df[column].mean():>8.1f} (Avg)')

               Number_of_Suitable_Crops_historical      0.00 (Min),     348.0 (Max),      40.5 (Avg)
   Number_of_Suitable_Crops_mid_century_medium_CO2      0.00 (Min),     366.0 (Max),      43.0 (Avg)
     Number_of_Suitable_Crops_mid_century_high_CO2      0.00 (Min),     365.0 (Max),      41.5 (Avg)


In [47]:
# Set bin size
bin_size = 10  # You can adjust the bin size as needed
color='cornflowerblue'
opacity=0.6

# Create Altair charts for each scenario
chart_h = alt.Chart(data_for_plot).mark_bar(color=color, opacity=opacity).encode(
    x=alt.X('Number_of_Suitable_Crops_historical:Q', bin=alt.Bin(step=bin_size), title='Number of Suitable Crops (Historical 1970-2000)', axis=alt.Axis(offset=10)),
    y=alt.Y('count()', title='Number of fields', axis=alt.Axis(offset=10)),
).properties(
    title='Historical Scenario (1970-2000)',
    width=600,
    height=200
)

chart_45 = alt.Chart(data_for_plot).mark_bar(color=color, opacity=opacity).encode(
    x=alt.X('Number_of_Suitable_Crops_mid_century_medium_CO2:Q', bin=alt.Bin(step=bin_size), title='Number of Suitable Crops (RCP 4.5, 2036-2065)', axis=alt.Axis(offset=10)),
    y=alt.Y('count()', title='Number of fields', axis=alt.Axis(offset=10)),
).properties(
    title='RCP 4.5 Scenario (2036-2065)',
    width=600,
    height=200
)

chart_85 = alt.Chart(data_for_plot).mark_bar(color=color, opacity=opacity).encode(
    x=alt.X('Number_of_Suitable_Crops_mid_century_high_CO2:Q', bin=alt.Bin(step=bin_size), title='Number of Suitable Crops (RCP 8.5, 2036-2065)', axis=alt.Axis(offset=10)),
    y=alt.Y('count()', title='Number of fields', axis=alt.Axis(offset=10)),
).properties(
    title='RCP 8.5 Scenario (2036-2065)',
    width=600,
    height=200
)

# Combine the charts with shared axis domains
combined_chart = alt.vconcat(
    chart_h, 
    chart_45, 
    chart_85
).resolve_scale(
    x='shared',
    y='shared'
)

combined_chart