# calculate total score for each field

## load libraries

In [1]:
import pandas as pd
import geopandas as gpd

import folium
from folium import Map, Element
import folium.plugins
# from folium.plugins import SideBySideLayers
from folium.plugins.treelayercontrol import TreeLayerControl
from folium import GeoJson

import branca
import branca.colormap as cm
from branca.element import MacroElement
from branca.utilities import legend_scaler

from jinja2 import Template

from shapely.geometry import Polygon, Point
import numpy as np
import xarray as xr
import random
import os
import time
import re
import math
import datetime
import itertools

import matplotlib.pyplot as plt
import altair as alt

from bs4 import BeautifulSoup
from IPython.display import display, HTML

import os
from http.server import HTTPServer, SimpleHTTPRequestHandler
import threading
import subprocess

import pprint

In [2]:
# Set GeoPandas to use pyogrio
gpd.options.io_engine = "pyogrio"

In [3]:
alt.data_transformers.enable("vegafusion")

DataTransformerRegistry.enable('vegafusion')

## folium map generator function

In [4]:
# List of available tiles with their attributions and layer names (including USGS maps)
tiles_list = [
    {"tiles": "https://basemap.nationalmap.gov/arcgis/rest/services/USGSTopo/MapServer/tile/{z}/{y}/{x}", "attr": 'U.S. Department of the Interior | U.S. Geological Survey', "name": "USGS Topo"},
    {"tiles": "https://basemap.nationalmap.gov/arcgis/rest/services/USGSImageryOnly/MapServer/tile/{z}/{y}/{x}", "attr": 'U.S. Department of the Interior | U.S. Geological Survey', "name": "USGS Imagery"},
    {"tiles": "https://basemap.nationalmap.gov/arcgis/rest/services/USGSImageryTopo/MapServer/tile/{z}/{y}/{x}", "attr": 'U.S. Department of the Interior | U.S. Geological Survey', "name": "USGS Imagery Topo"},
    {"tiles": "OpenStreetMap", "attr": "© OpenStreetMap contributors", "name": "OpenStreetMap"},
    {"tiles": "https://tiles.stadiamaps.com/tiles/stamen_toner_lite/{z}/{x}/{y}{r}.png", "attr": '&copy; <a href="https://stadiamaps.com/" target="_blank">Stadia Maps</a> <a href="https://stamen.com/" target="_blank">&copy; Stamen Design</a> &copy; <a href="https://openmaptiles.org/" target="_blank">OpenMapTiles</a> &copy; <a href="https://www.openstreetmap.org/copyright" target="_blank">OpenStreetMap</a>', "name": "Stamen Toner Lite"},
]

def generate_folium_map_with_csb_centroids_and_colors(gdf, zoom=6, decimal_places=3):
    # Check CRS and convert to EPSG:4326 if needed
    if gdf.crs != "EPSG:4326":
        gdf = gdf.to_crs(epsg=4326)
    
    # Format latitude and longitude to the desired number of decimal places
    gdf['Longitude_formatted'] = gdf['Longitude'].map(lambda x: f'{x:>{decimal_places+4}.{decimal_places}f}')
    gdf['Latitude_formatted'] = gdf['Latitude'].map(lambda x: f'{x:>{decimal_places+4}.{decimal_places}f}')
    
    # Ensure necessary columns are included in the properties
    gdf = gdf[['geometry', 'CDL2023', 'Longitude', 'Latitude', 'Longitude_formatted', 'Latitude_formatted', 'color', 'Crop', 'Elevation']]
    
    def style_function(feature):
        return {
            'fillColor': feature['properties']['color'],
            'color': feature['properties']['color'],
            'weight': 1,
            'fillOpacity': 0.6
        }

    # Create a separate GeoDataFrame for the centroids
    centroids_gdf = gdf.copy()
    # centroids_gdf['geometry'] = centroids_gdf.apply(lambda row: Point(row['Longitude'], row['Latitude']), axis=1)
    centroids_gdf['geometry'] = gpd.points_from_xy(centroids_gdf['Longitude'], centroids_gdf['Latitude'])
    
    # Convert geometries to GeoJSON
    geojson_data = gdf.to_json()
    centroids_geojson_data = centroids_gdf.to_json()
    
    # Calculate map center
    minx, miny, maxx, maxy = gdf.total_bounds
    center_longitude = (minx + maxx) / 2
    center_latitude = (miny + maxy) / 2
    
    # Initialize map
    m = folium.Map(location=[center_latitude, center_longitude], zoom_start=zoom, tiles=None)
    
    # Add each tile layer to the map
    for tile_info in tiles_list:
        tiles = tile_info["tiles"]
        attr = tile_info["attr"]
        layer_name = tile_info["name"]
        folium.TileLayer(tiles=tiles, attr=attr, name=layer_name).add_to(m)
    
    # Add GeoJSON layer with custom popups and styles for crop sequence boundaries
    folium.GeoJson(
        geojson_data,
        name='Crop Sequence Boundaries',
        style_function=style_function,
        popup=folium.GeoJsonPopup(fields=['CDL2023', 'Crop', 'Latitude_formatted', 'Longitude_formatted','Elevation'], aliases=['Crop ID:', 'Crop:', 'Latitude:', 'Longitude:', 'Elevation:'])
    ).add_to(m)
    
    # Add GeoJSON layer for centroids with custom CircleMarkers
    folium.GeoJson(
        centroids_geojson_data,
        name='CSB Centroids',
        # marker=folium.CircleMarker(radius=4, fill_color='grey', fill_opacity=1, color='grey', weight=0.2),
        marker=folium.CircleMarker(
            radius=4, 
            fill=True,
            fill_opacity=0.8,
            weight=0.2
        ),
        # marker=folium.CircleMarker(radius=4),
        style_function=lambda x: {'fillColor': x['properties']['color'], 'color': 'grey'},
        popup=folium.GeoJsonPopup(fields=['CDL2023', 'Crop', 'Latitude_formatted', 'Longitude_formatted', 'Elevation'], aliases=['Crop ID:', 'Crop:', 'Latitude:', 'Longitude:', 'Elevation:']),
    ).add_to(m)
    
    folium.plugins.Fullscreen(
        position="topleft",
        title="Fullscreen",
        title_cancel="Exit Fullscreen",
        force_separate_button=True,
    ).add_to(m)

    # Add layer control to the map
    folium.LayerControl().add_to(m)
    
    return m

## load files

In [5]:
# load crops
# crop_file = '../data/agricultural/EcoCrop/siads699/EcoCrop_Clean_Imputed_All.pickle'
# crop_file = '../data/crops/EcoCrop_Clean_Imputed_All.pickle'
crop_file = '../data/crops/EcoCrop_Complete.pickle'
crops = pd.read_pickle(crop_file)

# load fields
# csb_sample_file = '../data/agricultural/CSB/siads696/geo_balanced_sample.parquet'
csb_sample_file = '../data/fields/geo_balanced_sample.parquet'
fields = gpd.read_parquet(csb_sample_file)

In [6]:
# crops

In [7]:
# fields

In [8]:
# Load the score matrices from the NetCDF files
score_matrix_photoperiod = xr.open_dataarray('../data/scores/score_matrix_photoperiod.nc')
score_matrix_climate_zone = xr.open_dataarray('../data/scores/score_matrix_climate_zone.nc')
score_matrix_temperature = xr.open_dataarray('../data/scores/score_matrix_temperature.nc')
score_matrix_rainfall = xr.open_dataarray('../data/scores/score_matrix_rainfall.nc')
score_matrix_hardiness = xr.open_dataarray('../data/scores/score_matrix_hardiness.nc')
score_matrix_pH = xr.open_dataarray('../data/scores/score_matrix_pH.nc')

In [9]:
score_matrix_photoperiod

In [10]:
score_matrix_climate_zone

In [11]:
score_matrix_temperature

In [12]:
score_matrix_rainfall

In [13]:
score_matrix_hardiness

In [14]:
score_matrix_pH

In [15]:
# Inspect the coordinates to see where they differ
for dim in score_matrix_photoperiod.dims:
    print(f"Dimension: {dim}")
    print("Photoperiod coords:", score_matrix_photoperiod.coords[dim].values)
    print("Climate zone coords:", score_matrix_climate_zone.coords[dim].values)
    print("Temperature coords:", score_matrix_temperature.coords[dim].values)
    print("Rainfall coords:", score_matrix_rainfall.coords[dim].values)
    print()

Dimension: fields
Photoperiod coords: ['081623012787392' '351623001627247' '351623002777545' ...
 '041623013556134' '081623005639708' '081623008142809']
Climate zone coords: ['081623012787392' '351623001627247' '351623002777545' ...
 '041623013556134' '081623005639708' '081623008142809']
Temperature coords: ['081623012787392' '351623001627247' '351623002777545' ...
 '041623013556134' '081623005639708' '081623008142809']
Rainfall coords: ['081623012787392' '351623001627247' '351623002777545' ...
 '041623013556134' '081623005639708' '081623008142809']

Dimension: crops
Photoperiod coords: ['Abelmoschus esculentus' 'Abelmoschus manihot' 'Abelmoschus moschatus'
 ... 'Prosopis affinis' 'Vicia dasycarpa' 'Camelina sativa']
Climate zone coords: ['Abelmoschus esculentus' 'Abelmoschus manihot' 'Abelmoschus moschatus'
 ... 'Prosopis affinis' 'Vicia dasycarpa' 'Camelina sativa']
Temperature coords: ['Abelmoschus esculentus' 'Abelmoschus manihot' 'Abelmoschus moschatus'
 ... 'Prosopis affinis' 'Vi

In [16]:
# Inspect the data types of the coordinate values
for dim in score_matrix_photoperiod.dims:
    print(f"Dimension: {dim}")
    print("Photoperiod coords dtype:", score_matrix_photoperiod.coords[dim].values.dtype)
    print("Climate zone coords dtype:", score_matrix_climate_zone.coords[dim].values.dtype)
    print("Temperature coords dtype:", score_matrix_temperature.coords[dim].values.dtype)
    print("Rainfall coords dtype:", score_matrix_rainfall.coords[dim].values.dtype)
    print()

Dimension: fields
Photoperiod coords dtype: <U15
Climate zone coords dtype: <U15
Temperature coords dtype: <U15
Rainfall coords dtype: <U15

Dimension: crops
Photoperiod coords dtype: <U46
Climate zone coords dtype: <U46
Temperature coords dtype: <U46
Rainfall coords dtype: <U46

Dimension: scenarios
Photoperiod coords dtype: <U21
Climate zone coords dtype: <U20
Temperature coords dtype: <U22
Rainfall coords dtype: <U22



In [17]:
# Reindex the scenarios dimension to ensure consistent labels
# Here we are assuming the dimension to be reindexed is named 'scenarios'
# and that the scenarios should be ordered consistently
common_scenarios = ['historical', 'mid_century_medium_carbon', 'mid_century_high_carbon']

score_matrix_photoperiod = score_matrix_photoperiod.assign_coords(scenarios=common_scenarios)
score_matrix_climate_zone = score_matrix_climate_zone.assign_coords(scenarios=common_scenarios)
score_matrix_temperature = score_matrix_temperature.assign_coords(scenarios=common_scenarios)
score_matrix_rainfall = score_matrix_rainfall.assign_coords(scenarios=common_scenarios)
score_matrix_hardiness = score_matrix_hardiness.assign_coords(scenarios=common_scenarios)
score_matrix_pH = score_matrix_pH.assign_coords(scenarios=common_scenarios)

## sum matrix

In [18]:
# Ensure that the dimensions and coordinates match
assert score_matrix_photoperiod.shape == score_matrix_climate_zone.shape
assert score_matrix_photoperiod.dims == score_matrix_climate_zone.dims
# assert all(score_matrix_photoperiod.coords[dim].equals(score_matrix_climate_zone.coords[dim]) for dim in score_matrix_photoperiod.dims)

# Perform the sum operation to get the sum_matrix
sum_matrix = score_matrix_photoperiod + score_matrix_climate_zone + score_matrix_temperature + score_matrix_rainfall + score_matrix_hardiness + score_matrix_pH

# save_file = '../data/scores/score_matrix_overall3.nc'
# save_file = '../data/scores/score_matrix_overall4.nc'
# save_file = '../data/scores/score_matrix_overall6.nc'
save_file = '../data/scores/score_matrix_overall_sum.nc'
# Save the sum_matrix to a new NetCDF file
sum_matrix.to_netcdf(save_file)

print(f"Sum matrix saved to '{save_file} ")

Sum matrix saved to '../data/scores/score_matrix_overall_sum.nc 


In [19]:
sum_matrix.shape

(1200, 2568, 3)

In [20]:
sum_matrix_no_rainfall = score_matrix_photoperiod + score_matrix_climate_zone + score_matrix_temperature + score_matrix_hardiness + score_matrix_pH
sum_matrix_no_rainfall.shape

(1200, 2568, 3)

In [21]:
max_scores = sum_matrix.max(dim='crops')
scores, counts = np.unique(max_scores, return_counts=True)

In [22]:
scores, counts

(array([2.5, 3. , 3.5, 4. , 4.5, 5. , 5.5, 6. ]),
 array([  13,  254,  227, 1812,  207,  594,  413,   80]))

In [23]:
plot_df = pd.DataFrame({'Scores': scores, 'Counts': counts})

In [24]:
# Set bin size
bin_size = 0.2  # You can adjust the bin size as needed
color='cornflowerblue'
opacity=0.6

# Create Altair histogram of suitable crops
chart_h = alt.Chart(
    plot_df
).encode(
    x=alt.X('Scores:Q', 
            bin=alt.Bin(step=bin_size), 
            title='Crop Score', 
            scale=alt.Scale(domain = [2.2,6.2]), 
            axis=alt.Axis(offset=10)
           ),
    y=alt.Y('Counts:Q', title='Frequency', axis=alt.Axis(offset=10)),
).properties(
    title='Crop Score Distribution',
    width=600,
    height=200
).mark_bar(
    color=color, 
    opacity=opacity
)

chart_h

In [25]:
# Find all fields with scores of 6
fields_with_score_6 = sum_matrix.where(sum_matrix == 6, drop=True)

In [26]:
fields_with_score_6

In [27]:
# Find all fields with scores of 4
fields_with_score_4 = sum_matrix.where(sum_matrix == 4, drop=True)

In [28]:
fields_with_score_4

In [29]:
# Initialize an empty list to store the results
results = []

# Iterate over each scenario
for scenario in sum_matrix.scenarios.values:
    # Select the data for the current scenario
    sum_matrix_scenario = sum_matrix.sel(scenarios=scenario)
    
    # Find the crops with a score of 6 for each field
    score_6_mask = (sum_matrix_scenario == 6)
    
    # Count the number of crops with a score of 2 for each field
    score_6_count = score_6_mask.sum(dim='crops')
    
    # Convert to a DataFrame and add the scenario column
    df_scenario = score_6_count.to_dataframe(name='count_of_crops_with_score_6').reset_index()
    df_scenario['scenario'] = scenario
    
    # Append to the results list
    results.append(df_scenario)


In [30]:
results[0]

Unnamed: 0,fields,scenarios,count_of_crops_with_score_6,scenario
0,081623012787392,historical,0,historical
1,351623001627247,historical,0,historical
2,351623002777545,historical,0,historical
3,351623000046581,historical,0,historical
4,081623013377366,historical,0,historical
...,...,...,...,...
1195,351623002568175,historical,0,historical
1196,081623014401456,historical,0,historical
1197,041623013556134,historical,0,historical
1198,081623005639708,historical,0,historical


In [31]:
results[1].sort_values('count_of_crops_with_score_6', ascending=False)

Unnamed: 0,fields,scenarios,count_of_crops_with_score_6,scenario
291,351623003238389,mid_century_medium_carbon,1,mid_century_medium_carbon
967,081623004923368,mid_century_medium_carbon,1,mid_century_medium_carbon
402,081623003252660,mid_century_medium_carbon,1,mid_century_medium_carbon
799,491623000170214,mid_century_medium_carbon,0,mid_century_medium_carbon
805,041623007204976,mid_century_medium_carbon,0,mid_century_medium_carbon
...,...,...,...,...
400,081623009661584,mid_century_medium_carbon,0,mid_century_medium_carbon
399,351623000931998,mid_century_medium_carbon,0,mid_century_medium_carbon
398,081623004923222,mid_century_medium_carbon,0,mid_century_medium_carbon
397,081623003253198,mid_century_medium_carbon,0,mid_century_medium_carbon


In [32]:
n_recommendations, counts = np.unique(results[1]['count_of_crops_with_score_6'], return_counts=True)

In [33]:
n_recommendations

array([0, 1])

In [34]:
list(zip(n_recommendations, counts))

[(np.int64(0), np.int64(1197)), (np.int64(1), np.int64(3))]

In [35]:
sum(counts)

np.int64(1200)

In [36]:
dataarray = sum_matrix 

In [37]:
dataarray.dims

('fields', 'crops', 'scenarios')

In [38]:
historical_data = dataarray[:, :, 0]
historical_data

In [39]:
historical_data.argsort?

[0;31mSignature:[0m [0mhistorical_data[0m[0;34m.[0m[0margsort[0m[0;34m([0m[0;34m*[0m[0margs[0m[0;34m:[0m [0;34m'Any'[0m[0;34m,[0m [0;34m**[0m[0mkwargs[0m[0;34m:[0m [0;34m'Any'[0m[0;34m)[0m [0;34m->[0m [0;34m'Self'[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
a.argsort(axis=-1, kind=None, order=None)

Returns the indices that would sort this array.

Refer to `numpy.argsort` for full documentation.

See Also
--------
numpy.argsort : equivalent function
[0;31mFile:[0m      ~/miniforge3/envs/siads699b/lib/python3.12/site-packages/xarray/core/_typed_ops.py
[0;31mType:[0m      method

In [40]:
# Find maximum scores for each field and scenario
max_scores = dataarray.max(dim='crops')

# Find the crop indices where the scores equal the maximum scores
max_score_indices = dataarray == max_scores

In [41]:
# Find maximum scores for each field and scenario
max_scores_no_rainfall = sum_matrix_no_rainfall.max(dim='crops')

# Find the crop indices where the scores equal the maximum scores
max_scores_no_rainfall_indices = sum_matrix_no_rainfall == max_scores_no_rainfall

In [42]:
common_scenarios = ['historical', 'mid_century_medium_carbon', 'mid_century_high_carbon']
# Convert the results to a dictionary
top_scores_dict = {}
for scenario_idx in range(dataarray.sizes['scenarios']):
    # scenario_key = f'Scenario_{scenario_idx}'
    scenario_key = common_scenarios[scenario_idx]
    top_scores_dict[scenario_key] = {}
    scenario_data = max_score_indices.isel(scenarios=scenario_idx)
    max_scenario_scores = max_scores.isel(scenarios=scenario_idx)

    for field_idx in range(dataarray.sizes['fields']):
        field_key = field_idx
        field_crops = np.where(scenario_data.isel(fields=field_idx).values)[0]
        top_scores_dict[scenario_key][field_key] = {
            'Crop_Indices': field_crops.tolist(),
            'Max_Score': max_scenario_scores.isel(fields=field_idx).item(),
            'N_Crops': len(field_crops.tolist())
        }

In [43]:
common_scenarios = ['historical', 'mid_century_medium_carbon', 'mid_century_high_carbon']
# Convert the results to a dictionary
top_scores_no_rainfall_dict = {}
for scenario_idx in range(sum_matrix_no_rainfall.sizes['scenarios']):
    # scenario_key = f'Scenario_{scenario_idx}'
    scenario_key = common_scenarios[scenario_idx]
    top_scores_no_rainfall_dict[scenario_key] = {}
    scenario_data = max_scores_no_rainfall_indices.isel(scenarios=scenario_idx)
    max_scenario_scores = max_scores_no_rainfall.isel(scenarios=scenario_idx)

    for field_idx in range(sum_matrix_no_rainfall.sizes['fields']):
        field_key = field_idx
        field_crops = np.where(scenario_data.isel(fields=field_idx).values)[0]
        top_scores_no_rainfall_dict[scenario_key][field_key] = {
            'Crop_Indices': field_crops.tolist(),
            'Max_Score': max_scenario_scores.isel(fields=field_idx).item(),
            'N_Crops': len(field_crops.tolist())
        }

In [44]:
# Convert to DataFrame if needed
top_scores_df = pd.concat({k: pd.DataFrame.from_dict(v, orient='index') for k, v in top_scores_dict.items()}, axis=1)

In [45]:
# Convert to DataFrame if needed
top_scores_no_rainfall_df = pd.concat({k: pd.DataFrame.from_dict(v, orient='index') for k, v in top_scores_no_rainfall_dict.items()}, axis=1)

In [46]:
top_scores_df

Unnamed: 0_level_0,historical,historical,historical,mid_century_medium_carbon,mid_century_medium_carbon,mid_century_medium_carbon,mid_century_high_carbon,mid_century_high_carbon,mid_century_high_carbon
Unnamed: 0_level_1,Crop_Indices,Max_Score,N_Crops,Crop_Indices,Max_Score,N_Crops,Crop_Indices,Max_Score,N_Crops
0,"[26, 643, 1487]",4.5,3,[2140],4.0,1,[2140],4.0,1
1,"[1186, 1187]",5.0,2,[1187],3.5,1,[1187],3.5,1
2,"[26, 69, 420, 643, 1070, 1468, 1487]",4.0,7,"[368, 1187, 2201, 2417]",3.0,4,"[368, 1187, 2201, 2417]",3.0,4
3,"[625, 885, 1095, 1505, 2008, 2134, 2140, 2187,...",4.0,9,"[625, 885, 1095, 1505, 1587, 1724, 2008, 2134,...",4.0,11,[26],5.5,1
4,"[643, 647, 1496, 1729]",5.0,4,"[1714, 2140]",4.0,2,"[1714, 2140]",4.0,2
...,...,...,...,...,...,...,...,...,...
1195,[786],4.5,1,"[368, 786, 1095, 2417]",3.0,4,"[26, 420]",4.0,2
1196,"[647, 1496]",5.0,2,"[1187, 1714, 2140]",4.0,3,"[1187, 1714, 2140]",4.0,3
1197,[487],4.5,1,"[553, 940]",3.5,2,"[26, 553, 555, 568, 940]",3.5,5
1198,"[643, 647, 651, 1496, 1729]",5.0,5,"[1714, 2140]",4.0,2,"[1714, 2140]",4.0,2


In [47]:
top_scores_no_rainfall_df

Unnamed: 0_level_0,historical,historical,historical,mid_century_medium_carbon,mid_century_medium_carbon,mid_century_medium_carbon,mid_century_high_carbon,mid_century_high_carbon,mid_century_high_carbon
Unnamed: 0_level_1,Crop_Indices,Max_Score,N_Crops,Crop_Indices,Max_Score,N_Crops,Crop_Indices,Max_Score,N_Crops
0,"[21, 26, 67, 68, 107, 135, 137, 199, 243, 261,...",4.0,24,[2140],4.0,1,[2140],4.0,1
1,[1186],5.0,1,"[368, 1187, 1651, 2201, 2237, 2399, 2544]",3.0,7,"[368, 1187, 1651, 2201, 2237, 2399, 2544]",3.0,7
2,"[26, 69, 420, 643, 1070, 1468, 1487]",4.0,7,"[368, 1187, 2201, 2417]",3.0,4,"[368, 1187, 2201, 2417]",3.0,4
3,"[625, 885, 1095, 1505, 2008, 2134, 2140, 2187,...",4.0,9,"[625, 885, 1095, 1505, 1587, 1724, 2008, 2134,...",4.0,11,"[26, 129, 199, 420, 696]",5.0,5
4,[1496],5.0,1,"[1714, 2140]",4.0,2,"[1714, 2140]",4.0,2
...,...,...,...,...,...,...,...,...,...
1195,[786],4.5,1,"[368, 786, 1095, 2417]",3.0,4,"[26, 420]",4.0,2
1196,[1496],5.0,1,"[1714, 2140]",4.0,2,"[1714, 2140]",4.0,2
1197,[487],4.5,1,"[553, 940]",3.5,2,"[26, 553, 555, 568, 940]",3.5,5
1198,[1496],5.0,1,"[1714, 2140]",4.0,2,"[1714, 2140]",4.0,2


In [48]:
top_scores_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1200 entries, 0 to 1199
Data columns (total 9 columns):
 #   Column                                     Non-Null Count  Dtype  
---  ------                                     --------------  -----  
 0   (historical, Crop_Indices)                 1200 non-null   object 
 1   (historical, Max_Score)                    1200 non-null   float64
 2   (historical, N_Crops)                      1200 non-null   int64  
 3   (mid_century_medium_carbon, Crop_Indices)  1200 non-null   object 
 4   (mid_century_medium_carbon, Max_Score)     1200 non-null   float64
 5   (mid_century_medium_carbon, N_Crops)       1200 non-null   int64  
 6   (mid_century_high_carbon, Crop_Indices)    1200 non-null   object 
 7   (mid_century_high_carbon, Max_Score)       1200 non-null   float64
 8   (mid_century_high_carbon, N_Crops)         1200 non-null   int64  
dtypes: float64(3), int64(3), object(3)
memory usage: 93.8+ KB


In [49]:
top_scores_no_rainfall_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1200 entries, 0 to 1199
Data columns (total 9 columns):
 #   Column                                     Non-Null Count  Dtype  
---  ------                                     --------------  -----  
 0   (historical, Crop_Indices)                 1200 non-null   object 
 1   (historical, Max_Score)                    1200 non-null   float64
 2   (historical, N_Crops)                      1200 non-null   int64  
 3   (mid_century_medium_carbon, Crop_Indices)  1200 non-null   object 
 4   (mid_century_medium_carbon, Max_Score)     1200 non-null   float64
 5   (mid_century_medium_carbon, N_Crops)       1200 non-null   int64  
 6   (mid_century_high_carbon, Crop_Indices)    1200 non-null   object 
 7   (mid_century_high_carbon, Max_Score)       1200 non-null   float64
 8   (mid_century_high_carbon, N_Crops)         1200 non-null   int64  
dtypes: float64(3), int64(3), object(3)
memory usage: 93.8+ KB


In [50]:
top_scores_df.columns = [f'{column[0]}_{column[1]}' for column in top_scores_df.columns]

In [51]:
top_scores_no_rainfall_df.columns = [f'{column[0]}_{column[1]}_no_rainfall' for column in top_scores_no_rainfall_df.columns]

In [52]:
top_scores_df

Unnamed: 0,historical_Crop_Indices,historical_Max_Score,historical_N_Crops,mid_century_medium_carbon_Crop_Indices,mid_century_medium_carbon_Max_Score,mid_century_medium_carbon_N_Crops,mid_century_high_carbon_Crop_Indices,mid_century_high_carbon_Max_Score,mid_century_high_carbon_N_Crops
0,"[26, 643, 1487]",4.5,3,[2140],4.0,1,[2140],4.0,1
1,"[1186, 1187]",5.0,2,[1187],3.5,1,[1187],3.5,1
2,"[26, 69, 420, 643, 1070, 1468, 1487]",4.0,7,"[368, 1187, 2201, 2417]",3.0,4,"[368, 1187, 2201, 2417]",3.0,4
3,"[625, 885, 1095, 1505, 2008, 2134, 2140, 2187,...",4.0,9,"[625, 885, 1095, 1505, 1587, 1724, 2008, 2134,...",4.0,11,[26],5.5,1
4,"[643, 647, 1496, 1729]",5.0,4,"[1714, 2140]",4.0,2,"[1714, 2140]",4.0,2
...,...,...,...,...,...,...,...,...,...
1195,[786],4.5,1,"[368, 786, 1095, 2417]",3.0,4,"[26, 420]",4.0,2
1196,"[647, 1496]",5.0,2,"[1187, 1714, 2140]",4.0,3,"[1187, 1714, 2140]",4.0,3
1197,[487],4.5,1,"[553, 940]",3.5,2,"[26, 553, 555, 568, 940]",3.5,5
1198,"[643, 647, 651, 1496, 1729]",5.0,5,"[1714, 2140]",4.0,2,"[1714, 2140]",4.0,2


In [53]:
top_scores_no_rainfall_df

Unnamed: 0,historical_Crop_Indices_no_rainfall,historical_Max_Score_no_rainfall,historical_N_Crops_no_rainfall,mid_century_medium_carbon_Crop_Indices_no_rainfall,mid_century_medium_carbon_Max_Score_no_rainfall,mid_century_medium_carbon_N_Crops_no_rainfall,mid_century_high_carbon_Crop_Indices_no_rainfall,mid_century_high_carbon_Max_Score_no_rainfall,mid_century_high_carbon_N_Crops_no_rainfall
0,"[21, 26, 67, 68, 107, 135, 137, 199, 243, 261,...",4.0,24,[2140],4.0,1,[2140],4.0,1
1,[1186],5.0,1,"[368, 1187, 1651, 2201, 2237, 2399, 2544]",3.0,7,"[368, 1187, 1651, 2201, 2237, 2399, 2544]",3.0,7
2,"[26, 69, 420, 643, 1070, 1468, 1487]",4.0,7,"[368, 1187, 2201, 2417]",3.0,4,"[368, 1187, 2201, 2417]",3.0,4
3,"[625, 885, 1095, 1505, 2008, 2134, 2140, 2187,...",4.0,9,"[625, 885, 1095, 1505, 1587, 1724, 2008, 2134,...",4.0,11,"[26, 129, 199, 420, 696]",5.0,5
4,[1496],5.0,1,"[1714, 2140]",4.0,2,"[1714, 2140]",4.0,2
...,...,...,...,...,...,...,...,...,...
1195,[786],4.5,1,"[368, 786, 1095, 2417]",3.0,4,"[26, 420]",4.0,2
1196,[1496],5.0,1,"[1714, 2140]",4.0,2,"[1714, 2140]",4.0,2
1197,[487],4.5,1,"[553, 940]",3.5,2,"[26, 553, 555, 568, 940]",3.5,5
1198,[1496],5.0,1,"[1714, 2140]",4.0,2,"[1714, 2140]",4.0,2


In [54]:
# sorted([int(value) for value in np.unique(top_scores_df['historical_N_Crops'], return_counts=True)])
n_recommendations, counts = np.unique(top_scores_df['historical_N_Crops'], return_counts=True)

[[f'{int(n):>3}', f'{int(count):>3}'] for n, count in list(zip(n_recommendations, counts))][:10]

[['  1', '379'],
 ['  2', '232'],
 ['  3', '121'],
 ['  4', ' 83'],
 ['  5', ' 66'],
 ['  6', ' 63'],
 ['  7', ' 31'],
 ['  8', ' 22'],
 ['  9', ' 14'],
 [' 10', '  5']]

In [55]:
[[f'{int(n):>3}', f'{int(count):>3}'] for n, count in list(zip(n_recommendations, counts))][-10:]

[[' 36', '  5'],
 [' 37', '  2'],
 [' 41', '  3'],
 [' 42', '  3'],
 [' 47', '  2'],
 [' 52', '  1'],
 [' 53', '  1'],
 [' 54', '  1'],
 [' 55', '  1'],
 [' 59', '  1']]

In [56]:
# crops.columns

## add Common_Name and Notes columns

In [57]:
# additional_crop_info = pd.read_csv('../data/agricultural/EcoCrop/ScrapeR/crop_view_data.csv')
additional_crop_info = pd.read_csv('../data/crops/crop_view_data.csv')
additional_crop_info

Unnamed: 0,Ecocrop_code,Authority,Family,Synonyms,Common_names,Editor,Notes,Sources
0,289,(L.) Moench,Magnoliopsida:Dilleniidae:Malvales:Malvaceae,Hibiscus esculentus L.,"abelmoskus, america-neri, bakhua mun, bamia, b...",,"BRIEF DESCRIPTION A herbaceous, upright, ofte...",SOURCES (A. esculentus (L.) Moench)\nSims D (p...
1,290,(L.) Medic.,Magnoliopsida:Dilleniidae:Malvales:Malvaceae,"Hibiscus manihot L. (1753), Abelmoschus maniho...","neka (Simbo), bele (Fiji), pele (Tonga, Tuvalu...",,BRIEF DESCRIPTION A shallow rooted shrub reac...,SOURCES (A. manihot (L.) Medikus)\nTindall H 1...
2,291,Medic.,Magnoliopsida:Dilleniidae:Malvales:Malvaceae,Hibiscus abelmoschus L. (1753).,"abelmosk, musk mallow, mushkdan, muskdana, kas...",,DESCRIPTION: It is a coarse herb or prostrate ...,SOURCE: NewCrop Database (24.07.02) E291
3,295,Cunn. ex Benth.,Magnoliopsida:Rosidae:Fabales:Leguminosae,Racosperma auriculiforme (A. Cunn. ex Benth.) ...,"Papuan wattle, auri, ear leaf acacia, tuhkehn ...",,BRIEF DESCRIPTION A low to medium-sized fast ...,SOURCES (A. auriculiformis A. Cunn. ex Benth.)...
4,297,(L.) Willd.,Magnoliopsida:Rosidae:Fabales:Leguminosae,"Mimosa farnesiana L., Mimosa acicularis Poir.,...","Ellington curse, klu, cassie, espino blanco, a...",,BRIEF DESCRIPTION A thorny bush or small tree...,Grassland Index\nSkerman P 1988 pp 512-514 [TE...
...,...,...,...,...,...,...,...,...
2562,400000,Persoon,Leguminosae,Cassia rotundifolia,Pasto rastiero,,,Grassland Index
2563,400001,Willd.,Leguminosae,Acacia catechu,Black catechu,,,Grassland Index
2564,400002,(Sw.) DC.,Leguminosae,Prosopis algarrobilla,Algarobilla,,,Grassland Index
2565,400003,Roth.,Leguminosae,Vicia villosa ssp. varia,Hairy vetch,,,Grassland Index


In [58]:
crops['Common_Name'] = additional_crop_info['Common_names']
crops['Notes']       = additional_crop_info['Notes']
crops

Unnamed: 0,Crop_Code,Scientific_Name,Genus,Species,Variety,Life_Form,Habit,Life_Span,Physiology,Category,...,Use_Detailed,Use_Part,Climate_Zone_Trewartha,USDA_Hardiness_Zone,USDA_Hardiness_Zone_Min,USDA_Hardiness_Zone_Max,Datasheet_URL,PFAF_URL,Common_Name,Notes
0,289,Abelmoschus esculentus,Abelmoschus,esculentus,,herb,erect,annual,single stem,vegetables,...,vitamins,fruits,"tropical wet & dry (Aw), tropical wet (Ar), st...",5-11,5,11,http://ecocrop.fao.org/ecocrop/srv/en/dataShee...,https://pfaf.org/user/Plant.aspx?LatinName=Abe...,"abelmoskus, america-neri, bakhua mun, bamia, b...","BRIEF DESCRIPTION A herbaceous, upright, ofte..."
1,290,Abelmoschus manihot,Abelmoschus,manihot,,shrub,erect,"annual, perennial","deciduous, multi stem","vegetables, ornamentals/turf, medicinals & aro...",...,vitamins,leaves,"tropical wet & dry (Aw), tropical wet (Ar)",8-11,8,11,http://ecocrop.fao.org/ecocrop/srv/en/dataShee...,https://pfaf.org/user/Plant.aspx?LatinName=Abe...,"neka (Simbo), bele (Fiji), pele (Tonga, Tuvalu...",BRIEF DESCRIPTION A shallow rooted shrub reac...
2,291,Abelmoschus moschatus,Abelmoschus,moschatus,,"herb, sub-shrub",prostrate/procumbent/semi-erect,"annual, biennial, perennial","deciduous, multi stem","ornamentals/turf, medicinals & aromatic",...,ornamental/turf,entire plant,tropical wet & dry (Aw),8-11,8,11,http://ecocrop.fao.org/ecocrop/srv/en/dataShee...,https://pfaf.org/user/Plant.aspx?LatinName=Abe...,"abelmosk, musk mallow, mushkdan, muskdana, kas...",DESCRIPTION: It is a coarse herb or prostrate ...
3,295,Acacia auriculiformis,Acacia,auriculiformis,,tree,erect,perennial,single stem,forest/wood,...,dye/tannin,stems,"tropical wet & dry (Aw), tropical wet (Ar)",10-12,10,12,http://ecocrop.fao.org/ecocrop/srv/en/dataShee...,https://pfaf.org/user/Plant.aspx?LatinName=Aca...,"Papuan wattle, auri, ear leaf acacia, tuhkehn ...",BRIEF DESCRIPTION A low to medium-sized fast ...
4,297,Acacia farnesiana,Acacia,farnesiana,,tree,erect,perennial,single stem,"materials, ornamentals/turf, medicinals & arom...",...,ornamental/turf,entire plant,"tropical wet & dry (Aw), steppe or semiarid (B...",9-11,9,11,http://ecocrop.fao.org/ecocrop/srv/en/dataShee...,https://pfaf.org/user/Plant.aspx?LatinName=Aca...,"Ellington curse, klu, cassie, espino blanco, a...",BRIEF DESCRIPTION A thorny bush or small tree...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2563,400000,Chamaecrista rotundifolia,Chamaecrista,rotundifolia,,herb,prostrate/procumbent/semi-erect,"annual, perennial","deciduous, multi stem","forage/pasture, medicinals & aromatic",...,vitamins,entire plant,"tropical wet & dry (Aw), tropical wet (Ar), su...",9-11,9,11,http://ecocrop.fao.org/ecocrop/srv/en/dataShee...,https://pfaf.org/user/Plant.aspx?LatinName=Cha...,Black catechu,
2564,400001,Acacia polyacantha,Acacia,polyacantha,,"shrub, tree",erect,perennial,"single stem, multi stem","forest/wood, environmental",...,fuelwood,bark,"desert or arid (Bw), steppe or semiarid (Bs)",10-12,10,12,http://ecocrop.fao.org/ecocrop/srv/en/dataShee...,https://pfaf.org/user/Plant.aspx?LatinName=Aca...,Algarobilla,
2565,400002,Prosopis affinis,Prosopis,affinis,,tree,erect,perennial,"deciduous, single stem, C3 photosynthesis",forest/wood,...,vitamins,bark,"tropical wet & dry (Aw), desert or arid (Bw), ...",10-12,10,12,http://ecocrop.fao.org/ecocrop/srv/en/dataShee...,https://pfaf.org/user/Plant.aspx?LatinName=Pro...,Hairy vetch,
2566,400003,Vicia dasycarpa,Vicia,dasycarpa,,herb,climber/scrambler/scadent,annual,multi stem,forage/pasture,...,minerals,entire plant,"tropical wet & dry (Aw), tropical wet (Ar), st...",10-12,10,12,http://ecocrop.fao.org/ecocrop/srv/en/dataShee...,https://pfaf.org/user/Plant.aspx?LatinName=Vic...,"camelina, false flax, gold-of-pleasure, gergel...",DESCRIPTION: It attains heights of 30 to 90 cm...


In [59]:
crops['Variety'    ] = crops['Variety'    ].replace(np.nan, '')
crops['Common_Name'] = crops['Common_Name'].replace(np.nan, '')
crops['Notes'      ] = crops['Notes'      ].replace(np.nan, '')
crops

Unnamed: 0,Crop_Code,Scientific_Name,Genus,Species,Variety,Life_Form,Habit,Life_Span,Physiology,Category,...,Use_Detailed,Use_Part,Climate_Zone_Trewartha,USDA_Hardiness_Zone,USDA_Hardiness_Zone_Min,USDA_Hardiness_Zone_Max,Datasheet_URL,PFAF_URL,Common_Name,Notes
0,289,Abelmoschus esculentus,Abelmoschus,esculentus,,herb,erect,annual,single stem,vegetables,...,vitamins,fruits,"tropical wet & dry (Aw), tropical wet (Ar), st...",5-11,5,11,http://ecocrop.fao.org/ecocrop/srv/en/dataShee...,https://pfaf.org/user/Plant.aspx?LatinName=Abe...,"abelmoskus, america-neri, bakhua mun, bamia, b...","BRIEF DESCRIPTION A herbaceous, upright, ofte..."
1,290,Abelmoschus manihot,Abelmoschus,manihot,,shrub,erect,"annual, perennial","deciduous, multi stem","vegetables, ornamentals/turf, medicinals & aro...",...,vitamins,leaves,"tropical wet & dry (Aw), tropical wet (Ar)",8-11,8,11,http://ecocrop.fao.org/ecocrop/srv/en/dataShee...,https://pfaf.org/user/Plant.aspx?LatinName=Abe...,"neka (Simbo), bele (Fiji), pele (Tonga, Tuvalu...",BRIEF DESCRIPTION A shallow rooted shrub reac...
2,291,Abelmoschus moschatus,Abelmoschus,moschatus,,"herb, sub-shrub",prostrate/procumbent/semi-erect,"annual, biennial, perennial","deciduous, multi stem","ornamentals/turf, medicinals & aromatic",...,ornamental/turf,entire plant,tropical wet & dry (Aw),8-11,8,11,http://ecocrop.fao.org/ecocrop/srv/en/dataShee...,https://pfaf.org/user/Plant.aspx?LatinName=Abe...,"abelmosk, musk mallow, mushkdan, muskdana, kas...",DESCRIPTION: It is a coarse herb or prostrate ...
3,295,Acacia auriculiformis,Acacia,auriculiformis,,tree,erect,perennial,single stem,forest/wood,...,dye/tannin,stems,"tropical wet & dry (Aw), tropical wet (Ar)",10-12,10,12,http://ecocrop.fao.org/ecocrop/srv/en/dataShee...,https://pfaf.org/user/Plant.aspx?LatinName=Aca...,"Papuan wattle, auri, ear leaf acacia, tuhkehn ...",BRIEF DESCRIPTION A low to medium-sized fast ...
4,297,Acacia farnesiana,Acacia,farnesiana,,tree,erect,perennial,single stem,"materials, ornamentals/turf, medicinals & arom...",...,ornamental/turf,entire plant,"tropical wet & dry (Aw), steppe or semiarid (B...",9-11,9,11,http://ecocrop.fao.org/ecocrop/srv/en/dataShee...,https://pfaf.org/user/Plant.aspx?LatinName=Aca...,"Ellington curse, klu, cassie, espino blanco, a...",BRIEF DESCRIPTION A thorny bush or small tree...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2563,400000,Chamaecrista rotundifolia,Chamaecrista,rotundifolia,,herb,prostrate/procumbent/semi-erect,"annual, perennial","deciduous, multi stem","forage/pasture, medicinals & aromatic",...,vitamins,entire plant,"tropical wet & dry (Aw), tropical wet (Ar), su...",9-11,9,11,http://ecocrop.fao.org/ecocrop/srv/en/dataShee...,https://pfaf.org/user/Plant.aspx?LatinName=Cha...,Black catechu,
2564,400001,Acacia polyacantha,Acacia,polyacantha,,"shrub, tree",erect,perennial,"single stem, multi stem","forest/wood, environmental",...,fuelwood,bark,"desert or arid (Bw), steppe or semiarid (Bs)",10-12,10,12,http://ecocrop.fao.org/ecocrop/srv/en/dataShee...,https://pfaf.org/user/Plant.aspx?LatinName=Aca...,Algarobilla,
2565,400002,Prosopis affinis,Prosopis,affinis,,tree,erect,perennial,"deciduous, single stem, C3 photosynthesis",forest/wood,...,vitamins,bark,"tropical wet & dry (Aw), desert or arid (Bw), ...",10-12,10,12,http://ecocrop.fao.org/ecocrop/srv/en/dataShee...,https://pfaf.org/user/Plant.aspx?LatinName=Pro...,Hairy vetch,
2566,400003,Vicia dasycarpa,Vicia,dasycarpa,,herb,climber/scrambler/scadent,annual,multi stem,forage/pasture,...,minerals,entire plant,"tropical wet & dry (Aw), tropical wet (Ar), st...",10-12,10,12,http://ecocrop.fao.org/ecocrop/srv/en/dataShee...,https://pfaf.org/user/Plant.aspx?LatinName=Vic...,"camelina, false flax, gold-of-pleasure, gergel...",DESCRIPTION: It attains heights of 30 to 90 cm...


In [60]:
# crops.to_pickle('../data/agricultural/EcoCrop/siads699/EcoCrop_Clean_Imputed_All_with_Common_Name_and_Notes.pickle')
# crops.to_pickle('../data/crops/EcoCrop_Clean_Imputed_All_with_Common_Name_and_Notes.pickle')
crops.to_pickle('../data/crops/EcoCrop_Complete_with_Common_Name_and_Notes.pickle')
crops.to_csv('../data/crops/EcoCrop_Complete_with_Common_Name_and_Notes.pickle')

## generate a dataframe of the recommended crops for each field in a column recommended_crops_df

In [61]:
fields = fields.reset_index()
fields = fields.rename(columns={'index': 'Original_Index'})
fields

Unnamed: 0,Original_Index,CSBID,CSBYEARS,CSBACRES,CDL2016,CDL2017,CDL2018,CDL2019,CDL2020,CDL2021,...,INSIDE_X,INSIDE_Y,Shp_Len,Shp_Area,geometry,Longitude,Latitude,Elevation,color,Crop
0,478801,081623012787392,1623,3.791540,4,24,61,61,24,24,...,-6.035779e+05,1.704774e+06,532.211689,15343.877410,"MULTIPOLYGON (((-603525.799 1704829.066, -6035...",-102.962080,38.176922,1231,#bfbf7a,Fallow/Idle Cropland
1,107446,351623001627247,1623,4.325446,225,1,225,152,152,152,...,-7.967410e+05,1.149389e+06,723.964572,17504.529797,"MULTIPOLYGON (((-796679.747 1149498.895, -7966...",-104.600667,33.089835,1138,#ffa8e3,Alfalfa
2,121162,351623002777545,1623,3.222010,61,176,1,176,36,36,...,-1.086137e+06,1.590736e+06,549.623304,13039.063523,"MULTIPOLYGON (((-1086091.187 1590803.212, -108...",-108.309751,36.728108,1676,#ffd400,Corn
3,88813,351623000046581,1623,10.204036,36,36,36,36,152,37,...,-9.788489e+05,1.595846e+06,1274.741723,41294.433943,"MULTIPOLYGON (((-978851.432 1595726.11, -97884...",-107.112180,36.891083,2252,#a5f58d,Other Hay/Non Alfalfa
4,516266,081623013377366,1623,2.918481,24,29,29,24,1,61,...,-6.203751e+05,1.992249e+06,556.566951,11810.721459,"MULTIPOLYGON (((-620252.778 1992330.141, -6202...",-103.412893,40.723858,1330,#bfbf7a,Fallow/Idle Cropland
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1195,119071,351623002568175,1623,2.957066,176,176,176,61,24,24,...,-9.138551e+05,1.348095e+06,599.379620,11966.871324,"MULTIPOLYGON (((-913829.788 1348147.68, -91380...",-106.082615,34.756107,1877,#e9ffbe,Grass/Pasture
1196,540278,081623014401456,1623,9.374885,61,24,61,1,1,1,...,-6.892023e+05,1.934718e+06,988.353218,37938.965442,"MULTIPOLYGON (((-689103.22 1934811.717, -68911...",-104.172849,40.161427,1417,#a5f58d,Other Hay/Non Alfalfa
1197,591949,041623013556134,1623,3.770585,72,72,72,190,190,190,...,-1.707418e+06,1.273220e+06,523.387689,15259.076277,"MULTIPOLYGON (((-1707373.732 1273277.961, -170...",-114.508535,33.028054,101,#80b3b3,Woody Wetlands
1198,217842,081623005639708,1623,4.307316,61,24,1,61,24,29,...,-6.004245e+05,1.898598e+06,634.862780,17431.160158,"MULTIPOLYGON (((-600373.153 1898692.428, -6003...",-103.092389,39.903658,1396,#a87000,Winter Wheat


In [62]:
fields[top_scores_df.columns] = top_scores_df[top_scores_df.columns]
# fields

In [63]:
top_scores_no_rainfall_df

Unnamed: 0,historical_Crop_Indices_no_rainfall,historical_Max_Score_no_rainfall,historical_N_Crops_no_rainfall,mid_century_medium_carbon_Crop_Indices_no_rainfall,mid_century_medium_carbon_Max_Score_no_rainfall,mid_century_medium_carbon_N_Crops_no_rainfall,mid_century_high_carbon_Crop_Indices_no_rainfall,mid_century_high_carbon_Max_Score_no_rainfall,mid_century_high_carbon_N_Crops_no_rainfall
0,"[21, 26, 67, 68, 107, 135, 137, 199, 243, 261,...",4.0,24,[2140],4.0,1,[2140],4.0,1
1,[1186],5.0,1,"[368, 1187, 1651, 2201, 2237, 2399, 2544]",3.0,7,"[368, 1187, 1651, 2201, 2237, 2399, 2544]",3.0,7
2,"[26, 69, 420, 643, 1070, 1468, 1487]",4.0,7,"[368, 1187, 2201, 2417]",3.0,4,"[368, 1187, 2201, 2417]",3.0,4
3,"[625, 885, 1095, 1505, 2008, 2134, 2140, 2187,...",4.0,9,"[625, 885, 1095, 1505, 1587, 1724, 2008, 2134,...",4.0,11,"[26, 129, 199, 420, 696]",5.0,5
4,[1496],5.0,1,"[1714, 2140]",4.0,2,"[1714, 2140]",4.0,2
...,...,...,...,...,...,...,...,...,...
1195,[786],4.5,1,"[368, 786, 1095, 2417]",3.0,4,"[26, 420]",4.0,2
1196,[1496],5.0,1,"[1714, 2140]",4.0,2,"[1714, 2140]",4.0,2
1197,[487],4.5,1,"[553, 940]",3.5,2,"[26, 553, 555, 568, 940]",3.5,5
1198,[1496],5.0,1,"[1714, 2140]",4.0,2,"[1714, 2140]",4.0,2


### add no rainfall top scores

In [64]:
crops.index.dtype

dtype('int64')

In [65]:
crops.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2568 entries, 0 to 2567
Data columns (total 78 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Crop_Code                2568 non-null   int64  
 1   Scientific_Name          2568 non-null   object 
 2   Genus                    2568 non-null   object 
 3   Species                  2568 non-null   object 
 4   Variety                  2568 non-null   object 
 5   Life_Form                2568 non-null   object 
 6   Habit                    2568 non-null   object 
 7   Life_Span                2568 non-null   object 
 8   Physiology               2568 non-null   object 
 9   Category                 2568 non-null   object 
 10  Plant_Attributes         2568 non-null   object 
 11  Temp_Opt_Min             2568 non-null   float64
 12  Temp_Opt_Max             2568 non-null   float64
 13  Temp_Opt_Min_F           2568 non-null   float64
 14  Temp_Opt_Max_F          

In [66]:
[fields['historical_Crop_Indices']]

[0                                         [26, 643, 1487]
 1                                            [1186, 1187]
 2                    [26, 69, 420, 643, 1070, 1468, 1487]
 3       [625, 885, 1095, 1505, 2008, 2134, 2140, 2187,...
 4                                  [643, 647, 1496, 1729]
                               ...                        
 1195                                                [786]
 1196                                          [647, 1496]
 1197                                                [487]
 1198                          [643, 647, 651, 1496, 1729]
 1199                                     [643, 647, 1496]
 Name: historical_Crop_Indices, Length: 1200, dtype: object]

In [67]:
fields[top_scores_no_rainfall_df.columns] = top_scores_no_rainfall_df[top_scores_no_rainfall_df.columns]
fields

Unnamed: 0,Original_Index,CSBID,CSBYEARS,CSBACRES,CDL2016,CDL2017,CDL2018,CDL2019,CDL2020,CDL2021,...,mid_century_high_carbon_N_Crops,historical_Crop_Indices_no_rainfall,historical_Max_Score_no_rainfall,historical_N_Crops_no_rainfall,mid_century_medium_carbon_Crop_Indices_no_rainfall,mid_century_medium_carbon_Max_Score_no_rainfall,mid_century_medium_carbon_N_Crops_no_rainfall,mid_century_high_carbon_Crop_Indices_no_rainfall,mid_century_high_carbon_Max_Score_no_rainfall,mid_century_high_carbon_N_Crops_no_rainfall
0,478801,081623012787392,1623,3.791540,4,24,61,61,24,24,...,1,"[21, 26, 67, 68, 107, 135, 137, 199, 243, 261,...",4.0,24,[2140],4.0,1,[2140],4.0,1
1,107446,351623001627247,1623,4.325446,225,1,225,152,152,152,...,1,[1186],5.0,1,"[368, 1187, 1651, 2201, 2237, 2399, 2544]",3.0,7,"[368, 1187, 1651, 2201, 2237, 2399, 2544]",3.0,7
2,121162,351623002777545,1623,3.222010,61,176,1,176,36,36,...,4,"[26, 69, 420, 643, 1070, 1468, 1487]",4.0,7,"[368, 1187, 2201, 2417]",3.0,4,"[368, 1187, 2201, 2417]",3.0,4
3,88813,351623000046581,1623,10.204036,36,36,36,36,152,37,...,1,"[625, 885, 1095, 1505, 2008, 2134, 2140, 2187,...",4.0,9,"[625, 885, 1095, 1505, 1587, 1724, 2008, 2134,...",4.0,11,"[26, 129, 199, 420, 696]",5.0,5
4,516266,081623013377366,1623,2.918481,24,29,29,24,1,61,...,2,[1496],5.0,1,"[1714, 2140]",4.0,2,"[1714, 2140]",4.0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1195,119071,351623002568175,1623,2.957066,176,176,176,61,24,24,...,2,[786],4.5,1,"[368, 786, 1095, 2417]",3.0,4,"[26, 420]",4.0,2
1196,540278,081623014401456,1623,9.374885,61,24,61,1,1,1,...,3,[1496],5.0,1,"[1714, 2140]",4.0,2,"[1714, 2140]",4.0,2
1197,591949,041623013556134,1623,3.770585,72,72,72,190,190,190,...,5,[487],4.5,1,"[553, 940]",3.5,2,"[26, 553, 555, 568, 940]",3.5,5
1198,217842,081623005639708,1623,4.307316,61,24,1,61,24,29,...,2,[1496],5.0,1,"[1714, 2140]",4.0,2,"[1714, 2140]",4.0,2


In [68]:
# Function to get the topN recommended crops for a field
def get_recommended_crops(topN_indices, crops_df):
    return crops_df.iloc[topN_indices][['Scientific_Name', 'Common_Name', 'Notes']]

In [69]:
[get_recommended_crops(indices, crops) for indices in fields['historical_Crop_Indices'][-1:]]

[           Scientific_Name                                        Common_Name  \
 643    Agropyron cristatum             crested wheatgrass, agropiro de crista   
 647   Agropyron mongolicum         Mongolian wheatgrass, agropiro da Mongolia   
 1496      Lotus uliginosus  marsh bird's-foot trefoil, big trefoil, erva c...   
 
                                                   Notes  
 643   BRIEF DESCRIPTION  A tufted bunchgrass reachin...  
 647   BRIEF DESCRIPTION\nBunchgrass.\nGROWING PERIOD...  
 1496  BRIEF DESCRIPTION  A spreading to erect legume...  ]

In [70]:
fields['historical_Crop_Indices'].iloc[-1]

[643, 647, 1496]

In [71]:
# Add the recommended crops sub-dataframes to the fields dataframe
recommended_indices = fields['historical_Crop_Indices']
fields['Recommended_Crops_Historical'] = [get_recommended_crops(indices, crops) for indices in recommended_indices]

recommended_indices = fields['mid_century_medium_carbon_Crop_Indices']
fields['Recommended_Crops_Mid_Century_Medium_Carbon'] = [get_recommended_crops(indices, crops) for indices in recommended_indices]

recommended_indices = fields['mid_century_high_carbon_Crop_Indices']
fields['Recommended_Crops_Mid_Century_High_Carbon'] = [get_recommended_crops(indices, crops) for indices in recommended_indices]

fields

Unnamed: 0,Original_Index,CSBID,CSBYEARS,CSBACRES,CDL2016,CDL2017,CDL2018,CDL2019,CDL2020,CDL2021,...,historical_N_Crops_no_rainfall,mid_century_medium_carbon_Crop_Indices_no_rainfall,mid_century_medium_carbon_Max_Score_no_rainfall,mid_century_medium_carbon_N_Crops_no_rainfall,mid_century_high_carbon_Crop_Indices_no_rainfall,mid_century_high_carbon_Max_Score_no_rainfall,mid_century_high_carbon_N_Crops_no_rainfall,Recommended_Crops_Historical,Recommended_Crops_Mid_Century_Medium_Carbon,Recommended_Crops_Mid_Century_High_Carbon
0,478801,081623012787392,1623,3.791540,4,24,61,61,24,24,...,24,[2140],4.0,1,[2140],4.0,1,Scientific_Name ...,Scientific_Name Common_Name ...,Scientific_Name Common_Name ...
1,107446,351623001627247,1623,4.325446,225,1,225,152,152,152,...,1,"[368, 1187, 1651, 2201, 2237, 2399, 2544]",3.0,7,"[368, 1187, 1651, 2201, 2237, 2399, 2544]",3.0,7,Scientific_Name \ 1186 Erag...,Scientific_Name ...,Scientific_Name ...
2,121162,351623002777545,1623,3.222010,61,176,1,176,36,36,...,7,"[368, 1187, 2201, 2417]",3.0,4,"[368, 1187, 2201, 2417]",3.0,4,Scientific_Name \ 26 ...,Scientific_Name \ 368 Sor...,Scientific_Name \ 368 Sor...
3,88813,351623000046581,1623,10.204036,36,36,36,36,152,37,...,9,"[625, 885, 1095, 1505, 1587, 1724, 2008, 2134,...",4.0,11,"[26, 129, 199, 420, 696]",5.0,5,Scientific_Name ...,Scientific_Name ...,Scientific_Name Common_Na...
4,516266,081623013377366,1623,2.918481,24,29,29,24,1,61,...,1,"[1714, 2140]",4.0,2,"[1714, 2140]",4.0,2,Scientific_Name ...,Scientific_Name Common_Name ...,Scientific_Name Common_Name ...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1195,119071,351623002568175,1623,2.957066,176,176,176,61,24,24,...,1,"[368, 786, 1095, 2417]",3.0,4,"[26, 420]",4.0,2,Scientific_Name Common_Name \ 786 At...,Scientific_Name ...,Scientific_Name ...
1196,540278,081623014401456,1623,9.374885,61,24,61,1,1,1,...,1,"[1714, 2140]",4.0,2,"[1714, 2140]",4.0,2,Scientific_Name ...,Scientific_Name ...,Scientific_Name ...
1197,591949,041623013556134,1623,3.770585,72,72,72,190,190,190,...,1,"[553, 940]",3.5,2,"[26, 553, 555, 568, 940]",3.5,5,Scientific_Name ...,Scientific_Name ...,Scientific_Name ...
1198,217842,081623005639708,1623,4.307316,61,24,1,61,24,29,...,1,"[1714, 2140]",4.0,2,"[1714, 2140]",4.0,2,Scientific_Name ...,Scientific_Name Common_Name ...,Scientific_Name Common_Name ...


In [72]:
# Set the pandas display options for column width and wrapping text
pd.set_option('display.max_colwidth', None)  # Set no limit for max column width
fields['Recommended_Crops_Historical'][0]#.to_html()

Unnamed: 0,Scientific_Name,Common_Name,Notes
26,Amaranthus sp.,"amaranth, kata natay, chulai","SOURCES (Amaranthus spp.)\nSims D 1993 (pers. comm.)\nPlucknett D 1985 pp 14\nTindall H 1983 pp 36-41 [FER, TEXT, PH, TEMP, PHO]\nHackett C 1982 pp 25 [FER, PHO, DEP, PH, TEXT, TEMP]\nJanick J 1991 pp 211\nWestphal E 1989 pp 34-37 [USE, LIG, TEMP, PHO, RAIN, FER, DRA, TEXT]\nGROWING PERIOD Usually short-lived annuals, 30-50 days to first harvest. Growing period 120-300 days. COMMON NAMES African Spinach, Indian Spinach, Spinach, Amaranth, Bush Greens Chinese Spinach, Green leaf, Spinach Greens, Amarante, Bledo, Badi Chauli, Chota Chaulai, Kulitis. FURTHER INF Found at elevations between sea level and 1200 m. Best adapted to low to medium humidity. Photosynthesis pathway C4. With a normal yield of 25 t/ha in 8 weeks about 125 kg N, 25 kg P, 250 kg K, 75 kg Ca and 40 kg Mg may be taken up from the soil."
643,Agropyron cristatum,"crested wheatgrass, agropiro de crista","BRIEF DESCRIPTION A tufted bunchgrass reaching a height of up to 60 cm. USES Used for regrassing abandoned and depleted croplands, and used for pasture. KILLING T Very frost resistant. GROWING PERIOD Long-lived perennial. Productive stands can be up to 20-35 years old. COMMON NAMES\nCrested wheatgrass (Fairway), Fairway crested wheatgrass. FURTHER INF Scientific synonym: Bromus cristatum. Have certain winter requirements. In the subtropics the plant is adapted to altitudes between 1500-2800 m."
1487,Lolium multiflorum,"annual ryegrass, Italian ryegrass, ryegrass, raigras Italiano, ray-grass d'Italie, Italienisches raigras, vielblütiges weidelgras, Italienskt rajgräs, borstrepe, Italiensk raigras, Italiensk rajgræs, Italianraiheinä, acevén","BRIEF DESCRIPTION It has a bunch-type growth, it is leafy and has a dark green color, and can become up to 120 cm tall. USES It is used as a winter forage grass in Europe. Used in meadows, pastures, and lawns. GROWING PERIOD Annual or biennial. COMMON NAMES Italian ryegrass, Annual ryegrass, Australian ryegrass, Ray-grass Italie, Khortane, Maddoun, Mandjour, Noussel, Zamma. FURTHER INF Scientific synonym: L. scabrum, L. italicum. In Kenya it can be grown at altitudes above 2350 m."


In [73]:
pd.reset_option('display.max_colwidth')  # Revert to the default setting for max column width
# crops

### generate html for popup contents

In [74]:
# Function to get the topN recommended crops for a field
def get_recommended_crops_html(topN_indices, crops_df):
    # Define your CSS styles
    css = """
    <style>
    .scrollable-popup {
        width: 800px;
        /* height: 300px; */
        max-height: 300px;  /* Set maximum height */
        overflow-y: auto;
        overflow-x: auto;
        display: inline-block;
    }
    .scrollable-cell {
        max-height: 100px;
        overflow-y: auto;
        display: inline-block;
        width: 100%;
    }
    .common-name-cell {
        width: 200px;  /* Increased width for Common_Name */
    }
    .scientific-name-cell {
        width: 100px;  /* Increased width for Scientific_Name */
    }
    .sticky-header {
        position: sticky;
        top: 0;
        background-color: white;
        border-bottom: 1px solid #f0f0f0;  /* Light grey border */
        z-index: 1;
    }
    .sticky-index {
        position: sticky;
        left: 0;
        background-color: white;
        border-right: 1px solid #f0f0f0;  /* Light grey border */
        z-index: 2;
    }
    /* Add alternating row shading */
    .styled-table tr:nth-child(even) {
        background-color: #f9f9f9;  /* Light grey for even rows */
    }
    .styled-table tr:nth-child(odd) {
        background-color: #ffffff;  /* White for odd rows */
    }
    </style>
    """
    
    # Select the relevant rows from the DataFrame
    selected_df = crops_df.iloc[topN_indices][['Scientific_Name', 'Common_Name', 'Notes']]

    # Apply styling using Pandas Styler
    styled_df = selected_df.style.set_table_styles({
        'Scientific_Name': [{'selector': 'th', 'props': [('text-align', 'left'), ('vertical-align', 'top')]},  # Top-aligned headers
                            {'selector': 'td', 'props': [('text-align', 'left'), ('vertical-align', 'top'), ('width', '100px')]}],  # Increased width
        'Common_Name': [{'selector': 'th', 'props': [('text-align', 'left'), ('vertical-align', 'top')]},  # Top-aligned headers
                        {'selector': 'td', 'props': [('text-align', 'left'), ('vertical-align', 'top'), ('width', '200px')]}],  # Increased width
        'Notes': [{'selector': 'th', 'props': [('text-align', 'left'), ('vertical-align', 'top')]},  # Top-aligned headers
                  {'selector': 'td', 'props': [('text-align', 'left'), ('vertical-align', 'top'), ('max-height', 'auto'), ('overflow-y', 'auto'), ('display', 'inline-block')]}],  # Top-aligned and scrollable cells
    })

    # Set sticky headers with a light grey border
    styled_df = styled_df.hide(axis=0)
    
    # # Add custom CSS for sticky headers with light grey border
    styled_df = styled_df.set_table_styles([
        {'selector': 'thead th', 'props': 'position: sticky; top:0; background-color:#ddd; border-bottom:1px solid #f0f0f0'},
    ], overwrite=False)
    
    # Convert the styled DataFrame to HTML
    # df_html = styled_df.to_html(classes="table table-striped table-hover table-condensed table-responsive", index=False)
    df_html = styled_df.set_table_attributes('class="styled-table"').to_html()
    
    # Combine with the custom CSS for the scrollable popup
    html = css + '<div class="scrollable-popup">' + df_html + '</div>'
    
    return html

In [75]:
recommended_indices = fields['historical_Crop_Indices']
fields['Recommended_Crops_Historical_html'] = [get_recommended_crops_html(indices, crops) for indices in recommended_indices]

recommended_indices = fields['mid_century_medium_carbon_Crop_Indices']
fields['Recommended_Crops_Mid_Century_Medium_Carbon_html'] = [get_recommended_crops_html(indices, crops) for indices in recommended_indices]

recommended_indices = fields['mid_century_high_carbon_Crop_Indices']
fields['Recommended_Crops_Mid_Century_High_Carbon_html'] = [get_recommended_crops_html(indices, crops) for indices in recommended_indices]

In [76]:
HTML(fields['Recommended_Crops_Historical_html'][3])

Scientific_Name,Common_Name,Notes
Adonis vernalis,"adonis, pheasant's eye, spring pheasant's eye, Adonisröschen, Herba Adonis vernalis","DESCRIPTION: It is a herbaceous plant reaching up to 30 cm in height. The stem is branching, and the leaves many-cleft and sessile. The flowers are large, yellow, and attractive. USE: A toxic principle is present in very small quantities in the plant. It is poorly absorbed so poisoning is unlikely. The plant contains cardiac glycosides similar to those found in the foxglove (Digitalis purpurea). These substances improve the heart's efficiency, increasing its output at the same time as slowing its rate. It also has a sedative action and so is generally prescribed for patients whose hearts are beating too fast or irregularly. The herb is not often prescribed, however, due to irregular absorption. The herb is cardiotonic, diuretic, sedative and vasoconstrictor. It has sometimes been used internally as a cardiotonic with success where the better known foxglove (Digitalis purpurea) has failed - especially where there is also kidney disease. The herb is also used in the treatment of low blood pressure and its strong diuretic action can be used to counter water retention. It is included in many proprietary medicines, especially since its effects are not cumulative. The plants are harvested every third year as they come into flower, they are dried for use in tinctures and liquid extracts. The herb does not store well so stocks should be replaced every year. The plant is used in homeopathy as a treatment for angina. GROWING PERIOD: Perennial. COMMON NAMES: Pheasant's eye. FURTHER INF: It can be found in eastern South Europe and into Russia. Naturalized in north-eastern United States. Found on sunny grassy hills on dry calcareous soils. A rare plant in most of its range, it has legal protection from gathering in most countries."
Butomus umbellatus,"flowering rush, grassy rush, water gladiolus, junco florido, butomo, brudelys, sarjarimpi, schwanenblume","DESCRIPTION: It is a perennial, monocotyledon. It has slender, triangular, erect and 1-1.5 m tall stems, a stout rhizome, stiff, grass-like leaves and pink bitter almond scented flowers. USE: The tubers, which contain over 50% starch, are edible when cooked. Also the seed is edible and the plant is grown as an ornamental in garden ponds. GROWING PERIOD: Perennial. COMMON NAMES: Flowering rush, Brudelys, Sarjarimpi, Schwanenblume. FURTHER INF: It occurs as submersed plants in still or slowly-moving water in lakes, bogs, ponds, ditches and canals and as emerged plants in marshes and on shorelines. It grows in moist to wet soil or water up to 30 cm deep. It is native of can be found in central Europe from eastern England and France through southern Scandinavia and the northern Mediterranean into Russia. It has been introduced in northern United States (Vermont to Idaho) and in Asia. Seed germination is enhanced by wet cold stratification and long days. Best germination occurs on a wet emerged surface or in shallow water."
Cyperus longus,"galingale, cyperus, English galingale, sweet cyperus, sweet galingale, souchet odorant, hohes Zypergras, langes Zypergras, fladaks, zigolo commune","DESCRIPTION: It is an evergreen, perennial, herbaceous plant reaching 0.7-1.5 m in height. It can grow and spread quite fast. USE: The root is edible and it can be used as a spice in soups, pies and sweets. The leaves are used in basketry and for weaving hats, matting etc. The root and stem have the scent of violets and are used in perfumery. The root has medicinal properties and the plant is used as an ornamental. GROWING PERIOD: Perennial. COMMON NAMES: Galingale, Cyperus, English galingale, Sweet cyperus. FURTHER INF: It grows by or in water in ditches, ponds, bog gardens and marshy soils. It thrives in moist sandy loam but is not very particular to soil type and succeeds in any good garden soil so long as it does not dry out. It however requires moist or wet soil and can grow in water to a depth of 30 cm. Withstands some salinity and poor soils. It can be found in northern and central Africa to Kenya and Nigeria, around the Mediterranean, southern and central Europe, western Asia and central Asia to India."
Lycium barbarum,,
Typha domingensis,,"DESCRIPTION: It is a herbaceous, rhizomatous plant with long, slender green stalks topped with brown, fluffy, sausage-shaped flowering heads. It grows 1.5-4 m in height. USE: The root are edible raw or cooked. They are rich in starch and protein and can be boiled and eaten like potatoes. The root can also be dried, ground into a powder and then used as a thickener in soups etc or added to cereal flours. Young shoots in spring are edible raw or cooked. The seed can be consumed cooked. An edible oil is obtained from the seed. The pollen is rich in protein and is edible raw or cooked. The leaves are diuretic. The pollen is astringent, desiccant, diuretic, haemostatic and vulnerary. It is used in the treatment of nose bleeds, haematemesis, haematuria, uterine bleeding, dysmenorrhoea, postpartum abdominal pain and gastralgia, scrofula and abscesses. It is contraindicated for pregnant women. The seed down is haemostatic. The rootstock is astringent and diuretic. The stems and leaves make a good thatch, can be used in making paper, can be woven into mats, chairs, etc. The pollen is highly inflammable and is used in making fireworks. This plants extensive root system makes it very good for stabilizing wet banks of rivers, lakes etc. GROWING PERIOD: Perennial. COMMON NAMES: Southern cattail. FURTHER INF: It can be found in Europe, Asia, and North America. Found in shallow water to 15 cm in and along brackish to fresh marshes, ponds and pools. It requires wet soil and can grow in water. Plants can be very invasive, spreading freely at the roots when in a suitable site."
Asclepias incarnata,"swamp milkweed, swamp silkweed, white indian hemp","DESCRIPTION: It is a semiaquatic, herbaceous, perennial reaching 30-90 cm in height. All plant parts contain with milky sap. USE: Young shoots, flowers and leaves may be cooked with several changes of water and eaten. Stem fibres have been suggested as substitutes for flax and hemp. The roots have medicinal properties. It is grown as garden ornamental attracting butterflies. It is poisonous to sheep. GROWING PERIOD: Perennial. COMMON NAMES: Swamp milkweed, Swamp silkweed, White Indian hemp. FURTHER INF: It occurs in a range of wet conditions from standing water to saturated soil. It is common in wet places, such as low swampy ground, sphagnum bogs, stream banks, pond shores, and floodplains of lakes, waterways, marshes, swamps, wet areas in grassland and in low wet woods. It is a colonizer. It has wind-dispersed seeds and can self-fertilize."
Caragana arborescens,Siberian peashrub,"DESCRIPTION: It is a deciduous shrub or small tree growing to up to 6 m in height and 4 m in width at a fast rate. The leaves are alternate, 8-12 cm long, with each leaf composed of eight to twelve oval leaflets. The flowers are yellow and appear early in the season forming pods in late June or early July. As the pods ripen, they crack and burst, spreading the seeds. The young bark is smooth and olive green and becomes less vivid in colour as the bark ages. The flowers are hermaphrodite are pollinated by bees. USE: The seed are edible cooked. Small but produced in abundance, there are 4 - 6 seeds per pod. A bland flavour, it is best used in spicy dishes. The seed contains 12.4% of fatty oil and up to 36% protein. The young pods can be cooked and used as a vegetable. The whole plant is used in the treatment of cancer of the breast, and the orifice to the womb, and for dysmenorrhoea and other gynaecological problems. A fibre obtained from the bark is used for making cordage. A blue dye is obtained from the leaves. The plant can be grown as a hedge. It is quite wind-resistant and can also be planted in a shelterbelt. The plant has an extensive root system and can be used for erosion control, especially on marginal land. Because of its nitrogen-fixing capacity, it is valued as a soil-improving plant. GROWING PERIOD: Perennial. COMMON NAMES: Siberian pea tree, Pea shrub, Pea tree, Siberian pea shrub, Siberian peashrub. FURTHER INF: It can be found in East Asia from Siberia to Mongolia. Occasionally naturalized in France. Grows on river banks, pebbles, sands, open forests and forest edges, gully slopes and stony slopes. It can fix nitrogen and it is noted for attracting wildlife. The plant can tolerates strong winds but not maritime exposure."
Samolus valerandi,"seaside brookweed, brookweed, salzbunge, strandsamel, bunge, samel, suolapunka","DESCRIPTION: It is a short creeping, perennial, herbaceous, amphibious plant reaching 5-30 cm in height and 15-20 cm in width. In a basal rosette of 7-14 leaves looking rather like that of a daisy, and also up the stem, shiny, light green, 10 cm long and 3-4 cm wide, slightly fleshy, with rounded tip and narrowing to a stalk-like base. USE: The young leaves can be eaten raw or cooked. The plant is very ornamental as a foreground plant, in shallow aquariums as a swamp plant in a terrarium and as an indoor plant if placed in a north-facing window. GROWING PERIOD: Perennial. COMMON NAMES: Seaside brookweed, Brookweed, Salzbunge, Strandsamel. FURTHER INF: It can be found in wet soil, among wet rocks, in wetlands and creeks near where burns enter the sea and in shallow water. It can be found in North America from southern Canada to the Caribbean and in Europe from Southern Scandinavia to northern Africa."
Thymus maroccanus,"melia, boba","DESCRIPTION: It is a deciduous tree reaching 6-20 m in height with an open crown and laxly branched. Trees with 25 cm in stem diameter are common. The bark is grey, fairly smooth, furrowing with age. Leaves are a light, bright green, bipinnate with (sub)opposite leaflets, 3-7 per pinna and up to 35 cm long. The fruit is drupe-like and oval; colour changes from green to pale grey as the fruit matures. Fruit size is normally 4 cm long with a very thick, bony endocarp. USE: Fruit and leaves are used as fodder. The flowers are a source of honey. The wood is easily worked and shaped, making it suitable for making acoustic drums, containers, log bee hives and mortars. Leaf preparations are used as flea, mosquito and fly repellents. Useful in agroforestry since its deep rooting nature makes little interference with ox-plough cultivation. GROWING PERIOD: Perennial. COMMON NAMES: melia. FURTHER INF: It is common in association with acacia-commiphora vegetation. It is an emergent in acacia-commiphora deciduous bushland, sometimes fringing seasonal watercourses or appearing on rock outcrops. Physical characteristics of the soils in its natural range are sandy, clay and shallow stony. Good drainage is a common characteristic, although stands on sites classified as imperfectly drained soils in Tharaka-Nithi and Isiolo in Kenya have been reported."


## put recommended crops into a popup for each field

In [77]:
# List of available tiles with their attributions and layer names (including USGS maps)
tiles_list = [
    {"tiles": "https://basemap.nationalmap.gov/arcgis/rest/services/USGSTopo/MapServer/tile/{z}/{y}/{x}", "attr": 'U.S. Department of the Interior | U.S. Geological Survey', "name": "USGS Topo"},
    {"tiles": "https://basemap.nationalmap.gov/arcgis/rest/services/USGSImageryOnly/MapServer/tile/{z}/{y}/{x}", "attr": 'U.S. Department of the Interior | U.S. Geological Survey', "name": "USGS Imagery"},
    {"tiles": "https://basemap.nationalmap.gov/arcgis/rest/services/USGSImageryTopo/MapServer/tile/{z}/{y}/{x}", "attr": 'U.S. Department of the Interior | U.S. Geological Survey', "name": "USGS Imagery Topo"},
    
    {"tiles": "OpenStreetMap", "attr": "© OpenStreetMap contributors", "name": "OpenStreetMap"},
    
    # {"tiles": "https://tiles.stadiamaps.com/tiles/stamen_toner_lite/{z}/{x}/{y}{r}.png", "attr": '&copy; <a href="https://stadiamaps.com/" target="_blank">Stadia Maps</a> <a href="https://stamen.com/" target="_blank">&copy; Stamen Design</a> &copy; <a href="https://openmaptiles.org/" target="_blank">OpenMapTiles</a> &copy; <a href="https://www.openstreetmap.org/copyright" target="_blank">OpenStreetMap</a>', "name": "Stamen Toner Lite"},
    
    # {"tiles": "https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png", "attr": '&copy; <a href="https://www.openstreetmap.org/copyright">OpenStreetMap</a> contributors', "name": "OpenStreetMap Standard"},
    # {"tiles": "https://{s}.tile.opentopomap.org/{z}/{x}/{y}.png", "attr": 'Map data: &copy; <a href="https://www.openstreetmap.org/copyright">OpenStreetMap</a> contributors, SRTM | Map style: &copy; <a href="https://opentopomap.org">OpenTopoMap</a> (CC-BY-SA)', "name": "OpenTopoMap"}

    # {"tiles": "https://{s}.basemaps.cartocdn.com/rastertiles/voyager/{z}/{x}/{y}.png", "attr": '&copy; <a href="https://carto.com/attributions">CARTO</a>', "name": "CartoDB Voyager"},
    {"tiles": "https://{s}.basemaps.cartocdn.com/light_all/{z}/{x}/{y}.png", "attr": '&copy; <a href="https://carto.com/attributions">CARTO</a>', "name": "CartoDB Light"},
    # {"tiles": "https://{s}.basemaps.cartocdn.com/rastertiles/light_all/{z}/{x}/{y}.png", "attr": '&copy; <a href="https://carto.com/attributions">CARTO</a>', "name": "CartoDB Positron"},

]

# def generate_folium_map_with_recommended_crops_popup(gdf, zoom=6, decimal_places=3):
def generate_folium_map_with_recommended_crops_popup(gdf, zoom=6, max_zoom=12, decimal_places=3):
    # Check CRS and convert to EPSG:4326 if needed
    if gdf.crs != "EPSG:4326":
        gdf = gdf.to_crs(epsg=4326)
    
    # Format latitude and longitude to the desired number of decimal places
    gdf['Longitude_formatted'] = gdf['Longitude'].map(lambda x: f'{x:>{decimal_places+4}.{decimal_places}f}')
    gdf['Latitude_formatted'] = gdf['Latitude'].map(lambda x: f'{x:>{decimal_places+4}.{decimal_places}f}')
    
    # Ensure necessary columns are included in the properties
    gdf = gdf[['geometry', 'CDL2023', 'Longitude', 'Latitude', 'Longitude_formatted', 'Latitude_formatted', 'color', 'Crop', 'Elevation', 'Recommended_Crops_Historical_html']]
    
    def style_function(feature):
        return {
            'fillColor': feature['properties']['color'],
            'color': feature['properties']['color'],
            'weight': 1,
            'fillOpacity': 0.6
        }

    # Create a separate GeoDataFrame for the centroids
    centroids_gdf = gdf.copy()
    # centroids_gdf['geometry'] = centroids_gdf.apply(lambda row: Point(row['Longitude'], row['Latitude']), axis=1)
    centroids_gdf['geometry'] = gpd.points_from_xy(centroids_gdf['Longitude'], centroids_gdf['Latitude'])
    
    # Convert geometries to GeoJSON
    geojson_data = gdf.to_json()
    centroids_geojson_data = centroids_gdf.to_json()
    
    # Calculate map center
    minx, miny, maxx, maxy = gdf.total_bounds
    center_longitude = (minx + maxx) / 2
    center_latitude = (miny + maxy) / 2
    
    # # Initialize map
    # m = folium.Map(location=[center_latitude, center_longitude], zoom_start=zoom, max_zoom = 7, tiles=None)
    # Initialize map
    m = folium.Map(location=[center_latitude, center_longitude], zoom_start=zoom, max_zoom=max_zoom, tiles=None)
    
    # Add each tile layer to the map
    for tile_info in tiles_list:
        tiles = tile_info["tiles"]
        attr = tile_info["attr"]
        layer_name = tile_info["name"]
        folium.TileLayer(tiles=tiles, attr=attr, name=layer_name).add_to(m)
    
    # Add GeoJSON layer with custom popups and styles for crop sequence boundaries
    folium.GeoJson(
        geojson_data,
        name='Field Boundaries',
        style_function=style_function,
        # popup=folium.GeoJsonPopup(fields=['CDL2023', 'Crop', 'Latitude_formatted', 'Longitude_formatted','Elevation'], aliases=['Crop ID:', 'Crop:', 'Latitude:', 'Longitude:', 'Elevation:'])
        popup=folium.GeoJsonPopup(fields=['Recommended_Crops_Historical_html'], aliases=['Recommended Crops (Historical Scenario)'], parse_html=True, max_width="100%"),
        zoom_on_click=True,
    ).add_to(m)
    
    # Add GeoJSON layer for centroids with custom CircleMarkers
    folium.GeoJson(
        centroids_geojson_data,
        name='Field Locations',
        # marker=folium.CircleMarker(radius=4, fill_color='grey', fill_opacity=1, color='grey', weight=0.2),
        marker=folium.CircleMarker(
            radius=6, 
            fill=True,
            fill_opacity=0.8,
            weight=0.2
        ),
        # marker=folium.CircleMarker(radius=4),
        style_function=lambda x: {'fillColor': x['properties']['color'], 'color': 'grey'},
        popup=folium.GeoJsonPopup(fields=['CDL2023', 'Crop', 'Latitude_formatted', 'Longitude_formatted', 'Elevation'], aliases=['Crop ID:', 'Crop:', 'Latitude:', 'Longitude:', 'Elevation:']),
        # popup=folium.GeoJsonPopup(fields=['Recommended_Crops_Historical_html'], aliases=['Recommended Crops (Historical Scenario)'], parse_html=True, max_width="100%"),
        zoom_on_click=True,
    ).add_to(m)

    # folium.FitOverlays().add_to(m)

    folium.plugins.Fullscreen(
        position="topleft",
        title="Fullscreen",
        title_cancel="Exit Fullscreen",
        force_separate_button=True,
    ).add_to(m)
    
    # Add layer control to the map
    folium.LayerControl().add_to(m)
    
    return m

In [78]:
# m = generate_folium_map_with_recommended_crops_popup(fields)
# m.save('../data/public/maph a_with_crop_recommendations.html')

In [79]:
# m

#### generate colors for each set of recommended crops.

In [80]:
# list(fields.columns)

In [81]:
# cm.linear

In [82]:
historical_set = set(fields["historical_Crop_Indices"].apply(tuple).unique())
mcm_set = set(fields["mid_century_medium_carbon_Crop_Indices"].apply(tuple).unique())
mch_set = set(fields["mid_century_high_carbon_Crop_Indices"].apply(tuple).unique())

In [83]:
len(historical_set), len(mcm_set), len(mch_set)

(264, 212, 188)

In [84]:
combined_set = historical_set | mcm_set | mch_set
len(combined_set)

541

In [85]:
# Map each unique set to a unique color
# suitable_cropsets_colormap = cm.linear.Pastel1_09.to_step(index=[i for i in range(len(combined_set))]) #Set1_08
suitable_cropsets_colormap = cm.linear.Set1_08.to_step(index=[i for i in range(len(combined_set))]) #Set1_08
suitable_cropsets_colormap

In [86]:
# Create mappings from each set to a unique ID and a unique color
set_id_mapping = {set_: i for i, set_ in enumerate(combined_set)}
set_color_mapping = {set_: suitable_cropsets_colormap(i) for i, set_ in enumerate(combined_set)}

In [87]:
# Define a function to apply the mappings to each row
def get_set_id_and_color(crop_indices_list):
    crop_tuple = tuple(crop_indices_list)  # Convert list to tuple for lookup
    set_id = set_id_mapping[crop_tuple]  # Get the unique ID
    set_color = set_color_mapping[crop_tuple]  # Get the corresponding color
    return set_id, set_color

In [88]:
# Apply the function to the DataFrame columns
fields['historical_cropset_id'], fields['historical_cropset_color'] = zip(
    *fields['historical_Crop_Indices'].apply(get_set_id_and_color))

fields['mid_century_medium_carbon_cropset_id'], fields['mid_century_medium_carbon_cropset_color'] = zip(
    *fields['mid_century_medium_carbon_Crop_Indices'].apply(get_set_id_and_color))

fields['mid_century_high_carbon_cropset_id'], fields['mid_century_high_carbon_cropset_color'] = zip(
    *fields['mid_century_high_carbon_Crop_Indices'].apply(get_set_id_and_color))

# Now, the DataFrame `fields` contains the new ID and color columns

In [89]:
# fields['historical_cropset_color'].unique()

## start httpd server to view maps outside of notebook to save memory

In [90]:
# Change to the directory containing your files
os.chdir('/Users/jwhite/Library/Mobile Documents/com~apple~CloudDocs/UMich_MADS/Semester_06/SIADS_699_Capstone/future_fields/datasets/maps')
# os.chdir('../datesets/maps/')

# # Start the server in the background
# ! python -m http.server 8000 &

# Start the server in the background
# server_process = subprocess.Popen(['python', '-m', 'http.server', '8000'])
# Start the HTTP server and suppress its output
server_process = subprocess.Popen(
    ['python', '-m', 'http.server', '8000'],
    stdout=subprocess.DEVNULL,  # Suppress standard output
    stderr=subprocess.DEVNULL   # Suppress standard error
)

print("HTTP server started on port 8000")

os.chdir('/Users/jwhite/Library/Mobile Documents/com~apple~CloudDocs/UMich_MADS/Semester_06/SIADS_699_Capstone/future_fields/notebooks')
# os.chdir('../../notebooks/')

HTTP server started on port 8000


In [91]:
# # uncomment to terminate httpd
# server_process.terminate()
# print("HTTP server stopped")

In [92]:
# Ensure the directory exists
os.makedirs('../datasets/maps/', exist_ok=True)

timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
# map_file = f'../datasets/maps/{timestamp}_map_with_crop_recommendations.html'
map_file = f'../datasets/maps/map_with_crop_recommendations.html'

generate_folium_map_with_recommended_crops_popup(fields).save(map_file)

In [93]:
# Get the absolute path of the file
map_file_base = os.path.basename(map_file)

print()
HTML(f'<b><a href="http://localhost:8000/{map_file_base}" target="_blank">[ Map with crop recommendations in a popup ]</a></b>')




## are CDL crops in recommendations?

In [94]:
# cdl_to_ecocrop_crop_code_translator_df = pd.read_pickle('../data/agricultural/EcoCrop/siads699/cdl_to_ecocrop_code_translator.pickle')
cdl_to_ecocrop_crop_code_translator_df = pd.read_pickle('../data/crops/cdl_to_ecocrop_code_translator.pickle')
# crops.to_pickle('../data/crops/cdl_to_ecocrop_code_translator.pickle')

In [95]:
cdl_to_ecocrop_crop_code_translator_df

Unnamed: 0,CDL_Code,Crop_Name,Likely_Crop_Species,Likely_Crop_Species_Normalized,Likely_EcoCrop_IDs,Discussion
1,1,Corn,[Zea mays],[Zea mays],[2411],"When a field has 'corn' growing in it, the spe..."
2,2,Cotton,"[Gossypium hirsutum, Gossypium barbadense, Gos...","[Gossypium hirsutum, Gossypium barbadense, Gos...","[209, 208, 1360, 1361]","When a field has 'cotton' growing in it, the s..."
3,3,Rice,"[Oryza sativa, Oryza glaberrima]","[Oryza sativa, Oryza glaberrima]","[1608, 1605]","When a field has 'rice' growing in it, the spe..."
4,4,Sorghum,[Sorghum bicolor],[Sorghum bicolor],[2281],"When a field has 'sorghum' growing in it, the ..."
5,5,Soybeans,[Glycine max],[Glycine max],[206],"When a field has 'soybeans' growing in it, the..."
...,...,...,...,...,...,...
247,247,Turnips,[Brassica rapa subsp. rapa],[Brassica rapa],[869],"When a field has 'turnips' growing in it, the ..."
248,248,Eggplants,[Solanum melongena],[Solanum melongena],[360],"When a field has 'eggplants' growing in it, th..."
249,249,Gourds,"[Cucurbita pepo, Lagenaria siceraria]","[Cucurbita pepo, Lagenaria siceraria]","[137, 236]","When a field has 'gourds' growing in it, the s..."
250,250,Cranberries,[Vaccinium macrocarpon],[Vaccinium macrocarpon],[2021],"When a field has 'cranberries' growing in it, ..."


In [96]:
fields

Unnamed: 0,Original_Index,CSBID,CSBYEARS,CSBACRES,CDL2016,CDL2017,CDL2018,CDL2019,CDL2020,CDL2021,...,Recommended_Crops_Mid_Century_High_Carbon,Recommended_Crops_Historical_html,Recommended_Crops_Mid_Century_Medium_Carbon_html,Recommended_Crops_Mid_Century_High_Carbon_html,historical_cropset_id,historical_cropset_color,mid_century_medium_carbon_cropset_id,mid_century_medium_carbon_cropset_color,mid_century_high_carbon_cropset_id,mid_century_high_carbon_cropset_color
0,478801,081623012787392,1623,3.791540,4,24,61,61,24,24,...,Scientific_Name Common_Name ...,\n <style>\n .scrollable-popup {\n ...,\n <style>\n .scrollable-popup {\n ...,\n <style>\n .scrollable-popup {\n ...,249,#b0597dff,480,#b9604bff,480,#b9604bff
1,107446,351623001627247,1623,4.325446,225,1,225,152,152,152,...,Scientific_Name ...,\n <style>\n .scrollable-popup {\n ...,\n <style>\n .scrollable-popup {\n ...,\n <style>\n .scrollable-popup {\n ...,469,#ae5a35ff,325,#ff9b0bff,325,#ff9b0bff
2,121162,351623002777545,1623,3.222010,61,176,1,176,36,36,...,Scientific_Name \ 368 Sor...,\n <style>\n .scrollable-popup {\n ...,\n <style>\n .scrollable-popup {\n ...,\n <style>\n .scrollable-popup {\n ...,481,#ba604dff,228,#9552a0ff,228,#9552a0ff
3,88813,351623000046581,1623,10.204036,36,36,36,36,152,37,...,Scientific_Name Common_Na...,\n <style>\n .scrollable-popup {\n ...,\n <style>\n .scrollable-popup {\n ...,\n <style>\n .scrollable-popup {\n ...,54,#6b6089ff,459,#aa5c28ff,47,#7a577bff
4,516266,081623013377366,1623,2.918481,24,29,29,24,1,61,...,Scientific_Name Common_Name ...,\n <style>\n .scrollable-popup {\n ...,\n <style>\n .scrollable-popup {\n ...,\n <style>\n .scrollable-popup {\n ...,145,#4aa957ff,358,#ffd221ff,358,#ffd221ff
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1195,119071,351623002568175,1623,2.957066,176,176,176,61,24,24,...,Scientific_Name ...,\n <style>\n .scrollable-popup {\n ...,\n <style>\n .scrollable-popup {\n ...,\n <style>\n .scrollable-popup {\n ...,372,#ffea2aff,256,#ba5e6eff,219,#8c5d95ff
1196,540278,081623014401456,1623,9.374885,61,24,61,1,1,1,...,Scientific_Name ...,\n <style>\n .scrollable-popup {\n ...,\n <style>\n .scrollable-popup {\n ...,\n <style>\n .scrollable-popup {\n ...,390,#faf432ff,105,#3f9090ff,105,#3f9090ff
1197,591949,041623013556134,1623,3.770585,72,72,72,190,190,190,...,Scientific_Name ...,\n <style>\n .scrollable-popup {\n ...,\n <style>\n .scrollable-popup {\n ...,\n <style>\n .scrollable-popup {\n ...,474,#b35d3fff,382,#fffa31ff,197,#77797cff
1198,217842,081623005639708,1623,4.307316,61,24,1,61,24,29,...,Scientific_Name Common_Name ...,\n <style>\n .scrollable-popup {\n ...,\n <style>\n .scrollable-popup {\n ...,\n <style>\n .scrollable-popup {\n ...,50,#745b81ff,358,#ffd221ff,358,#ffd221ff


In [97]:
fields.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 1200 entries, 0 to 1199
Data columns (total 57 columns):
 #   Column                                              Non-Null Count  Dtype   
---  ------                                              --------------  -----   
 0   Original_Index                                      1200 non-null   int64   
 1   CSBID                                               1200 non-null   object  
 2   CSBYEARS                                            1200 non-null   object  
 3   CSBACRES                                            1200 non-null   float64 
 4   CDL2016                                             1200 non-null   int64   
 5   CDL2017                                             1200 non-null   int64   
 6   CDL2018                                             1200 non-null   int64   
 7   CDL2019                                             1200 non-null   int64   
 8   CDL2020                                             1200 non

In [98]:
fields['CDL2016_2023'] = fields[['CDL2016', 'CDL2017', 'CDL2018', 'CDL2019', 'CDL2020', 'CDL2021', 'CDL2022', 'CDL2023']].values.tolist()
fields['CDL2016_2023']

0             [4, 24, 61, 61, 24, 24, 61, 61]
1        [225, 1, 225, 152, 152, 152, 36, 36]
2             [61, 176, 1, 176, 36, 36, 1, 1]
3           [36, 36, 36, 36, 152, 37, 37, 37]
4             [24, 29, 29, 24, 1, 61, 61, 61]
                        ...                  
1195     [176, 176, 176, 61, 24, 24, 24, 176]
1196             [61, 24, 61, 1, 1, 1, 1, 37]
1197    [72, 72, 72, 190, 190, 190, 190, 190]
1198          [61, 24, 1, 61, 24, 29, 61, 24]
1199    [24, 24, 24, 176, 176, 176, 176, 176]
Name: CDL2016_2023, Length: 1200, dtype: object

In [99]:
# First, create a mapping dictionary from cdl_to_ecocrop_crop_code_translator_df
cdl_to_ecocrop_mapping = dict(zip(
    cdl_to_ecocrop_crop_code_translator_df['CDL_Code'],
    cdl_to_ecocrop_crop_code_translator_df['Likely_EcoCrop_IDs']
))

# Define a function that will translate CDL codes to EcoCrop IDs
def translate_cdl_to_ecocrop(cdl_list):
    # Use the mapping to translate each CDL code to a list of EcoCrop IDs
    return [cdl_to_ecocrop_mapping.get(cdl_code, []) for cdl_code in cdl_list]

# Apply the function to the 'CDL2016_2023' column and create the 'EcoCrop2016_2023' column
fields['EcoCrop2016_2023'] = fields['CDL2016_2023'].apply(translate_cdl_to_ecocrop)

# Now fields['EcoCrop2016_2023'] contains lists of EcoCrop indices corresponding to each field's CDL codes
# Flatten the list of lists using list comprehension
fields['EcoCrop2016_2023'] = [list(itertools.chain.from_iterable(lst)) for lst in fields['EcoCrop2016_2023']]
fields['EcoCrop2016_2023'] = [list(set(crop_list)) for crop_list in fields['EcoCrop2016_2023']]

In [100]:
fields['EcoCrop2016_2023']

0                                             [2281, 404]
1                                        [2411, 404, 261]
2                                             [2411, 261]
3       [261, 400, 401, 402, 403, 2476, 2477, 2478, 24...
4                                 [2411, 404, 1621, 1863]
                              ...                        
1195                                                [404]
1196    [400, 401, 402, 403, 404, 2476, 2477, 2478, 24...
1197                                 [114, 116, 117, 118]
1198                              [2411, 404, 1621, 1863]
1199                                                [404]
Name: EcoCrop2016_2023, Length: 1200, dtype: object

In [101]:
fields['historical_Crop_Indices']

0                                         [26, 643, 1487]
1                                            [1186, 1187]
2                    [26, 69, 420, 643, 1070, 1468, 1487]
3       [625, 885, 1095, 1505, 2008, 2134, 2140, 2187,...
4                                  [643, 647, 1496, 1729]
                              ...                        
1195                                                [786]
1196                                          [647, 1496]
1197                                                [487]
1198                          [643, 647, 651, 1496, 1729]
1199                                     [643, 647, 1496]
Name: historical_Crop_Indices, Length: 1200, dtype: object

In [102]:
# Use sets to check if there is any overlap
fields['cdl_crops_in_historical_recommendations'] = fields.apply(
    lambda row: int(bool(set(row['EcoCrop2016_2023']) & set(row['historical_Crop_Indices']))), axis=1
)

# Use sets to check if there is any overlap
fields['cdl_crops_in_mid_century_medium_recommendations'] = fields.apply(
    lambda row: int(bool(set(row['EcoCrop2016_2023']) & set(row['mid_century_medium_carbon_Crop_Indices']))), axis=1
)

# Use sets to check if there is any overlap
fields['cdl_crops_in_mid_century_high_recommendations'] = fields.apply(
    lambda row: int(bool(set(row['EcoCrop2016_2023']) & set(row['mid_century_high_carbon_Crop_Indices']))), axis=1
)


In [103]:
# Generate 'cdl_crops_in_historical_recommendations_yes_no' column
fields['cdl_crops_in_historical_recommendations_yes_no'] = np.where(fields['cdl_crops_in_historical_recommendations'] == 1, 'Yes', 'No')

# Generate 'cdl_crops_in_mid_century_medium_recommendations_yes_no' column
fields['cdl_crops_in_mid_century_medium_recommendations_yes_no'] = np.where(fields['cdl_crops_in_mid_century_medium_recommendations'] == 1, 'Yes', 'No')

# Generate 'cdl_crops_in_mid_century_high_recommendations_yes_no' column
fields['cdl_crops_in_mid_century_high_recommendations_yes_no'] = np.where(fields['cdl_crops_in_mid_century_high_recommendations'] == 1, 'Yes', 'No')

In [104]:
fields[fields[['cdl_crops_in_historical_recommendations', 'cdl_crops_in_mid_century_medium_recommendations', 'cdl_crops_in_mid_century_high_recommendations']].any(axis=1)]

Unnamed: 0,Original_Index,CSBID,CSBYEARS,CSBACRES,CDL2016,CDL2017,CDL2018,CDL2019,CDL2020,CDL2021,...,mid_century_high_carbon_cropset_id,mid_century_high_carbon_cropset_color,CDL2016_2023,EcoCrop2016_2023,cdl_crops_in_historical_recommendations,cdl_crops_in_mid_century_medium_recommendations,cdl_crops_in_mid_century_high_recommendations,cdl_crops_in_historical_recommendations_yes_no,cdl_crops_in_mid_century_medium_recommendations_yes_no,cdl_crops_in_mid_century_high_recommendations_yes_no
152,589456,41623011637460,1623,4.600101,36,36,36,36,36,22,...,74,#3d7ab2ff,"[36, 36, 36, 36, 36, 22, 36, 2]","[261, 208, 209, 1360, 2003, 1361]",1,0,0,Yes,No,No
272,596521,41623013868437,1623,8.773638,2,61,61,152,152,152,...,47,#7a577bff,"[2, 61, 61, 152, 152, 152, 152, 152]","[208, 209, 1361, 1360]",1,0,0,Yes,No,No
314,590186,41623011638195,1623,4.368546,36,36,2,2,2,2,...,74,#3d7ab2ff,"[36, 36, 2, 2, 2, 2, 36, 36]","[261, 208, 209, 1361, 1360]",1,0,0,Yes,No,No
343,597145,41623013869061,1623,3.629369,2,61,61,152,152,51,...,269,#cb6652ff,"[2, 61, 61, 152, 152, 51, 61, 61]","[487, 208, 209, 1361, 1360]",1,0,0,Yes,No,No
384,128332,351623003520488,1623,69.182339,36,36,1,1,42,24,...,273,#d0694aff,"[36, 36, 1, 1, 42, 24, 1, 1]","[2211, 261, 2411, 404, 309]",1,0,0,Yes,No,No
396,591476,41623011640410,1623,21.566905,37,44,37,2,2,2,...,197,#77797cff,"[37, 44, 37, 2, 2, 2, 2, 2]","[400, 401, 402, 403, 2476, 2477, 2478, 2479, 2...",1,0,0,Yes,No,No
400,392909,81623009661584,1623,3.346989,142,176,190,205,37,23,...,114,#419683ff,"[142, 176, 190, 205, 37, 23, 36, 37]","[261, 400, 401, 402, 403, 404, 2476, 2477, 247...",0,0,1,No,No,Yes
410,578898,41623010293816,1623,10.417035,61,2,2,61,61,61,...,380,#fff72fff,"[61, 2, 2, 61, 61, 61, 61, 61]","[208, 209, 1361, 1360]",1,0,0,Yes,No,No
486,85168,491623015988285,1623,3.909715,36,36,36,36,36,36,...,129,#469f6eff,"[36, 36, 36, 36, 36, 36, 36, 36]",[261],1,0,0,Yes,No,No
655,40971,491623012656946,1623,2.695439,195,190,190,190,190,205,...,350,#ffc51bff,"[195, 190, 190, 190, 190, 205, 205, 36]","[353, 261, 2001, 2002, 2003, 404, 2004, 2550, ...",0,1,0,No,Yes,No


In [105]:
fields[fields['cdl_crops_in_historical_recommendations']==1]['cdl_crops_in_historical_recommendations'].sum()

np.int64(14)

In [106]:
fields[fields['cdl_crops_in_mid_century_medium_recommendations']==1]['cdl_crops_in_mid_century_medium_recommendations'].sum()

np.int64(1)

In [107]:
fields[fields['cdl_crops_in_mid_century_high_recommendations']==1]['cdl_crops_in_mid_century_high_recommendations'].sum()

np.int64(1)

In [108]:
# no rainfall scores
# Use sets to check if there is any overlap
fields['cdl_crops_in_historical_recommendations_no_rainfall'] = fields.apply(
    lambda row: int(bool(set(row['EcoCrop2016_2023']) & set(row['historical_Crop_Indices_no_rainfall']))), axis=1
)

# Use sets to check if there is any overlap
fields['cdl_crops_in_mid_century_medium_recommendations_no_rainfall'] = fields.apply(
    lambda row: int(bool(set(row['EcoCrop2016_2023']) & set(row['mid_century_medium_carbon_Crop_Indices_no_rainfall']))), axis=1
)

# Use sets to check if there is any overlap
fields['cdl_crops_in_mid_century_high_recommendations_no_rainfall'] = fields.apply(
    lambda row: int(bool(set(row['EcoCrop2016_2023']) & set(row['mid_century_high_carbon_Crop_Indices_no_rainfall']))), axis=1
)


## map with crop recommendations in a popup and comparison layer

In [109]:
# List of available tiles with their attributions and layer names (including USGS maps)
tiles_list = [
    {"tiles": "https://basemap.nationalmap.gov/arcgis/rest/services/USGSTopo/MapServer/tile/{z}/{y}/{x}", "attr": 'U.S. Department of the Interior | U.S. Geological Survey', "name": "USGS Topo"},
    {"tiles": "https://basemap.nationalmap.gov/arcgis/rest/services/USGSImageryOnly/MapServer/tile/{z}/{y}/{x}", "attr": 'U.S. Department of the Interior | U.S. Geological Survey', "name": "USGS Imagery"},
    {"tiles": "https://basemap.nationalmap.gov/arcgis/rest/services/USGSImageryTopo/MapServer/tile/{z}/{y}/{x}", "attr": 'U.S. Department of the Interior | U.S. Geological Survey', "name": "USGS Imagery Topo"},
    
    {"tiles": "OpenStreetMap", "attr": "© OpenStreetMap contributors", "name": "OpenStreetMap"},
    
    # {"tiles": "https://tiles.stadiamaps.com/tiles/stamen_toner_lite/{z}/{x}/{y}{r}.png", "attr": '&copy; <a href="https://stadiamaps.com/" target="_blank">Stadia Maps</a> <a href="https://stamen.com/" target="_blank">&copy; Stamen Design</a> &copy; <a href="https://openmaptiles.org/" target="_blank">OpenMapTiles</a> &copy; <a href="https://www.openstreetmap.org/copyright" target="_blank">OpenStreetMap</a>', "name": "Stamen Toner Lite"},
    
    # {"tiles": "https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png", "attr": '&copy; <a href="https://www.openstreetmap.org/copyright">OpenStreetMap</a> contributors', "name": "OpenStreetMap Standard"},
    # {"tiles": "https://{s}.tile.opentopomap.org/{z}/{x}/{y}.png", "attr": 'Map data: &copy; <a href="https://www.openstreetmap.org/copyright">OpenStreetMap</a> contributors, SRTM | Map style: &copy; <a href="https://opentopomap.org">OpenTopoMap</a> (CC-BY-SA)', "name": "OpenTopoMap"}

    # {"tiles": "https://{s}.basemaps.cartocdn.com/rastertiles/voyager/{z}/{x}/{y}.png", "attr": '&copy; <a href="https://carto.com/attributions">CARTO</a>', "name": "CartoDB Voyager"},
    {"tiles": "https://{s}.basemaps.cartocdn.com/light_all/{z}/{x}/{y}.png", "attr": '&copy; <a href="https://carto.com/attributions">CARTO</a>', "name": "CartoDB Light"},
    # {"tiles": "https://{s}.basemaps.cartocdn.com/rastertiles/light_all/{z}/{x}/{y}.png", "attr": '&copy; <a href="https://carto.com/attributions">CARTO</a>', "name": "CartoDB Positron"},

]

# def generate_folium_map_with_recommended_crops_popup(gdf, zoom=6, decimal_places=3):
def generate_folium_map_with_recommended_crops_popup_cdl_recommendation_comparison(gdf, zoom=6, max_zoom=12, decimal_places=3):
    # Check CRS and convert to EPSG:4326 if needed
    if gdf.crs != "EPSG:4326":
        gdf = gdf.to_crs(epsg=4326)
    
    # Format latitude and longitude to the desired number of decimal places
    gdf['Longitude_formatted'] = gdf['Longitude'].map(lambda x: f'{x:>{decimal_places+4}.{decimal_places}f}')
    gdf['Latitude_formatted'] = gdf['Latitude'].map(lambda x: f'{x:>{decimal_places+4}.{decimal_places}f}')
    
    # Ensure necessary columns are included in the properties
    gdf = gdf[[
        'geometry', 
        'CDL2023', 
        'Longitude', 
        'Latitude', 
        'Longitude_formatted', 
        'Latitude_formatted', 
        'color', 
        'Crop', 
        'Elevation', 
        'Recommended_Crops_Historical_html', 
        'cdl_crops_in_historical_recommendations', 
        'cdl_crops_in_historical_recommendations_no_rainfall', 
        'cdl_crops_in_mid_century_medium_recommendations_no_rainfall', 
        'cdl_crops_in_mid_century_high_recommendations_no_rainfall',
    ]]
    
    def style_function(feature):
        return {
            'fillColor': feature['properties']['color'],
            'color': feature['properties']['color'],
            'weight': 1,
            'fillOpacity': 0.6
        }

    # Create a separate GeoDataFrame for the centroids
    centroids_gdf = gdf.copy()
    # centroids_gdf['geometry'] = centroids_gdf.apply(lambda row: Point(row['Longitude'], row['Latitude']), axis=1)
    centroids_gdf['geometry'] = gpd.points_from_xy(centroids_gdf['Longitude'], centroids_gdf['Latitude'])
    
    # Convert geometries to GeoJSON
    geojson_data = gdf.to_json()
    centroids_geojson_data = centroids_gdf.to_json()
    
    # Calculate map center
    minx, miny, maxx, maxy = gdf.total_bounds
    center_longitude = (minx + maxx) / 2
    center_latitude = (miny + maxy) / 2
    
    # # Initialize map
    # m = folium.Map(location=[center_latitude, center_longitude], zoom_start=zoom, max_zoom = 7, tiles=None)
    # Initialize map
    m = folium.Map(location=[center_latitude, center_longitude], zoom_start=zoom, max_zoom=max_zoom, tiles=None)
    
    # Add each tile layer to the map
    for tile_info in tiles_list:
        tiles = tile_info["tiles"]
        attr = tile_info["attr"]
        layer_name = tile_info["name"]
        folium.TileLayer(tiles=tiles, attr=attr, name=layer_name).add_to(m)
    
    # Add GeoJSON layer with custom popups and styles for crop sequence boundaries
    folium.GeoJson(
        geojson_data,
        name='Field boundaries',
        style_function=style_function,
        # popup=folium.GeoJsonPopup(fields=['CDL2023', 'Crop', 'Latitude_formatted', 'Longitude_formatted','Elevation'], aliases=['Crop ID:', 'Crop:', 'Latitude:', 'Longitude:', 'Elevation:'])
        popup=folium.GeoJsonPopup(
            fields=['Recommended_Crops_Historical_html'], 
            aliases=['Suitable crops (historical scenario)'], 
            parse_html=True, 
            max_width="100%",
            lazy=True  # Enable lazy loading for popups
        ),
        zoom_on_click=True,
        show=False,
    ).add_to(m)
    
    # Add GeoJSON layer for centroids with custom CircleMarkers
    folium.GeoJson(
        centroids_geojson_data,
        name='Field locations',
        # marker=folium.CircleMarker(radius=4, fill_color='grey', fill_opacity=1, color='grey', weight=0.2),
        marker=folium.CircleMarker(
            radius=6, 
            fill=True,
            fill_opacity=0.8,
            weight=0.2
        ),
        # marker=folium.CircleMarker(radius=4),
        style_function=lambda x: {'fillColor': x['properties']['color'], 'color': 'grey'},
        popup=folium.GeoJsonPopup(fields=['CDL2023', 'Crop', 'Latitude_formatted', 'Longitude_formatted', 'Elevation'], 
                                  aliases=['Crop ID:', 'Crop:', 'Latitude:', 'Longitude:', 'Elevation:']),
        # popup=folium.GeoJsonPopup(fields=['Recommended_Crops_Historical_html'], aliases=['Recommended Crops (Historical Scenario)'], parse_html=True, max_width="100%"),
        zoom_on_click=True,
    ).add_to(m)


    # Add GeoJSON layer for centroids with conditional coloring
    folium.GeoJson(
        centroids_geojson_data,
        name='Suitable crops (historical) match actual?',
        marker=folium.CircleMarker(
            radius=6,
            fill=True,
            fill_opacity=0.6,
            weight=0.2
        ),
        style_function=lambda feature: {
            'fillColor': 'cornflowerblue' if feature['properties']['cdl_crops_in_historical_recommendations'] == 1 else '#b22222',
            'fillOpacity': 0.8 if feature['properties']['cdl_crops_in_historical_recommendations'] == 1 else 0.4,
            'color': 'grey',
        },
        popup=folium.GeoJsonPopup(
            fields=['CDL2023', 'Crop', 'Latitude_formatted', 'Longitude_formatted', 'Elevation', 'cdl_crops_in_historical_recommendations'], 
            aliases=['Crop ID:', 'Crop:', 'Latitude:', 'Longitude:', 'Elevation:', 'Suitable (historical) matches actual?'],
            lazy=True,  # Enable lazy loading for popups
        ),
        show=False,
    ).add_to(m)

    # Add GeoJSON layer for centroids with conditional coloring
    folium.GeoJson(
        centroids_geojson_data,
        name='Suitable crops (historical, w/o rainfall) match actual?',
        marker=folium.CircleMarker(
            radius=6,
            fill=True,
            fill_opacity=0.6,
            weight=0.2
        ),
        style_function=lambda feature: {
            'fillColor': 'cornflowerblue' if feature['properties']['cdl_crops_in_historical_recommendations_no_rainfall'] == 1 else '#b22222',
            'fillOpacity': 0.8 if feature['properties']['cdl_crops_in_historical_recommendations_no_rainfall'] == 1 else 0.4,
            'color': 'grey',
        },
        popup=folium.GeoJsonPopup(
            fields=['CDL2023', 'Crop', 'Latitude_formatted', 'Longitude_formatted', 'Elevation', 'cdl_crops_in_historical_recommendations_no_rainfall'], 
            aliases=['Crop ID:', 'Crop:', 'Latitude:', 'Longitude:', 'Elevation:', 'Suitable (w/o rainfall) matches actual?'],
            lazy=True,  # Enable lazy loading for popups
        ),
        show=False,
    ).add_to(m)
    

    folium.plugins.Fullscreen(
        position="topleft",
        title="Fullscreen",
        title_cancel="Exit Fullscreen",
        force_separate_button=True,
    ).add_to(m)

    # folium.FitOverlays().add_to(m)
    
    # Add layer control to the map
    folium.LayerControl().add_to(m)
    
    return m

In [110]:
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")

# map_file = f'../data/maps/{timestamp}_map_with_crop_recommendations_popup_and_comparison_layer_no_rainfall.html'
map_file = '../datasets/maps/map_with_crop_recommendations_popup_and_comparison_layer_no_rainfall'

generate_folium_map_with_recommended_crops_popup_cdl_recommendation_comparison(fields).save(map_file)

In [111]:
# Display the link
# Get the absolute path of the file
map_file_base = os.path.basename(map_file)

print()
HTML(f'<b><a href="http://localhost:8000/{map_file_base}" target="_blank">[ Map with crop recommendations in a popup and comparison layer ]</a></b>')




## jaccard similarity between historical and mid-century scenarios

In [112]:
# Convert lists to sets and then to arrays of sets
historical_sets = np.array([set(indices) for indices in fields['historical_Crop_Indices']])
mid_century_medium_sets = np.array([set(indices) for indices in fields['mid_century_medium_carbon_Crop_Indices']])
mid_century_high_sets = np.array([set(indices) for indices in fields['mid_century_high_carbon_Crop_Indices']])

# Calculate intersection and union for medium carbon scenario
intersection_medium = np.array([len(h_set & m_set) for h_set, m_set in zip(historical_sets, mid_century_medium_sets)])
union_medium = np.array([len(h_set | m_set) for h_set, m_set in zip(historical_sets, mid_century_medium_sets)])

# Calculate Jaccard similarity for medium carbon scenario
fields['jaccard_hist_mid_medium'] = np.divide(intersection_medium, union_medium, out=np.zeros_like(intersection_medium, dtype=float), where=union_medium != 0)

# Calculate intersection and union for high carbon scenario
intersection_high = np.array([len(h_set & h_set2) for h_set, h_set2 in zip(historical_sets, mid_century_high_sets)])
union_high = np.array([len(h_set | h_set2) for h_set, h_set2 in zip(historical_sets, mid_century_high_sets)])

# Calculate Jaccard similarity for high carbon scenario
fields['jaccard_hist_mid_high'] = np.divide(intersection_high, union_high, out=np.zeros_like(intersection_high, dtype=float), where=union_high != 0)

In [113]:
fields

Unnamed: 0,Original_Index,CSBID,CSBYEARS,CSBACRES,CDL2016,CDL2017,CDL2018,CDL2019,CDL2020,CDL2021,...,cdl_crops_in_mid_century_medium_recommendations,cdl_crops_in_mid_century_high_recommendations,cdl_crops_in_historical_recommendations_yes_no,cdl_crops_in_mid_century_medium_recommendations_yes_no,cdl_crops_in_mid_century_high_recommendations_yes_no,cdl_crops_in_historical_recommendations_no_rainfall,cdl_crops_in_mid_century_medium_recommendations_no_rainfall,cdl_crops_in_mid_century_high_recommendations_no_rainfall,jaccard_hist_mid_medium,jaccard_hist_mid_high
0,478801,081623012787392,1623,3.791540,4,24,61,61,24,24,...,0,0,No,No,No,0,0,0,0.000000,0.0
1,107446,351623001627247,1623,4.325446,225,1,225,152,152,152,...,0,0,No,No,No,0,0,0,0.500000,0.5
2,121162,351623002777545,1623,3.222010,61,176,1,176,36,36,...,0,0,No,No,No,0,0,0,0.000000,0.0
3,88813,351623000046581,1623,10.204036,36,36,36,36,152,37,...,0,0,No,No,No,0,0,0,0.818182,0.0
4,516266,081623013377366,1623,2.918481,24,29,29,24,1,61,...,0,0,No,No,No,0,0,0,0.000000,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1195,119071,351623002568175,1623,2.957066,176,176,176,61,24,24,...,0,0,No,No,No,0,0,0,0.250000,0.0
1196,540278,081623014401456,1623,9.374885,61,24,61,1,1,1,...,0,0,No,No,No,0,0,0,0.000000,0.0
1197,591949,041623013556134,1623,3.770585,72,72,72,190,190,190,...,0,0,No,No,No,0,0,0,0.000000,0.0
1198,217842,081623005639708,1623,4.307316,61,24,1,61,24,29,...,0,0,No,No,No,0,0,0,0.000000,0.0


In [114]:
field_file = '../data/fields/csb_sample_with_jaccard_comparisons6.parquet'
fields.to_pickle(field_file)

## jaccard similarity between mid-century medium and mid-century high scenarios

In [115]:
# Convert lists to sets and then to arrays of sets
# historical_sets = np.array([set(indices) for indices in fields['historical_Crop_Indices']])
# mid_century_medium_sets = np.array([set(indices) for indices in fields['mid_century_medium_carbon_Crop_Indices']])
# mid_century_high_sets = np.array([set(indices) for indices in fields['mid_century_high_carbon_Crop_Indices']])

# # Calculate intersection and union for medium carbon scenario
# intersection_medium = np.array([len(h_set & m_set) for h_set, m_set in zip(historical_sets, mid_century_medium_sets)])
# union_medium = np.array([len(h_set | m_set) for h_set, m_set in zip(historical_sets, mid_century_medium_sets)])

# # Calculate Jaccard similarity for medium carbon scenario
# fields['jaccard_hist_mid_medium'] = np.divide(intersection_medium, union_medium, out=np.zeros_like(intersection_medium, dtype=float), where=union_medium != 0)

# # Calculate intersection and union for high carbon scenario
# intersection_high = np.array([len(h_set & h_set2) for h_set, h_set2 in zip(historical_sets, mid_century_high_sets)])
# union_high = np.array([len(h_set | h_set2) for h_set, h_set2 in zip(historical_sets, mid_century_high_sets)])

# # Calculate Jaccard similarity for high carbon scenario
# fields['jaccard_hist_mid_high'] = np.divide(intersection_high, union_high, out=np.zeros_like(intersection_high, dtype=float), where=union_high != 0)

intersection_med_high =  np.array([len(h_set & h_set2) for h_set, h_set2 in zip(mid_century_medium_sets, mid_century_high_sets)])
union_med_high =  np.array([len(h_set | h_set2) for h_set, h_set2 in zip(mid_century_medium_sets, mid_century_high_sets)])
fields['jaccard_mid_medium_mid_high'] = np.divide(intersection_med_high, union_med_high, out=np.zeros_like(intersection_med_high, dtype=float), where=union_med_high != 0)

In [116]:
fields['jaccard_mid_medium_mid_high'].min(), fields['jaccard_mid_medium_mid_high'].max(), fields['jaccard_mid_medium_mid_high'].mean()

(np.float64(0.0), np.float64(1.0), np.float64(0.62719724179859))

In [117]:
# list(fields.columns)

## generate_folium_map_with_scenario_comparisons 

In [118]:
colormap = cm.LinearColormap(["#b22222", 'white', "cornflowerblue"], vmin=0, vmax=1)
colormap.caption = 'Similarity between climate scenarios'
colormap.width=200
# colormap

In [119]:
colormap_similarities = cm.LinearColormap(["#b22222", 'white', "cornflowerblue"], vmin=0, vmax=1)
colormap_similarities.caption = 'Similarity between climate scenarios'
colormap_similarities.width=200
# colormap_similarities

In [120]:
colormap_score = cm.LinearColormap(["#b22222", 'white', "cornflowerblue"], vmin=-1, vmax=1,)
colormap_score.caption = 'Category score'
colormap_score.width=200
# colormap_score

In [121]:
colormap_score_overall = cm.LinearColormap(["#b22222", 'white', "cornflowerblue"], vmin=-6, vmax=6)
colormap_score_overall.caption = 'Overall score'
colormap_score_overall.width=200
# colormap_score_overall

In [122]:
colormap_match = cm.StepColormap(["#b22222", 'white', "cornflowerblue"])
colormap_match.caption = 'Match between actual and suitable'
colormap_match.width=200
# colormap_match

In [123]:
# List of available tiles with their attributions and layer names (including USGS maps)
tiles_list = [
    # {"tiles": "https://basemap.nationalmap.gov/arcgis/rest/services/USGSTopo/MapServer/tile/{z}/{y}/{x}", "attr": 'U.S. Department of the Interior | U.S. Geological Survey', "name": "USGS Topo"},
    {"tiles": "https://basemap.nationalmap.gov/arcgis/rest/services/USGSImageryOnly/MapServer/tile/{z}/{y}/{x}", "attr": 'U.S. Department of the Interior | U.S. Geological Survey', "name": "USGS Imagery"},
    {"tiles": "https://basemap.nationalmap.gov/arcgis/rest/services/USGSImageryTopo/MapServer/tile/{z}/{y}/{x}", "attr": 'U.S. Department of the Interior | U.S. Geological Survey', "name": "USGS Imagery Topo"},
    
    {"tiles": "OpenStreetMap", "attr": "© OpenStreetMap contributors", "name": "OpenStreetMap"},
    
    # {"tiles": "https://tiles.stadiamaps.com/tiles/stamen_toner_lite/{z}/{x}/{y}{r}.png", "attr": '&copy; <a href="https://stadiamaps.com/" target="_blank">Stadia Maps</a> <a href="https://stamen.com/" target="_blank">&copy; Stamen Design</a> &copy; <a href="https://openmaptiles.org/" target="_blank">OpenMapTiles</a> &copy; <a href="https://www.openstreetmap.org/copyright" target="_blank">OpenStreetMap</a>', "name": "Stamen Toner Lite"},
    
    # {"tiles": "https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png", "attr": '&copy; <a href="https://www.openstreetmap.org/copyright">OpenStreetMap</a> contributors', "name": "OpenStreetMap Standard"},
    # {"tiles": "https://{s}.tile.opentopomap.org/{z}/{x}/{y}.png", "attr": 'Map data: &copy; <a href="https://www.openstreetmap.org/copyright">OpenStreetMap</a> contributors, SRTM | Map style: &copy; <a href="https://opentopomap.org">OpenTopoMap</a> (CC-BY-SA)', "name": "OpenTopoMap"}

    # {"tiles": "https://{s}.basemaps.cartocdn.com/rastertiles/voyager/{z}/{x}/{y}.png", "attr": '&copy; <a href="https://carto.com/attributions">CARTO</a>', "name": "CartoDB Voyager"},
    {"tiles": "https://{s}.basemaps.cartocdn.com/light_all/{z}/{x}/{y}.png", "attr": '&copy; <a href="https://carto.com/attributions">CARTO</a>', "name": "CartoDB Light"},
    # {"tiles": "https://{s}.basemaps.cartocdn.com/rastertiles/light_all/{z}/{x}/{y}.png", "attr": '&copy; <a href="https://carto.com/attributions">CARTO</a>', "name": "CartoDB Positron"},

]

# def generate_folium_map_with_recommended_crops_popup(gdf, zoom=6, decimal_places=3):
def generate_folium_map_with_scenario_comparisons(gdf, zoom=6, max_zoom=12, decimal_places=3):
    # Check CRS and convert to EPSG:4326 if needed
    if gdf.crs != "EPSG:4326":
        gdf = gdf.to_crs(epsg=4326)
    
    # Format latitude and longitude to the desired number of decimal places
    gdf['Longitude_formatted'] = gdf['Longitude'].map(lambda x: f'{x:>{decimal_places+4}.{decimal_places}f}')
    gdf['Latitude_formatted'] = gdf['Latitude'].map(lambda x: f'{x:>{decimal_places+4}.{decimal_places}f}')
    
    # Ensure necessary columns are included in the properties
    gdf = gdf[['geometry', 
               'CDL2023', 
               'Longitude', 
               'Latitude', 
               'Longitude_formatted', 
               'Latitude_formatted', 
               'color', 
               'Crop', 
               'Elevation', 
               'Recommended_Crops_Historical_html', 
               'cdl_crops_in_historical_recommendations',
               'cdl_crops_in_mid_century_medium_recommendations',
               'cdl_crops_in_mid_century_high_recommendations',
               'jaccard_hist_mid_medium', 
               'jaccard_hist_mid_high', 
               'jaccard_mid_medium_mid_high',
              ]]


    # Define a colormap using RdBu_03 from branca
    # colormap = cm.linear.RdBu_03.scale(0, 1)
    colormap = cm.LinearColormap([ "#b22222", "white", "cornflowerblue"], vmin=0, vmax=1)
    colormap.caption = 'Similarity between climate scenarios'
    colormap.width=200
        
    def style_function(feature):
        return {
            'fillColor': feature['properties']['color'],
            'color': feature['properties']['color'],
            'weight': 1,
            'fillOpacity': 0.6
        }

    # Create a separate GeoDataFrame for the centroids
    centroids_gdf = gdf.copy()
    # centroids_gdf['geometry'] = centroids_gdf.apply(lambda row: Point(row['Longitude'], row['Latitude']), axis=1)
    centroids_gdf['geometry'] = gpd.points_from_xy(centroids_gdf['Longitude'], centroids_gdf['Latitude'])
    
    # Convert geometries to GeoJSON
    geojson_data = gdf.to_json()
    centroids_geojson_data = centroids_gdf.to_json()
    
    # Calculate map center
    minx, miny, maxx, maxy = gdf.total_bounds
    center_longitude = (minx + maxx) / 2
    center_latitude = (miny + maxy) / 2
    
    # # Initialize map
    # m = folium.Map(location=[center_latitude, center_longitude], zoom_start=zoom, max_zoom = 7, tiles=None)
    # Initialize map
    m = folium.Map(
        location=[center_latitude, center_longitude], 
        zoom_start=zoom, 
        max_zoom=max_zoom, 
        tiles=None,
        control_scale=True,
    )
    
    # Add each tile layer to the map
    for tile_info in tiles_list:
        tiles = tile_info["tiles"]
        attr = tile_info["attr"]
        layer_name = tile_info["name"]
        folium.TileLayer(tiles=tiles, attr=attr, name=layer_name).add_to(m)
    
    # Add GeoJSON layer with custom popups and styles for crop sequence boundaries
    folium.GeoJson(
        geojson_data,
        # name='Crop Sequence Boundaries',
        name='Field boundaries',
        style_function=style_function,
        # popup=folium.GeoJsonPopup(fields=['CDL2023', 'Crop', 'Latitude_formatted', 'Longitude_formatted','Elevation'], aliases=['Crop ID:', 'Crop:', 'Latitude:', 'Longitude:', 'Elevation:'])
        # popup=folium.GeoJsonPopup(
        #     fields=['Recommended_Crops_Historical_html'], 
        #     aliases=['Suitable Crops (Recent Scenario)'], 
        #     parse_html=True, 
        #     max_width="100%",
        #     lazy=True  # Enable lazy loading for popups
        # ),
         popup=folium.GeoJsonPopup(
            fields=['Crop', 'Latitude_formatted', 'Longitude_formatted','Elevation', 'Recommended_Crops_Historical_html'], 
            aliases=['Crop:', 'Latitude:', 'Longitude:', 'Elevation:', 'Suitable Crops (Recent Scenario)'], 
            parse_html=True, 
            max_width="100%",
            lazy=True  # Enable lazy loading for popups
        ),
        # zoom_on_click=True,
        show=False,
    ).add_to(m)
    
    # Add GeoJSON layer for centroids with custom CircleMarkers
    folium.GeoJson(
        centroids_geojson_data,
        # name='CSB Centroids',
        name='Field locations',
        # marker=folium.CircleMarker(radius=4, fill_color='grey', fill_opacity=1, color='grey', weight=0.2),
        marker=folium.CircleMarker(
            radius=6, 
            fill=True,
            fill_opacity=0.8,
            weight=0.2
        ),
        # marker=folium.CircleMarker(radius=4),
        style_function=lambda x: {'fillColor': x['properties']['color'], 'color': 'grey'},
        popup=folium.GeoJsonPopup(fields=['CDL2023', 'Crop', 'Latitude_formatted', 'Longitude_formatted', 'Elevation'], aliases=['Crop ID:', 'Crop:', 'Latitude:', 'Longitude:', 'Elevation:']),
        # popup=folium.GeoJsonPopup(fields=['Recommended_Crops_Historical_html'], aliases=['Recommended Crops (Historical Scenario)'], parse_html=True, max_width="100%"),
        # zoom_on_click=True,
    ).add_to(m)


    # Add GeoJSON layer for centroids with conditional coloring
    folium.GeoJson(
        centroids_geojson_data,
        name='Do actual crops (2016-2023) match suitable (recent scenario)?',
        marker=folium.CircleMarker(
            radius=6,
            fill=True,
            fill_opacity=0.6,
            weight=0.2
        ),
        style_function=lambda feature: {
            'fillColor': 'cornflowerblue' if feature['properties']['cdl_crops_in_historical_recommendations'] == 1 else '#b22222',
            'fillOpacity': 0.8 if feature['properties']['cdl_crops_in_historical_recommendations'] == 1 else 0.4,
            'color': 'grey',
        },
        popup=folium.GeoJsonPopup(
            fields=['CDL2023', 'Crop', 'Latitude_formatted', 'Longitude_formatted', 'Elevation', 'cdl_crops_in_historical_recommendations'], 
            aliases=['Crop ID:', 'Crop:', 'Latitude:', 'Longitude:', 'Elevation:', 'Do suitable crops (recent) contain the actual crop(s)?'],
            lazy=True,  # Enable lazy loading for popups
        ),
        show=False,
    ).add_to(m)

    # Add GeoJSON layer for centroids with conditional coloring
    folium.GeoJson(
        centroids_geojson_data,
        name='Do actual crops (2016-2023) match suitable (MCM scenario)?',
        marker=folium.CircleMarker(
            radius=6,
            fill=True,
            fill_opacity=0.6,
            weight=0.2
        ),
        style_function=lambda feature: {
            'fillColor': 'cornflowerblue' if feature['properties']['cdl_crops_in_mid_century_medium_recommendations'] == 1 else '#b22222',
            'fillOpacity': 0.8 if feature['properties']['cdl_crops_in_historical_recommendations'] == 1 else 0.4,
            'color': 'grey',
        },
        popup=folium.GeoJsonPopup(
            fields=['CDL2023', 'Crop', 'Latitude_formatted', 'Longitude_formatted', 'Elevation', 'cdl_crops_in_mid_century_medium_recommendations'], 
            aliases=['Crop ID:', 'Crop:', 'Latitude:', 'Longitude:', 'Elevation:', 'Do suitable crops (MCM scenario) contain the actual crop(s)?'],
            lazy=True,  # Enable lazy loading for popups
        ),
        show=False,
    ).add_to(m)

    # Add GeoJSON layer for centroids with conditional coloring
    folium.GeoJson(
        centroids_geojson_data,
        name='Do actual crops (2016-2023) match suitable (MCH scenario)?',
        marker=folium.CircleMarker(
            radius=6,
            fill=True,
            fill_opacity=0.6,
            weight=0.2
        ),
        style_function=lambda feature: {
            'fillColor': 'cornflowerblue' if feature['properties']['cdl_crops_in_mid_century_high_recommendations'] == 1 else '#b22222',
            'fillOpacity': 0.8 if feature['properties']['cdl_crops_in_historical_recommendations'] == 1 else 0.4,
            'color': 'grey',
        },
        popup=folium.GeoJsonPopup(
            fields=['CDL2023', 'Crop', 'Latitude_formatted', 'Longitude_formatted', 'Elevation', 'cdl_crops_in_mid_century_high_recommendations'], 
            aliases=['Crop ID:', 'Crop:', 'Latitude:', 'Longitude:', 'Elevation:', 'Do suitable crops (MCH scenario) contain the actual crop(s)?'],
            lazy=True,  # Enable lazy loading for popups
        ),
        show=False,
    ).add_to(m)
    

    # Add GeoJSON layer for centroids with conditional coloring based on Jaccard Similarity
    folium.GeoJson(
        centroids_geojson_data,
        name='Suitable crops: Mid-century medium-carbon compared to recent',
        marker=folium.CircleMarker(
            radius=6,
            fill=True,
            fill_opacity=0.6,
            weight=0.2
        ),
        style_function=lambda feature: {
            'fillColor': colormap(feature['properties']['jaccard_hist_mid_medium']),
            'fillOpacity': 0.6,
            'color': 'grey',
        },
        popup=folium.GeoJsonPopup(
            fields=['CDL2023', 'Crop', 'Latitude_formatted', 'Longitude_formatted', 'Elevation', 'jaccard_hist_mid_medium'], 
            aliases=['Crop ID:', 'Crop:', 'Latitude:', 'Longitude:', 'Elevation:', 'MCM vs Recent:'],
            lazy=True,  # Enable lazy loading for popups
        ),
        show=False,
    ).add_to(m)

    #  Recent compared to mid-century high-carbon
    folium.GeoJson(
        centroids_geojson_data,
        name='Suitable crops: Mid-century high-carbon compared to recent',
        marker=folium.CircleMarker(
            radius=6,
            fill=True,
            fill_opacity=0.6,
            weight=0.2
        ),
        style_function=lambda feature: {
            'fillColor': colormap(feature['properties']['jaccard_hist_mid_high']),
            'fillOpacity': 0.6,
            'color': 'grey',
        },
        popup=folium.GeoJsonPopup(
            fields=['CDL2023', 'Crop', 'Latitude_formatted', 'Longitude_formatted', 'Elevation', 'jaccard_mid_medium_mid_high'], 
            aliases=['Crop ID:', 'Crop:', 'Latitude:', 'Longitude:', 'Elevation:', 'MCH vs Recent:'],
            lazy=True,  # Enable lazy loading for popups
        ),
        show=False,
    ).add_to(m)
    
    #  mid-century medium-carbon compared to mid-century high-carbon
    folium.GeoJson(
        centroids_geojson_data,
        name='Suitable crops: Mid-century medium- compared to high-carbon',
        marker=folium.CircleMarker(
            radius=6,
            fill=True,
            fill_opacity=0.6,
            weight=0.2
        ),
        style_function=lambda feature: {
            'fillColor': colormap(feature['properties']['jaccard_hist_mid_high']),
            'fillOpacity': 0.6,
            'color': 'grey',
        },
        popup=folium.GeoJsonPopup(
            fields=['CDL2023', 'Crop', 'Latitude_formatted', 'Longitude_formatted', 'Elevation', 'jaccard_hist_mid_high'], 
            aliases=['Crop ID:', 'Crop:', 'Latitude:', 'Longitude:', 'Elevation:', 'MCM vs MCH:'],
            lazy=True,  # Enable lazy loading for popups
        ),
        show=False,
    ).add_to(m)
    
    # Add the colormap legend to the map
    # colormap.add_to(m)
    colormap_html_class = 'my_colormap'  # Ensure this matches the JavaScript

    folium.plugins.Fullscreen(
        position="topleft",
        title="Fullscreen",
        title_cancel="Exit Fullscreen",
        force_separate_button=True,
    ).add_to(m)

    # folium.FitOverlays().add_to(m)
    
    # Add layer control to the map
    folium.LayerControl().add_to(m)

    # Add the colormap manually
    m.get_root().html.add_child(folium.Element(f'''
        <div style="
            position: fixed;
            bottom: 40px;
            left: 20px;
            width: 100px;
            z-index: 1000;
        " class="{colormap_html_class}">
            {colormap._repr_html_()}
        </div>
    '''))


    return m

In [124]:
# Generate a timestamp
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")

# map_file = f'../data/public/{timestamp}_map_with_crop_recommendations_popup_and_scenario_comparisons.html'
map_file = '../datasets/maps/map_with_crop_recommendations_popup_and_scenario_comparisons.html'

generate_folium_map_with_scenario_comparisons(fields).save(map_file)

In [125]:
# Display the link
# Get the absolute path of the file
map_file_base = os.path.basename(map_file)

print()
HTML(f'<b><a href="http://localhost:8000/{map_file_base}" target="_blank">[ Map with crop recommendations in a popup and scenario comparisons ]</a></b>')




## test map

In [126]:
# List of available tiles with their attributions and layer names (including USGS maps)
tiles_list = [
    {"tiles": "https://basemap.nationalmap.gov/arcgis/rest/services/USGSTopo/MapServer/tile/{z}/{y}/{x}", "attr": 'U.S. Department of the Interior | U.S. Geological Survey', "name": "USGS Topo"},
    {"tiles": "https://basemap.nationalmap.gov/arcgis/rest/services/USGSImageryOnly/MapServer/tile/{z}/{y}/{x}", "attr": 'U.S. Department of the Interior | U.S. Geological Survey', "name": "USGS Imagery"},
    {"tiles": "https://basemap.nationalmap.gov/arcgis/rest/services/USGSImageryTopo/MapServer/tile/{z}/{y}/{x}", "attr": 'U.S. Department of the Interior | U.S. Geological Survey', "name": "USGS Imagery Topo"},
    
    {"tiles": "OpenStreetMap", "attr": "© OpenStreetMap contributors", "name": "OpenStreetMap"},
    
    {"tiles": "https://{s}.basemaps.cartocdn.com/light_all/{z}/{x}/{y}.png", "attr": '&copy; <a href="https://carto.com/attributions">CARTO</a>', "name": "CartoDB Light"},
    
]

# def generate_folium_map_with_recommended_crops_popup(gdf, zoom=6, decimal_places=3):
def generate_folium_sample_map(gdf, zoom=6, max_zoom=12, decimal_places=3):
    # Check CRS and convert to EPSG:4326 if needed
    if gdf.crs != "EPSG:4326":
        gdf = gdf.to_crs(epsg=4326)
    
    # Format latitude and longitude to the desired number of decimal places
    gdf['Longitude_formatted'] = gdf['Longitude'].map(lambda x: f'{x:>{decimal_places+4}.{decimal_places}f}')
    gdf['Latitude_formatted'] = gdf['Latitude'].map(lambda x: f'{x:>{decimal_places+4}.{decimal_places}f}')
    
    # Ensure necessary columns are included in the properties
    gdf = gdf[['geometry', 'CDL2023', 'Longitude', 'Latitude', 'Longitude_formatted', 'Latitude_formatted', 'color', 'Crop', 'Elevation', 'Recommended_Crops_Historical_html', 'cdl_crops_in_historical_recommendations']]
    
    def style_function(feature):
        return {
            'fillColor': feature['properties']['color'],
            'color': feature['properties']['color'],
            'weight': 1,
            'fillOpacity': 0.6
        }

    # Create a separate GeoDataFrame for the centroids
    centroids_gdf = gdf.copy()
    # centroids_gdf['geometry'] = centroids_gdf.apply(lambda row: Point(row['Longitude'], row['Latitude']), axis=1)
    centroids_gdf['geometry'] = gpd.points_from_xy(centroids_gdf['Longitude'], centroids_gdf['Latitude'])
    
    # Convert geometries to GeoJSON
    geojson_data = gdf.to_json()
    centroids_geojson_data = centroids_gdf.to_json()
    
    # Calculate map center
    minx, miny, maxx, maxy = gdf.total_bounds
    center_longitude = (minx + maxx) / 2
    center_latitude = (miny + maxy) / 2
    
    # Initialize map
    m = folium.Map(location=[center_latitude, center_longitude], zoom_start=zoom, max_zoom=max_zoom, tiles=None)
    
    # Add each tile layer to the map
    for tile_info in tiles_list:
        tiles = tile_info["tiles"]
        attr = tile_info["attr"]
        layer_name = tile_info["name"]
        folium.TileLayer(tiles=tiles, attr=attr, name=layer_name).add_to(m)

    
    # Add GeoJSON layer for centroids with custom CircleMarkers
    folium.GeoJson(
        centroids_geojson_data,
        name='CSB Centroids',
        # marker=folium.CircleMarker(radius=4, fill_color='grey', fill_opacity=1, color='grey', weight=0.2),
        marker=folium.CircleMarker(
            radius=6, 
            fill=True,
            fill_opacity=0.8,
            weight=0.2
        ),
        # marker=folium.CircleMarker(radius=4),
        style_function=lambda x: {'fillColor': x['properties']['color'], 'color': 'grey'},
        popup=folium.GeoJsonPopup(fields=['CDL2023', 'Crop', 'Latitude_formatted', 'Longitude_formatted', 'Elevation'], aliases=['Crop ID:', 'Crop:', 'Latitude:', 'Longitude:', 'Elevation:']),
        # popup=folium.GeoJsonPopup(fields=['Recommended_Crops_Historical_html'], aliases=['Recommended Crops (Historical Scenario)'], parse_html=True, max_width="100%"),
        zoom_on_click=True,
    ).add_to(m)

    folium.plugins.Fullscreen(
        position="topleft",
        title="Fullscreen",
        title_cancel="Exit Fullscreen",
        force_separate_button=True,
    ).add_to(m)


    # folium.FitOverlays().add_to(m)
    
    # Add layer control to the map
    folium.LayerControl().add_to(m)
    
    return m

In [127]:
# Generate a timestamp
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")

# map_file = f'../datasets/maps/{timestamp}_sample_map.html'
map_file = f'../datasets/maps/sample_map.html'

generate_folium_sample_map(fields).save(map_file)

In [128]:
# Display the link
# Get the absolute path of the file
map_file_base = os.path.basename(map_file)

print()
HTML(f'<b><a href="http://localhost:8000/{map_file_base}" target="_blank">[ Sample Map ]</a></b>')




## score analysis

In [129]:
# fields.columns

In [130]:
fields.CDL2016_2023

0             [4, 24, 61, 61, 24, 24, 61, 61]
1        [225, 1, 225, 152, 152, 152, 36, 36]
2             [61, 176, 1, 176, 36, 36, 1, 1]
3           [36, 36, 36, 36, 152, 37, 37, 37]
4             [24, 29, 29, 24, 1, 61, 61, 61]
                        ...                  
1195     [176, 176, 176, 61, 24, 24, 24, 176]
1196             [61, 24, 61, 1, 1, 1, 1, 37]
1197    [72, 72, 72, 190, 190, 190, 190, 190]
1198          [61, 24, 1, 61, 24, 29, 61, 24]
1199    [24, 24, 24, 176, 176, 176, 176, 176]
Name: CDL2016_2023, Length: 1200, dtype: object

In [131]:
# fields.info()

In [132]:
# Create a list of the matrices
matrices = [
    score_matrix_photoperiod,
    score_matrix_climate_zone,
    score_matrix_temperature,
    score_matrix_rainfall,
    score_matrix_hardiness,
    score_matrix_pH
]

# Create the 'scores' dimension
score_names = ['Photoperiod', 'Climate_Zone', 'Temperature', 'Rainfall', 'Hardiness', 'pH']

# # Stack the matrices along a new dimension 'scores'
# stacked_matrix = xr.concat(matrices, dim=pd.Index(score_names, name='scores'))

# Concatenate along a new dimension 'scores'
stacked_matrix = xr.concat(matrices, dim=pd.Index(score_names, name='scores')).transpose('fields', 'crops', 'scores', 'scenarios')

# Now, the shape should be (1200, 2568, 6, 3)
stacked_matrix.shape

(1200, 2568, 6, 3)

In [133]:
stacked_matrix = stacked_matrix.reset_index('crops')
stacked_matrix['crops'] = np.arange(len(stacked_matrix['crops']))
stacked_matrix

In [134]:
for dim in stacked_matrix.dims:
    print(f"Dimension: {dim}")
    print(f"Coordinate values dtype for '{dim}': {stacked_matrix.coords[dim].dtype}")
    print()

Dimension: fields
Coordinate values dtype for 'fields': <U15

Dimension: crops
Coordinate values dtype for 'crops': int64

Dimension: scores
Coordinate values dtype for 'scores': object

Dimension: scenarios
Coordinate values dtype for 'scenarios': <U25



In [135]:
save_file = '../datasets/scores/score_matrix_overall_stacked.nc'
# Save the sum_matrix to a new NetCDF file
stacked_matrix.to_netcdf(save_file)

print(f"Stacked matrix saved to '{save_file} ")

  exec(code_obj, self.user_global_ns, self.user_ns)


Stacked matrix saved to '../datasets/scores/score_matrix_overall_stacked.nc 


In [136]:
# Reshape the stacked_matrix into a DataFrame
stacked_df = stacked_matrix.to_dataframe(name='score').reset_index()
# Convert 'fields' and 'crops' into integers to allow easy matching
stacked_df['fields'] = stacked_df['fields'].astype(str)
stacked_df['crops'] = stacked_df['crops'].astype(int)

In [137]:
# Inspect the DataFrame
stacked_df

Unnamed: 0,fields,crops,scores,scenarios,score
0,041623001737287,0,Photoperiod,historical,-1.0
1,041623001737287,0,Photoperiod,mid_century_medium_carbon,-1.0
2,041623001737287,0,Photoperiod,mid_century_high_carbon,-1.0
3,041623001737287,0,Climate_Zone,historical,-1.0
4,041623001737287,0,Climate_Zone,mid_century_medium_carbon,-1.0
...,...,...,...,...,...
55468795,491623016053632,2567,Hardiness,mid_century_medium_carbon,1.0
55468796,491623016053632,2567,Hardiness,mid_century_high_carbon,1.0
55468797,491623016053632,2567,pH,historical,0.0
55468798,491623016053632,2567,pH,mid_century_medium_carbon,0.0


In [138]:
stacked_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 55468800 entries, 0 to 55468799
Data columns (total 5 columns):
 #   Column     Dtype  
---  ------     -----  
 0   fields     object 
 1   crops      int64  
 2   scores     object 
 3   scenarios  object 
 4   score      float64
dtypes: float64(1), int64(1), object(3)
memory usage: 2.1+ GB


In [139]:
pd.set_option('future.no_silent_downcasting', True)

# Expand the 'EcoCrop2016_2023' and 'historical_Crop_Indices' columns in the fields DataFrame
fields_expanded_actual = fields.explode('EcoCrop2016_2023').reset_index()
# Replace NaN values in the 'EcoCrop2016_2023' column with 9999
fields_expanded_actual['EcoCrop2016_2023'] = fields_expanded_actual['EcoCrop2016_2023'].fillna(9999)
# Infer the appropriate data types to take care of the future warning
fields_expanded_actual['EcoCrop2016_2023'] = fields_expanded_actual['EcoCrop2016_2023'].infer_objects(copy=False)
fields_expanded_actual['EcoCrop2016_2023'] = fields_expanded_actual['EcoCrop2016_2023'].astype(int)
fields_expanded_actual['CSBID'] = fields_expanded_actual['CSBID'].astype(str)

fields_expanded_suitable_historical = fields.explode('historical_Crop_Indices').reset_index()
fields_expanded_suitable_historical['historical_Crop_Indices'] = fields_expanded_suitable_historical['historical_Crop_Indices'].astype(int)
fields_expanded_suitable_historical['CSBID'] = fields_expanded_suitable_historical['CSBID'].astype(str)

fields_expanded_suitable_MCM = fields.explode('mid_century_medium_carbon_Crop_Indices').reset_index()
fields_expanded_suitable_MCM['mid_century_medium_carbon_Crop_Indices'] = fields_expanded_suitable_MCM['mid_century_medium_carbon_Crop_Indices'].astype(int)
fields_expanded_suitable_MCM['CSBID'] = fields_expanded_suitable_MCM['CSBID'].astype(str)

fields_expanded_suitable_MCH = fields.explode('mid_century_high_carbon_Crop_Indices').reset_index()
fields_expanded_suitable_MCH['mid_century_high_carbon_Crop_Indices'] = fields_expanded_suitable_MCH['mid_century_high_carbon_Crop_Indices'].astype(int)
fields_expanded_suitable_MCH['CSBID'] = fields_expanded_suitable_MCH['CSBID'].astype(str)

In [140]:
expanded_columns_to_keep = [
    'CSBID',  
    'CDL2016_2023',
    'EcoCrop2016_2023',
    'geometry', 
    'Longitude',
    'Latitude', 
    'Elevation', 
    'color', 
    'Crop', 
    'historical_Crop_Indices',
    'historical_Max_Score', 
    'historical_N_Crops',
    'mid_century_medium_carbon_Crop_Indices',
    'mid_century_medium_carbon_Max_Score',
    'mid_century_medium_carbon_N_Crops',
    'mid_century_high_carbon_Crop_Indices',
    'mid_century_high_carbon_Max_Score', 
    'mid_century_high_carbon_N_Crops',
    'Recommended_Crops_Historical',
    'Recommended_Crops_Mid_Century_Medium_Carbon',
    'Recommended_Crops_Mid_Century_High_Carbon',
]

In [141]:
fields_expanded_actual = fields_expanded_actual[expanded_columns_to_keep]
fields_expanded_suitable_historical = fields_expanded_suitable_historical[expanded_columns_to_keep]
fields_expanded_suitable_MCM = fields_expanded_suitable_MCM[expanded_columns_to_keep]
fields_expanded_suitable_MCH = fields_expanded_suitable_MCH[expanded_columns_to_keep]
# len(fields_expanded_suitable.CSBID.unique())

fields_expanded_suitable_historical.info()
print()
fields_expanded_suitable_historical

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 6929 entries, 0 to 6928
Data columns (total 21 columns):
 #   Column                                       Non-Null Count  Dtype   
---  ------                                       --------------  -----   
 0   CSBID                                        6929 non-null   object  
 1   CDL2016_2023                                 6929 non-null   object  
 2   EcoCrop2016_2023                             6929 non-null   object  
 3   geometry                                     6929 non-null   geometry
 4   Longitude                                    6929 non-null   float64 
 5   Latitude                                     6929 non-null   float64 
 6   Elevation                                    6929 non-null   int32   
 7   color                                        6929 non-null   object  
 8   Crop                                         6929 non-null   object  
 9   historical_Crop_Indices                      6929 non-n

Unnamed: 0,CSBID,CDL2016_2023,EcoCrop2016_2023,geometry,Longitude,Latitude,Elevation,color,Crop,historical_Crop_Indices,...,historical_N_Crops,mid_century_medium_carbon_Crop_Indices,mid_century_medium_carbon_Max_Score,mid_century_medium_carbon_N_Crops,mid_century_high_carbon_Crop_Indices,mid_century_high_carbon_Max_Score,mid_century_high_carbon_N_Crops,Recommended_Crops_Historical,Recommended_Crops_Mid_Century_Medium_Carbon,Recommended_Crops_Mid_Century_High_Carbon
0,081623012787392,"[4, 24, 61, 61, 24, 24, 61, 61]","[2281, 404]","MULTIPOLYGON (((-603525.799 1704829.066, -6035...",-102.962080,38.176922,1231,#bfbf7a,Fallow/Idle Cropland,26,...,3,[2140],4.0,1,[2140],4.0,1,Scientific_Name ...,Scientific_Name Common_Name ...,Scientific_Name Common_Name ...
1,081623012787392,"[4, 24, 61, 61, 24, 24, 61, 61]","[2281, 404]","MULTIPOLYGON (((-603525.799 1704829.066, -6035...",-102.962080,38.176922,1231,#bfbf7a,Fallow/Idle Cropland,643,...,3,[2140],4.0,1,[2140],4.0,1,Scientific_Name ...,Scientific_Name Common_Name ...,Scientific_Name Common_Name ...
2,081623012787392,"[4, 24, 61, 61, 24, 24, 61, 61]","[2281, 404]","MULTIPOLYGON (((-603525.799 1704829.066, -6035...",-102.962080,38.176922,1231,#bfbf7a,Fallow/Idle Cropland,1487,...,3,[2140],4.0,1,[2140],4.0,1,Scientific_Name ...,Scientific_Name Common_Name ...,Scientific_Name Common_Name ...
3,351623001627247,"[225, 1, 225, 152, 152, 152, 36, 36]","[2411, 404, 261]","MULTIPOLYGON (((-796679.747 1149498.895, -7966...",-104.600667,33.089835,1138,#ffa8e3,Alfalfa,1186,...,2,[1187],3.5,1,[1187],3.5,1,Scientific_Name \ 1186 Erag...,Scientific_Name ...,Scientific_Name ...
4,351623001627247,"[225, 1, 225, 152, 152, 152, 36, 36]","[2411, 404, 261]","MULTIPOLYGON (((-796679.747 1149498.895, -7966...",-104.600667,33.089835,1138,#ffa8e3,Alfalfa,1187,...,2,[1187],3.5,1,[1187],3.5,1,Scientific_Name \ 1186 Erag...,Scientific_Name ...,Scientific_Name ...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6924,081623005639708,"[61, 24, 1, 61, 24, 29, 61, 24]","[2411, 404, 1621, 1863]","MULTIPOLYGON (((-600373.153 1898692.428, -6003...",-103.092389,39.903658,1396,#a87000,Winter Wheat,1496,...,5,"[1714, 2140]",4.0,2,"[1714, 2140]",4.0,2,Scientific_Name ...,Scientific_Name Common_Name ...,Scientific_Name Common_Name ...
6925,081623005639708,"[61, 24, 1, 61, 24, 29, 61, 24]","[2411, 404, 1621, 1863]","MULTIPOLYGON (((-600373.153 1898692.428, -6003...",-103.092389,39.903658,1396,#a87000,Winter Wheat,1729,...,5,"[1714, 2140]",4.0,2,"[1714, 2140]",4.0,2,Scientific_Name ...,Scientific_Name Common_Name ...,Scientific_Name Common_Name ...
6926,081623008142809,"[24, 24, 24, 176, 176, 176, 176, 176]",[404],"MULTIPOLYGON (((-630497.949 1799492.797, -6305...",-103.356445,39.001119,1504,#e9ffbe,Grass/Pasture,643,...,3,"[1714, 2140]",4.0,2,"[1714, 2140]",4.0,2,Scientific_Name ...,Scientific_Name Common_Name ...,Scientific_Name Common_Name ...
6927,081623008142809,"[24, 24, 24, 176, 176, 176, 176, 176]",[404],"MULTIPOLYGON (((-630497.949 1799492.797, -6305...",-103.356445,39.001119,1504,#e9ffbe,Grass/Pasture,647,...,3,"[1714, 2140]",4.0,2,"[1714, 2140]",4.0,2,Scientific_Name ...,Scientific_Name Common_Name ...,Scientific_Name Common_Name ...


In [142]:
fields_expanded_actual[['CSBID', 'EcoCrop2016_2023']].info()
print('**********************************************************************')
fields_expanded_suitable_historical[['CSBID', 'historical_Crop_Indices']].info()
print('**********************************************************************')
fields_expanded_suitable_MCM[['CSBID', 'mid_century_medium_carbon_Crop_Indices']].info()
print('**********************************************************************')
fields_expanded_suitable_MCH[['CSBID', 'mid_century_high_carbon_Crop_Indices']].info()
print('**********************************************************************')
stacked_df[['fields', 'crops']].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12146 entries, 0 to 12145
Data columns (total 2 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   CSBID             12146 non-null  object
 1   EcoCrop2016_2023  12146 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 189.9+ KB
**********************************************************************
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6929 entries, 0 to 6928
Data columns (total 2 columns):
 #   Column                   Non-Null Count  Dtype 
---  ------                   --------------  ----- 
 0   CSBID                    6929 non-null   object
 1   historical_Crop_Indices  6929 non-null   int64 
dtypes: int64(1), object(1)
memory usage: 108.4+ KB
**********************************************************************
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6221 entries, 0 to 6220
Data columns (total 2 columns):
 #   Column                           

### create dataframes to analyze score breakdown

In [143]:
# Merge fields_expanded_actual with stacked_df to get the actual scores
actual_scores_df = pd.merge(
    fields_expanded_actual,
    stacked_df,
    left_on=['CSBID', 'EcoCrop2016_2023'],
    right_on=['fields', 'crops'],
    how='left'
)

# Merge fields_expanded_suitable with stacked_df to get the suitable scores
suitable_scores_df_historical = pd.merge(
    fields_expanded_suitable_historical,
    stacked_df,
    left_on=['CSBID', 'historical_Crop_Indices'],
    right_on=['fields', 'crops'],
    how='left'
)


# Merge fields_expanded_suitable with stacked_df to get the suitable scores
suitable_scores_df_MCM = pd.merge(
    fields_expanded_suitable_MCM,
    stacked_df,
    left_on=['CSBID', 'mid_century_medium_carbon_Crop_Indices'],
    right_on=['fields', 'crops'],
    how='left'
)

# Merge fields_expanded_suitable with stacked_df to get the suitable scores
suitable_scores_df_MCH = pd.merge(
    fields_expanded_suitable_MCH,
    stacked_df,
    left_on=['CSBID', 'mid_century_high_carbon_Crop_Indices'],
    right_on=['fields', 'crops'],
    how='left'
)

#### check merges

In [144]:
# Check if CSBID and fields match after the merge
matching_actual = actual_scores_df['CSBID'] == actual_scores_df['fields']
print(f"Actual scores - Matching CSBID and fields: {matching_actual.sum()} out of {len(actual_scores_df)}")

matching_historical = suitable_scores_df_historical['CSBID'] == suitable_scores_df_historical['fields']
print(f"Historical suitable scores - Matching CSBID and fields: {matching_historical.sum()} out of {len(suitable_scores_df_historical)}")

matching_MCM = suitable_scores_df_MCM['CSBID'] == suitable_scores_df_MCM['fields']
print(f"Mid-century medium carbon suitable scores - Matching CSBID and fields: {matching_MCM.sum()} out of {len(suitable_scores_df_MCM)}")

matching_MCH = suitable_scores_df_MCH['CSBID'] == suitable_scores_df_MCH['fields']
print(f"Mid-century high carbon suitable scores - Matching CSBID and fields: {matching_MCH.sum()} out of {len(suitable_scores_df_MCH)}")

Actual scores - Matching CSBID and fields: 218592 out of 218594
Historical suitable scores - Matching CSBID and fields: 124722 out of 124722
Mid-century medium carbon suitable scores - Matching CSBID and fields: 111978 out of 111978
Mid-century high carbon suitable scores - Matching CSBID and fields: 84780 out of 84780


In [145]:
# Check for missing values in key columns after the merge
print("Actual scores missing values:")
print(actual_scores_df.isnull().sum())

# print("Historical suitable scores missing values:")
# print(suitable_scores_df_historical.isnull().sum())

# print("Mid-century medium carbon suitable scores missing values:")
# print(suitable_scores_df_MCM.isnull().sum())

# print("Mid-century high carbon suitable scores missing values:")
# print(suitable_scores_df_MCH.isnull().sum())

# # missing two 'fields' entries: in the actual_scores_df; makes sense because there were two fields where we could not identify a EcoCrop ID from the CDL_Code

Actual scores missing values:
CSBID                                          0
CDL2016_2023                                   0
EcoCrop2016_2023                               0
geometry                                       0
Longitude                                      0
Latitude                                       0
Elevation                                      0
color                                          0
Crop                                           0
historical_Crop_Indices                        0
historical_Max_Score                           0
historical_N_Crops                             0
mid_century_medium_carbon_Crop_Indices         0
mid_century_medium_carbon_Max_Score            0
mid_century_medium_carbon_N_Crops              0
mid_century_high_carbon_Crop_Indices           0
mid_century_high_carbon_Max_Score              0
mid_century_high_carbon_N_Crops                0
Recommended_Crops_Historical                   0
Recommended_Crops_Mid_Century_Medium_Ca

### create overall dataframe to analyze overall scores

#### check dfs for merges

In [146]:
for dim in sum_matrix.dims:
    print(f"Dimension: {dim}")
    print(f"Coordinate values dtype for '{dim}': {stacked_matrix.coords[dim].dtype}")
    print()

Dimension: fields
Coordinate values dtype for 'fields': <U15

Dimension: crops
Coordinate values dtype for 'crops': int64

Dimension: scenarios
Coordinate values dtype for 'scenarios': <U25



In [147]:
overall_matrix = sum_matrix.reset_index('crops')
overall_matrix['crops'] = np.arange(len(overall_matrix['crops']))

In [148]:
# Reshape the sum_matrix into a DataFrame
overall_df = overall_matrix.to_dataframe(name='score').reset_index()

# don't do this.  'fields'
# # Convert 'fields' and 'crops' into integers to allow easy matching
# overall_df['fields'] = overall_df['fields'].astype(int)
# overall_df['crops'] = overall_df['crops'].astype(int)

overall_df['scores'] = 'Overall'

In [149]:
overall_df

Unnamed: 0,fields,crops,scenarios,score,scores
0,081623012787392,0,historical,2.0,Overall
1,081623012787392,0,mid_century_medium_carbon,0.0,Overall
2,081623012787392,0,mid_century_high_carbon,0.0,Overall
3,081623012787392,1,historical,-5.0,Overall
4,081623012787392,1,mid_century_medium_carbon,-5.0,Overall
...,...,...,...,...,...
9244795,081623008142809,2566,mid_century_medium_carbon,0.5,Overall
9244796,081623008142809,2566,mid_century_high_carbon,0.5,Overall
9244797,081623008142809,2567,historical,-2.0,Overall
9244798,081623008142809,2567,mid_century_medium_carbon,0.0,Overall


In [150]:
overall_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9244800 entries, 0 to 9244799
Data columns (total 5 columns):
 #   Column     Dtype  
---  ------     -----  
 0   fields     object 
 1   crops      int64  
 2   scenarios  object 
 3   score      float64
 4   scores     object 
dtypes: float64(1), int64(1), object(3)
memory usage: 352.7+ MB


In [151]:
fields_expanded_actual.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 12146 entries, 0 to 12145
Data columns (total 21 columns):
 #   Column                                       Non-Null Count  Dtype   
---  ------                                       --------------  -----   
 0   CSBID                                        12146 non-null  object  
 1   CDL2016_2023                                 12146 non-null  object  
 2   EcoCrop2016_2023                             12146 non-null  int64   
 3   geometry                                     12146 non-null  geometry
 4   Longitude                                    12146 non-null  float64 
 5   Latitude                                     12146 non-null  float64 
 6   Elevation                                    12146 non-null  int32   
 7   color                                        12146 non-null  object  
 8   Crop                                         12146 non-null  object  
 9   historical_Crop_Indices                      12146 no

In [152]:
overall_df[['fields', 'crops']]

Unnamed: 0,fields,crops
0,081623012787392,0
1,081623012787392,0
2,081623012787392,0
3,081623012787392,1
4,081623012787392,1
...,...,...
9244795,081623008142809,2566
9244796,081623008142809,2566
9244797,081623008142809,2567
9244798,081623008142809,2567


In [153]:
fields_expanded_actual[['CSBID', 'EcoCrop2016_2023']]

Unnamed: 0,CSBID,EcoCrop2016_2023
0,081623012787392,2281
1,081623012787392,404
2,351623001627247,2411
3,351623001627247,404
4,351623001627247,261
...,...,...
12141,081623005639708,2411
12142,081623005639708,404
12143,081623005639708,1621
12144,081623005639708,1863


In [154]:
overall_df[['fields', 'crops']].info()
print('******************************************************')
fields_expanded_actual[['CSBID', 'EcoCrop2016_2023']].info()
print('******************************************************')
fields_expanded_suitable_historical[['CSBID', 'historical_Crop_Indices']].info()
print('******************************************************')
fields_expanded_suitable_MCM[['CSBID', 'EcoCrop2016_2023']].info()
print('******************************************************')
fields_expanded_suitable_MCH[['CSBID', 'EcoCrop2016_2023']].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9244800 entries, 0 to 9244799
Data columns (total 2 columns):
 #   Column  Dtype 
---  ------  ----- 
 0   fields  object
 1   crops   int64 
dtypes: int64(1), object(1)
memory usage: 141.1+ MB
******************************************************
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12146 entries, 0 to 12145
Data columns (total 2 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   CSBID             12146 non-null  object
 1   EcoCrop2016_2023  12146 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 189.9+ KB
******************************************************
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6929 entries, 0 to 6928
Data columns (total 2 columns):
 #   Column                   Non-Null Count  Dtype 
---  ------                   --------------  ----- 
 0   CSBID                    6929 non-null   object
 1   historical_Crop_Indices  6929 no

#### merges

In [155]:
####### after consistency check of entire notebook, do not convert types ######

# # Convert 'CSBID' to int64 in fields_expanded_actual
# fields_expanded_actual['CSBID'] = fields_expanded_actual['CSBID'].astype(int)
# fields_expanded_suitable_historical['CSBID'] = fields_expanded_suitable_historical['CSBID'].astype(int)
# fields_expanded_suitable_MCM['CSBID'] = fields_expanded_suitable_MCM['CSBID'].astype(int)
# fields_expanded_suitable_MCH['CSBID'] = fields_expanded_suitable_MCH['CSBID'].astype(int)
# Convert 'fields' in overall_df to int64
# overall_df['fields'] = overall_df['fields'].astype(int)

# Merge fields_expanded_actual with stacked_df to get the actual scores
actual_scores_overall_df = pd.merge(
    fields_expanded_actual,
    overall_df,
    left_on=['CSBID', 'EcoCrop2016_2023'],
    right_on=['fields', 'crops'],
    how='left'
)

# Merge fields_expanded_suitable with stacked_df to get the suitable scores
suitable_scores_overall_df_historical = pd.merge(
    fields_expanded_suitable_historical,
    overall_df,
    left_on=['CSBID', 'historical_Crop_Indices'],
    right_on=['fields', 'crops'],
    how='left'
)


# Merge fields_expanded_suitable with stacked_df to get the suitable scores
suitable_scores_overall_df_MCM = pd.merge(
    fields_expanded_suitable_MCM,
    overall_df,
    left_on=['CSBID', 'mid_century_medium_carbon_Crop_Indices'],
    right_on=['fields', 'crops'],
    how='left'
)

# Merge fields_expanded_suitable with stacked_df to get the suitable scores
suitable_scores_overall_df_MCH = pd.merge(
    fields_expanded_suitable_MCH,
    overall_df,
    left_on=['CSBID', 'mid_century_high_carbon_Crop_Indices'],
    right_on=['fields', 'crops'],
    how='left'
)

In [156]:
actual_scores_df.groupby('CSBID')['score'].max()

CSBID
041623001737287    1.0
041623001737315    1.0
041623001737326    1.0
041623001737406    1.0
041623004560244    1.0
                  ... 
491623016053358    1.0
491623016053395    1.0
491623016053447    1.0
491623016053449    1.0
491623016053632    1.0
Name: score, Length: 1200, dtype: float64

In [157]:
overall_df.groupby('fields')['score'].first()

fields
041623001737287    0.0
041623001737315    0.0
041623001737326    0.0
041623001737406    0.0
041623004560244    0.0
                  ... 
491623016053358    0.0
491623016053395    0.0
491623016053447    0.0
491623016053449    0.0
491623016053632    1.5
Name: score, Length: 1200, dtype: float64

In [158]:
fields_expanded_actual.groupby('CSBID')['EcoCrop2016_2023'].first()

CSBID
041623001737287     434
041623001737315     434
041623001737326     208
041623001737406     434
041623004560244    2211
                   ... 
491623016053358     404
491623016053395     261
491623016053447     261
491623016053449    2411
491623016053632     404
Name: EcoCrop2016_2023, Length: 1200, dtype: int64

In [159]:
actual_scores_overall_df.groupby('CSBID')['score'].max()

CSBID
041623001737287   -3.0
041623001737315   -1.0
041623001737326    2.0
041623001737406   -1.0
041623004560244    2.5
                  ... 
491623016053358    1.5
491623016053395    2.0
491623016053447    2.0
491623016053449    2.0
491623016053632    0.0
Name: score, Length: 1200, dtype: float64

In [160]:
actual_scores_overall_df.groupby('fields')['score'].max()

fields
041623001737287   -3.0
041623001737315   -1.0
041623001737326    2.0
041623001737406   -1.0
041623004560244    2.5
                  ... 
491623016053358    1.5
491623016053395    2.0
491623016053447    2.0
491623016053449    2.0
491623016053632    0.0
Name: score, Length: 1198, dtype: float64

In [161]:
actual_scores_overall_df[['fields', 'CSBID']]

Unnamed: 0,fields,CSBID
0,081623012787392,081623012787392
1,081623012787392,081623012787392
2,081623012787392,081623012787392
3,081623012787392,081623012787392
4,081623012787392,081623012787392
...,...,...
36429,081623005639708,081623005639708
36430,081623005639708,081623005639708
36431,081623008142809,081623008142809
36432,081623008142809,081623008142809


In [162]:
(actual_scores_overall_df['fields']).info()

<class 'pandas.core.series.Series'>
RangeIndex: 36434 entries, 0 to 36433
Series name: fields
Non-Null Count  Dtype 
--------------  ----- 
36432 non-null  object
dtypes: object(1)
memory usage: 284.8+ KB


In [163]:
fields_expanded_actual.columns

Index(['CSBID', 'CDL2016_2023', 'EcoCrop2016_2023', 'geometry', 'Longitude',
       'Latitude', 'Elevation', 'color', 'Crop', 'historical_Crop_Indices',
       'historical_Max_Score', 'historical_N_Crops',
       'mid_century_medium_carbon_Crop_Indices',
       'mid_century_medium_carbon_Max_Score',
       'mid_century_medium_carbon_N_Crops',
       'mid_century_high_carbon_Crop_Indices',
       'mid_century_high_carbon_Max_Score', 'mid_century_high_carbon_N_Crops',
       'Recommended_Crops_Historical',
       'Recommended_Crops_Mid_Century_Medium_Carbon',
       'Recommended_Crops_Mid_Century_High_Carbon'],
      dtype='object')

In [164]:
actual_scores_df['fields']
# actual_scores_df

0         081623012787392
1         081623012787392
2         081623012787392
3         081623012787392
4         081623012787392
               ...       
218589    081623008142809
218590    081623008142809
218591    081623008142809
218592    081623008142809
218593    081623008142809
Name: fields, Length: 218594, dtype: object

In [165]:
# Check for matching values between the two columns
common_values = fields_expanded_actual['CSBID'].isin(overall_df['fields'])
print(f"Matching CSBID values: {common_values.sum()} out of {len(fields_expanded_actual)}")

common_values = overall_df['fields'].isin(fields_expanded_actual['CSBID'])
print(f"Matching fields values: {common_values.sum()} out of {len(overall_df)}")

Matching CSBID values: 12146 out of 12146
Matching fields values: 9244800 out of 9244800


In [166]:
# Ensure there are no leading zeros or inconsistencies
print(fields_expanded_actual['CSBID'].apply(lambda x: len(str(x))).value_counts())  # Should be consistent
print(overall_df['fields'].apply(lambda x: len(str(x))).value_counts())            # Should be consistent
print(fields['CSBID'].apply(lambda x: len(str(x))).value_counts())  

CSBID
15    12146
Name: count, dtype: int64
fields
15    9244800
Name: count, dtype: int64
CSBID
15    1200
Name: count, dtype: int64


In [167]:
# cdl_to_ecocrop_crop_code_translator_df

### create grouped comparison dataframes

In [168]:
# Function to check and print data types for specified columns
def check_dtypes(df, columns):
    print(f"Data types for {df.name}:")
    for column in columns:
        print(f"  {column}: {df[column].dtype}")
    print()

# List of columns to check
columns_to_check = ['CSBID', 'Crop', 'scores', 'score']

# Assign names to dataframes for easier identification
actual_scores_df.name = "actual_scores_df"
suitable_scores_df_historical.name = "suitable_scores_df_historical"
suitable_scores_df_MCM.name = "suitable_scores_df_MCM"
suitable_scores_df_MCH.name = "suitable_scores_df_MCH"
actual_scores_overall_df.name = "actual_scores_overall_df"
suitable_scores_overall_df_historical.name = "suitable_scores_overall_df_historical"
suitable_scores_overall_df_MCM.name = "suitable_scores_overall_df_MCM"
suitable_scores_overall_df_MCH.name = "suitable_scores_overall_df_MCH"

# # Check data types before merges
# check_dtypes(actual_scores_df, columns_to_check)
# check_dtypes(suitable_scores_df_historical, columns_to_check)
# check_dtypes(suitable_scores_df_MCM, columns_to_check)
# check_dtypes(suitable_scores_df_MCH, columns_to_check)
# check_dtypes(actual_scores_overall_df, columns_to_check)
# check_dtypes(suitable_scores_overall_df_historical, columns_to_check)
# check_dtypes(suitable_scores_overall_df_MCM, columns_to_check)
# check_dtypes(suitable_scores_overall_df_MCH, columns_to_check)

In [169]:
comparison_df_historical = pd.merge(
    actual_scores_df[['CSBID', 'Crop', 'scores', 'score']], 
    suitable_scores_df_historical[['CSBID', 'Crop', 'scores', 'score']], 
    on=['CSBID', 'Crop', 'scores'],
    suffixes=('_actual', '_suitable')
)

comparison_df_MCM = pd.merge(
    actual_scores_df[['CSBID', 'Crop', 'scores', 'score']], 
    suitable_scores_df_MCM[['CSBID', 'Crop', 'scores', 'score']], 
    on=['CSBID', 'Crop', 'scores'],
    suffixes=('_actual', '_suitable')
)

comparison_df_MCH = pd.merge(
    actual_scores_df[['CSBID', 'Crop', 'scores', 'score']], 
    suitable_scores_df_MCH[['CSBID', 'Crop', 'scores', 'score']], 
    on=['CSBID', 'Crop', 'scores'],
    suffixes=('_actual', '_suitable')
)

In [170]:
comparison_df_overall_historical = pd.merge(
    actual_scores_overall_df[['CSBID', 'Crop', 'scores', 'score']], 
    suitable_scores_overall_df_historical[['CSBID', 'Crop', 'scores', 'score']], 
    on=['CSBID', 'Crop', 'scores'],
    suffixes=('_actual', '_suitable')
)

comparison_df_overall_MCM = pd.merge(
    actual_scores_overall_df[['CSBID', 'Crop', 'scores', 'score']], 
    suitable_scores_overall_df_MCM[['CSBID', 'Crop', 'scores', 'score']], 
    on=['CSBID', 'Crop', 'scores'],
    suffixes=('_actual', '_suitable')
)

comparison_df_overall_MCH = pd.merge(
    actual_scores_overall_df[['CSBID', 'Crop', 'scores', 'score']], 
    suitable_scores_overall_df_MCH[['CSBID', 'Crop', 'scores', 'score']], 
    on=['CSBID', 'Crop', 'scores'],
    suffixes=('_actual', '_suitable')
)

In [171]:
# # Group by the actual and suitable scores and count occurrences
comparison_df_historical_grouped = comparison_df_historical.groupby(['scores', 'score_actual', 'score_suitable']).size().reset_index(name='count')
comparison_df_MCM_grouped = comparison_df_MCM.groupby(['scores', 'score_actual', 'score_suitable']).size().reset_index(name='count')
comparison_df_MCH_grouped = comparison_df_MCH.groupby(['scores', 'score_actual', 'score_suitable']).size().reset_index(name='count')

In [172]:
# # Group by the actual and suitable scores and count occurrences
comparison_df_overall_historical_grouped = comparison_df_overall_historical.groupby(['scores', 'score_actual', 'score_suitable']).size().reset_index(name='count')
comparison_df_overall_MCM_grouped = comparison_df_overall_MCM.groupby(['scores', 'score_actual', 'score_suitable']).size().reset_index(name='count')
comparison_df_overall_MCH_grouped = comparison_df_overall_MCH.groupby(['scores', 'score_actual', 'score_suitable']).size().reset_index(name='count')

In [173]:
# comparison_df_historical_grouped['scenario'] = 'Historical'
# comparison_df_MCM_grouped['scenario'] = 'Mid-Century_Medium-Carbon'
# comparison_df_MCH_grouped['scenario'] = 'Mid-Century_High-Carbon'
comparison_df_historical_grouped['scenario'] = 'Recent'
comparison_df_MCM_grouped['scenario'] = 'Mid-century_medium-carbon'
comparison_df_MCH_grouped['scenario'] = 'Mid-century_high-carbon'

In [174]:
# comparison_df_historical_grouped
# comparison_df_MCM_grouped
# comparison_df_MCH_grouped

In [175]:
# comparison_df_overall_historical_grouped['scenario'] = 'Historical'
# comparison_df_overall_MCM_grouped['scenario'] = 'Mid-Century_Medium-Carbon'
# comparison_df_overall_MCH_grouped['scenario'] = 'Mid-Century_High-Carbon'
comparison_df_overall_historical_grouped['scenario'] = 'Recent'
comparison_df_overall_MCM_grouped['scenario'] = 'Mid-century_medium-carbon'
comparison_df_overall_MCH_grouped['scenario'] = 'Mid-century_high-carbon'

In [176]:
# comparison_df_overall_historical_grouped
# comparison_df_overall_MCM_grouped
# comparison_df_overall_MCH_grouped

In [177]:
# Concatenate the DataFrames
comparison_df_grouped = pd.concat([comparison_df_historical_grouped, comparison_df_MCM_grouped, comparison_df_MCH_grouped])

In [178]:
comparison_df_grouped

Unnamed: 0,scores,score_actual,score_suitable,count,scenario
0,Climate_Zone,-1.0,-1.0,50339,Recent
1,Climate_Zone,-1.0,1.0,335950,Recent
2,Climate_Zone,1.0,-1.0,47509,Recent
3,Climate_Zone,1.0,1.0,296012,Recent
4,Hardiness,-1.0,-1.0,9433,Recent
...,...,...,...,...,...
37,pH,0.0,-1.0,54,Mid-century_high-carbon
38,pH,0.0,0.0,31221,Mid-century_high-carbon
39,pH,0.0,1.0,267066,Mid-century_high-carbon
40,pH,1.0,0.0,1881,Mid-century_high-carbon


In [179]:
# Define all possible score values
score_values = [-1, -0.5, 0, 0.5, 1]

# Create a DataFrame for all combinations of scores and scenarios
all_combinations = pd.MultiIndex.from_product(
    [comparison_df_grouped['scores'].unique(), score_values, score_values, comparison_df_grouped['scenario'].unique()],
    names=['scores', 'score_actual', 'score_suitable', 'scenario']
).to_frame(index=False)

# Merge with the existing comparison_df_grouped
comparison_df_grouped_filled = pd.merge(
    all_combinations,
    comparison_df_grouped,
    how='left',
    on=['scores', 'score_actual', 'score_suitable', 'scenario']
).fillna({'count': 0})

In [180]:
comparison_df_grouped_filled

Unnamed: 0,scores,score_actual,score_suitable,scenario,count
0,Climate_Zone,-1.0,-1.0,Recent,50339.0
1,Climate_Zone,-1.0,-1.0,Mid-century_medium-carbon,11151.0
2,Climate_Zone,-1.0,-1.0,Mid-century_high-carbon,7158.0
3,Climate_Zone,-1.0,-0.5,Recent,0.0
4,Climate_Zone,-1.0,-0.5,Mid-century_medium-carbon,0.0
...,...,...,...,...,...
445,pH,1.0,0.5,Mid-century_medium-carbon,0.0
446,pH,1.0,0.5,Mid-century_high-carbon,0.0
447,pH,1.0,1.0,Recent,175437.0
448,pH,1.0,1.0,Mid-century_medium-carbon,179163.0


In [181]:
single_chart = alt.Chart(
    comparison_df_grouped_filled[comparison_df_grouped_filled['scenario'] == 'Recent']
).encode(
    x=alt.X('score_actual:N'),
    y=alt.Y('score_suitable:N', scale=alt.Scale(reverse=True)),
    size='count:Q',
    color='scores:N',
    tooltip=['score_actual', 'score_suitable', 'count']
).mark_square().properties(
    width=400,
    height=400
)

# single_chart

In [182]:
comparison_df_grouped_filled.dtypes

scores             object
score_actual      float64
score_suitable    float64
scenario           object
count             float64
dtype: object

In [183]:
print(comparison_df_grouped_filled['scores'].unique())
print(comparison_df_grouped_filled['scenario'].unique())
print(comparison_df_grouped_filled['score_actual'].unique())
print(comparison_df_grouped_filled['score_suitable'].unique())

['Climate_Zone' 'Hardiness' 'Photoperiod' 'Rainfall' 'Temperature' 'pH']
['Recent' 'Mid-century_medium-carbon' 'Mid-century_high-carbon']
[-1.  -0.5  0.   0.5  1. ]
[-1.  -0.5  0.   0.5  1. ]


In [184]:
print(comparison_df_grouped_filled.isnull().sum())

scores            0
score_actual      0
score_suitable    0
scenario          0
count             0
dtype: int64


In [185]:
simple_facet_chart = alt.Chart(
    comparison_df_grouped_filled
).encode(
    x='score_actual:N',
    y='score_suitable:N',
    size='count:Q',
    color='scores:N',
    tooltip=['score_actual', 'score_suitable', 'count']
).mark_square(
    opacity=0.5,
).facet(
    row='scores:N',
    column='scenario:N'
)

# simple_facet_chart

In [186]:
test_facet_chart = alt.Chart(
    comparison_df_grouped_filled
).encode(
    x='score_actual:N',
    y='score_suitable:N',
    size=alt.Size('count:Q'),
    color='scores:N',
    tooltip=['score_actual', 'score_suitable', 'count']
).mark_square(
    opacity=0.5,
).facet(
    row='scores:N',
    column='scenario:N'
)

# test_facet_chart

In [187]:
# comparison_df_grouped_filled['count_sqrt'] = comparison_df_grouped_filled['count'].apply(lambda x: x ** 0.5)
comparison_df_grouped_filled['count_sqrt'] = np.sqrt(comparison_df_grouped_filled['count'])

facet_column_order = ['Recent', 'Mid-century_medium-carbon', 'Mid-century_high-carbon',]

facet_chart = alt.Chart(
    comparison_df_grouped_filled,
).encode(
    x=alt.X('score_actual:N', title='', axis=alt.Axis(offset=1, domain=False, labelAngle=0)),
    y=alt.Y('score_suitable:N', title='', scale=alt.Scale(reverse=True,), axis=alt.Axis(offset=1, domain=False, labelAngle=0,)),
    # size=alt.Size('count:Q', scale=alt.Scale(type='log', range=[10, 1000])),
    # size=alt.Size('count:Q',),
    # size=alt.Size('count_sqrt:Q', scale=alt.Scale(range=[10, 1000])),
    size=alt.Size(
        'count_sqrt:Q',
        # scale=alt.Scale(type='log', range=[1, 1000]),
        legend=alt.Legend(
            title="Number of Scores",  # Set the legend title
            titleColor='black',  # Set the title color
            titleFontSize=13, 
            labelColor='black',  # Set the label color
            labelFontSize=12,
            symbolStrokeColor='lightgrey',  # Set the symbol border color
            symbolFillColor='whitesmoke',  # Set the symbol fill color
            labelExpr="format(datum.value * datum.value, ',')",  # Square the value to show the original count
            # values=[10**0.5000, 100**0.5, 1000**0.5, 10_000**0.5, 50_000**0.5, 100_000**0.5],
            values=[200**0.5, 2000**0.5, 20_000**0.5, 200_000**0.5, 2_000_000**0.5],
            # labelExpr="datum.value"  # Display actual counts
            # values=[0, 300],
            # labelExpr="datum.value"  # Display actual counts
        )
    ),
    opacity=alt.condition(
        alt.datum.count_sqrt > 0,  # If count_sqrt > 0
        alt.value(0.8),  # Set opacity to 0.5
        alt.value(0)  # Otherwise, set opacity to 0
    ),
    color=alt.Color(
        'scores:N',
        legend=alt.Legend(
            title='Score',
            titleFontSize=13,
            labelFontSize=12,
        ),
    ),  # Use color to distinguish between score categories
    # tooltip=['scores', 'score_actual', 'score_suitable', 'count']
    tooltip=[
        alt.Tooltip('scores:N', title='Score: '),
        alt.Tooltip('score_actual:N', title='Actual Score: '),
        alt.Tooltip('score_suitable:N', title='Suitable Score: '),
        alt.Tooltip('count:Q', title='Number of Scores: ', format=',')
    ]
).mark_square(
    # opacity=0.8,
).properties(
    width=200,
    height=200
).facet(
    row=alt.Row(
        'scores:N',
        header=alt.Header(
            labelFontSize=13,
            labelFontWeight='bold',
            labelAngle=0,  # Keep the labels horizontal
            labelAlign='left',  # Align labels to the left
            labelColor='darkblue',  # Customize label color
            title=None  # Remove the title
        )
    ),
    column=alt.Column(
        'scenario:N',
        sort = facet_column_order,
        header=alt.Header(
            labelFontSize=13,
            labelFontWeight='bold',
            labelAngle=0,  # Keep the labels horizontal
            labelAlign='center',  # Align labels to the center
            labelColor='darkblue',  # Customize label color
            title=None  # Remove the title
        )
    ),
    title=['Distribution of Suitable vs. Actual Crop Scores',""],
    spacing=10  # Adjust spacing between facets
).resolve_scale(
    x='shared',
    y='shared'
)

facet_chart.configure_facet(
    spacing=10  # Adjust spacing between facets
).configure_axis(
    labelFontSize=12,
    titleFontSize=14
).configure_title(
    fontSize=16,
    anchor='middle',
    color='black'
)#.interactive()

In [188]:
# # Aggregate the data to get the total counts for each scenario
# total_scores_df = comparison_df_grouped_filled.groupby(
#     ['score_actual', 'score_suitable', 'scenario']
# )[['count', 'count_sqrt']].sum().reset_index()

In [189]:
# Add a "scores" column to total_scores_df with the value "Total"
# total_scores_df['scores'] = 'Total'

In [190]:
# Append total_scores_df to comparison_df_grouped_filled
# comparison_df_with_totals = pd.concat([comparison_df_grouped_filled, total_scores_df], ignore_index=True)
# comparison_df_overall_historical_grouped
# comparison_df_overall_MCM_grouped
# comparison_df_overall_MCH_grouped

overall_scores_comparison_df = pd.concat([comparison_df_overall_historical_grouped,
                                          comparison_df_overall_MCM_grouped,
                                          comparison_df_overall_MCH_grouped
                                          ], ignore_index=True)

In [191]:
overall_scores_comparison_df['score_actual'].unique()

array([-5. , -4.5, -4. , -3.5, -3. , -2.5, -2. , -1.5, -1. , -0.5,  0. ,
        0.5,  1. ,  1.5,  2. ,  2.5,  3. ,  3.5,  4. ,  4.5,  5. ])

In [192]:
# Define all possible score values
score_values = [-6., -5.5, -5., -4.5, -4. , -3.5, -3. , -2.5, -2. , -1.5, -1. , -0.5,  0. ,
        0.5,  1. ,  1.5,  2. ,  2.5,  3. ,  3.5,  4. ,  4.5,  5., 5.5, 6. ]

# Create a DataFrame for all combinations of scores and scenarios
all_combinations = pd.MultiIndex.from_product(
    [overall_scores_comparison_df['scores'].unique(), score_values, score_values, overall_scores_comparison_df['scenario'].unique()],
    names=['scores', 'score_actual', 'score_suitable', 'scenario']
).to_frame(index=False)

# Merge with the existing comparison_df_grouped
overall_scores_comparison_filled = pd.merge(
    all_combinations,
    overall_scores_comparison_df,
    how='left',
    on=['scores', 'score_actual', 'score_suitable', 'scenario']
).fillna({'count': 0})

In [193]:
# overall_scores_comparison_filled['score_actual'].unique()
# overall_scores_comparison_filled['score_suitable'].unique()
# overall_scores_comparison_filled

In [194]:
overall_scores_comparison_filled_normalized = overall_scores_comparison_filled.copy()
overall_scores_comparison_filled_normalized['score_actual'] = overall_scores_comparison_filled_normalized['score_actual']/6
overall_scores_comparison_filled_normalized['score_suitable'] = overall_scores_comparison_filled_normalized['score_suitable']/6

In [195]:
comparison_df_with_overall = pd.concat([comparison_df_grouped_filled,
                                        overall_scores_comparison_filled_normalized
                                       ], ignore_index=True)

In [196]:
overall_scores_comparison_filled['count_sqrt'] = np.sqrt(overall_scores_comparison_filled['count'])
comparison_df_with_overall['count_sqrt'] = np.sqrt(comparison_df_with_overall['count'])
overall_scores_comparison_filled_normalized['count_sqrt'] = np.sqrt(overall_scores_comparison_filled_normalized['count'])

In [197]:
overall_scores_comparison_filled

Unnamed: 0,scores,score_actual,score_suitable,scenario,count,count_sqrt
0,Overall,-6.0,-6.0,Recent,0.0,0.0
1,Overall,-6.0,-6.0,Mid-century_medium-carbon,0.0,0.0
2,Overall,-6.0,-6.0,Mid-century_high-carbon,0.0,0.0
3,Overall,-6.0,-5.5,Recent,0.0,0.0
4,Overall,-6.0,-5.5,Mid-century_medium-carbon,0.0,0.0
...,...,...,...,...,...,...
1870,Overall,6.0,5.5,Mid-century_medium-carbon,0.0,0.0
1871,Overall,6.0,5.5,Mid-century_high-carbon,0.0,0.0
1872,Overall,6.0,6.0,Recent,0.0,0.0
1873,Overall,6.0,6.0,Mid-century_medium-carbon,0.0,0.0


In [198]:
comparison_df_with_overall

Unnamed: 0,scores,score_actual,score_suitable,scenario,count,count_sqrt
0,Climate_Zone,-1.0,-1.000000,Recent,50339.0,224.363544
1,Climate_Zone,-1.0,-1.000000,Mid-century_medium-carbon,11151.0,105.598295
2,Climate_Zone,-1.0,-1.000000,Mid-century_high-carbon,7158.0,84.604964
3,Climate_Zone,-1.0,-0.500000,Recent,0.0,0.000000
4,Climate_Zone,-1.0,-0.500000,Mid-century_medium-carbon,0.0,0.000000
...,...,...,...,...,...,...
2320,Overall,1.0,0.916667,Mid-century_medium-carbon,0.0,0.000000
2321,Overall,1.0,0.916667,Mid-century_high-carbon,0.0,0.000000
2322,Overall,1.0,1.000000,Recent,0.0,0.000000
2323,Overall,1.0,1.000000,Mid-century_medium-carbon,0.0,0.000000


In [199]:
overall_scores_simple = alt.Chart(
    overall_scores_comparison_filled
).encode(
    x='score_actual:N',
    y=alt.Y('score_suitable:N', 
            scale=alt.Scale(reverse=True)
           ),
    size='count_sqrt:Q',
    column='scenario:N',
    tooltip=[
        alt.Tooltip('score_actual:N', title='Actual Score'),
        alt.Tooltip('score_suitable:N', title='Suitable Score'),
        alt.Tooltip('count:Q', title='Number of Scores', format=',')
    ]
).mark_square()
# ).mark_circle()

# overall_scores_simple

In [200]:
values = [-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6]

facet_column_order = ['Recent', 'Mid-century_medium-carbon', 'Mid-century_high-carbon',]

overall_scores_chart = alt.Chart(
    
    overall_scores_comparison_filled
    
).encode(
    
    x=alt.X('score_actual:N', 
            title='Actual Score', 
            axis=alt.Axis(titleFontSize=13,
                          titleY=30,
                          offset=1, 
                          domain=False,
                          labelFontSize=12,
                          labelAngle=0,
                          values = values,
                         ),
           ),
    
    y=alt.Y('score_suitable:N', 
            title='Suitable Score', 
            scale=alt.Scale(reverse=True,), 
            axis=alt.Axis(titleFontSize=13,
                          titleX=-30,
                          offset=1, 
                          domain=False,
                          labelFontSize=12,
                          labelAngle=0,
                          values = values,
                         ),
           ),
    
     size=alt.Size(
        'count_sqrt:Q',
        scale=alt.Scale(range=[10,500]),
        legend=alt.Legend(title="Number of Scores",  # Set the legend title
                          titleColor='black',  # Set the title color
                          titleFontSize=13,
                          labelColor='black',  # Set the label color
                          labelFontSize=12,
                          symbolStrokeColor='lightgrey',  # Set the symbol border color
                           symbolFillColor='whitesmoke',  # Set the symbol fill color
                          labelExpr="format(datum.value * datum.value, ',')",  # Square the value to show the original count
                          values=[10**0.5, 100**0.5, 1000**0.5, 10_000**0.5, 100_000**0.5],
        )
    ),
    opacity=alt.condition(
        alt.datum.count_sqrt > 0,  # If count_sqrt > 0
        alt.value(0.8),  # Set opacity to 0.5
        alt.value(0.1)  # Otherwise, set opacity to 0
    ),
    tooltip=[
        alt.Tooltip('score_actual:N', title='Actual Score'),
        alt.Tooltip('score_suitable:N', title='Suitable Score'),
        alt.Tooltip('count:Q', title='Number of Scores', format=',')
    ]
).mark_square(
    opacity=0.5,
# ).mark_circle(
#     opacity=0.5,
).facet(
    column=alt.Column(
        'scenario:N',
        sort=facet_column_order,
        header=alt.Header(
            title='',
            labelFontSize=14,
            labelFontWeight='bold',
            labelAngle=0,
            labelAlign='center',
            labelColor='black',
        )
    ),
    title="Overall score distribution for each scenario: actual vs. suitable"  # Set the overall title
)

overall_scores_chart.configure_title(
    fontSize=18,  # Set the font size of the title
    anchor='middle',  # Position the title (start, middle, or end)
    align='center',  # Align the title horizontally (left, center, or right)
    dx=-50,
    dy=-20,  # Adjust the vertical position (negative values move the title up)
    color='black' # Set the color of the title text
)

In [201]:
# Define a custom sort order for the 'scores' column
sort_order = alt.Sort(['Overall', 'Climate_Zone', 'Hardiness', 'Photoperiod', 'Rainfall', 'Temperature', 'pH'])

# If you want "Total" at the bottom, just place it at the end of the list
sort_order_bottom = alt.Sort(['Climate_Zone', 'Hardiness', 'Photoperiod', 'Rainfall', 'Temperature', 'pH', 'Overall'])

facet_column_order = ['Recent', 'Mid-century_medium-carbon', 'Mid-century_high-carbon',]

facet_chart_with_totals = alt.Chart(
    # comparison_df_with_overall
    comparison_df_grouped_filled
).encode(
    x=alt.X('score_actual:N', 
            title='', 
            axis=alt.Axis(values=[-1, 0, 1],
                          offset=1, 
                          domain=False, 
                          labelAngle=0
                         )
           ),
    y=alt.Y('score_suitable:N', 
            title='', 
            scale=alt.Scale(reverse=True,), 
            axis=alt.Axis(values=[-1, 0, 1],
                          offset=1, 
                          domain=False,
                          labelAngle=0,
                         )
           ),

     size=alt.Size(
        'count_sqrt:Q',
        scale=alt.Scale(range=[10,1000]),
        legend=alt.Legend(title="Number of Scores",  # Set the legend title
                          titleColor='black',  # Set the title color
                          titleFontSize=13,
                          labelColor='black',  # Set the label color
                          labelFontSize=12,
                          symbolStrokeColor='lightgrey',  # Set the symbol border color
                           symbolFillColor='whitesmoke',  # Set the symbol fill color
                          labelExpr="format(datum.value * datum.value, ',')",  # Square the value to show the original count
                          values=[10**0.5, 100**0.5, 1000**0.5, 10_000**0.5, 100_000**0.5],
        )
    ),
    
    opacity=alt.condition(
        alt.datum.count_sqrt > 1,  # If count_sqrt > 0
        alt.value(0.8),  # Set opacity to 0.5
        alt.value(0)  # Otherwise, set opacity to 0
    ),
    
    color=alt.Color(
        'scores:N',
        sort=sort_order,
        legend=alt.Legend(title='Score',
                          titleFontSize=13,
                          labelFontSize=12,
                          ),
        ),  # Use color to distinguish between score categories

    tooltip=[
        alt.Tooltip('scores:N', title='Score: '),
        alt.Tooltip('score_actual:N', title='Actual Score: '),
        alt.Tooltip('score_suitable:N', title='Suitable Score: '),
        alt.Tooltip('count:Q', title='Number of Scores: ', format=',')
    ],
    
).mark_square(
    # opacity=0.8,
).properties(
    width=180,
    height=180
).facet(
    row=alt.Row(
        'scores:N',
        sort=sort_order,
        header=alt.Header(
            labelFontSize=13,
            labelFontWeight='bold',
            labelAngle=0,  # Keep the labels horizontal
            labelAlign='left',  # Align labels to the left
            labelColor='darkblue',  # Customize label color
            title=None  # Remove the title
        )
    ),
    column=alt.Column(
        'scenario:N',
        sort=facet_column_order,
        header=alt.Header(
            labelFontSize=13,
            labelFontWeight='bold',
            labelAngle=0,  # Keep the labels horizontal
            labelAlign='center',  # Align labels to the center
            labelColor='darkblue',  # Customize label color
            title=None  # Remove the title
        )
    ),
    title=['Distribution of Suitable vs. Actual Crop Scores',""],
    spacing=10  # Adjust spacing between facets
# ).resolve_scale(
#     x='shared',
#     y='shared'
)

facet_chart_with_totals.configure_facet(
    spacing=10  # Adjust spacing between facets
).configure_axis(
    labelFontSize=12,
    titleFontSize=14
).configure_title(
    fontSize=16,
    anchor='middle',
    color='black'
)#.interactive()

### breakdown of scores by field for actual crops (why do actual crops not match suitable?) 

In [202]:
actual_scores_overall_df.groupby('CSBID')['score'].max()

CSBID
041623001737287   -3.0
041623001737315   -1.0
041623001737326    2.0
041623001737406   -1.0
041623004560244    2.5
                  ... 
491623016053358    1.5
491623016053395    2.0
491623016053447    2.0
491623016053449    2.0
491623016053632    0.0
Name: score, Length: 1200, dtype: float64

In [203]:
# max_actual_scores_overall_df.groupby('CSBID')['score'].idxmax()
max_score_indices = actual_scores_overall_df.groupby(['CSBID','scenarios'])['score'].idxmax()
max_score_indices[max_score_indices.isna()]
print(len(max_score_indices))
max_score_indices = max_score_indices.dropna()
print(len(max_score_indices))

3594
3594


In [204]:
# Subset the DataFrame using the indices of the maximum scores
max_score_breakdown_df = actual_scores_overall_df.loc[max_score_indices]
# don't cast to int; CSBID should be string (object)
# max_score_breakdown_df['CSBID'] = max_score_breakdown_df['CSBID'].astype(int)
max_score_breakdown_df.info()
max_score_breakdown_df

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 3594 entries, 591 to 18523
Data columns (total 26 columns):
 #   Column                                       Non-Null Count  Dtype   
---  ------                                       --------------  -----   
 0   CSBID                                        3594 non-null   object  
 1   CDL2016_2023                                 3594 non-null   object  
 2   EcoCrop2016_2023                             3594 non-null   int64   
 3   geometry                                     3594 non-null   geometry
 4   Longitude                                    3594 non-null   float64 
 5   Latitude                                     3594 non-null   float64 
 6   Elevation                                    3594 non-null   int32   
 7   color                                        3594 non-null   object  
 8   Crop                                         3594 non-null   object  
 9   historical_Crop_Indices                      3594 non-nul

Unnamed: 0,CSBID,CDL2016_2023,EcoCrop2016_2023,geometry,Longitude,Latitude,Elevation,color,Crop,historical_Crop_Indices,...,mid_century_high_carbon_Max_Score,mid_century_high_carbon_N_Crops,Recommended_Crops_Historical,Recommended_Crops_Mid_Century_Medium_Carbon,Recommended_Crops_Mid_Century_High_Carbon,fields,crops,scenarios,score,scores
591,041623001737287,"[61, 61, 74, 74, 152, 61, 61, 74]",434,"MULTIPOLYGON (((-1234020.598 1157021.234, -123...",-109.282459,32.706889,1254,#b6705c,Pecans,"[598, 1187, 1935]",...,3.0,7,Scientific_Name ...,Scientific_Name \ 368 Sor...,Scientific_Name \ 368 Sor...,041623001737287,434.0,historical,-5.0,Overall
593,041623001737287,"[61, 61, 74, 74, 152, 61, 61, 74]",434,"MULTIPOLYGON (((-1234020.598 1157021.234, -123...",-109.282459,32.706889,1254,#b6705c,Pecans,"[598, 1187, 1935]",...,3.0,7,Scientific_Name ...,Scientific_Name \ 368 Sor...,Scientific_Name \ 368 Sor...,041623001737287,434.0,mid_century_high_carbon,-3.0,Overall
592,041623001737287,"[61, 61, 74, 74, 152, 61, 61, 74]",434,"MULTIPOLYGON (((-1234020.598 1157021.234, -123...",-109.282459,32.706889,1254,#b6705c,Pecans,"[598, 1187, 1935]",...,3.0,7,Scientific_Name ...,Scientific_Name \ 368 Sor...,Scientific_Name \ 368 Sor...,041623001737287,434.0,mid_century_medium_carbon,-3.0,Overall
19290,041623001737315,"[61, 61, 74, 152, 152, 152, 152, 152]",434,"MULTIPOLYGON (((-1254751.575 1148858.857, -125...",-109.489526,32.609445,1021,#c7d79e,Shrubland,"[598, 1187, 1935]",...,3.0,7,Scientific_Name ...,Scientific_Name \ 368 Sor...,Scientific_Name \ 368 Sor...,041623001737315,434.0,historical,-1.0,Overall
19292,041623001737315,"[61, 61, 74, 152, 152, 152, 152, 152]",434,"MULTIPOLYGON (((-1254751.575 1148858.857, -125...",-109.489526,32.609445,1021,#c7d79e,Shrubland,"[598, 1187, 1935]",...,3.0,7,Scientific_Name ...,Scientific_Name \ 368 Sor...,Scientific_Name \ 368 Sor...,041623001737315,434.0,mid_century_high_carbon,-3.0,Overall
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10187,491623016053449,"[152, 152, 1, 1, 1, 36, 152, 152]",261,"MULTIPOLYGON (((-1157384.118 1768393.098, -115...",-109.389405,38.212378,1763,#c7d79e,Shrubland,"[643, 647, 651, 786, 1729]",...,3.5,2,Scientific_Name ...,Scientific_Name ...,Scientific_Name ...,491623016053449,261.0,mid_century_high_carbon,0.0,Overall
10186,491623016053449,"[152, 152, 1, 1, 1, 36, 152, 152]",261,"MULTIPOLYGON (((-1157384.118 1768393.098, -115...",-109.389405,38.212378,1763,#c7d79e,Shrubland,"[643, 647, 651, 786, 1729]",...,3.5,2,Scientific_Name ...,Scientific_Name ...,Scientific_Name ...,491623016053449,261.0,mid_century_medium_carbon,0.0,Overall
18522,491623016053632,"[24, 61, 24, 61, 152, 152, 152, 61]",404,"MULTIPOLYGON (((-1183233.163 1724245.743, -118...",-109.612303,37.790033,2106,#bfbf7a,Fallow/Idle Cropland,"[647, 1390]",...,5.0,2,Scientific_Name ...,Scientific_Name ...,Scientific_Name ...,491623016053632,404.0,historical,-2.0,Overall
18524,491623016053632,"[24, 61, 24, 61, 152, 152, 152, 61]",404,"MULTIPOLYGON (((-1183233.163 1724245.743, -118...",-109.612303,37.790033,2106,#bfbf7a,Fallow/Idle Cropland,"[647, 1390]",...,5.0,2,Scientific_Name ...,Scientific_Name ...,Scientific_Name ...,491623016053632,404.0,mid_century_high_carbon,0.0,Overall


In [205]:
max_score_breakdown_df['fields']

591      041623001737287
593      041623001737287
592      041623001737287
19290    041623001737315
19292    041623001737315
              ...       
10187    491623016053449
10186    491623016053449
18522    491623016053632
18524    491623016053632
18523    491623016053632
Name: fields, Length: 3594, dtype: object

In [206]:
actual_scores_overall_df['fields']

0        081623012787392
1        081623012787392
2        081623012787392
3        081623012787392
4        081623012787392
              ...       
36429    081623005639708
36430    081623005639708
36431    081623008142809
36432    081623008142809
36433    081623008142809
Name: fields, Length: 36434, dtype: object

In [207]:
stacked_matrix.shape

(1200, 2568, 6, 3)

### merged score breakdown dataframe

In [208]:
# Check data types before the merge
print("Data types for stacked_df:")
print(stacked_df[['fields', 'crops', 'scenarios']].dtypes)
print("\nData types for max_score_breakdown_df:")
print(max_score_breakdown_df[['CSBID', 'EcoCrop2016_2023', 'scenarios']].dtypes)

# Perform the merge on 'fields' (in stacked_df), 'CSBID' (in max_score_breakdown_df), and 'scenarios'
actual_merged_df = pd.merge(
    stacked_df,
    max_score_breakdown_df[['CSBID', 'EcoCrop2016_2023', 'scenarios']],  # Only select relevant columns for the join
    left_on=['fields', 'crops', 'scenarios'],
    right_on=['CSBID', 'EcoCrop2016_2023', 'scenarios'],
    how='inner'  # Use inner join to only keep matching rows
)

# Check the result of the merge
print("\nData types for actual_merged_df:")
print(actual_merged_df.dtypes)

Data types for stacked_df:
fields       object
crops         int64
scenarios    object
dtype: object

Data types for max_score_breakdown_df:
CSBID               object
EcoCrop2016_2023     int64
scenarios           object
dtype: object

Data types for actual_merged_df:
fields               object
crops                 int64
scores               object
scenarios            object
score               float64
CSBID                object
EcoCrop2016_2023      int64
dtype: object


In [209]:
# Perform the merge on 'fields' (in stacked_df), 'CSBID' (in max_score_breakdown_df), and 'scenarios'
actual_merged_df = pd.merge(
    stacked_df,
    max_score_breakdown_df[['CSBID', 'EcoCrop2016_2023', 'scenarios']],  # Only select relevant columns for the join
    left_on=['fields', 'crops', 'scenarios'],
    right_on=['CSBID', 'EcoCrop2016_2023', 'scenarios'],
    how='inner'  # Use inner join to only keep matching rows
)

actual_merged_df

Unnamed: 0,fields,crops,scores,scenarios,score,CSBID,EcoCrop2016_2023
0,041623001737287,434,Photoperiod,historical,-1.0,041623001737287,434
1,041623001737287,434,Photoperiod,mid_century_medium_carbon,1.0,041623001737287,434
2,041623001737287,434,Photoperiod,mid_century_high_carbon,1.0,041623001737287,434
3,041623001737287,434,Climate_Zone,historical,-1.0,041623001737287,434
4,041623001737287,434,Climate_Zone,mid_century_medium_carbon,-1.0,041623001737287,434
...,...,...,...,...,...,...,...
21559,491623016053632,404,Hardiness,mid_century_medium_carbon,-1.0,491623016053632,404
21560,491623016053632,404,Hardiness,mid_century_high_carbon,-1.0,491623016053632,404
21561,491623016053632,404,pH,historical,0.0,491623016053632,404
21562,491623016053632,404,pH,mid_century_medium_carbon,0.0,491623016053632,404


In [210]:
actual_merged_df.groupby('fields')['score'].max()

fields
041623001737287    1.0
041623001737315    1.0
041623001737326    1.0
041623001737406    1.0
041623004560244    1.0
                  ... 
491623016053358    1.0
491623016053395    1.0
491623016053447    1.0
491623016053449    1.0
491623016053632    1.0
Name: score, Length: 1198, dtype: float64

In [211]:
actual_merged_df.columns

Index(['fields', 'crops', 'scores', 'scenarios', 'score', 'CSBID',
       'EcoCrop2016_2023'],
      dtype='object')

In [212]:
max_score_breakdown_df.columns

Index(['CSBID', 'CDL2016_2023', 'EcoCrop2016_2023', 'geometry', 'Longitude',
       'Latitude', 'Elevation', 'color', 'Crop', 'historical_Crop_Indices',
       'historical_Max_Score', 'historical_N_Crops',
       'mid_century_medium_carbon_Crop_Indices',
       'mid_century_medium_carbon_Max_Score',
       'mid_century_medium_carbon_N_Crops',
       'mid_century_high_carbon_Crop_Indices',
       'mid_century_high_carbon_Max_Score', 'mid_century_high_carbon_N_Crops',
       'Recommended_Crops_Historical',
       'Recommended_Crops_Mid_Century_Medium_Carbon',
       'Recommended_Crops_Mid_Century_High_Carbon', 'fields', 'crops',
       'scenarios', 'score', 'scores'],
      dtype='object')

In [213]:
max_score_breakdown_df['fields']

591      041623001737287
593      041623001737287
592      041623001737287
19290    041623001737315
19292    041623001737315
              ...       
10187    491623016053449
10186    491623016053449
18522    491623016053632
18524    491623016053632
18523    491623016053632
Name: fields, Length: 3594, dtype: object

In [214]:
max_score_breakdown_actual_overall_df_for_concat = max_score_breakdown_df.copy()
max_score_breakdown_actual_overall_df_for_concat = max_score_breakdown_actual_overall_df_for_concat[actual_merged_df.columns]
max_score_breakdown_actual_overall_df_for_concat

Unnamed: 0,fields,crops,scores,scenarios,score,CSBID,EcoCrop2016_2023
591,041623001737287,434.0,Overall,historical,-5.0,041623001737287,434
593,041623001737287,434.0,Overall,mid_century_high_carbon,-3.0,041623001737287,434
592,041623001737287,434.0,Overall,mid_century_medium_carbon,-3.0,041623001737287,434
19290,041623001737315,434.0,Overall,historical,-1.0,041623001737315,434
19292,041623001737315,434.0,Overall,mid_century_high_carbon,-3.0,041623001737315,434
...,...,...,...,...,...,...,...
10187,491623016053449,261.0,Overall,mid_century_high_carbon,0.0,491623016053449,261
10186,491623016053449,261.0,Overall,mid_century_medium_carbon,0.0,491623016053449,261
18522,491623016053632,404.0,Overall,historical,-2.0,491623016053632,404
18524,491623016053632,404.0,Overall,mid_century_high_carbon,0.0,491623016053632,404


In [215]:
# Check for columns that are in one dataframe and not the other
actual_merged_df_cols = set(actual_merged_df.columns)
max_score_breakdown_df_cols = set(max_score_breakdown_df.columns)
max_score_breakdown_actual_overall_df_for_concat_cols = set(max_score_breakdown_actual_overall_df_for_concat.columns)
extra_columns_in_max_score_breakdown_df = max_score_breakdown_df_cols - actual_merged_df_cols
extra_columns_in_max_score_breakdown_df

{'CDL2016_2023',
 'Crop',
 'Elevation',
 'Latitude',
 'Longitude',
 'Recommended_Crops_Historical',
 'Recommended_Crops_Mid_Century_High_Carbon',
 'Recommended_Crops_Mid_Century_Medium_Carbon',
 'color',
 'geometry',
 'historical_Crop_Indices',
 'historical_Max_Score',
 'historical_N_Crops',
 'mid_century_high_carbon_Crop_Indices',
 'mid_century_high_carbon_Max_Score',
 'mid_century_high_carbon_N_Crops',
 'mid_century_medium_carbon_Crop_Indices',
 'mid_century_medium_carbon_Max_Score',
 'mid_century_medium_carbon_N_Crops'}

In [216]:
actual_merged_df

Unnamed: 0,fields,crops,scores,scenarios,score,CSBID,EcoCrop2016_2023
0,041623001737287,434,Photoperiod,historical,-1.0,041623001737287,434
1,041623001737287,434,Photoperiod,mid_century_medium_carbon,1.0,041623001737287,434
2,041623001737287,434,Photoperiod,mid_century_high_carbon,1.0,041623001737287,434
3,041623001737287,434,Climate_Zone,historical,-1.0,041623001737287,434
4,041623001737287,434,Climate_Zone,mid_century_medium_carbon,-1.0,041623001737287,434
...,...,...,...,...,...,...,...
21559,491623016053632,404,Hardiness,mid_century_medium_carbon,-1.0,491623016053632,404
21560,491623016053632,404,Hardiness,mid_century_high_carbon,-1.0,491623016053632,404
21561,491623016053632,404,pH,historical,0.0,491623016053632,404
21562,491623016053632,404,pH,mid_century_medium_carbon,0.0,491623016053632,404


In [217]:
max_score_breakdown_actual_overall_df_for_concat

Unnamed: 0,fields,crops,scores,scenarios,score,CSBID,EcoCrop2016_2023
591,041623001737287,434.0,Overall,historical,-5.0,041623001737287,434
593,041623001737287,434.0,Overall,mid_century_high_carbon,-3.0,041623001737287,434
592,041623001737287,434.0,Overall,mid_century_medium_carbon,-3.0,041623001737287,434
19290,041623001737315,434.0,Overall,historical,-1.0,041623001737315,434
19292,041623001737315,434.0,Overall,mid_century_high_carbon,-3.0,041623001737315,434
...,...,...,...,...,...,...,...
10187,491623016053449,261.0,Overall,mid_century_high_carbon,0.0,491623016053449,261
10186,491623016053449,261.0,Overall,mid_century_medium_carbon,0.0,491623016053449,261
18522,491623016053632,404.0,Overall,historical,-2.0,491623016053632,404
18524,491623016053632,404.0,Overall,mid_century_high_carbon,0.0,491623016053632,404


In [218]:
score_breakdown_actual_crops_df = pd.concat([actual_merged_df, max_score_breakdown_actual_overall_df_for_concat])
score_breakdown_actual_crops_df

Unnamed: 0,fields,crops,scores,scenarios,score,CSBID,EcoCrop2016_2023
0,041623001737287,434.0,Photoperiod,historical,-1.0,041623001737287,434
1,041623001737287,434.0,Photoperiod,mid_century_medium_carbon,1.0,041623001737287,434
2,041623001737287,434.0,Photoperiod,mid_century_high_carbon,1.0,041623001737287,434
3,041623001737287,434.0,Climate_Zone,historical,-1.0,041623001737287,434
4,041623001737287,434.0,Climate_Zone,mid_century_medium_carbon,-1.0,041623001737287,434
...,...,...,...,...,...,...,...
10187,491623016053449,261.0,Overall,mid_century_high_carbon,0.0,491623016053449,261
10186,491623016053449,261.0,Overall,mid_century_medium_carbon,0.0,491623016053449,261
18522,491623016053632,404.0,Overall,historical,-2.0,491623016053632,404
18524,491623016053632,404.0,Overall,mid_century_high_carbon,0.0,491623016053632,404


In [219]:
score_breakdown_actual_crops_pivot = score_breakdown_actual_crops_df.pivot_table(
    index='CSBID',
    columns=['scenarios', 'scores'],
    values='score'
)

In [220]:
score_breakdown_actual_crops_pivot.columns =  [f'Actual_{scenario}_{category}' for scenario, category in score_breakdown_actual_crops_pivot.columns]
score_breakdown_actual_crops_pivot

Unnamed: 0_level_0,Actual_historical_Climate_Zone,Actual_historical_Hardiness,Actual_historical_Overall,Actual_historical_Photoperiod,Actual_historical_Rainfall,Actual_historical_Temperature,Actual_historical_pH,Actual_mid_century_high_carbon_Climate_Zone,Actual_mid_century_high_carbon_Hardiness,Actual_mid_century_high_carbon_Overall,...,Actual_mid_century_high_carbon_Rainfall,Actual_mid_century_high_carbon_Temperature,Actual_mid_century_high_carbon_pH,Actual_mid_century_medium_carbon_Climate_Zone,Actual_mid_century_medium_carbon_Hardiness,Actual_mid_century_medium_carbon_Overall,Actual_mid_century_medium_carbon_Photoperiod,Actual_mid_century_medium_carbon_Rainfall,Actual_mid_century_medium_carbon_Temperature,Actual_mid_century_medium_carbon_pH
CSBID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
041623001737287,-1.0,-1.0,-5.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-3.0,...,0.0,-1.0,-1.0,-1.0,-1.0,-3.0,1.0,0.0,-1.0,-1.0
041623001737315,-1.0,-1.0,-1.0,1.0,0.0,1.0,-1.0,-1.0,-1.0,-3.0,...,0.0,-1.0,-1.0,-1.0,-1.0,-3.0,1.0,0.0,-1.0,-1.0
041623001737326,-1.0,1.0,2.0,1.0,0.0,1.0,0.0,1.0,1.0,2.0,...,0.0,-1.0,0.0,-1.0,1.0,0.0,1.0,0.0,-1.0,0.0
041623001737406,-1.0,-1.0,-1.0,1.0,0.0,1.0,-1.0,-1.0,-1.0,-3.0,...,0.0,-1.0,-1.0,-1.0,-1.0,-3.0,1.0,0.0,-1.0,-1.0
041623004560244,1.0,-1.0,2.5,1.0,0.0,0.5,1.0,1.0,-1.0,1.0,...,0.0,-1.0,1.0,1.0,-1.0,1.0,1.0,0.0,-1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
491623016053358,1.0,-1.0,1.5,1.0,0.0,0.5,0.0,1.0,-1.0,0.0,...,0.0,-1.0,0.0,1.0,-1.0,0.0,1.0,0.0,-1.0,0.0
491623016053395,-1.0,1.0,2.0,1.0,0.0,1.0,0.0,-1.0,1.0,0.0,...,0.0,-1.0,0.0,-1.0,1.0,0.0,1.0,0.0,-1.0,0.0
491623016053447,-1.0,1.0,2.0,1.0,0.0,1.0,0.0,-1.0,1.0,0.0,...,0.0,-1.0,0.0,-1.0,1.0,0.0,1.0,0.0,-1.0,0.0
491623016053449,-1.0,1.0,2.0,1.0,0.0,1.0,0.0,-1.0,1.0,0.0,...,0.0,-1.0,0.0,-1.0,1.0,0.0,1.0,0.0,-1.0,0.0


In [221]:
score_breakdown_actual_crops_pivot.reset_index(inplace=True)

In [222]:
score_breakdown_actual_crops_pivot

Unnamed: 0,CSBID,Actual_historical_Climate_Zone,Actual_historical_Hardiness,Actual_historical_Overall,Actual_historical_Photoperiod,Actual_historical_Rainfall,Actual_historical_Temperature,Actual_historical_pH,Actual_mid_century_high_carbon_Climate_Zone,Actual_mid_century_high_carbon_Hardiness,...,Actual_mid_century_high_carbon_Rainfall,Actual_mid_century_high_carbon_Temperature,Actual_mid_century_high_carbon_pH,Actual_mid_century_medium_carbon_Climate_Zone,Actual_mid_century_medium_carbon_Hardiness,Actual_mid_century_medium_carbon_Overall,Actual_mid_century_medium_carbon_Photoperiod,Actual_mid_century_medium_carbon_Rainfall,Actual_mid_century_medium_carbon_Temperature,Actual_mid_century_medium_carbon_pH
0,041623001737287,-1.0,-1.0,-5.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,...,0.0,-1.0,-1.0,-1.0,-1.0,-3.0,1.0,0.0,-1.0,-1.0
1,041623001737315,-1.0,-1.0,-1.0,1.0,0.0,1.0,-1.0,-1.0,-1.0,...,0.0,-1.0,-1.0,-1.0,-1.0,-3.0,1.0,0.0,-1.0,-1.0
2,041623001737326,-1.0,1.0,2.0,1.0,0.0,1.0,0.0,1.0,1.0,...,0.0,-1.0,0.0,-1.0,1.0,0.0,1.0,0.0,-1.0,0.0
3,041623001737406,-1.0,-1.0,-1.0,1.0,0.0,1.0,-1.0,-1.0,-1.0,...,0.0,-1.0,-1.0,-1.0,-1.0,-3.0,1.0,0.0,-1.0,-1.0
4,041623004560244,1.0,-1.0,2.5,1.0,0.0,0.5,1.0,1.0,-1.0,...,0.0,-1.0,1.0,1.0,-1.0,1.0,1.0,0.0,-1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1193,491623016053358,1.0,-1.0,1.5,1.0,0.0,0.5,0.0,1.0,-1.0,...,0.0,-1.0,0.0,1.0,-1.0,0.0,1.0,0.0,-1.0,0.0
1194,491623016053395,-1.0,1.0,2.0,1.0,0.0,1.0,0.0,-1.0,1.0,...,0.0,-1.0,0.0,-1.0,1.0,0.0,1.0,0.0,-1.0,0.0
1195,491623016053447,-1.0,1.0,2.0,1.0,0.0,1.0,0.0,-1.0,1.0,...,0.0,-1.0,0.0,-1.0,1.0,0.0,1.0,0.0,-1.0,0.0
1196,491623016053449,-1.0,1.0,2.0,1.0,0.0,1.0,0.0,-1.0,1.0,...,0.0,-1.0,0.0,-1.0,1.0,0.0,1.0,0.0,-1.0,0.0


In [223]:
score_breakdown_actual_crops_pivot.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1198 entries, 0 to 1197
Data columns (total 22 columns):
 #   Column                                         Non-Null Count  Dtype  
---  ------                                         --------------  -----  
 0   CSBID                                          1198 non-null   object 
 1   Actual_historical_Climate_Zone                 1198 non-null   float64
 2   Actual_historical_Hardiness                    1198 non-null   float64
 3   Actual_historical_Overall                      1198 non-null   float64
 4   Actual_historical_Photoperiod                  1198 non-null   float64
 5   Actual_historical_Rainfall                     1198 non-null   float64
 6   Actual_historical_Temperature                  1198 non-null   float64
 7   Actual_historical_pH                           1198 non-null   float64
 8   Actual_mid_century_high_carbon_Climate_Zone    1198 non-null   float64
 9   Actual_mid_century_high_carbon_Hardiness       1198 

In [224]:
score_breakdown_actual_crops_subset = score_breakdown_actual_crops_df.groupby('CSBID').first()['EcoCrop2016_2023'].reset_index()
# score_breakdown_actual_crops_subset
score_breakdown_actual_crops_pivot_crops = pd.merge(
    score_breakdown_actual_crops_pivot,
    score_breakdown_actual_crops_subset,
    on='CSBID',
    how='left'
)

In [225]:
score_breakdown_actual_crops_pivot_crops.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1198 entries, 0 to 1197
Data columns (total 23 columns):
 #   Column                                         Non-Null Count  Dtype  
---  ------                                         --------------  -----  
 0   CSBID                                          1198 non-null   object 
 1   Actual_historical_Climate_Zone                 1198 non-null   float64
 2   Actual_historical_Hardiness                    1198 non-null   float64
 3   Actual_historical_Overall                      1198 non-null   float64
 4   Actual_historical_Photoperiod                  1198 non-null   float64
 5   Actual_historical_Rainfall                     1198 non-null   float64
 6   Actual_historical_Temperature                  1198 non-null   float64
 7   Actual_historical_pH                           1198 non-null   float64
 8   Actual_mid_century_high_carbon_Climate_Zone    1198 non-null   float64
 9   Actual_mid_century_high_carbon_Hardiness       1198 

In [226]:
fields.columns

Index(['Original_Index', 'CSBID', 'CSBYEARS', 'CSBACRES', 'CDL2016', 'CDL2017',
       'CDL2018', 'CDL2019', 'CDL2020', 'CDL2021', 'CDL2022', 'CDL2023',
       'STATEFIPS', 'STATEASD', 'ASD', 'CNTY', 'CNTYFIPS', 'INSIDE_X',
       'INSIDE_Y', 'Shp_Len', 'Shp_Area', 'geometry', 'Longitude', 'Latitude',
       'Elevation', 'color', 'Crop', 'historical_Crop_Indices',
       'historical_Max_Score', 'historical_N_Crops',
       'mid_century_medium_carbon_Crop_Indices',
       'mid_century_medium_carbon_Max_Score',
       'mid_century_medium_carbon_N_Crops',
       'mid_century_high_carbon_Crop_Indices',
       'mid_century_high_carbon_Max_Score', 'mid_century_high_carbon_N_Crops',
       'historical_Crop_Indices_no_rainfall',
       'historical_Max_Score_no_rainfall', 'historical_N_Crops_no_rainfall',
       'mid_century_medium_carbon_Crop_Indices_no_rainfall',
       'mid_century_medium_carbon_Max_Score_no_rainfall',
       'mid_century_medium_carbon_N_Crops_no_rainfall',
       'mid_centu

In [227]:
# Select the necessary columns from the fields dataframe
fields_subset = fields[['CSBID', 'Longitude', 'Latitude', 'Elevation']]

# Merge with score_breakdown_actual_crops_df on 'CSBID'
score_breakdown_actual_crops_df_with_coords = pd.merge(
    # score_breakdown_actual_crops_df,
    score_breakdown_actual_crops_pivot_crops,
    fields_subset,
    how='left',
    on='CSBID'
)

In [228]:
# Check the result
score_breakdown_actual_crops_df_with_coords.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1198 entries, 0 to 1197
Data columns (total 26 columns):
 #   Column                                         Non-Null Count  Dtype  
---  ------                                         --------------  -----  
 0   CSBID                                          1198 non-null   object 
 1   Actual_historical_Climate_Zone                 1198 non-null   float64
 2   Actual_historical_Hardiness                    1198 non-null   float64
 3   Actual_historical_Overall                      1198 non-null   float64
 4   Actual_historical_Photoperiod                  1198 non-null   float64
 5   Actual_historical_Rainfall                     1198 non-null   float64
 6   Actual_historical_Temperature                  1198 non-null   float64
 7   Actual_historical_pH                           1198 non-null   float64
 8   Actual_mid_century_high_carbon_Climate_Zone    1198 non-null   float64
 9   Actual_mid_century_high_carbon_Hardiness       1198 

In [229]:
score_breakdown_actual_crops_df_with_coords.iloc[0]

CSBID                                            041623001737287
Actual_historical_Climate_Zone                              -1.0
Actual_historical_Hardiness                                 -1.0
Actual_historical_Overall                                   -5.0
Actual_historical_Photoperiod                               -1.0
Actual_historical_Rainfall                                   0.0
Actual_historical_Temperature                               -1.0
Actual_historical_pH                                        -1.0
Actual_mid_century_high_carbon_Climate_Zone                 -1.0
Actual_mid_century_high_carbon_Hardiness                    -1.0
Actual_mid_century_high_carbon_Overall                      -3.0
Actual_mid_century_high_carbon_Photoperiod                   1.0
Actual_mid_century_high_carbon_Rainfall                      0.0
Actual_mid_century_high_carbon_Temperature                  -1.0
Actual_mid_century_high_carbon_pH                           -1.0
Actual_mid_century_medium

In [230]:
score_breakdown_actual_crops_df_with_coords['EcoCrop2016_2023'].info()

<class 'pandas.core.series.Series'>
RangeIndex: 1198 entries, 0 to 1197
Series name: EcoCrop2016_2023
Non-Null Count  Dtype
--------------  -----
1198 non-null   int64
dtypes: int64(1)
memory usage: 9.5 KB


In [231]:
crops.iloc[434]

Crop_Code                                                               2231
Scientific_Name                                            Carya illinoensis
Genus                                                                  Carya
Species                                                          illinoensis
Variety                                                                     
                                                 ...                        
USDA_Hardiness_Zone_Max                                                   12
Datasheet_URL              http://ecocrop.fao.org/ecocrop/srv/en/dataShee...
PFAF_URL                   https://pfaf.org/user/Plant.aspx?LatinName=Car...
Common_Name                                                            Pecan
Notes                      BRIEF DESCRIPTION  A tall, handsome, deciduous...
Name: 434, Length: 78, dtype: object

In [232]:
crops.index.dtype

dtype('int64')

In [233]:
score_breakdown_actual_crops_df_with_coords

Unnamed: 0,CSBID,Actual_historical_Climate_Zone,Actual_historical_Hardiness,Actual_historical_Overall,Actual_historical_Photoperiod,Actual_historical_Rainfall,Actual_historical_Temperature,Actual_historical_pH,Actual_mid_century_high_carbon_Climate_Zone,Actual_mid_century_high_carbon_Hardiness,...,Actual_mid_century_medium_carbon_Hardiness,Actual_mid_century_medium_carbon_Overall,Actual_mid_century_medium_carbon_Photoperiod,Actual_mid_century_medium_carbon_Rainfall,Actual_mid_century_medium_carbon_Temperature,Actual_mid_century_medium_carbon_pH,EcoCrop2016_2023,Longitude,Latitude,Elevation
0,041623001737287,-1.0,-1.0,-5.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-3.0,1.0,0.0,-1.0,-1.0,434,-109.282459,32.706889,1254
1,041623001737315,-1.0,-1.0,-1.0,1.0,0.0,1.0,-1.0,-1.0,-1.0,...,-1.0,-3.0,1.0,0.0,-1.0,-1.0,434,-109.489526,32.609445,1021
2,041623001737326,-1.0,1.0,2.0,1.0,0.0,1.0,0.0,1.0,1.0,...,1.0,0.0,1.0,0.0,-1.0,0.0,209,-109.514170,32.443414,1088
3,041623001737406,-1.0,-1.0,-1.0,1.0,0.0,1.0,-1.0,-1.0,-1.0,...,-1.0,-3.0,1.0,0.0,-1.0,-1.0,434,-109.454658,32.344597,1128
4,041623004560244,1.0,-1.0,2.5,1.0,0.0,0.5,1.0,1.0,-1.0,...,-1.0,1.0,1.0,0.0,-1.0,1.0,487,-113.123487,33.915456,673
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1193,491623016053358,1.0,-1.0,1.5,1.0,0.0,0.5,0.0,1.0,-1.0,...,-1.0,0.0,1.0,0.0,-1.0,0.0,404,-110.144871,38.805319,1255
1194,491623016053395,-1.0,1.0,2.0,1.0,0.0,1.0,0.0,-1.0,1.0,...,1.0,0.0,1.0,0.0,-1.0,0.0,261,-110.064416,38.776194,1252
1195,491623016053447,-1.0,1.0,2.0,1.0,0.0,1.0,0.0,-1.0,1.0,...,1.0,0.0,1.0,0.0,-1.0,0.0,261,-110.689771,38.399610,1341
1196,491623016053449,-1.0,1.0,2.0,1.0,0.0,1.0,0.0,-1.0,1.0,...,1.0,0.0,1.0,0.0,-1.0,0.0,261,-109.389405,38.212378,1763


In [234]:
# score_breakdown_actual_crops_df_with_coords_pivot = score_breakdown_actual_crops_df_with_coords.pivot_table(
#     index='CSBID',
#     columns=['scenarios', 'scores'],
#     values='score'
# )

In [235]:
# # score_breakdown_actual_crops_df_with_coords_pivot
# score_breakdown_actual_crops_df_with_coords_pivot.columns = [f'Actual_{scenario}_{category}' for scenario, category in pivoted_scores_df.columns]
# score_breakdown_actual_crops_df_with_coords_pivot

In [236]:
# score_breakdown_actual_crops_df_with_coords['crops'] = score_breakdown_actual_crops_df_with_coords['crops'].astype('int64')
crops_for_merge_df = crops[['Scientific_Name', 'Common_Name']].copy()
crops_for_merge_df = crops_for_merge_df.reset_index()
crops_for_merge_df

Unnamed: 0,index,Scientific_Name,Common_Name
0,0,Abelmoschus esculentus,"abelmoskus, america-neri, bakhua mun, bamia, b..."
1,1,Abelmoschus manihot,"neka (Simbo), bele (Fiji), pele (Tonga, Tuvalu..."
2,2,Abelmoschus moschatus,"abelmosk, musk mallow, mushkdan, muskdana, kas..."
3,3,Acacia auriculiformis,"Papuan wattle, auri, ear leaf acacia, tuhkehn ..."
4,4,Acacia farnesiana,"Ellington curse, klu, cassie, espino blanco, a..."
...,...,...,...
2563,2563,Chamaecrista rotundifolia,Black catechu
2564,2564,Acacia polyacantha,Algarobilla
2565,2565,Prosopis affinis,Hairy vetch
2566,2566,Vicia dasycarpa,"camelina, false flax, gold-of-pleasure, gergel..."


In [237]:
# merge the dataframes to include Scientific_Name and Common_Name
score_breakdown_actual_crops_with_coords_and_names_df = pd.merge(
    score_breakdown_actual_crops_df_with_coords,
    crops_for_merge_df[['index', 'Scientific_Name', 'Common_Name']],  # Select relevant columns
    left_on='EcoCrop2016_2023',  # Match with the EcoCrop2016_2023 column in the score dataframe
    right_on='index',  # Use the index of the crops dataframe as the matching key
    how='left'
)

In [238]:
score_breakdown_actual_crops_with_coords_and_names_df

Unnamed: 0,CSBID,Actual_historical_Climate_Zone,Actual_historical_Hardiness,Actual_historical_Overall,Actual_historical_Photoperiod,Actual_historical_Rainfall,Actual_historical_Temperature,Actual_historical_pH,Actual_mid_century_high_carbon_Climate_Zone,Actual_mid_century_high_carbon_Hardiness,...,Actual_mid_century_medium_carbon_Rainfall,Actual_mid_century_medium_carbon_Temperature,Actual_mid_century_medium_carbon_pH,EcoCrop2016_2023,Longitude,Latitude,Elevation,index,Scientific_Name,Common_Name
0,041623001737287,-1.0,-1.0,-5.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,...,0.0,-1.0,-1.0,434,-109.282459,32.706889,1254,434,Carya illinoensis,Pecan
1,041623001737315,-1.0,-1.0,-1.0,1.0,0.0,1.0,-1.0,-1.0,-1.0,...,0.0,-1.0,-1.0,434,-109.489526,32.609445,1021,434,Carya illinoensis,Pecan
2,041623001737326,-1.0,1.0,2.0,1.0,0.0,1.0,0.0,1.0,1.0,...,0.0,-1.0,0.0,209,-109.514170,32.443414,1088,209,Gossypium hirsutum,"cotton, upland cotton, coton, algodón, qutun, ..."
3,041623001737406,-1.0,-1.0,-1.0,1.0,0.0,1.0,-1.0,-1.0,-1.0,...,0.0,-1.0,-1.0,434,-109.454658,32.344597,1128,434,Carya illinoensis,Pecan
4,041623004560244,1.0,-1.0,2.5,1.0,0.0,0.5,1.0,1.0,-1.0,...,0.0,-1.0,1.0,487,-113.123487,33.915456,673,487,Cicer arietinum,"chickpea, Bengal gram, pois chiche, cafe franc..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1193,491623016053358,1.0,-1.0,1.5,1.0,0.0,0.5,0.0,1.0,-1.0,...,0.0,-1.0,0.0,404,-110.144871,38.805319,1255,404,Triticum aestivum,"wheat, bread wheat, blé, alkamh, sinde, xiao m..."
1194,491623016053395,-1.0,1.0,2.0,1.0,0.0,1.0,0.0,-1.0,1.0,...,0.0,-1.0,0.0,261,-110.064416,38.776194,1252,261,Medicago sativa,"alfalfa, lucerne, common purple lucerne, commo..."
1195,491623016053447,-1.0,1.0,2.0,1.0,0.0,1.0,0.0,-1.0,1.0,...,0.0,-1.0,0.0,261,-110.689771,38.399610,1341,261,Medicago sativa,"alfalfa, lucerne, common purple lucerne, commo..."
1196,491623016053449,-1.0,1.0,2.0,1.0,0.0,1.0,0.0,-1.0,1.0,...,0.0,-1.0,0.0,261,-109.389405,38.212378,1763,261,Medicago sativa,"alfalfa, lucerne, common purple lucerne, commo..."


### map field locations colored by actual scores

In [239]:
overall_colormap = cm.LinearColormap(["#b22222", "white", "cornflowerblue"], vmin=-6, vmax=6, caption='Overall score')
overall_colormap.width=200
overall_colormap

In [240]:
# Assuming geo_gdf is your GeoDataFrame


# Extract unique crops and their colors
unique_crops = fields[['Crop', 'color']].copy().drop_duplicates()

# Create the HTML for the crop legend
legend_html = """
<div style="
    position: fixed;
    top: 12px;  /* Align from the top of the map */
    left: 50px;
    width: 240px;
    height: auto;
    z-index: 1000;
    background-color: rgba(255, 255, 255, 0.8);  /* White with 80% opacity */
    box-shadow: 0px 4px 8px rgba(0, 0, 0, 0.2);  /* Drop shadow */
    border-radius: 3px;
    padding: 10px;
    font-size:10px;
    font-weight: normal;
">
<h4>Actual Crops 2023</h4>
<ul style="list-style-type:none; padding-left: 0;">
"""

for _, row in unique_crops.iterrows():
    legend_html += f"""
    <li style="display: flex; align-items: center; margin-bottom: 2px;">
        <span style="
            background:{row['color']}; 
            width:10px; 
            height:10px; 
            display:inline-block; 
            margin-right: 5px;"></span>{row['Crop']}
    </li>
    """

# Define colormaps for the overall score and category score
overall_colormap = cm.LinearColormap(
    colors=["#b22222", "white", "cornflowerblue"],
    vmin=-6, vmax=6,
    caption='Overall Score (-6 to 6)'
)
overall_colormap.width = 200

individual_colormap = cm.LinearColormap(
    colors=["#b22222", "white", "cornflowerblue"],
    vmin=-1, vmax=1,
    caption='Category Score (-1 to 1)'
)
individual_colormap.width = 200

# Add the colormaps to the legend
legend_html += "</ul>"
legend_html += "<h4>Scores</h4>"
legend_html += overall_colormap._repr_html_()
legend_html += individual_colormap._repr_html_()
legend_html += "</div>"

# Create the map
m = folium.Map(location=[fields['Latitude'].mean(), fields['Longitude'].mean()], zoom_start=6)

# Add the combined legend to the map
legend = branca.element.Element(legend_html)
m.get_root().html.add_child(legend)

folium.plugins.Fullscreen(
    position="topleft",
    title="Fullscreen",
    title_cancel="Exit Fullscreen",
    force_separate_button=True,
).add_to(m)
# folium.LayerControl().add_to(m)
# Display the map
# m

<folium.plugins.fullscreen.Fullscreen at 0x3699d8980>

#### generate_crop_legend_html (toggle)

In [241]:
def generate_crop_legend_html(unique_crops):

    # Create the HTML for the crop legend with a toggle button
    legend_html = """
    <div id="legend-container" style="
        position: fixed;
        top: 12px;  /* Align from the top of the map */
        left: 50px;
        width: 240px;
        height: auto;
        z-index: 1000;
        background-color: rgba(255, 255, 255, 0.8);  /* White with 80% opacity */
        box-shadow: 0px 4px 8px rgba(0, 0, 0, 0.2);  /* Drop shadow */
        border-radius: 3px;
        padding: 10px;
        font-size:10px;
        font-weight: normal;
    ">
        <button id="legend-toggle" style="
            background-color: #f4f4f4;
            border: none;
            cursor: pointer;
            font-size: 12px;
            text-align: left;
            width: 100%;
            padding: 5px;
            margin-bottom: 5px;
        ">Hide Legend</button>

        <div id="legend-content">
            <h4>Actual Crops 2023</h4>
            <ul style="list-style-type:none; padding-left: 0;">
    """

    for _, row in unique_crops.iterrows():
        legend_html += f"""
            <li style="display: flex; align-items: center; margin-bottom: 2px;">
                <span style="
                    background:{row['color']}; 
                    width:10px; 
                    height:10px; 
                    display:inline-block; 
                    margin-right: 5px;"></span>{row['Crop']}
            </li>
        """
    
    # Define colormaps for the overall score and category score
    overall_colormap = cm.LinearColormap(
        colors=["#b22222", "white", "cornflowerblue"],
        vmin=-6, vmax=6,
        caption='Overall Score (-6 to 6)'
    )
    overall_colormap.width = 200
    
    individual_colormap = cm.LinearColormap(
        colors=["#b22222", "white", "cornflowerblue"],
        vmin=-1, vmax=1,
        caption='Category Score (-1 to 1)'
    )
    individual_colormap.width = 200

    similarity_colormap = cm.LinearColormap([ "#b22222", "white", "cornflowerblue"], vmin=0, vmax=1)
    similarity_colormap.caption = 'Similarity between climate scenarios'
    similarity_colormap.width=200
    
    # Add the colormaps to the legend
    legend_html += "</ul>"
    legend_html += "<h4>Scores</h4>"
    legend_html += similarity_colormap._repr_html_()
    legend_html += overall_colormap._repr_html_()
    legend_html += individual_colormap._repr_html_()
    legend_html += "</div>"  # Close legend-content
    legend_html += "</div>"  # Close legend-container

    # Add the JavaScript for toggling the legend
    legend_html += """
    <script>
        document.getElementById('legend-toggle').onclick = function() {
            var content = document.getElementById('legend-content');
            var toggleButton = document.getElementById('legend-toggle');
            if (content.style.display === 'none') {
                content.style.display = 'block';
                toggleButton.textContent = 'Hide Legend';
            } else {
                content.style.display = 'none';
                toggleButton.textContent = 'Show Legend';
            }
        };
    </script>
    """

    return legend_html, overall_colormap, individual_colormap, similarity_colormap

#### add_scenario_score_layers

In [242]:
def add_scenario_score_layers(map_object, score_df, score_categories, colormap, opacity, scenario):
    """
    Add folium layers for each score category for a specific scenario in the regular DataFrame (score_df).
    """
    # Ensure df has valid geometry
    gdf = gpd.GeoDataFrame(
        score_df,
        geometry=gpd.points_from_xy(score_df['Longitude'], score_df['Latitude']),
        crs="EPSG:4326"
    )
    popup_html = f"""
    <b style='margin-bottom:4px;'>Actual crop with best overall score</b>
    """
    
    gdf['popup_title'] = popup_html
    
    for category in score_categories:
        layer_name = f'{scenario} - {category}'

        # Define the color for each marker based on the score
        folium.GeoJson(
            gdf.to_json(),
            name=layer_name,
            marker=folium.CircleMarker(
                radius=6,
                fill=True,
                fill_opacity=opacity,
                weight=0.2
            ),
            style_function=lambda feature: {
                'fillColor': colormap(feature['properties'][f'Actual_{scenario}_{category}']),
                'fillOpacity': opacity,
                'color': 'grey',
            },
            popup=folium.GeoJsonPopup(
                fields=['popup_title',
                        'CSBID',
                        'Scientific_Name', 
                        'Common_Name', 
                        'Longitude', 
                        'Latitude', 
                        'Elevation', 
                        f'Actual_{scenario}_{category}', 
                        # f'Actual_{scenario}_Overall'
                       ],
                aliases=['2016-2023',
                         'Field ID:',
                         'Scientific Name:', 
                         'Common Name:', 
                         'Longitude:', 
                         'Latitude:', 
                         'Elevation:', 
                         f'{category} Score:',
                         # f'Overall Score:'
                        ],
                parse_html=True,
                lazy=True,  # Enable lazy loading for popups
            ),
            show=False,
        ).add_to(map_object)

#### add_geojson_layers

In [243]:
def add_geojson_layers(m, gdf):
    """
    Adds layers from the original generate_folium_map_with_scenario_comparisons function.
    """
    # Convert geometries to GeoJSON
    geojson_data = gdf.to_json()
    centroids_gdf = gdf.copy()
    centroids_gdf['geometry'] = gpd.points_from_xy(centroids_gdf['Longitude'], centroids_gdf['Latitude'])
    centroids_geojson_data = centroids_gdf.to_json()

    # Add layers similar to the previous function
    folium.GeoJson(
        geojson_data,
        name='Field boundaries',
        style_function=lambda feature: {
            'fillColor': feature['properties']['color'],
            'color': feature['properties']['color'],
            'weight': 1,
            'fillOpacity': 0.6
        },
        popup=folium.GeoJsonPopup(
            fields=['CSBID',  'Longitude_formatted', 'Latitude_formatted', 'Elevation', 'Crop', 'Recommended_Crops_Historical_html'],
            aliases=['Field ID:',  'Longitude:', 'Latitude:', 'Elevation:', 'Actual Crop (2023)', 'Suitable Crops (Recent Scenario)'],
            parse_html=True,
            max_width="100%",
            lazy=True  # Enable lazy loading for popups
        ),
        show=False,
    ).add_to(m)

    # Add GeoJSON layer for centroids with custom CircleMarkers
    folium.GeoJson(
        centroids_geojson_data,
        name='Field locations',
        # marker=folium.CircleMarker(radius=4, fill_color='grey', fill_opacity=1, color='grey', weight=0.2),
        marker=folium.CircleMarker(
            radius=6, 
            fill=True,
            fill_opacity=0.8,
            weight=0.2
        ),
        # marker=folium.CircleMarker(radius=4),
        style_function=lambda x: {'fillColor': x['properties']['color'], 'color': 'grey'},
        popup=folium.GeoJsonPopup(fields=['CSBID', 'Crop', 'Latitude_formatted', 'Longitude_formatted', 'Elevation'], 
                                  aliases=['Field ID:', 'Crop 2023:', 'Latitude:', 'Longitude:', 'Elevation:']),
        # popup=folium.GeoJsonPopup(fields=['Recommended_Crops_Historical_html'], aliases=['Recommended Crops (Historical Scenario)'], parse_html=True, max_width="100%"),
        # zoom_on_click=True,
        show=True,
    ).add_to(m)

    folium.GeoJson(
        centroids_geojson_data,
        # name='Do Suitable Crops Match Actual?',
        # name='Do actual crops (2016-2023) match suitable?',
        name='Do actual crops (2016-2023) match suitable (recent scenario)?',
        marker=folium.CircleMarker(
            radius=6,
            fill=True,
            fill_opacity=0.6,
            weight=0.2
        ),
        style_function=lambda feature: {
            'fillColor': 'cornflowerblue' if feature['properties']['cdl_crops_in_historical_recommendations'] == 1 else '#b22222',
            'fillOpacity': 0.8 if feature['properties']['cdl_crops_in_historical_recommendations'] == 1 else 0.4,
            'color': 'grey',
        },
        popup=folium.GeoJsonPopup(
            fields=['CSBID', 'Crop', 'Longitude_formatted', 'Latitude_formatted', 'Elevation', 'cdl_crops_in_historical_recommendations'],
            aliases=['Field ID:', 'Crop 2023:', 'Longitude:', 'Latitude:', 'Elevation:', 'Do Suitable Crops Contain the Actual Crop(s)?'],
            lazy=True,  # Enable lazy loading for popups
        ),
        show=False,
    ).add_to(m)


#### generate_combined_folium_map

In [244]:
# List of available tiles with their attributions and layer names (including USGS maps)
tiles_list = [
    {"tiles": "https://basemap.nationalmap.gov/arcgis/rest/services/USGSImageryOnly/MapServer/tile/{z}/{y}/{x}", "attr": 'U.S. Department of the Interior | U.S. Geological Survey', "name": "USGS Imagery"},
    {"tiles": "https://basemap.nationalmap.gov/arcgis/rest/services/USGSImageryTopo/MapServer/tile/{z}/{y}/{x}", "attr": 'U.S. Department of the Interior | U.S. Geological Survey', "name": "USGS Imagery Topo"},
    {"tiles": "OpenStreetMap", "attr": "© OpenStreetMap contributors", "name": "OpenStreetMap"},
    {"tiles": "https://{s}.basemaps.cartocdn.com/light_all/{z}/{x}/{y}.png", "attr": '&copy; <a href="https://carto.com/attributions">CARTO</a>', "name": "CartoDB Light"},
]

def generate_combined_folium_map(geo_gdf, score_df, zoom=6, max_zoom=12, decimal_places=3):
    """
    Generate a combined folium map with layers from both the GeoDataFrame (geo_gdf)
    and the regular DataFrame (score_df).
    """

    # Check CRS and convert to EPSG:4326 if needed
    if geo_gdf.crs != "EPSG:4326":
        geo_gdf = geo_gdf.to_crs(epsg=4326)

    # Format latitude and longitude to the desired number of decimal places in geo_gdf
    geo_gdf['Longitude_formatted'] = geo_gdf['Longitude'].map(lambda x: f'{x:>{decimal_places+4}.{decimal_places}f}')
    geo_gdf['Latitude_formatted'] = geo_gdf['Latitude'].map(lambda x: f'{x:>{decimal_places+4}.{decimal_places}f}')

    # Ensure necessary columns are included in the properties
    geo_gdf = geo_gdf[['CSBID',
                        'geometry', 
                        'CDL2023', 
                        'Longitude', 
                        'Latitude', 
                        'Longitude_formatted', 
                        'Latitude_formatted', 
                        'color', 
                        'Crop', 
                        'Elevation', 
                        'Recommended_Crops_Historical_html', 
                        'cdl_crops_in_historical_recommendations', 
                        'cdl_crops_in_historical_recommendations_no_rainfall', 
                        'cdl_crops_in_mid_century_medium_recommendations_no_rainfall', 
                        'cdl_crops_in_mid_century_high_recommendations_no_rainfall',
                        ]]
    
    # Extract unique crops and their colors
    unique_crops = geo_gdf[['Crop', 'color']].copy().drop_duplicates()
    # legend_html, overall_colormap, individual_colormap = generate_crop_legend_html(unique_crops)
    legend_html, overall_colormap, individual_colormap, similarity_colormap = generate_crop_legend_html(unique_crops)
        
    # Add the legend to the map
    legend = branca.element.Element(legend_html)

    # Initialize map
    minx, miny, maxx, maxy = geo_gdf.total_bounds
    center_longitude = (minx + maxx) / 2
    center_latitude = (miny + maxy) / 2

    m = folium.Map(
        location=[center_latitude, center_longitude],
        zoom_start=zoom,
        max_zoom=max_zoom,
        tiles=None,
        control_scale=True,
    )

    # Add each base tile layer to the map
    for tile_info in tiles_list:
        tiles = tile_info["tiles"]
        attr = tile_info["attr"]
        layer_name = tile_info["name"]
        folium.TileLayer(tiles=tiles, attr=attr, name=layer_name).add_to(m)

    # Add layers from geo_gdf using the function add_geojson_layers
    # add_geojson_layers(m, geo_gdf, individual_colormap)
    add_geojson_layers(m, geo_gdf)

    # Define the scenarios and score categories
    scenarios = ['historical', 'mid_century_medium_carbon', 'mid_century_high_carbon']
    score_categories = ['pH', 'Photoperiod', 'Climate_Zone', 'Temperature', 'Rainfall', 'Hardiness', 'Overall']

    # Add layers for each scenario and score category from score_df
    for scenario in scenarios:
        add_scenario_score_layers(m, score_df, score_categories[:-1], individual_colormap, 0.16, scenario)
        add_scenario_score_layers(m, score_df, ['Overall'], overall_colormap, 1, scenario)
        # add_scenario_score_layers(m, score_df, score_categories[:-1], 0.16, scenario)
        # add_scenario_score_layers(m, score_df, ['Overall'], 1, scenario)

    # add crop color legend
    m.get_root().html.add_child(legend)

    folium.plugins.Fullscreen(
        position="topleft",
        title="Fullscreen",
        title_cancel="Exit Fullscreen",
        force_separate_button=True,
    ).add_to(m)

    folium.LayerControl().add_to(m)

    return m

In [245]:
# Generate a timestamp
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")

map_file = f'../datasets/maps/{timestamp}_actual_crop_score_breakdown.html'

generate_combined_folium_map(fields, score_breakdown_actual_crops_with_coords_and_names_df).save(map_file)

In [246]:
# Display the link
# Get the absolute path of the file
map_file_base = os.path.basename(map_file)

print()
HTML(f'<b><a href="http://localhost:8000/{map_file_base}" target="_blank">[ Score breakdown of top scoring actual crops 2016-2023 ]</a></b>')




### map field locations colored by actual scores with Tree Layers

In [247]:
# list(score_breakdown_actual_crops_with_coords_and_names_df.columns)

#### generate_geojson_layers_tree

In [248]:
def generate_geojson_layers_tree(boundaries_geojson, locations_geojson):
    """
    Adds layers from the original generate_folium_map_with_scenario_comparisons function.
    """

    # Add layers similar to the previous function
    boundaries_layer = folium.GeoJson(
        boundaries_geojson,
        name='Field boundaries',
        style_function=lambda feature: {
            'fillColor': feature['properties']['color'],
            'color': feature['properties']['color'],
            'weight': 1,
            'fillOpacity': 0.6
        },
        popup=folium.GeoJsonPopup(
                fields=['CSBID', 'Longitude_formatted', 'Latitude_formatted', 'Elevation', 'Crop', 'suitable_crops_popup_title_recent_html', 'Recommended_Crops_Historical_html'],
                aliases=[
                    '<div style="text-align: right;">Field&nbsp;ID:</div>',  
                    '<div style="text-align: right;">Longitude:</div>', 
                    '<div style="text-align: right;">Latitude:</div>', 
                    '<div style="text-align: right;">Elevation:</div>', 
                    '<div style="text-align: right;">Actual&nbsp;Crop&nbsp;(2023):</div>', 
                    '<div style="text-align: right;">Suitable&nbsp;Crops:</div>',
                    ' ',
                ],
                parse_html=True,
                max_width="100%",
                lazy=True,
        ),

        zoom_on_click=True,
        show=False,
    )#.add_to(m)

    # Add GeoJSON layer for centroids with custom CircleMarkers
    locations_layer = folium.GeoJson(
        locations_geojson,
        name='Field locations',
        marker=folium.CircleMarker(
            radius=6, 
            fill=True,
            fill_opacity=0.8,
            weight=0.2
        ),
        style_function=lambda x: {'fillColor': x['properties']['color'], 'color': 'grey'},
        
        # popup=folium.GeoJsonPopup(fields=['CSBID', 'Crop', 'Latitude_formatted', 'Longitude_formatted', 'Elevation'], 
        #                           aliases=['Field ID:', 'Crop 2023:', 'Latitude:', 'Longitude:', 'Elevation:']),
        popup=folium.GeoJsonPopup(
            fields=['CSBID', 'Longitude_formatted', 'Latitude_formatted', 'Elevation', 'Crop', 'suitable_crops_popup_title_recent_html', 'Recommended_Crops_Historical_html'],
            aliases=[
                '<div style="text-align: right;">Field&nbsp;ID:</div>',  
                '<div style="text-align: right;">Longitude:</div>', 
                '<div style="text-align: right;">Latitude:</div>', 
                '<div style="text-align: right;">Elevation:</div>', 
                '<div style="text-align: right;">Actual&nbsp;Crop&nbsp;(2023):</div>', 
                '<div style="text-align: right;">Suitable&nbsp;Crops:</div>',
                ' ',
            ],
            parse_html=True,
            max_width="100%",
            lazy=True,
        ),
        # zoom_on_click=True,
        show=True,
    )#.add_to(m)
    
    return boundaries_layer, locations_layer


#### generate_score_layers_tree

In [249]:
def generate_score_layers_tree(score_geojson, colormap, opacity, scenario, category):
    """
    Add folium layers for each score category for a specific scenario in the regular DataFrame (score_df).
    """
    layer_name = f'{scenario} - {category}'
    
    # Define the color for each marker based on the score
    layer = folium.GeoJson(
        score_geojson,
        name=layer_name,
        marker=folium.CircleMarker(
            radius=6,
            fill=True,
            fill_opacity=opacity,
            weight=0.2
        ),
        style_function=lambda feature: {
            'fillColor': colormap(feature['properties'][f'Actual_{scenario}_{category}']),
            'fillOpacity': opacity,
            'color': 'grey',
        },
        popup=folium.GeoJsonPopup(
            fields=['popup_title',
                    'CSBID',
                    'Scientific_Name', 
                    'Common_Name', 
                    'Longitude', 
                    'Latitude', 
                    'Elevation', 
                    f'Actual_{scenario}_{category}', 
                    # f'Actual_{scenario}_Overall'
                   ],
            aliases=['2016-2023',
                     'Field ID:',
                     'Scientific Name:', 
                     'Common Name:', 
                     'Longitude:', 
                     'Latitude:', 
                     'Elevation:', 
                     f'{category} Score:',
                     # f'Overall Score:'
                    ],
            parse_html=True,
            lazy=True,  # Enable lazy loading for popups
        ),
        show=False,
    )
    return layer
    

#### suitable_comparison_layers_tree

In [250]:
def suitable_comparison_layers_tree(locations_geojson, name, column, column_html1, column_html2, title_1, title_2, alias, colormap):
        
    # color locations based on comparison scores
    layer = folium.GeoJson(
        locations_geojson,
        name=name,
        marker=folium.CircleMarker(
            radius=6,
            fill=True,
            fill_opacity=0.6,
            weight=0.2
        ),
        style_function=lambda feature: {
            'fillColor': colormap(feature['properties'][column]),
            'fillOpacity': 0.6,
            'color': 'grey',
        },
        popup=folium.GeoJsonPopup(
            fields=['CSBID', 'Latitude_formatted', 'Longitude_formatted', 'Elevation', 'Crop', column, title_1, column_html1, title_2, column_html2], 
            aliases=[
                '<div style="text-align: right;">Field ID:</div>',
                '<div style="text-align: right;">Latitude:</div>',
                '<div style="text-align: right;">Longitude:</div>',
                '<div style="text-align: right;">Elevation:</div>',
                '<div style="text-align: right;">Crop&nbsp;2023:</div>',                
                f'<div style="text-align: right;">{alias}</div>',
                '<div style="text-align: right;">Suitable&nbsp;crops:</div>',
                ' ',
                '<div style="text-align: right;">Suitable&nbsp;crops:</div>',
                ' ',
            ],
            parse_html=True,
            max_width="100%",
            lazy=True,  # Enable lazy loading for popups
        ),
        show=False,
    )#.add_to(m)


    return layer

#### actual_match_suitable_comparison_layers_tree

In [251]:
def actual_match_suitable_comparison_layers_tree(locations_geojson, name, column, column_yesno, column_html, alias, title, similarity_colormap):

    # for reference

        
    # color locations based on comparison scores
    layer = folium.GeoJson(
        locations_geojson,
        name=name,
        marker=folium.CircleMarker(
            radius=6,
            fill=True,
            fill_opacity=0.6,
            weight=0.2
        ),
        style_function=lambda feature: {
            'fillColor': similarity_colormap(feature['properties'][column]),
            'fillOpacity': 0.6,
            'color': 'grey',
        },
        popup=folium.GeoJsonPopup(
            fields=['CSBID', 'Latitude_formatted', 'Longitude_formatted', 'Elevation',  column_yesno, 'Crop', title, column_html], 
            aliases=[
                '<div style="text-align: right;">Field ID:</div>',
                '<div style="text-align: right;">Latitude:</div>',
                '<div style="text-align: right;">Longitude:</div>',
                '<div style="text-align: right;">Elevation:</div>',                                
                f'<div style="text-align: right;">{alias}</div>',
                '<div style="text-align: right;">Actual&nbsp;crop:</div>',
                '<div style="text-align: right;">Suitable&nbsp;crops:</div>',
                '',
            ],
            parse_html=True,
            max_width="100%",
            lazy=True,  # Enable lazy loading for popups
        ),
        show=False,
    )#.add_to(m)


    return layer

#### show_suitable_crops_tree

In [252]:
def show_suitable_crops_tree(locations_geojson, name, column, column_html, alias, colormap, color_column, popup, suitable_crops_title, opacity=0.8):
        
    # color locations based on comparison scores
    layer = folium.GeoJson(
        locations_geojson,
        name=name,
        marker=folium.CircleMarker(
            radius=6,
            fill=True,
            fill_opacity=opacity,
            weight=0.2
        ),
        style_function=lambda feature: {
            'fillColor': feature['properties'][color_column],  # Directly use the color from the color column
            'fillOpacity': opacity,
            'color': 'grey',
            'strokeColor': 'grey',
            'weight': 0.2  # Border color and weight
        },
        popup=folium.GeoJsonPopup(
            fields=['CSBID', 'Latitude_formatted', 'Longitude_formatted', 'Elevation', 'Crop', column, suitable_crops_title, column_html, ], 
            aliases=[
                '<div style="text-align: right;">Field&nbsp;ID:</div>',  
                '<div style="text-align: right;">Latitude:</div>',  
                '<div style="text-align: right;">Longitude:</div>',  
                '<div style="text-align: right;">Elevation:</div>',  
                '<div style="text-align: right;">Actual&nbsp;crop&nbsp;2023:</div>',  
                '<div style="text-align: right;">Cropset&nbsp;ID:</div>',  
                '<div style="text-align: right;">Suitable&nbsp;crops:</div>',
                '<div style="text-align: right;"> &nbsp; </div>',   
            ],
            parse_html=True,
            max_width="100%",
            lazy=True,  # Enable lazy loading for popups
        ),
        show=False,
    )#.add_to(m)


    return layer

#### generate_combined_folium_map_tree

In [253]:
# List of available tiles with their attributions and layer names (including USGS maps)
tiles_list = [
    {"tiles": "https://basemap.nationalmap.gov/arcgis/rest/services/USGSImageryOnly/MapServer/tile/{z}/{y}/{x}", "attr": 'U.S. Department of the Interior | U.S. Geological Survey', "name": "USGS Imagery"},
    {"tiles": "https://basemap.nationalmap.gov/arcgis/rest/services/USGSImageryTopo/MapServer/tile/{z}/{y}/{x}", "attr": 'U.S. Department of the Interior | U.S. Geological Survey', "name": "USGS Imagery Topo"},
    {"tiles": "https://basemap.nationalmap.gov/arcgis/rest/services/USGSShadedReliefOnly/MapServer/tile/{z}/{y}/{x}", "attr": 'U.S. Geological Survey', "name": "USGS Shaded Relief"},
    {"tiles": "OpenStreetMap", "attr": "© OpenStreetMap contributors", "name": "OpenStreetMap"},
    {"tiles": "https://{s}.basemaps.cartocdn.com/light_all/{z}/{x}/{y}.png", "attr": '&copy; <a href="https://carto.com/attributions">CARTO</a>', "name": "CartoDB Light"},
]

def generate_combined_folium_map_tree(gdf, score_df, zoom=6, max_zoom=12, decimal_places=3):
    """
    Generate a combined folium map with layers from both the GeoDataFrame (geo_gdf)
    and the regular DataFrame (score_df).
    """

    # Check CRS and convert to EPSG:4326 if needed
    if gdf.crs != "EPSG:4326":
        gdf = gdf.to_crs(epsg=4326)

    # Ensure df has valid geometry
    score_gdf = gpd.GeoDataFrame(
        score_df,
        geometry=gpd.points_from_xy(score_df['Longitude'], score_df['Latitude']),
        crs="EPSG:4326"
    )
    popup_html = f"""
    <b style='margin-bottom:4px;'>Actual crop with best overall score</b>
    """
    score_gdf['popup_title'] = popup_html

    suitable_crops_popup_title_recent_html = f"""
    <b style='margin-bottom:4px;'>Recent scenario</b>
    """
    suitable_crops_popup_title_mcm_html = f"""
    <b style='margin-bottom:4px;'>Mid-century medium-carbon scenario</b>
    """
    suitable_crops_popup_title_mch_html = f"""
    <b style='margin-bottom:4px;'>Mid-century high-carbon scenario</b>
    """
    
    gdf['suitable_crops_popup_title_recent_html'] = suitable_crops_popup_title_recent_html
    gdf['suitable_crops_popup_title_mcm_html'] = suitable_crops_popup_title_mcm_html
    gdf['suitable_crops_popup_title_mch_html'] = suitable_crops_popup_title_mch_html
    
        
    # Format latitude and longitude to the desired number of decimal places in geo_gdf
    gdf['Longitude_formatted'] = gdf['Longitude'].map(lambda x: f'{x:>{decimal_places+4}.{decimal_places}f}')
    gdf['Latitude_formatted'] = gdf['Latitude'].map(lambda x: f'{x:>{decimal_places+4}.{decimal_places}f}')

    # Ensure necessary columns are included in the properties
    gdf = gdf[[
            'CSBID',
            'geometry', 
            'CDL2023', 
            'Longitude', 
            'Latitude', 
            'Longitude_formatted', 
            'Latitude_formatted', 
            'color', 
            'Crop', 
            'Elevation', 
            'historical_cropset_id',
            'mid_century_medium_carbon_cropset_id',
            'mid_century_high_carbon_cropset_id',
            'historical_cropset_color',
            'mid_century_medium_carbon_cropset_color',
            'mid_century_high_carbon_cropset_color',            
            'Recommended_Crops_Historical_html',        
            'Recommended_Crops_Mid_Century_Medium_Carbon_html',
            'Recommended_Crops_Mid_Century_High_Carbon_html',
            'cdl_crops_in_historical_recommendations',
            'cdl_crops_in_historical_recommendations_yes_no',
            'cdl_crops_in_mid_century_medium_recommendations',
            'cdl_crops_in_mid_century_medium_recommendations_yes_no',
            'cdl_crops_in_mid_century_high_recommendations',
            'cdl_crops_in_mid_century_high_recommendations_yes_no',
            'jaccard_hist_mid_medium', 
            'jaccard_hist_mid_high', 
            'jaccard_mid_medium_mid_high',
            'suitable_crops_popup_title_recent_html',
            'suitable_crops_popup_title_mcm_html',
            'suitable_crops_popup_title_mch_html',
    ]]

    # Create a separate GeoDataFrame for the centroids
    centroids_gdf = gdf.copy()
    centroids_gdf['geometry'] = gpd.points_from_xy(centroids_gdf['Longitude'], centroids_gdf['Latitude'])
    
    # Convert geometries to GeoJSON
    boundaries_geojson = gdf.to_json()
    locations_geojson = centroids_gdf.to_json()
    score_geojson = score_gdf.to_json()
    
    # Extract unique crops and their colors for legend
    unique_crops = gdf[['Crop', 'color']].copy().drop_duplicates()
    # generate the legend and colormaps
    legend_html, overall_colormap, individual_colormap, similarity_colormap = generate_crop_legend_html(unique_crops)
           
    # Add the legend to the map
    legend = branca.element.Element(legend_html)

    # Initialize map
    minx, miny, maxx, maxy = gdf.total_bounds
    center_longitude = (minx + maxx) / 2
    center_latitude = (miny + maxy) / 2

    m = folium.Map(
        location=[center_latitude, center_longitude],
        zoom_start=zoom,
        max_zoom=max_zoom,
        tiles=None,
        control_scale=True,
    )


    #### base map branch ####
    basemap_tree = {"label": "<b>Base Maps </b>",  "children": []}
    # Add each base tile layer to the map
    for tile_info in tiles_list:
        tiles = tile_info["tiles"]
        attr = tile_info["attr"]
        layer_name = tile_info["name"]
        layer = folium.TileLayer(tiles=tiles, attr=attr, name=layer_name).add_to(m)
        basemap_tree['children'].append({"label": f' {layer_name}', "layer": layer})

    #### top of tree ####
    overlay_tree = {"label": "<b>Crop suitability assessment for climate scenarios</b>",  "children": []}

    # field boundaries and field locations layers
    geojson_branch = {"label": "<b>Fields for assessment</b>",  "children": []}
    # # Add layers from geo_gdf using the function add_geojson_layers
    boundaries_layer, locations_layer = generate_geojson_layers_tree(boundaries_geojson, locations_geojson)

    boundaries_layer.add_to(m)
    locations_layer.add_to(m)
    geojson_branch["children"].append({"label": f" Field boundaries (actual crops 2023)", "layer": boundaries_layer})
    geojson_branch["children"].append({"label": f" Field locations (actual crops 2023)", "layer": locations_layer})

    # score branch layers
    score_branch = {"label": "<b>Scores of actual crops for climate scenarios</b>", "collapsed": False, "children": []}
    # Define the scenarios and score categories
    scenarios = ['historical', 'mid_century_medium_carbon', 'mid_century_high_carbon']
    scenario_labels = [' Scenario: Recent', ' Scenario: Mid-century_medium-carbon', ' Scenario: Mid-century_high-carbon']
    score_categories = ['pH', 'Photoperiod', 'Climate_Zone', 'Temperature', 'Rainfall', 'Hardiness', 'Overall']

    # Add layers for each scenario and score category from score_df
    for scenario, scenario_label in list(zip(scenarios, scenario_labels)):
        # scenario_branch = {"label": scenario_label, "children": []}
        scenario_branch = {"label": scenario_label, "collapsed": True, "children": []}
        for category in score_categories:
            if category == 'Overall':
                layer = generate_score_layers_tree(score_geojson=score_geojson, 
                                                   colormap=overall_colormap, 
                                                   opacity=0.8,
                                                   scenario=scenario, 
                                                   category=category
                                                  ).add_to(m)
            else:
                layer = generate_score_layers_tree(score_geojson=score_geojson, 
                                                   colormap=individual_colormap, 
                                                   opacity=0.16, 
                                                   scenario=scenario, 
                                                   category=category
                                                  ).add_to(m)
            # Add layer to the scenario branch
            scenario_branch["children"].append({"label": f" {category}", "layer": layer})

        # pprint.pprint(scenario_branch)
        # print()
        
        # Add scenario branch to the overlay tree
        score_branch["children"].append(scenario_branch)

    ##### comparison branch: suitable crops #####
    comparison_branch = {"label": "<b>Comparison of suitable crops between climate scenarios</b>", "collapsed": True, "children": []}
    # Define comparison layers

    comparisons_suitable = [
        {
            'name': ' Recent compared to mid-century medium-carbon',
            'column': 'jaccard_hist_mid_medium',
            'column_html1':'Recommended_Crops_Historical_html',
            'column_html2':'Recommended_Crops_Mid_Century_Medium_Carbon_html',
            'title_1': 'suitable_crops_popup_title_recent_html',
            'title_2': 'suitable_crops_popup_title_mcm_html',
            'alias': 'Recent_vs_MCM_similarity:'
        },
        {
            'name': ' Recent compared to mid-century high-carbon',
            'column': 'jaccard_hist_mid_high',
            'column_html1':'Recommended_Crops_Historical_html',
            'column_html2':'Recommended_Crops_Mid_Century_High_Carbon_html',
            'title_1': 'suitable_crops_popup_title_recent_html',
            'title_2': 'suitable_crops_popup_title_mcm_html',            
            'alias': 'Recent_vs_MCH_similarity:'
        },
        {
            'name': ' Mid-century medium- compared to high-carbon',
            'column': 'jaccard_mid_medium_mid_high',
            'column_html1':'Recommended_Crops_Mid_Century_Medium_Carbon_html',
            'column_html2':'Recommended_Crops_Mid_Century_High_Carbon_html',
            'title_1': 'suitable_crops_popup_title_mcm_html',
            'title_2': 'suitable_crops_popup_title_mch_html',
            'alias': 'MCM_vs_MCH_similarity:'
        }
    ]
    
    for layer in comparisons_suitable:
        
        name = layer['name']
        column = layer['column']
        column_html1 = layer['column_html1']
        column_html2 = layer['column_html2']
        title_1 = layer['title_1']
        title_2 = layer['title_2']
        alias= layer['alias']
        
        # layer = generate_comparison_layers_tree(locations_geojson, name, column, column_html, alias, similarity_colormap)
        layer = suitable_comparison_layers_tree(locations_geojson, name, column, column_html1, column_html2, title_1, title_2, alias, similarity_colormap)
        layer.add_to(m)
        comparison_branch["children"].append({"label": name, "layer": layer, "radioGroup": "comparison"})

    empty_layer = folium.TileLayer("", name="None", attr="blank").add_to(m)   
    comparison_branch["children"].append({"label": " [ Comparison layers off ]", "layer": empty_layer, "radioGroup": "comparison"})
    
    ###################################

    
    ##### Do actual crops match suitable? #####    
    match_branch = {"label": "<b>Match between actual crops (2016-2023) and suitable crops by climate scenarios</b>", "collapsed": True, "children": []}
    # Define comparison layers
    actual_match_suitable = [
        {
            'name': ' Actual crop(s) matches recent',
            'column': 'cdl_crops_in_historical_recommendations',
            'column_yesno': 'cdl_crops_in_historical_recommendations_yes_no',
            'column_html':'Recommended_Crops_Historical_html',
            'alias': 'Actual&nbsp;crop&nbsp;in&nbsp;suitable&nbsp;(recent)?:',
            'title': 'suitable_crops_popup_title_recent_html',
        },
        {
            'name': ' Actual crop(s) matches mid-century medium-carbon',
            'column': 'cdl_crops_in_mid_century_medium_recommendations',
            'column_yesno': 'cdl_crops_in_mid_century_medium_recommendations_yes_no',
            'column_html':'Recommended_Crops_Mid_Century_Medium_Carbon_html',
            'alias': 'Actual&nbsp;crop&nbsp;in&nbsp;suitable&nbsp;(MCM)?:',
            'title': 'suitable_crops_popup_title_mcm_html',
        },
        {
            'name': ' Actual crop(s) mid-century high-carbon',
            'column': 'cdl_crops_in_mid_century_high_recommendations',
            'column_yesno': 'cdl_crops_in_mid_century_high_recommendations_yes_no',
            'column_html':'Recommended_Crops_Mid_Century_High_Carbon_html',
            'alias': 'Actual&nbsp;crop&nbsp;in&nbsp;suitable&nbsp;(MCH)?:',
            'title': 'suitable_crops_popup_title_mch_html',
        }
    ]
    
    for layer in actual_match_suitable:
        
        name = layer['name']
        column = layer['column']
        column_yesno = layer['column_yesno']
        column_html = layer['column_html']
        alias = layer['alias']
        title = layer['title']
        
        layer = actual_match_suitable_comparison_layers_tree(locations_geojson, name, column, column_yesno, column_html, alias, title, similarity_colormap)
        layer.add_to(m)
        match_branch["children"].append({"label": name, "layer": layer, "radioGroup": "match"})

    empty_layer = folium.TileLayer("", name="None", attr="blank").add_to(m)    
    match_branch["children"].append({"label": " [ Match layers off ]", "layer": empty_layer, "radioGroup": "match"})
    
    #########################

    ##### Suitable crops for each scenario #####    
    suitable_branch = {"label": "<b>Suitable crops fo each climate scenarios</b>", "collapsed": False, "children": []}
    # Define suitable layers
    suitable = [
        {
            'name': ' Scenario: Recent',
            'column': 'historical_cropset_id',
            'column_html':'Recommended_Crops_Historical_html',
            'alias': 'Recent cropset ID: ',
            'color_column': 'historical_cropset_color',
            'popup': ' Recent',
            'suitable_crops_title': 'suitable_crops_popup_title_recent_html',
        },
        {
            'name': ' Scenario: Mid-century_medium-carbon',
            'column': 'cdl_crops_in_mid_century_medium_recommendations',
            'column_html':'Recommended_Crops_Mid_Century_Medium_Carbon_html',
            'alias': 'Mid-century medium-carbon cropset ID: ',
            'color_column': 'mid_century_medium_carbon_cropset_color',
            'popup': '  Mid-century_medium-carbon',
            'suitable_crops_title': 'suitable_crops_popup_title_mcm_html',
        },
        {
            'name': ' Scenario: Mid-century_high-carbon',
            'column': 'cdl_crops_in_mid_century_high_recommendations',
            'column_html':'Recommended_Crops_Mid_Century_High_Carbon_html',
            'alias': 'Mid-century high-carbon cropset ID:',
            'color_column': 'mid_century_high_carbon_cropset_color',
            'popup': '  Mid-century_high-carbon',
            'suitable_crops_title': 'suitable_crops_popup_title_mch_html',
        }
    ]
    
    for layer in suitable:
        
        name = layer['name']
        column = layer['column']
        column_html = layer['column_html']
        alias = layer['alias']
        color_column = layer['color_column']
        popup = layer['popup']
        suitable_crops_title = layer['suitable_crops_title']
        
        layer = show_suitable_crops_tree(locations_geojson, name, column, column_html, alias, colormap, color_column, popup, suitable_crops_title, opacity=0.6)
        layer.add_to(m)
        suitable_branch["children"].append({"label": name, "layer": layer, "radioGroup": "suitable"})

    empty_layer = folium.TileLayer("", name="None", attr="blank").add_to(m)    
    suitable_branch["children"].append({"label": " [ Suitable layers off ]", "layer": empty_layer, "radioGroup": "suitable"})
    #########################
    
    overlay_tree["children"].append(geojson_branch)
    overlay_tree["children"].append(suitable_branch)
    overlay_tree["children"].append(comparison_branch)
    overlay_tree["children"].append(match_branch)
    overlay_tree["children"].append(score_branch)
    
    # add crop color legend
    m.get_root().html.add_child(legend)

    folium.plugins.Fullscreen(
        position="topleft",
        title="Fullscreen",
        title_cancel="Exit Fullscreen",
        force_separate_button=True,
    ).add_to(m)

    # pprint.pprint(overlay_tree)
    # print()

    # Add the TreeLayerControl
    TreeLayerControl(base_tree=basemap_tree, 
                     overlay_tree=overlay_tree,
                     collapse_all=''
                    ).add_to(m)


    return m
    # return overlay_tree

In [254]:
# list(fields.columns)

In [255]:
# list(score_breakdown_actual_crops_with_coords_and_names_df.columns)
# score_breakdown_actual_crops_with_coords_and_names_df

In [256]:
# Generate a timestamp
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")

# map_file = f'../datasets/maps/{timestamp}_FutureFields_Crop_Suitability_Assessment.html'
map_file = '../datasets/maps/FutureFields_Crop_Suitability_Assessment.html'

generate_combined_folium_map_tree(fields, score_breakdown_actual_crops_with_coords_and_names_df).save(map_file)

In [257]:
# Display the link
# Get the absolute path of the file
map_file_base = os.path.basename(map_file)

print()
HTML(f'<b><a href="http://localhost:8000/{map_file_base}" target="_blank">[ Future Fields Crop Suitability Assessment ]</a></b>')




## save final score data 

In [258]:
fields_final = fields.copy()
fields_final = fields_final.drop(columns=['Recommended_Crops_Historical', 'Recommended_Crops_Mid_Century_Medium_Carbon','Recommended_Crops_Mid_Century_High_Carbon'])

fields_final_data_file = '../data/fields/csb_sample_with_final_data.parquet'
fields_final.to_parquet(fields_final_data_file)

In [259]:
actual_crop_scores_file = '../data/fields/csb_sample_with_actual_crop_scores.parquet'
score_breakdown_actual_crops_with_coords_and_names_df.to_parquet(actual_crop_scores_file)