## libraries

In [1]:
import fiona
import geopandas as gpd
import shapely  # shapely 2.0
import pyogrio
import pyarrow
import folium
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from datetime import datetime

from pyproj import CRS
from mpl_toolkits.basemap import Basemap
from scipy.spatial import KDTree
from shapely.geometry import Point, LineString
import itertools
from operator import itemgetter
from pprint import pprint

from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

import gower

In [2]:
# Set GeoPandas to use pyogrio
gpd.options.io_engine = "pyogrio"

In [3]:
gpd.show_versions()


SYSTEM INFO
-----------
python     : 3.11.0 | packaged by conda-forge | (main, Oct 25 2022, 06:21:02) [Clang 14.0.4 ]
executable : /Users/jwhite/miniforge3/envs/siads696/bin/python
machine    : macOS-14.5-arm64-arm-64bit

GEOS, GDAL, PROJ INFO
---------------------
GEOS       : 3.11.3
GEOS lib   : None
GDAL       : 3.5.3
GDAL data dir: /Users/jwhite/miniforge3/envs/siads696/share/gdal
PROJ       : 9.1.0
PROJ data dir: /Users/jwhite/miniforge3/envs/siads696/share/proj

PYTHON DEPENDENCIES
-------------------
geopandas  : 0.14.4
numpy      : 1.23.4
pandas     : 1.5.1
pyproj     : 3.4.0
shapely    : 2.0.4
fiona      : 1.8.22
geoalchemy2: None
geopy      : 2.4.1
matplotlib : 3.6.2
mapclassify: 2.4.3
pygeos     : 0.13
pyogrio    : v0.4.2
psycopg2   : None
pyarrow    : 9.0.0
rtree      : 1.0.1


## file locations

In [4]:
fields_features_df_file = '../../../SIADS_699_Capstone/project/data/public/20240625_1710_csb_weather_elevation_demographic.parquet'

In [15]:
ecocrop_data_file = '../../../SIADS_699_Capstone/project/data/public/cropbasics_clean_df.pkl'

## field data

In [5]:
fields_features_df = gpd.read_parquet(fields_features_df_file)

In [6]:
fields_features_df

Unnamed: 0,index,CSBID,CSBYEARS,CSBACRES,CDL2016,CDL2017,CDL2018,CDL2019,CDL2020,CDL2021,...,S1903_C03_031E,S1903_C03_032E,S1903_C03_033E,S1903_C03_034E,S1903_C03_035E,S1903_C03_036E,S1903_C03_037E,S1903_C03_038E,S1903_C03_039E,S1903_C03_040E
0,0,491623000059102,1623,3.661043,61,24,61,61,24,61,...,50776,93194,144038,27540,19813,17344,65489,35023,23611,93659
1,1,491623000059103,1623,4.741851,24,61,24,61,24,61,...,50776,93194,144038,27540,19813,17344,65489,35023,23611,93659
2,2,491623000059104,1623,8.743661,61,24,61,24,61,24,...,50776,93194,144038,27540,19813,17344,65489,35023,23611,93659
3,3,491623000059105,1623,8.071827,36,36,36,36,36,36,...,50776,93194,144038,27540,19813,17344,65489,35023,23611,93659
4,4,491623000059106,1623,7.234577,36,36,36,36,36,36,...,50776,93194,144038,27540,19813,17344,65489,35023,23611,93659
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
618689,54921,041623015988832,1623,8.444513,36,36,36,36,152,22,...,67553,97218,138157,39803,36216,31954,60784,45497,37754,69797
618690,54922,041623015988833,1623,5.531071,36,36,36,36,152,152,...,67553,97218,138157,39803,36216,31954,60784,45497,37754,69797
618691,54923,041623015988834,1623,17.123461,36,36,36,23,152,23,...,67553,97218,138157,39803,36216,31954,60784,45497,37754,69797
618692,54924,041623015988835,1623,7.212146,36,36,36,36,152,152,...,67553,97218,138157,39803,36216,31954,60784,45497,37754,69797


In [7]:
# reorder columns to put attributes first, then features
df_columns = list(fields_features_df.columns)
skip_columns = [ 
    'county_fips',
    'GEO_ID',
    'NAME',
]
attribute_columns = df_columns[1:26] + skip_columns
feature_columns = [column for column in df_columns[26:] if column not in skip_columns]

# df_columns
# attribute_columns
# len(feature_columns)

In [8]:
attributes_df = fields_features_df[attribute_columns]
features_df = fields_features_df[feature_columns]

In [9]:
# Replace or remove non-numeric values
features_df = features_df.replace('-', '0')#.dropna().astype(float)
features_df = features_df.replace('250,000+', '250001')#.dropna().astype(float)
features_df = features_df.replace('2,500-', '2499')#.dropna().astype(float)
features_df = features_df.astype(float)

In [10]:
features_df

Unnamed: 0,202201_ppt,202202_ppt,202203_ppt,202204_ppt,202205_ppt,202206_ppt,202207_ppt,202208_ppt,202209_ppt,202210_ppt,...,S1903_C03_031E,S1903_C03_032E,S1903_C03_033E,S1903_C03_034E,S1903_C03_035E,S1903_C03_036E,S1903_C03_037E,S1903_C03_038E,S1903_C03_039E,S1903_C03_040E
0,6.9541,24.447399,32.368999,5.5834,0.0,20.317600,43.372799,45.463898,32.925499,55.420799,...,50776.0,93194.0,144038.0,27540.0,19813.0,17344.0,65489.0,35023.0,23611.0,93659.0
1,6.9541,24.447399,32.368999,5.5834,0.0,20.317600,43.372799,45.463898,32.925499,55.420799,...,50776.0,93194.0,144038.0,27540.0,19813.0,17344.0,65489.0,35023.0,23611.0,93659.0
2,6.8790,24.103300,32.152798,6.0021,0.0,20.831299,45.273499,46.299297,34.400398,55.966599,...,50776.0,93194.0,144038.0,27540.0,19813.0,17344.0,65489.0,35023.0,23611.0,93659.0
3,4.1532,24.084599,35.753399,4.7964,0.0,20.755899,60.666397,42.887100,33.855198,56.687500,...,50776.0,93194.0,144038.0,27540.0,19813.0,17344.0,65489.0,35023.0,23611.0,93659.0
4,4.1532,24.084599,35.753399,4.7964,0.0,20.755899,60.666397,42.887100,33.855198,56.687500,...,50776.0,93194.0,144038.0,27540.0,19813.0,17344.0,65489.0,35023.0,23611.0,93659.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
618689,10.4719,10.519800,14.965899,0.0000,0.0,35.283100,44.655499,37.593098,59.954800,40.751801,...,67553.0,97218.0,138157.0,39803.0,36216.0,31954.0,60784.0,45497.0,37754.0,69797.0
618690,10.4719,10.519800,14.965899,0.0000,0.0,35.283100,44.655499,37.593098,59.954800,40.751801,...,67553.0,97218.0,138157.0,39803.0,36216.0,31954.0,60784.0,45497.0,37754.0,69797.0
618691,10.4719,10.519800,14.965899,0.0000,0.0,35.283100,44.655499,37.593098,59.954800,40.751801,...,67553.0,97218.0,138157.0,39803.0,36216.0,31954.0,60784.0,45497.0,37754.0,69797.0
618692,10.4719,10.519800,14.965899,0.0000,0.0,35.283100,44.655499,37.593098,59.954800,40.751801,...,67553.0,97218.0,138157.0,39803.0,36216.0,31954.0,60784.0,45497.0,37754.0,69797.0


In [163]:
# list(features_df.columns)

In [161]:
sample_row = features_df.sample(1)
sample_row

Unnamed: 0,202201_ppt,202202_ppt,202203_ppt,202204_ppt,202205_ppt,202206_ppt,202207_ppt,202208_ppt,202209_ppt,202210_ppt,...,S1903_C03_031E,S1903_C03_032E,S1903_C03_033E,S1903_C03_034E,S1903_C03_035E,S1903_C03_036E,S1903_C03_037E,S1903_C03_038E,S1903_C03_039E,S1903_C03_040E
210839,5.4735,18.8018,19.726299,9.8646,13.3279,33.5131,68.106102,68.644402,20.1565,23.2108,...,53109.0,65924.0,148438.0,28238.0,22465.0,20174.0,68846.0,29475.0,29245.0,36250.0


In [162]:
attributes_df.iloc[210839]

CSBID                                            081623005157840
CSBYEARS                                                    1623
CSBACRES                                                 6.57931
CDL2016                                                      176
CDL2017                                                      176
CDL2018                                                       37
CDL2019                                                      176
CDL2020                                                      176
CDL2021                                                      176
CDL2022                                                      176
CDL2023                                                      176
STATEFIPS                                                     08
STATEASD                                                    0880
ASD                                                           80
CNTY                                                    Saguache
CNTYFIPS                 

In [164]:
# sample_row

## generate normalized field vectors

In [11]:
sample_size = 1000
sample_df = features_df.sample(sample_size)

In [12]:
# Normalize the data
scaler = StandardScaler()
normalized_sample = scaler.fit_transform(sample_df)

In [13]:
normalized_sample

array([[-0.90073472, -0.54600568, -1.15306662, ..., -1.15403018,
        -0.80408656, -0.11583943],
       [-0.38962691,  0.70232041,  1.58753425, ...,  2.60097419,
         0.73161419,  0.61557624],
       [ 3.69700139,  1.85803002,  1.07527569, ...,  1.80898888,
         1.58323376,  0.82038404],
       ...,
       [ 0.65919573, -0.52775648,  0.00402848, ...,  0.29167172,
         0.75727029, -0.01583976],
       [ 0.97159097, -0.65225798,  1.58671417, ...,  0.13785605,
         0.53483599,  2.88667712],
       [ 0.28614391, -0.32929485,  1.68073186, ..., -1.19661893,
        -0.70838524, -0.68225972]])

In [83]:
normalized_sample_df = pd.DataFrame(normalized_sample, columns=sample_df.columns)

## ecocrop data

In [16]:
ecocrop_df = pd.read_pickle(ecocrop_data_file)

In [167]:
# ecocrop_df

In [17]:
columns_to_keep = [
    # 'crop_code', 
    # 'species', 
    'Life.form', 
    'Habit', 
    'Life.span', 
    'Physiology',
    'Category', 
    'Plant.attributes', 
    'temp_opt_min', 
    'Temp_Opt_Max', 
    'Temp_Abs_Min', 
    'Temp_Abs_Max', 
    'Rain_Opt_Min', 
    'Rain_Opt_Max', 
    'Rain_Abs_Min', 
    'Rain_Abs_Max', 
    'Lat_Opt_Min', 
    'Lat_Opt_Max',  
    'Lat_Abs_Min', 
    'Lat_Abs_Max', 
    # 'Alt_Opt_Min', 
    # 'Alt_Opt_Max',  
    'Alt_Abs_Min', 
    'Alt_Abs_Max', 
    'pH_Opt_Min', 
    'pH_Opt_Max', 
    'pH_Abs_Min',  
    'pH_Abs_Max', 
    'Light_Opt_Min', 
    'Light_Opt_Max', 
    'Light_Abs_Min',  
    'Light_Abs_Max', 
    'Depth_Opt', 
    'Depth_Abs', 
    'Texture_Ops', 
    'Texture_Abs',    
    'Fertility_Ops', 
    'Fertility_Abs', 
    'Al_Toxicity_Opt', 
    'Al_Toxicity_Abs',  
    'Salinity_Ops', 
    'Salinity_Abs', 
    'drainage_opt', 
    'drainage_abs',
    'Climate.Zone', 
    'photoperiod', 
    'Killing.temp..during.rest', 
    'Killing.temp..early.growth', 
    'Abiotic.toler.', 
    'Abiotic.suscept.',     
    # 'Introduction.risks.', 
    # 'Product..system', 
    # 'Cropping.system', 
    # 'Subsystem', 
    # 'Companion.species', 
    # 'Level.of.mechanization',
    # 'Labour.intensity', 
    'cycle_min', 
    'cycle_max', 
    # 'use.main',
    # 'use.detailed', 
    # 'use.part', 
    # 'datasheet_url', 
    # 'genus',
    # 'species'
]

In [18]:
ecocrop_select = ecocrop_df[columns_to_keep]
# ecocrop_select

In [87]:
categorical_features = ecocrop_select.select_dtypes(include='object')
numeric_features = ecocrop_select.select_dtypes(include='number')
# categorical_features
# numeric_features

In [88]:
encoder = OneHotEncoder(sparse=False)
encoded_categorical_data = encoder.fit_transform(categorical_features)

# Convert to DataFrame for easier concatenation
encoded_categorical_df = pd.DataFrame(encoded_categorical_data, columns=encoder.get_feature_names_out(categorical_features.columns))
# np.unique(encoded_categorical_df)

In [89]:
# Standardize numeric features
scaler = StandardScaler()
scaled_numeric_data = scaler.fit_transform(numeric_features)

# Convert to DataFrame for easier concatenation
scaled_numeric_df = pd.DataFrame(scaled_numeric_data, columns=numeric_features.columns)
# scaled_numeric_df

In [104]:
# Combine encoded categorical and scaled numeric features
ecocrop_scaled_encoded = pd.concat([encoded_categorical_df, scaled_numeric_df], axis=1)
# ecocrop_scaled_encoded

## scale crop and field vectors

In [103]:
# Combine both datasets for Gower computation

field_vectors = sample_df.copy()

# crop_vectors = ecocrop_scaled_encoded.copy()
# numeric data only for now
crop_vectors = scaled_numeric_df.copy()
# crop_vectors

In [93]:
# Standardize numeric features in both field and crop datasets
scaler = StandardScaler()
# Standardize numeric features of the crop dataset
scaled_numeric_data = scaler.fit_transform(crop_vectors)
scaled_numeric_data

array([[ 7.77805151e-03, -7.33891479e-04,  5.63155331e-02, ...,
         1.98134551e-18, -6.87059952e-01, -8.94825940e-02],
       [ 5.84852664e-02, -7.33891479e-04,  1.90976799e-01, ...,
         1.98134551e-18,  2.11789255e+00,  1.25725115e+00],
       [ 7.77805151e-03, -7.33891479e-04, -1.10150998e-02, ...,
         1.98134551e-18, -1.13229051e+00, -1.39981813e+00],
       ...,
       [ 3.31316589e-02,  2.25700439e-02,  6.30485963e-02, ...,
         1.98134551e-18, -1.13229051e+00, -1.39981813e+00],
       [-1.69697201e-01, -1.07984958e-01, -1.73220715e-01, ...,
         1.98134551e-18, -1.13229051e+00, -1.39981813e+00],
       [ 6.91727741e-19, -8.13549749e-17,  9.68418838e-18, ...,
         1.98134551e-18, -1.13229051e+00, -1.39981813e+00]])

In [97]:
scaled_crop_df = pd.DataFrame(scaled_numeric_data, columns=crop_vectors.columns)
# scaled_crop_df

In [99]:
# Standardize numeric features of the field dataset
scaled_field_data = scaler.fit_transform(field_vectors)
scaled_field_df = pd.DataFrame(scaled_field_data, columns=field_vectors.columns)
# scaled_field_df

## align columns and combine data

In [107]:
# Create zero-filled DataFrames for missing columns
missing_cols_for_fields = pd.DataFrame(0, index=scaled_field_df.index, columns=[col for col in scaled_crop_df.columns if col not in scaled_field_df.columns])
missing_cols_for_crops = pd.DataFrame(0, index=scaled_crop_df.index, columns=[col for col in scaled_field_df.columns if col not in scaled_crop_df.columns])

# Add the missing columns to both DataFrames
aligned_field_data = pd.concat([scaled_field_df, missing_cols_for_fields], axis=1)
aligned_crop_data = pd.concat([scaled_crop_df, missing_cols_for_crops], axis=1)

# Ensure the same order of columns
aligned_field_data = aligned_field_data[aligned_crop_data.columns]

In [109]:
aligned_field_data

Unnamed: 0,temp_opt_min,Temp_Opt_Max,Temp_Abs_Min,Temp_Abs_Max,Rain_Opt_Min,Rain_Opt_Max,Rain_Abs_Min,Rain_Abs_Max,Lat_Opt_Min,Lat_Opt_Max,...,S1903_C03_031E,S1903_C03_032E,S1903_C03_033E,S1903_C03_034E,S1903_C03_035E,S1903_C03_036E,S1903_C03_037E,S1903_C03_038E,S1903_C03_039E,S1903_C03_040E
0,0,0,0,0,0,0,0,0,0,0,...,-1.108117,-0.211010,-0.501308,-1.014859,-0.777728,-0.742151,0.135348,-1.154030,-0.804087,-0.115839
1,0,0,0,0,0,0,0,0,0,0,...,3.386515,2.950162,2.184038,2.146734,1.906238,2.247085,1.938604,2.600974,0.731614,0.615576
2,0,0,0,0,0,0,0,0,0,0,...,2.255439,2.336488,0.301984,2.366839,2.590573,1.735396,1.100143,1.808989,1.583234,0.820384
3,0,0,0,0,0,0,0,0,0,0,...,-0.792558,0.007132,-0.253105,-0.530548,0.010081,0.260755,-1.272674,-0.999636,-0.871281,0.128720
4,0,0,0,0,0,0,0,0,0,0,...,0.573800,0.195710,0.441630,0.896843,0.562569,0.557048,0.899279,0.973588,0.801497,0.308344
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,0,0,0,0,0,0,0,0,0,0,...,-1.027494,-0.837332,-0.711614,-0.295277,-0.558776,-0.615467,0.743670,0.031426,-0.101924,-0.067470
996,0,0,0,0,0,0,0,0,0,0,...,-0.558593,-0.239440,0.632679,-0.979738,-1.234679,-1.278385,0.576147,-0.721692,-1.099172,0.487439
997,0,0,0,0,0,0,0,0,0,0,...,0.527423,-0.036161,0.059643,-0.205079,-0.315321,-0.220226,0.530978,0.291672,0.757270,-0.015840
998,0,0,0,0,0,0,0,0,0,0,...,-0.286708,-0.961482,-0.848530,0.183647,-0.086746,0.242374,0.438832,0.137856,0.534836,2.886677


In [110]:
aligned_crop_data

Unnamed: 0,temp_opt_min,Temp_Opt_Max,Temp_Abs_Min,Temp_Abs_Max,Rain_Opt_Min,Rain_Opt_Max,Rain_Abs_Min,Rain_Abs_Max,Lat_Opt_Min,Lat_Opt_Max,...,S1903_C03_031E,S1903_C03_032E,S1903_C03_033E,S1903_C03_034E,S1903_C03_035E,S1903_C03_036E,S1903_C03_037E,S1903_C03_038E,S1903_C03_039E,S1903_C03_040E
0,7.778052e-03,-7.338915e-04,5.631553e-02,-2.800895e-02,-5.860480e-01,-4.557701e-01,-8.055308e-01,7.120431e-02,1.443447e-16,-2.645257e-17,...,0,0,0,0,0,0,0,0,0,0
1,5.848527e-02,-7.338915e-04,1.909768e-01,-1.525617e-02,4.249104e-01,3.948860e-01,9.221193e-01,4.429701e-01,1.443447e-16,-2.645257e-17,...,0,0,0,0,0,0,0,0,0,0
2,7.778052e-03,-7.338915e-04,-1.101510e-02,-2.800895e-02,8.792424e-02,-2.431061e-01,6.753121e-01,-6.723274e-01,1.443447e-16,-2.645257e-17,...,0,0,0,0,0,0,0,0,0,0
3,1.091925e-01,2.839603e-02,-1.101510e-02,3.575494e-02,4.249104e-01,9.265461e-01,-3.119165e-01,1.930034e+00,-5.418152e-01,-4.242703e-01,...,0,0,0,0,0,0,0,0,0,0
4,7.778052e-03,2.839603e-02,-1.120110e-01,6.126050e-02,-9.230341e-01,-2.431061e-01,-1.175742e+00,1.186502e+00,-4.512106e-01,-3.057357e-01,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2563,7.778052e-03,-7.355869e-02,1.573115e-01,-9.177284e-02,8.792424e-02,-3.044204e-02,-1.885129e-01,-3.005615e-01,1.443447e-16,-2.645257e-17,...,0,0,0,0,0,0,0,0,0,0
2564,1.191179e-02,3.172064e-02,-3.077518e-02,4.185410e-02,-6.634266e-01,-6.935724e-01,-6.429600e-01,-8.194173e-01,-1.413843e-01,-1.765041e-01,...,0,0,0,0,0,0,0,0,0,0
2565,3.313166e-02,2.257004e-02,6.304860e-02,3.575494e-02,-1.057829e+00,-1.051229e+00,-1.116508e+00,-1.146329e+00,-4.253235e-01,-3.017846e-01,...,0,0,0,0,0,0,0,0,0,0
2566,-1.696972e-01,-1.079850e-01,-1.732207e-01,-1.080036e-01,-6.243419e-01,-8.134323e-01,-7.471945e-01,-7.277543e-01,3.289962e-01,1.506223e-01,...,0,0,0,0,0,0,0,0,0,0


In [111]:
# Define the number of concepts
n_concepts = 50  # Adjust based on your needs

# Combine both datasets for SVD
combined_data = pd.concat([aligned_field_data, aligned_crop_data], ignore_index=True)

# Apply SVD to the combined data
svd = TruncatedSVD(n_components=n_concepts)
svd.fit(combined_data)

# Transform both field and crop data to the common concept space
field_concept_vectors = svd.transform(aligned_field_data)
crop_concept_vectors = svd.transform(aligned_crop_data)

In [112]:
# Compute cosine similarity between fields and crops
similarity_matrix = cosine_similarity(field_concept_vectors, crop_concept_vectors)

# Convert to a DataFrame for easier manipulation
similarity_df = pd.DataFrame(similarity_matrix)

In [114]:
# similarity_df

In [123]:
# Find the top 3 most similar crops for each field
top_similarities = similarity_df.apply(lambda row: row.nlargest(3).index.tolist(), axis=1)

# Create a DataFrame to store the top 3 crop indices for each field
top_crops_indices = pd.DataFrame(top_similarities.tolist(), columns=['Top1', 'Top2', 'Top3'])

# Ensure the crop names are preserved
crop_names = ecocrop_df['genus_species']

# Map indices back to crop names
top_crops_df = top_crops_indices.applymap(lambda idx: crop_names[idx])

# Save the top crops for each field to a CSV file
# top_crops_df.to_csv('top_crops_per_field.csv', index=False)

print("Top 3 similar crops for each field have been extracted and saved to 'top_crops_per_field.csv'")

Top 3 similar crops for each field have been extracted and saved to 'top_crops_per_field.csv'


In [124]:
top_crops_indices

Unnamed: 0,Top1,Top2,Top3
0,932,614,2075
1,1791,1550,192
2,473,1041,1830
3,473,1041,1065
4,473,1041,2075
...,...,...,...
995,473,1041,1830
996,407,1655,284
997,407,284,1863
998,2381,242,730


In [125]:
top_crops_df

# np.unique(top_crops_df)

Unnamed: 0,Top1,Top2,Top3
0,Carissa edulis,Acorus calamus,Harpagophytum procumbens
1,Raphanus caudatus,Mentha spicata var. crispa,Syzygium malaccense
2,Sambucus nigra,Copaifera langsdorfii,Sambucus racemosa
3,Sambucus nigra,Copaifera langsdorfii,Cryptotaenia japonica
4,Sambucus nigra,Copaifera langsdorfii,Harpagophytum procumbens
...,...,...,...
995,Sambucus nigra,Copaifera langsdorfii,Sambucus racemosa
996,Urena lobata,Perilla frutescens,Ocimum basilicum
997,Urena lobata,Ocimum basilicum,Setaria italica
998,Citrus aurantium ssp. bergamia,Litchi chinensis,Arachis glabrata


In [128]:
sample_df.index

Int64Index([152033, 381389, 266530, 489947,  80191, 448560, 231990, 381989,
             86985, 564380,
            ...
            141217, 216335, 481668, 160023, 237221, 594919,   1774,  54132,
            224840, 181396],
           dtype='int64', length=1000)

In [134]:
attributes_sample_df = attributes_df.iloc[sample_df.index]

In [135]:
attributes_sample_df.reset_index()

Unnamed: 0,index,CSBID,CSBYEARS,CSBACRES,CDL2016,CDL2017,CDL2018,CDL2019,CDL2020,CDL2021,...,Shp_Len,Shp_Area,geometry,Longitude,Latitude,color,Crop,county_fips,GEO_ID,NAME
0,152033,351623007696318,1623,5.255025,68,74,74,68,68,68,...,771.807691,21266.416746,"MULTIPOLYGON (((-105.24793 32.90521, -105.2479...",-105.249074,32.904562,#95ce93,Evergreen Forest,35005,0500000US35005,"Chaves County, New Mexico"
1,381389,081623009540670,1623,8.307097,61,61,24,61,24,24,...,1088.952150,33617.762705,"MULTIPOLYGON (((-103.91948 39.46212, -103.9194...",-103.918794,39.461358,#bfbf7a,Fallow/Idle Cropland,08039,0500000US08039,"Elbert County, Colorado"
2,266530,081623007149539,1623,8.107864,176,176,36,176,176,176,...,1282.057768,32811.492795,"MULTIPOLYGON (((-106.96726 40.50763, -106.9666...",-106.966704,40.506718,#e9ffbe,Grass/Pasture,08107,0500000US08107,"Routt County, Colorado"
3,489947,081623013012895,1623,3.932812,1,1,1,1,1,1,...,623.052731,15915.589854,"MULTIPOLYGON (((-102.87996 40.65375, -102.8799...",-102.881267,40.653551,#a87000,Winter Wheat,08075,0500000US08075,"Logan County, Colorado"
4,80191,491623015919207,1623,4.805755,36,21,1,36,36,36,...,538.114935,19448.279852,"MULTIPOLYGON (((-112.08148 41.27419, -112.0821...",-112.082468,41.274256,#ffa8e3,Alfalfa,49057,0500000US49057,"Weber County, Utah"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,594919,041623013567534,1623,9.855910,22,61,37,22,59,71,...,856.554935,39885.610883,"MULTIPOLYGON (((-114.75073 32.64048, -114.7507...",-114.752050,32.641086,#e2007f,Dbl Crop Lettuce/Barley,04027,0500000US04027,"Yuma County, Arizona"
996,1774,491623000062816,1623,2.774609,61,24,61,61,36,36,...,544.493609,11228.489576,"MULTIPOLYGON (((-109.45107 37.61833, -109.4513...",-109.451592,37.617703,#bfbf7a,Fallow/Idle Cropland,49037,0500000US49037,"San Juan County, Utah"
997,54132,491623013945595,1623,2.902467,61,37,37,37,37,37,...,482.787757,11745.913065,"MULTIPOLYGON (((-112.09211 41.87342, -112.0921...",-112.092724,41.872921,#a5f58d,Other Hay/Non Alfalfa,49003,0500000US49003,"Box Elder County, Utah"
998,224840,081623006250293,1623,3.605620,61,61,24,61,24,61,...,499.900366,14591.483531,"MULTIPOLYGON (((-102.43065 39.55827, -102.4303...",-102.431066,39.557590,#a87000,Winter Wheat,08063,0500000US08063,"Kit Carson County, Colorado"


In [136]:
attributes_sample_df

Unnamed: 0,CSBID,CSBYEARS,CSBACRES,CDL2016,CDL2017,CDL2018,CDL2019,CDL2020,CDL2021,CDL2022,...,Shp_Len,Shp_Area,geometry,Longitude,Latitude,color,Crop,county_fips,GEO_ID,NAME
152033,351623007696318,1623,5.255025,68,74,74,68,68,68,68,...,771.807691,21266.416746,"MULTIPOLYGON (((-105.24793 32.90521, -105.2479...",-105.249074,32.904562,#95ce93,Evergreen Forest,35005,0500000US35005,"Chaves County, New Mexico"
381389,081623009540670,1623,8.307097,61,61,24,61,24,24,61,...,1088.952150,33617.762705,"MULTIPOLYGON (((-103.91948 39.46212, -103.9194...",-103.918794,39.461358,#bfbf7a,Fallow/Idle Cropland,08039,0500000US08039,"Elbert County, Colorado"
266530,081623007149539,1623,8.107864,176,176,36,176,176,176,176,...,1282.057768,32811.492795,"MULTIPOLYGON (((-106.96726 40.50763, -106.9666...",-106.966704,40.506718,#e9ffbe,Grass/Pasture,08107,0500000US08107,"Routt County, Colorado"
489947,081623013012895,1623,3.932812,1,1,1,1,1,1,61,...,623.052731,15915.589854,"MULTIPOLYGON (((-102.87996 40.65375, -102.8799...",-102.881267,40.653551,#a87000,Winter Wheat,08075,0500000US08075,"Logan County, Colorado"
80191,491623015919207,1623,4.805755,36,21,1,36,36,36,36,...,538.114935,19448.279852,"MULTIPOLYGON (((-112.08148 41.27419, -112.0821...",-112.082468,41.274256,#ffa8e3,Alfalfa,49057,0500000US49057,"Weber County, Utah"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
594919,041623013567534,1623,9.855910,22,61,37,22,59,71,230,...,856.554935,39885.610883,"MULTIPOLYGON (((-114.75073 32.64048, -114.7507...",-114.752050,32.641086,#e2007f,Dbl Crop Lettuce/Barley,04027,0500000US04027,"Yuma County, Arizona"
1774,491623000062816,1623,2.774609,61,24,61,61,36,36,24,...,544.493609,11228.489576,"MULTIPOLYGON (((-109.45107 37.61833, -109.4513...",-109.451592,37.617703,#bfbf7a,Fallow/Idle Cropland,49037,0500000US49037,"San Juan County, Utah"
54132,491623013945595,1623,2.902467,61,37,37,37,37,37,37,...,482.787757,11745.913065,"MULTIPOLYGON (((-112.09211 41.87342, -112.0921...",-112.092724,41.872921,#a5f58d,Other Hay/Non Alfalfa,49003,0500000US49003,"Box Elder County, Utah"
224840,081623006250293,1623,3.605620,61,61,24,61,24,61,24,...,499.900366,14591.483531,"MULTIPOLYGON (((-102.43065 39.55827, -102.4303...",-102.431066,39.557590,#a87000,Winter Wheat,08063,0500000US08063,"Kit Carson County, Colorado"


In [138]:
recommedation_df = attributes_sample_df.copy().reset_index()
recommedation_df

Unnamed: 0,index,CSBID,CSBYEARS,CSBACRES,CDL2016,CDL2017,CDL2018,CDL2019,CDL2020,CDL2021,...,Shp_Len,Shp_Area,geometry,Longitude,Latitude,color,Crop,county_fips,GEO_ID,NAME
0,152033,351623007696318,1623,5.255025,68,74,74,68,68,68,...,771.807691,21266.416746,"MULTIPOLYGON (((-105.24793 32.90521, -105.2479...",-105.249074,32.904562,#95ce93,Evergreen Forest,35005,0500000US35005,"Chaves County, New Mexico"
1,381389,081623009540670,1623,8.307097,61,61,24,61,24,24,...,1088.952150,33617.762705,"MULTIPOLYGON (((-103.91948 39.46212, -103.9194...",-103.918794,39.461358,#bfbf7a,Fallow/Idle Cropland,08039,0500000US08039,"Elbert County, Colorado"
2,266530,081623007149539,1623,8.107864,176,176,36,176,176,176,...,1282.057768,32811.492795,"MULTIPOLYGON (((-106.96726 40.50763, -106.9666...",-106.966704,40.506718,#e9ffbe,Grass/Pasture,08107,0500000US08107,"Routt County, Colorado"
3,489947,081623013012895,1623,3.932812,1,1,1,1,1,1,...,623.052731,15915.589854,"MULTIPOLYGON (((-102.87996 40.65375, -102.8799...",-102.881267,40.653551,#a87000,Winter Wheat,08075,0500000US08075,"Logan County, Colorado"
4,80191,491623015919207,1623,4.805755,36,21,1,36,36,36,...,538.114935,19448.279852,"MULTIPOLYGON (((-112.08148 41.27419, -112.0821...",-112.082468,41.274256,#ffa8e3,Alfalfa,49057,0500000US49057,"Weber County, Utah"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,594919,041623013567534,1623,9.855910,22,61,37,22,59,71,...,856.554935,39885.610883,"MULTIPOLYGON (((-114.75073 32.64048, -114.7507...",-114.752050,32.641086,#e2007f,Dbl Crop Lettuce/Barley,04027,0500000US04027,"Yuma County, Arizona"
996,1774,491623000062816,1623,2.774609,61,24,61,61,36,36,...,544.493609,11228.489576,"MULTIPOLYGON (((-109.45107 37.61833, -109.4513...",-109.451592,37.617703,#bfbf7a,Fallow/Idle Cropland,49037,0500000US49037,"San Juan County, Utah"
997,54132,491623013945595,1623,2.902467,61,37,37,37,37,37,...,482.787757,11745.913065,"MULTIPOLYGON (((-112.09211 41.87342, -112.0921...",-112.092724,41.872921,#a5f58d,Other Hay/Non Alfalfa,49003,0500000US49003,"Box Elder County, Utah"
998,224840,081623006250293,1623,3.605620,61,61,24,61,24,61,...,499.900366,14591.483531,"MULTIPOLYGON (((-102.43065 39.55827, -102.4303...",-102.431066,39.557590,#a87000,Winter Wheat,08063,0500000US08063,"Kit Carson County, Colorado"


In [139]:
recommedation_df[top_crops_df.columns] = top_crops_df[top_crops_df.columns]
recommedation_df

Unnamed: 0,index,CSBID,CSBYEARS,CSBACRES,CDL2016,CDL2017,CDL2018,CDL2019,CDL2020,CDL2021,...,Longitude,Latitude,color,Crop,county_fips,GEO_ID,NAME,Top1,Top2,Top3
0,152033,351623007696318,1623,5.255025,68,74,74,68,68,68,...,-105.249074,32.904562,#95ce93,Evergreen Forest,35005,0500000US35005,"Chaves County, New Mexico",Carissa edulis,Acorus calamus,Harpagophytum procumbens
1,381389,081623009540670,1623,8.307097,61,61,24,61,24,24,...,-103.918794,39.461358,#bfbf7a,Fallow/Idle Cropland,08039,0500000US08039,"Elbert County, Colorado",Raphanus caudatus,Mentha spicata var. crispa,Syzygium malaccense
2,266530,081623007149539,1623,8.107864,176,176,36,176,176,176,...,-106.966704,40.506718,#e9ffbe,Grass/Pasture,08107,0500000US08107,"Routt County, Colorado",Sambucus nigra,Copaifera langsdorfii,Sambucus racemosa
3,489947,081623013012895,1623,3.932812,1,1,1,1,1,1,...,-102.881267,40.653551,#a87000,Winter Wheat,08075,0500000US08075,"Logan County, Colorado",Sambucus nigra,Copaifera langsdorfii,Cryptotaenia japonica
4,80191,491623015919207,1623,4.805755,36,21,1,36,36,36,...,-112.082468,41.274256,#ffa8e3,Alfalfa,49057,0500000US49057,"Weber County, Utah",Sambucus nigra,Copaifera langsdorfii,Harpagophytum procumbens
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,594919,041623013567534,1623,9.855910,22,61,37,22,59,71,...,-114.752050,32.641086,#e2007f,Dbl Crop Lettuce/Barley,04027,0500000US04027,"Yuma County, Arizona",Sambucus nigra,Copaifera langsdorfii,Sambucus racemosa
996,1774,491623000062816,1623,2.774609,61,24,61,61,36,36,...,-109.451592,37.617703,#bfbf7a,Fallow/Idle Cropland,49037,0500000US49037,"San Juan County, Utah",Urena lobata,Perilla frutescens,Ocimum basilicum
997,54132,491623013945595,1623,2.902467,61,37,37,37,37,37,...,-112.092724,41.872921,#a5f58d,Other Hay/Non Alfalfa,49003,0500000US49003,"Box Elder County, Utah",Urena lobata,Ocimum basilicum,Setaria italica
998,224840,081623006250293,1623,3.605620,61,61,24,61,24,61,...,-102.431066,39.557590,#a87000,Winter Wheat,08063,0500000US08063,"Kit Carson County, Colorado",Citrus aurantium ssp. bergamia,Litchi chinensis,Arachis glabrata


In [165]:
ecocrop_df.columns

Index(['crop_code', 'genus_species', 'Life.form', 'Habit', 'Life.span',
       'Physiology', 'Category', 'Plant.attributes', 'temp_opt_min',
       'Temp_Opt_Max', 'Temp_Abs_Min', 'Temp_Abs_Max', 'Rain_Opt_Min',
       'Rain_Opt_Max', 'Rain_Abs_Min', 'Rain_Abs_Max', 'Lat_Opt_Min',
       'Lat_Opt_Max', 'Lat_Abs_Min', 'Lat_Abs_Max', 'Alt_Opt_Min',
       'Alt_Opt_Max', 'Alt_Abs_Min', 'Alt_Abs_Max', 'pH_Opt_Min', 'pH_Opt_Max',
       'pH_Abs_Min', 'pH_Abs_Max', 'Light_Opt_Min', 'Light_Opt_Max',
       'Light_Abs_Min', 'Light_Abs_Max', 'Depth_Opt', 'Depth_Abs',
       'Texture_Ops', 'Texture_Abs', 'Fertility_Ops', 'Fertility_Abs',
       'Al_Toxicity_Opt', 'Al_Toxicity_Abs', 'Salinity_Ops', 'Salinity_Abs',
       'drainage_opt', 'drainage_abs', 'Climate.Zone', 'photoperiod',
       'Killing.temp..during.rest', 'Killing.temp..early.growth',
       'Abiotic.toler.', 'Abiotic.suscept.', 'Introduction.risks.',
       'Product..system', 'Cropping.system', 'Subsystem', 'Companion.species',
    