In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.cluster import KMeans
import seaborn as sns

In [None]:
# Load datasets

plants_df = pd.read_csv('../data/dataset_edible_plants.csv')
weather_df = pd.read_csv('../data/dataset_weather.csv')

In [None]:
weather_df

In [None]:
plants_df

In [None]:
plants_df['Watering Needs'].unique()

In [None]:
# Convert columns to integer type
plants_df['Time to Consumable (days)'] = plants_df['Time to Consumable (days)'].astype(int)
plants_df['Weight when Full Grown (kg)'] = plants_df['Weight when Full Grown (kg)'].astype(int)
plants_df['Kcal per 100g'] = plants_df['Kcal per 100g'].astype(int)
plants_df['Proteins per 100g (g)'] = plants_df['Proteins per 100g (g)'].astype(int)
plants_df['Watering Needs'] = plants_df['Watering Needs'].map({'Low': 0, 'Moderate': 1, 'High': 2})
plants_df
plants_df.to_csv('../data/dataset_edible_plants_processed.csv', index=False)

In [None]:
plants_df

In [None]:
# Extract unique climate classifications
climate_types = weather_df['LocationKoppenGeigerClassification'].unique()
climate_types

In [None]:
climate_mapping = {
    # Tropical climates
    'Af': 'Tropical',     # Tropical rainforest
    'Am': 'Tropical',     # Tropical monsoon
    'Aw': 'Tropical',     # Tropical savanna
    
    # Arid climates
    'BWh': 'Arid',        # Hot desert
    'BWk': 'Arid',        # Cold desert 
    'BSh': 'Arid',        # Hot semi-arid
    'BSk': 'Arid',        # Cold semi-arid
    
    # Mediterranean
    'Csa': 'Mediterranean', # Mediterranean hot summer
    'Csb': 'Mediterranean', # Mediterranean warm summer
    'Csc': 'Mediterranean', # Mediterranean cold summer
    
    # Temperate
    'Cfa': 'Temperate',   # Humid subtropical
    'Cfb': 'Temperate',   # Oceanic
    'Cfc': 'Temperate',   # Subpolar oceanic
    'Dfa': 'Temperate',   # Humid continental hot summer
    'Dfb': 'Temperate',   # Humid continental warm summer
    'Dfc': 'Temperate',   # Subarctic (could be Polar in some cases)
    'Dfd': 'Temperate',   # Subarctic very cold winter
    
    # Polar
    'ET': 'Polar',        # Tundra
    'EF': 'Polar'         # Ice cap
}

# Create new column with mapped climate names
weather_df['ClimateType'] = weather_df['LocationKoppenGeigerClassification'].map(climate_mapping)

# Display unique values to verify mapping
print(weather_df[['Location', 'ClimateType']].drop_duplicates())

In [None]:
# find chestnut cafe location
chestnut_cafe = weather_df[weather_df['Location'] == 'Chestnut Tree Café']
chestnut_cafe

In [None]:
# Get unique locations and their climate types
locations = weather_df[['Location', 'ClimateType']].drop_duplicates()
# Convert to dictionary
locations_dict = locations.set_index('Location').to_dict()['ClimateType']
locations_dict

In [None]:
# Extract unique climate classifications
climate_types = weather_df['LocationKoppenGeigerClassification'].unique()

In [None]:
climate_types = plants_df['Growth Climate'].unique()
climate_types

In [None]:
# Create label encoders
climate_encoder = LabelEncoder()
plants_df['climate_code'] = climate_encoder.fit_transform(plants_df['Growth Climate'])

In [None]:
# Define a corrected compatibility function using only the available growth climates
def calculate_compatibility(plant_climate, location_climate):
    # Map Koppen climate classes to the available growth climates in plants_df
    koppen_to_growth = {
        # Tropical climates
        'Af': 'Tropical',     # Tropical rainforest
        'Am': 'Tropical',     # Tropical monsoon
        'Aw': 'Tropical',     # Tropical savanna
        
        # Arid climates
        'BWh': 'Arid',        # Hot desert
        'BWk': 'Arid',        # Cold desert 
        'BSh': 'Arid',        # Hot semi-arid
        'BSk': 'Arid',        # Cold semi-arid
        
        # Mediterranean
        'Csa': 'Mediterranean', # Mediterranean hot summer
        'Csb': 'Mediterranean', # Mediterranean warm summer
        'Csc': 'Mediterranean', # Mediterranean cold summer
        
        # Temperate
        'Cfa': 'Temperate',   # Humid subtropical
        'Cfb': 'Temperate',   # Oceanic
        'Cfc': 'Temperate',   # Subpolar oceanic
        'Dfa': 'Temperate',   # Humid continental hot summer
        'Dfb': 'Temperate',   # Humid continental warm summer
        'Dfc': 'Temperate',   # Subarctic (could be Polar in some cases)
        'Dfd': 'Temperate',   # Subarctic very cold winter
        
        # Polar
        'ET': 'Polar',        # Tundra
        'EF': 'Polar'         # Ice cap
    }
    location_growth_climate = location_climate
    
    # Define climate compatibility relationships using only available climate types
    climate_compatibility = {
        'Tropical': {'Tropical': 10, 'Mediterranean': 5, 'Temperate': 4, 'Polar': 0, 'Arid': 3},
        'Mediterranean': {'Tropical': 5, 'Mediterranean': 10, 'Temperate': 7, 'Polar': 0, 'Arid': 6},
        'Temperate': {'Tropical': 3, 'Mediterranean': 7, 'Temperate': 10, 'Polar': 2, 'Arid': 4},
        'Polar': {'Tropical': 0, 'Mediterranean': 1, 'Temperate': 3, 'Polar': 10, 'Arid': 0},
        'Arid': {'Tropical': 2, 'Mediterranean': 5, 'Temperate': 3, 'Polar': 0, 'Arid': 10}
    }
    
    # Get base compatibility score
    base_score = climate_compatibility.get(plant_climate, {}).get(location_growth_climate, 3)
    
    # Special case adjustments
    if plant_climate == 'Mediterranean' and location_growth_climate == 'Arid':
        # Many Mediterranean plants are drought-tolerant
        base_score += 1
    elif plant_climate == 'Tropical' and location_growth_climate == 'Polar':
        # Tropical plants cannot survive in polar climates
        base_score = 0
        
    return base_score
        
# Create compatibility matrix
compatibility_matrix = {}
for climate in weather_df['ClimateType'].unique():
    compatibility_matrix[climate] = {}
    for _, plant in plants_df.iterrows():
        compatibility_matrix[climate][plant['Name']] = calculate_compatibility(
            plant['Growth Climate'], climate)

# Convert to DataFrame for easier use
compatibility_df = pd.DataFrame(compatibility_matrix)

# Convert to DataFrame for easier use
compatibility_df = pd.DataFrame(compatibility_matrix)

In [None]:
weather_df['ClimateType'].unique()

In [None]:
len(plants_df['Name'].unique())

In [None]:
plants_df.head()

In [None]:
# plot heatmap of compatibility matrix
sns.heatmap(compatibility_df[:10], cmap='RdYlGn_r', center=5, annot=True)

In [None]:
compatibility_df

In [None]:
compatibility_df.to_csv('../data/plants_weather_compatibilities.csv', index=True)

In [None]:
compatibility_matrix