In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.ensemble import RandomForestRegressor
import re

### Load Dataset

In [2]:
df = pd.read_csv("DSCP Combined Dahlia.csv")

### Reassign NPK Optimals 

In [3]:
# Clean and split NPK values
df[['N', 'P', 'K']] = df['NPK Ratio'].str.split(':', expand=True).astype(float)

# Define adjusted classification function with ±3 threshold flexibility
def classify_adjusted_npk(n, p, k):
    if n == 0 or p == 0 or k == 0:
        return 'Unknown'
    
    # Simple ratio normalization
    p_ratio = round(p / n, 1)
    k_ratio = round(k / n, 1)

    # Adjusted range thresholds (±3 flexibility)
    if 1.4 <= p_ratio <= 2.6 and 1.4 <= k_ratio <= 2.6:
        return 'Optimal (Long Bean)'
    elif 0.9 <= p_ratio <= 2.1 and 0.9 <= k_ratio <= 2.1:
        return 'Balanced (Lady Finger)'
    elif 0.4 <= p_ratio <= 1.6 and 0.4 <= k_ratio <= 1.6:
        return 'Balanced (General)'
    elif 0.1 <= p_ratio <= 1.1 and 0.1 <= k_ratio <= 1.1:
        return 'High Nitrogen (Spinach Leaning)'
    else:
        return 'Unbalanced'

df['NPK - Plant Readiness'] = df.apply(lambda row: classify_adjusted_npk(row['N'], row['P'], row['K']), axis=1)

### Reassign Greens and Browns values

In [4]:
# Step 1: Ensure datetime and sort
df['date'] = pd.to_datetime(df['date'])
df = df.sort_values(['devicename', 'date']).reset_index(drop=True)

# Step 2: Filter rows with feeding activity
df_with_feed = df[df['feeding_description'].notna() & (df['feeding_description'].str.strip() != '')].copy()
df_with_feed['feeding_description'] = df_with_feed['feeding_description'].astype(str)
df_with_feed = df_with_feed.sort_values(['devicename', 'date']).reset_index(drop=True)

# Ensure 'feeding_description' is string
df_with_feed['feeding_description'] = df_with_feed['feeding_description'].astype(str)

# Function to extract Browns
def extract_browns(desc):
    desc = desc.lower()
    if 'coffee' in desc or 'mango skins' in desc:
        return 0
    match = re.search(r'(\d+)\s*g(?:ram)?s?\s*of\s*dried leaves', desc)
    return float(match.group(1)) if match else 0

# Function to extract Water
def extract_water(desc):
    match = re.search(r'(\d+)\s*g(?:ram)?s?\s*(?:of\s*)?(?:distilled\s*)?water', desc.lower())
    return float(match.group(1)) if match else 0

# Greens extractors
def extract_coffee_as_green(desc):
    match = re.search(r'(\d+)\s*g(?:ram)?s?\s*of\s*(?:ground\s*)?(coffee|coffee grounds)', desc.lower())
    return float(match.group(1)) if match else 0

def extract_dried_mango_skin(desc):
    match = re.search(r'(\d+)\s*g(?:ram)?s?\s*of\s*dried\s*mango\s*skins?', desc.lower())
    return float(match.group(1)) if match else 0

def extract_dried_radish_tops(desc):
    match = re.search(r'(\d+)\s*g(?:ram)?s?\s*of\s*dried\s*radish\s*tops?', desc.lower())
    return float(match.group(1)) if match else 0

def extract_crushed_eggshells(desc):
    match = re.search(r'(\d+)\s*g(?:ram)?s?\s*of\s*crushed\s*eggshells?', desc.lower())
    return float(match.group(1)) if match else 0

def extract_fresh_mint(desc):
    match = re.search(r'(\d+)\s*g(?:ram)?s?\s*of\s*fresh\s*mint\s*leaves?', desc.lower())
    return float(match.group(1)) if match else 0

def extract_dried_chye_sim(desc):
    match = re.search(r'(\d+)\s*g(?:ram)?s?\s*of\s*dried\s*chye\s*sim', desc.lower())
    return float(match.group(1)) if match else 0

# Apply extractors to df_with_feed
df_with_feed['Browns (g)'] = df_with_feed['feeding_description'].apply(extract_browns)
df_with_feed['Water (g)'] = df_with_feed['feeding_description'].apply(extract_water)

# Ensure Greens column exists
if 'Greens (g)' not in df_with_feed.columns:
    df_with_feed['Greens (g)'] = 0

# Sum up all the Greens sources
df_with_feed['Greens (g)'] = (
    df_with_feed['feeding_description'].apply(extract_coffee_as_green) +
    df_with_feed['feeding_description'].apply(extract_dried_mango_skin) +
    df_with_feed['feeding_description'].apply(extract_dried_radish_tops) +
    df_with_feed['feeding_description'].apply(extract_crushed_eggshells) +
    df_with_feed['feeding_description'].apply(extract_fresh_mint) +
    df_with_feed['feeding_description'].apply(extract_dried_chye_sim)
)

In [5]:
# New C:N Status Classification based on plant categories
def classify_cn_status(cn_ratio):
    try:
        if isinstance(cn_ratio, str):
            cn_value = int(cn_ratio.split(':')[0])
        else:
            cn_value = cn_ratio
            
        status = []
        if 10 <= cn_value <= 15:
            status.append("Leafy Greens Optimal")
        if 15 <= cn_value <= 20:
            status.append("Fruit-Bearing Veggies Optimal")
        if 15 <= cn_value <= 25:
            status.append("Root Vegetables Optimal")
        if 12 <= cn_value <= 20:
            status.append("Herbs Optimal")
        if 15 <= cn_value <= 25:
            status.append("Flowering Plants Optimal")
        if 20 <= cn_value <= 30:
            status.append("Woody Shrubs/Trees Optimal")
        
        if not status:
            return "Not Optimal"
        return ", ".join(status)
    except:
        return "Unknown"

# Apply the new classification
df['C_N_Status'] = df['C_N_Ratio'].apply(classify_cn_status)

In [None]:
df_with_feed.to_csv("df_with_feed_output.csv", index=False)


### Track the feeds and the days took to reach Optimal C:N

In [6]:
results = []
for device in df['devicename'].unique():
    device_data = df[df['devicename'] == device]
    feed_data = df_with_feed[df_with_feed['devicename'] == device]

    for idx, row in device_data.iterrows():
        if pd.notna(row['C_N_Status']) and 'Optimal' in row['C_N_Status']:
            recent_feed = feed_data[feed_data['date'] <= row['date']].sort_values('date', ascending=False).head(1)
            if not recent_feed.empty:
                feed_row = recent_feed.iloc[0]
                day_gap = (row['date'] - feed_row['date']).days
                results.append({
                    'Device': device,
                    'date_of_feed': feed_row['date'],
                    'CO2': row['CO2'],
                    'Soil Nitrogen': row['Soil Nitrogen'],
                    'Soil EC': row['Soil EC'],
                    'Soil Moisture': row['Soil Moisture'],
                    'feeding_description': feed_row['feeding_description'],
                    'Greens (g)': feed_row['Greens (g)'],
                    'Browns (g)': feed_row['Browns (g)'],
                    'Water (g)': feed_row['Water (g)'],
                    'date_of_optimal': row['date'],
                    'C_N_Ratio': row['C_N_Ratio'],
                    'C_N_Status': row['C_N_Status'],
                    'Days since last fed': day_gap
                })

df_transitions = pd.DataFrame(results)

In [7]:
df_transitions = df_transitions[df_transitions['C_N_Status'].notna() & df_transitions['Soil Moisture'].notna()]

plant_categories = [
    "Leafy Greens Optimal",
    "Fruit-Bearing Veggies Optimal",
    "Root Vegetables Optimal",
    "Herbs Optimal",
    "Flowering Plants Optimal",
    "Woody Shrubs/Trees Optimal"
]

moisture_targets = {}
for category in plant_categories:
    category_rows = df_transitions[df_transitions['C_N_Status'].str.contains(category, case=False, na=False)]
    avg_moisture = category_rows['Soil Moisture'].mean()
    if not pd.isna(avg_moisture):
        moisture_targets[category] = round(avg_moisture, 2)

print("Moisture Targets by Plant Category:", moisture_targets)

Moisture Targets by Plant Category: {'Leafy Greens Optimal': 57.75, 'Fruit-Bearing Veggies Optimal': 41.3, 'Root Vegetables Optimal': 40.76, 'Herbs Optimal': 48.28, 'Flowering Plants Optimal': 40.76, 'Woody Shrubs/Trees Optimal': 39.67}


### Recommended Green:Brown ratio of feed to be composted

In [None]:
df = pd.read_csv("df_with_feed_output.csv")
df = df.dropna(subset=['CO2', 'Soil Nitrogen'])

greens_only_df = df[(df['Greens (g)'] > 0) & (df['Browns (g)'] == 0)].dropna(subset=['CO2', 'Soil Nitrogen', 'Greens (g)'])
browns_only_df = df[(df['Greens (g)'] == 0) & (df['Browns (g)'] > 0)].dropna(subset=['CO2', 'Soil Nitrogen', 'Browns (g)'])

# Threshold
def get_crop_tags(cn_ratio_str):
    try:
        value = int(cn_ratio_str.split(':')[0])
        tags = []
        if 10 <= value <= 24:
            tags.append("Lady-Finger Optimal")
        if 10 <= value <= 12:
            tags.append("Spinach Optimal")
        if 10 <= value <= 25:
            tags.append("Long Bean Optimal")
        return tags
    except:
        return []

In [None]:
# Ensure valid data
df_transitions = df_transitions[df_transitions['C_N_Status'].notna() & df_transitions['Soil Moisture'].notna()]

crop_targets = ["Lady-Finger Optimal", "Spinach Optimal", "Long Bean Optimal"]

# Compute average moisture for each crop tag
moisture_targets = {}

for crop in crop_targets:
    crop_rows = df_transitions[df_transitions['C_N_Status'].str.contains(crop, case=False, na=False)]
    avg_moisture = crop_rows['Soil Moisture'].mean()
    if not pd.isna(avg_moisture):
        moisture_targets[crop] = round(avg_moisture, 2)

print("Moisture Targets by Crop:", moisture_targets)

In [None]:
# Recommendation 
def recommend_browns_with_water(greens_input_grams, target_crop_input):

    # Normalize crop input
    crop_input_normalized = target_crop_input.strip().lower().replace(" ", "").replace("-", "")
    crop_options = {
        "ladyfingeroptimal": "Lady-Finger Optimal",
        "spinachoptimal": "Spinach Optimal",
        "longbeanoptimal": "Long Bean Optimal"
    }

    if crop_input_normalized not in crop_options:
        return {"message": f"Invalid crop type: {target_crop_input}. Please choose a valid one."}

    target_crop = crop_options[crop_input_normalized]
    
    target_moisture_pct = moisture_targets.get(target_crop)

    for _, green in greens_only_df.iterrows():
        greens_scale = greens_input_grams / green['Greens (g)']

        for _, brown in browns_only_df.iterrows():
            for b_multiplier in np.linspace(0.1, 3.0, 50):
                browns_weight = brown['Browns (g)'] * b_multiplier

                total_greens = greens_input_grams
                total_browns = browns_weight
                total_feed = total_greens + total_browns

                # Calculate required water to hit target moisture
                target_water = (target_moisture_pct / 100) * (total_feed / (1 - target_moisture_pct / 100))

                total_co2 = greens_scale * green['CO2'] + b_multiplier * brown['CO2']
                total_n = greens_scale * green['Soil Nitrogen'] + b_multiplier * brown['Soil Nitrogen']

                if pd.isna(total_co2) or pd.isna(total_n) or total_n == 0:
                    continue

                cn_ratio = round(total_co2 / total_n)
                formatted_ratio = f"{cn_ratio}:1"
                crop_tags = get_crop_tags(formatted_ratio)

                if target_crop in crop_tags:
                    return {
                        "Greens (g)": round(total_greens, 1),
                        "Browns (g)": round(total_browns, 1),
                        "Water (g)": round(target_water, 1),
                        "C_N_Ratio": formatted_ratio,
                        "Crop Target": target_crop,
                        "Moisture Target %": target_moisture_pct
                    }

    return {"message": f"No optimal match found for {greens_input_grams}g of greens for {target_crop}."}

### User input and output recommendation for feed

In [None]:
if __name__ == "__main__":
    print("Choose crop type: Lady-Finger Optimal | Spinach Optimal | Long Bean Optimal")
    crop_type = input("Enter your crop type: ")
    greens_input = float(input("Enter the amount of greens you have (in grams): "))

    result = recommend_browns_with_water(greens_input, crop_type)
    print("\nRecommendation")
    print(result)