In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from scipy.spatial.distance import cdist

# Step 1: Load the Algae Dataset and Industry Requirements
algae_data = pd.read_csv('/content/algae_nutrition_environmental_impact_dataset.csv')

# Complete Industry Requirements based on the provided data
industry_requirements = {
    "Food and Beverages": {"Protein (%)": (10, 60), "Fat (%)": (0, 15), "Carbohydrates (%)": (10, 50), "Fiber (%)": (5, 20),
                           "Vitamin A (μg/g)": (100, 500), "Calcium (mg/g)": (0.5, 3), "Iron (mg/g)": (1, 2)},
    "Biofuels": {"Protein (%)": (15, 50), "Fat (%)": (20, 40), "Carbohydrates (%)": (20, 40), "Fiber (%)": (10, 30),
                 "Vitamin A (μg/g)": (0, 10), "Calcium (mg/g)": (0, 1), "Iron (mg/g)": (0, 1)},
    "Pharmaceuticals and Medicine": {"Protein (%)": (10, 50), "Fat (%)": (2, 20), "Carbohydrates (%)": (15, 40), "Fiber (%)": (10, 30),
                                     "Vitamin A (μg/g)": (100, 1000), "Calcium (mg/g)": (1, 5), "Iron (mg/g)": (1, 5)},
    "Cosmetics and Skin Care": {"Protein (%)": (10, 30), "Fat (%)": (5, 15), "Carbohydrates (%)": (5, 20), "Fiber (%)": (3, 10),
                                "Vitamin A (μg/g)": (500, 1500), "Calcium (mg/g)": (0.5, 3), "Iron (mg/g)": (0.5, 1)},
    "Animal Feed": {"Protein (%)": (20, 60), "Fat (%)": (5, 15), "Carbohydrates (%)": (10, 30), "Fiber (%)": (5, 15),
                    "Vitamin A (μg/g)": (500, 1000), "Calcium (mg/g)": (1, 5), "Iron (mg/g)": (0.5, 2)},
    "Aquaculture and Fisheries": {"Protein (%)": (10, 50), "Fat (%)": (5, 15), "Carbohydrates (%)": (15, 40), "Fiber (%)": (10, 30),
                                  "Vitamin A (μg/g)": (1000, 5000), "Calcium (mg/g)": (5, 10), "Iron (mg/g)": (1, 3)},
    "Nutraceuticals and Supplements": {"Protein (%)": (20, 70), "Fat (%)": (5, 20), "Carbohydrates (%)": (10, 40), "Fiber (%)": (10, 20),
                                       "Vitamin A (μg/g)": (100, 2000), "Calcium (mg/g)": (1, 5), "Iron (mg/g)": (1, 5)},
    "Bioplastics and Biocomposites": {"Protein (%)": (10, 30), "Fat (%)": (0, 10), "Carbohydrates (%)": (20, 50), "Fiber (%)": (30, 60),
                                      "Vitamin A (μg/g)": (0, 5), "Calcium (mg/g)": (0, 2), "Iron (mg/g)": (0, 1)},
    "Agriculture": {"Protein (%)": (15, 40), "Fat (%)": (1, 5), "Carbohydrates (%)": (10, 30), "Fiber (%)": (10, 20),
                    "Vitamin A (μg/g)": (0, 10), "Calcium (mg/g)": (1, 5), "Iron (mg/g)": (0.5, 2)},
    "Textiles": {"Protein (%)": (10, 40), "Fat (%)": (1, 5), "Carbohydrates (%)": (20, 50), "Fiber (%)": (20, 40),
                 "Vitamin A (μg/g)": (0, 5), "Calcium (mg/g)": (0, 1), "Iron (mg/g)": (0, 1)},
    "Carbon Sequestration": {"Protein (%)": (5, 30), "Fat (%)": (0, 10), "Carbohydrates (%)": (10, 40), "Fiber (%)": (10, 30),
                             "Vitamin A (μg/g)": (0, 10), "Calcium (mg/g)": (1, 3), "Iron (mg/g)": (0, 2)},
    "Biofertilizers": {"Protein (%)": (15, 30), "Fat (%)": (1, 5), "Carbohydrates (%)": (20, 50), "Fiber (%)": (10, 30),
                       "Vitamin A (μg/g)": (0, 10), "Calcium (mg/g)": (1, 5), "Iron (mg/g)": (0, 2)},
    "Bioherbicides": {"Protein (%)": (10, 30), "Fat (%)": (0, 5), "Carbohydrates (%)": (10, 30), "Fiber (%)": (5, 20),
                      "Vitamin A (μg/g)": (0, 5), "Calcium (mg/g)": (0, 1), "Iron (mg/g)": (0, 1)},
    "Bioelectronics": {"Protein (%)": (10, 40), "Fat (%)": (2, 10), "Carbohydrates (%)": (10, 30), "Fiber (%)": (10, 30),
                       "Vitamin A (μg/g)": (0, 10), "Calcium (mg/g)": (1, 2), "Iron (mg/g)": (0.5, 2)},
    "Bio-based Detergents": {"Protein (%)": (10, 30), "Fat (%)": (1, 5), "Carbohydrates (%)": (10, 40), "Fiber (%)": (20, 40),
                             "Vitamin A (μg/g)": (0, 10), "Calcium (mg/g)": (0, 2), "Iron (mg/g)": (0, 1)},
    "Marine Industry": {"Protein (%)": (5, 20), "Fat (%)": (5, 15), "Carbohydrates (%)": (10, 30), "Fiber (%)": (10, 20),
                        "Vitamin A (μg/g)": (0, 5), "Calcium (mg/g)": (0, 2), "Iron (mg/g)": (0.5, 1)},
    "Pulp and Paper": {"Protein (%)": (10, 30), "Fat (%)": (0, 10), "Carbohydrates (%)": (20, 40), "Fiber (%)": (30, 60),
                       "Vitamin A (μg/g)": (0, 5), "Calcium (mg/g)": (0, 2), "Iron (mg/g)": (0, 1)},
    "Beverages": {"Protein (%)": (10, 40), "Fat (%)": (2, 8), "Carbohydrates (%)": (20, 40), "Fiber (%)": (3, 10),
                  "Vitamin A (μg/g)": (200, 1000), "Calcium (mg/g)": (1, 5), "Iron (mg/g)": (0.5, 2)},
    "Wastewater Treatment": {"Protein (%)": (5, 20), "Fat (%)": (1, 5), "Carbohydrates (%)": (10, 30), "Fiber (%)": (10, 20),
                             "Vitamin A (μg/g)": (0, 5), "Calcium (mg/g)": (0, 2), "Iron (mg/g)": (0, 1)},
    "Building Materials": {"Protein (%)": (10, 30), "Fat (%)": (5, 15), "Carbohydrates (%)": (20, 40), "Fiber (%)": (30, 60),
                           "Vitamin A (μg/g)": (0, 10), "Calcium (mg/g)": (0, 2), "Iron (mg/g)": (0, 1)},
    "Packaging": {"Protein (%)": (10, 30), "Fat (%)": (0, 10), "Carbohydrates (%)": (20, 40), "Fiber (%)": (30, 50),
                  "Vitamin A (μg/g)": (0, 10), "Calcium (mg/g)": (0, 2), "Iron (mg/g)": (0, 1)},
    "Luxury Products": {"Protein (%)": (15, 30), "Fat (%)": (5, 15), "Carbohydrates (%)": (5, 20), "Fiber (%)": (3, 10),
                        "Vitamin A (μg/g)": (500, 1500), "Calcium (mg/g)": (0.5, 3), "Iron (mg/g)": (0.5, 1)},
    "Fermentation": {"Protein (%)": (30, 60), "Fat (%)": (2, 8), "Carbohydrates (%)": (20, 50), "Fiber (%)": (10, 30),
                     "Vitamin A (μg/g)": (200, 1000), "Calcium (mg/g)": (1, 3), "Iron (mg/g)": (0.5, 2)},
    "Fire Retardants": {"Protein (%)": (0, 10), "Fat (%)": (0, 2), "Carbohydrates (%)": (10, 30), "Fiber (%)": (5, 15),
                        "Vitamin A (μg/g)": (0, 5), "Calcium (mg/g)": (0, 1), "Iron (mg/g)": (0, 1)},
    "Water Filtration": {"Protein (%)": (10, 40), "Fat (%)": (2, 10), "Carbohydrates (%)": (15, 50), "Fiber (%)": (10, 30),
                         "Vitamin A (μg/g)": (100, 500), "Calcium (mg/g)": (1, 2), "Iron (mg/g)": (0.5, 1)}
}
# Sustainability Metrics with estimated values
sustainability_metrics = {
    "Carbon Sequestration Rate (ton CO₂/ha/yr)": 5,  # Average between 2-9 tons CO₂/ha/yr
    "Carbon Storage Duration (years)": 5,  # Algae in non-stable storage, lasts years; biochar form may last centuries
    "Biodiversity Score": 7,  # Hypothetical score based on ecosystem support for biodiversity
    "Nutrient Cycling Contribution": 8,  # High score based on algae's nutrient absorption capabilities
    "Water Quality Improvement": 9,  # High impact score due to pollutant absorption capacity
    "Resilience to Environmental Change": 7,  # Moderate resilience, species-dependent
    "Cultivation Energy (MJ/kg)": 2.5,  # Average energy requirement range for algae cultivation
    "GHG Emissions (kg CO₂-eq/kg)": 0.75,  # Average emissions range for algae biofuels
    }

# Step 2: Prompt user to select an industry
print("Please select an industry from the list below:")
for i, industry in enumerate(industry_requirements.keys(), 1):
    print(f"{i}. {industry}")

try:
    industry_index = int(input("\nEnter the number corresponding to the industry of your choice: ")) - 1
    industry_name = list(industry_requirements.keys())[industry_index]
    print(f"\nYou selected: {industry_name}\n")
except (ValueError, IndexError):
    print("Invalid selection. Please restart and select a valid industry number.")
    exit()



# Step 3: Implement Life Cycle Assessment (LCA)
def lca_score_algae_for_industry(algae_data, industry_requirements, selected_industry):
    reqs = industry_requirements[selected_industry]

    # Calculate LCA score for each algae entry
    lca_score_df = algae_data.copy()
    lca_score_df["lca_score"] = 0  # Initialize LCA score column

    # Production Process Phase
    for nutrient, (min_val, max_val) in reqs.items():
        # Calculate the midpoint of the industry range for this nutrient
        ideal_value = (min_val + max_val) / 2

        # Calculate the absolute deviation from ideal value
        lca_score_df[f"{nutrient}_deviation"] = abs(lca_score_df[nutrient] - ideal_value) / (max_val - min_val)

    # Sum deviation scores across all nutrients to get a total deviation score (lower is better)
    lca_score_df["nutrient_deviation_score"] = lca_score_df[[f"{nutrient}_deviation" for nutrient in reqs]].sum(axis=1)

    # Incorporate sustainability metrics into the LCA score
    for metric, value in sustainability_metrics.items():
        # Normalize the sustainability metric to a range of 0-1
        metric_value_normalized = (value - min(sustainability_metrics.values())) / \
                                  (max(sustainability_metrics.values()) - min(sustainability_metrics.values()))
        # Adjust LCA score based on sustainability metric's weight
        lca_score_df[f"{metric}_impact"] = metric_value_normalized
        lca_score_df["lca_score"] += lca_score_df[f"{metric}_impact"]

    # Combine the nutrient deviation score and the sustainability metrics score
    lca_score_df["lca_score"] += lca_score_df["nutrient_deviation_score"]

    return lca_score_df

lca_scored_algae = lca_score_algae_for_industry(algae_data, industry_requirements, industry_name)

# Step 4: Sort Algae Based on LCA Score and Show Recommendations
# Lower LCA score means better environmental performance
lca_scored_algae = lca_scored_algae.sort_values(by="lca_score").reset_index(drop=True)

# Display top 3 recommendations
print("Top 3 recommended algae for", industry_name)
print(lca_scored_algae.head(3)[["Species", "lca_score", "Protein (%)", "Fat (%)", "Carbohydrates (%)",
                            "Fiber (%)", "Vitamin A (μg/g)", "Calcium (mg/g)", "Iron (mg/g)",
                            "Carbon Sequestration Rate (ton CO₂/ha/yr)", "Carbon Storage Duration",
                            "Biodiversity Score", "Nutrient Cycling Contribution", "Water Quality Improvement",
                            "Resilience to Environmental Change", "Cultivation Energy (MJ/kg)",
                            "GHG Emissions (kg CO₂-eq/kg)"]])

Please select an industry from the list below:
1. Food and Beverages
2. Biofuels
3. Pharmaceuticals and Medicine
4. Cosmetics and Skin Care
5. Animal Feed
6. Aquaculture and Fisheries
7. Nutraceuticals and Supplements
8. Bioplastics and Biocomposites
9. Agriculture
10. Textiles
11. Carbon Sequestration
12. Biofertilizers
13. Bioherbicides
14. Bioelectronics
15. Bio-based Detergents
16. Marine Industry
17. Pulp and Paper
18. Beverages
19. Wastewater Treatment
20. Building Materials
21. Packaging
22. Luxury Products
23. Fermentation
24. Fire Retardants
25. Water Filtration

Enter the number corresponding to the industry of your choice: 9

You selected: Agriculture

Top 3 recommended algae for Agriculture
                 Species  lca_score  Protein (%)  Fat (%)  Carbohydrates (%)  \
0   Gracilaria verrucosa  17.736364         20.0      0.8               45.0   
1    Hildenbrandia rubra  18.849697          8.0      0.4               48.0   
2  Eucheuma denticulatum  19.313030         11.0

In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from scipy.spatial.distance import cdist

# Step 1: Load the Algae Dataset and Industry Requirements
algae_data = pd.read_csv('/content/algae_nutrition_environmental_impact_dataset.csv')

# Complete Industry Requirements based on the provided data
industry_requirements = {
    "Food and Beverages": {"Protein (%)": (10, 60), "Fat (%)": (0, 15), "Carbohydrates (%)": (10, 50), "Fiber (%)": (5, 20),
                           "Vitamin A (μg/g)": (100, 500), "Calcium (mg/g)": (0.5, 3), "Iron (mg/g)": (1, 2)},
    "Biofuels": {"Protein (%)": (15, 50), "Fat (%)": (20, 40), "Carbohydrates (%)": (20, 40), "Fiber (%)": (10, 30),
                 "Vitamin A (μg/g)": (0, 10), "Calcium (mg/g)": (0, 1), "Iron (mg/g)": (0, 1)},
    "Pharmaceuticals and Medicine": {"Protein (%)": (10, 50), "Fat (%)": (2, 20), "Carbohydrates (%)": (15, 40), "Fiber (%)": (10, 30),
                                     "Vitamin A (μg/g)": (100, 1000), "Calcium (mg/g)": (1, 5), "Iron (mg/g)": (1, 5)},
    "Cosmetics and Skin Care": {"Protein (%)": (10, 30), "Fat (%)": (5, 15), "Carbohydrates (%)": (5, 20), "Fiber (%)": (3, 10),
                                "Vitamin A (μg/g)": (500, 1500), "Calcium (mg/g)": (0.5, 3), "Iron (mg/g)": (0.5, 1)},
    "Animal Feed": {"Protein (%)": (20, 60), "Fat (%)": (5, 15), "Carbohydrates (%)": (10, 30), "Fiber (%)": (5, 15),
                    "Vitamin A (μg/g)": (500, 1000), "Calcium (mg/g)": (1, 5), "Iron (mg/g)": (0.5, 2)},
    "Aquaculture and Fisheries": {"Protein (%)": (10, 50), "Fat (%)": (5, 15), "Carbohydrates (%)": (15, 40), "Fiber (%)": (10, 30),
                                  "Vitamin A (μg/g)": (1000, 5000), "Calcium (mg/g)": (5, 10), "Iron (mg/g)": (1, 3)},
    "Nutraceuticals and Supplements": {"Protein (%)": (20, 70), "Fat (%)": (5, 20), "Carbohydrates (%)": (10, 40), "Fiber (%)": (10, 20),
                                       "Vitamin A (μg/g)": (100, 2000), "Calcium (mg/g)": (1, 5), "Iron (mg/g)": (1, 5)},
    "Bioplastics and Biocomposites": {"Protein (%)": (10, 30), "Fat (%)": (0, 10), "Carbohydrates (%)": (20, 50), "Fiber (%)": (30, 60),
                                      "Vitamin A (μg/g)": (0, 5), "Calcium (mg/g)": (0, 2), "Iron (mg/g)": (0, 1)},
    "Agriculture": {"Protein (%)": (15, 40), "Fat (%)": (1, 5), "Carbohydrates (%)": (10, 30), "Fiber (%)": (10, 20),
                    "Vitamin A (μg/g)": (0, 10), "Calcium (mg/g)": (1, 5), "Iron (mg/g)": (0.5, 2)},
    "Textiles": {"Protein (%)": (10, 40), "Fat (%)": (1, 5), "Carbohydrates (%)": (20, 50), "Fiber (%)": (20, 40),
                 "Vitamin A (μg/g)": (0, 5), "Calcium (mg/g)": (0, 1), "Iron (mg/g)": (0, 1)},
    "Carbon Sequestration": {"Protein (%)": (5, 30), "Fat (%)": (0, 10), "Carbohydrates (%)": (10, 40), "Fiber (%)": (10, 30),
                             "Vitamin A (μg/g)": (0, 10), "Calcium (mg/g)": (1, 3), "Iron (mg/g)": (0, 2)},
    "Biofertilizers": {"Protein (%)": (15, 30), "Fat (%)": (1, 5), "Carbohydrates (%)": (20, 50), "Fiber (%)": (10, 30),
                       "Vitamin A (μg/g)": (0, 10), "Calcium (mg/g)": (1, 5), "Iron (mg/g)": (0, 2)},
    "Bioherbicides": {"Protein (%)": (10, 30), "Fat (%)": (0, 5), "Carbohydrates (%)": (10, 30), "Fiber (%)": (5, 20),
                      "Vitamin A (μg/g)": (0, 5), "Calcium (mg/g)": (0, 1), "Iron (mg/g)": (0, 1)},
    "Bioelectronics": {"Protein (%)": (10, 40), "Fat (%)": (2, 10), "Carbohydrates (%)": (10, 30), "Fiber (%)": (10, 30),
                       "Vitamin A (μg/g)": (0, 10), "Calcium (mg/g)": (1, 2), "Iron (mg/g)": (0.5, 2)},
    "Bio-based Detergents": {"Protein (%)": (10, 30), "Fat (%)": (1, 5), "Carbohydrates (%)": (10, 40), "Fiber (%)": (20, 40),
                             "Vitamin A (μg/g)": (0, 10), "Calcium (mg/g)": (0, 2), "Iron (mg/g)": (0, 1)},
    "Marine Industry": {"Protein (%)": (5, 20), "Fat (%)": (5, 15), "Carbohydrates (%)": (10, 30), "Fiber (%)": (10, 20),
                        "Vitamin A (μg/g)": (0, 5), "Calcium (mg/g)": (0, 2), "Iron (mg/g)": (0.5, 1)},
    "Pulp and Paper": {"Protein (%)": (10, 30), "Fat (%)": (0, 10), "Carbohydrates (%)": (20, 40), "Fiber (%)": (30, 60),
                       "Vitamin A (μg/g)": (0, 5), "Calcium (mg/g)": (0, 2), "Iron (mg/g)": (0, 1)},
    "Beverages": {"Protein (%)": (10, 40), "Fat (%)": (2, 8), "Carbohydrates (%)": (20, 40), "Fiber (%)": (3, 10),
                  "Vitamin A (μg/g)": (200, 1000), "Calcium (mg/g)": (1, 5), "Iron (mg/g)": (0.5, 2)},
    "Wastewater Treatment": {"Protein (%)": (5, 20), "Fat (%)": (1, 5), "Carbohydrates (%)": (10, 30), "Fiber (%)": (10, 20),
                             "Vitamin A (μg/g)": (0, 5), "Calcium (mg/g)": (0, 2), "Iron (mg/g)": (0, 1)},
    "Building Materials": {"Protein (%)": (10, 30), "Fat (%)": (5, 15), "Carbohydrates (%)": (20, 40), "Fiber (%)": (30, 60),
                           "Vitamin A (μg/g)": (0, 10), "Calcium (mg/g)": (0, 2), "Iron (mg/g)": (0, 1)},
    "Packaging": {"Protein (%)": (10, 30), "Fat (%)": (0, 10), "Carbohydrates (%)": (20, 40), "Fiber (%)": (30, 50),
                  "Vitamin A (μg/g)": (0, 10), "Calcium (mg/g)": (0, 2), "Iron (mg/g)": (0, 1)},
    "Luxury Products": {"Protein (%)": (15, 30), "Fat (%)": (5, 15), "Carbohydrates (%)": (5, 20), "Fiber (%)": (3, 10),
                        "Vitamin A (μg/g)": (500, 1500), "Calcium (mg/g)": (0.5, 3), "Iron (mg/g)": (0.5, 1)},
    "Fermentation": {"Protein (%)": (30, 60), "Fat (%)": (2, 8), "Carbohydrates (%)": (20, 50), "Fiber (%)": (10, 30),
                     "Vitamin A (μg/g)": (200, 1000), "Calcium (mg/g)": (1, 3), "Iron (mg/g)": (0.5, 2)},
    "Fire Retardants": {"Protein (%)": (0, 10), "Fat (%)": (0, 2), "Carbohydrates (%)": (10, 30), "Fiber (%)": (5, 15),
                        "Vitamin A (μg/g)": (0, 5), "Calcium (mg/g)": (0, 1), "Iron (mg/g)": (0, 1)},
    "Water Filtration": {"Protein (%)": (10, 40), "Fat (%)": (2, 10), "Carbohydrates (%)": (15, 50), "Fiber (%)": (10, 30),
                         "Vitamin A (μg/g)": (100, 500), "Calcium (mg/g)": (1, 2), "Iron (mg/g)": (0.5, 1)}
}

# Step 2: Prompt user to select an industry
print("Please select an industry from the list below:")
for i, industry in enumerate(industry_requirements.keys(), 1):
    print(f"{i}. {industry}")

try:
    industry_index = int(input("\nEnter the number corresponding to the industry of your choice: ")) - 1
    industry_name = list(industry_requirements.keys())[industry_index]
    print(f"\nYou selected: {industry_name}\n")
except (ValueError, IndexError):
    print("Invalid selection. Please restart and select a valid industry number.")
    exit()

# Step 3: Calculate Deviation Score for Each Algae
def score_algae_for_industry(algae_data, industry_requirements, selected_industry):
    reqs = industry_requirements[selected_industry]

    # Calculate deviation score for each algae entry
    score_df = algae_data.copy()
    score_df["deviation_score"] = 0  # Initialize score column

    for nutrient, (min_val, max_val) in reqs.items():
        # Calculate the midpoint of the industry range for this nutrient
        ideal_value = (min_val + max_val) / 2

        # Calculate the absolute deviation from ideal value
        score_df[f"{nutrient}_deviation"] = abs(score_df[nutrient] - ideal_value) / (max_val - min_val)

    # Sum deviation scores across all nutrients to get a total deviation score (lower is better)
    score_df["deviation_score"] = score_df[[f"{nutrient}_deviation" for nutrient in reqs]].sum(axis=1)

    return score_df

scored_algae = score_algae_for_industry(algae_data, industry_requirements, industry_name)

# Step 4: Sort Algae Based on Deviation Score and Show Recommendations
# Lower deviation score means closer match to industry profile
scored_algae = scored_algae.sort_values(by="deviation_score").reset_index(drop=True)
print(scored_algae.columns)
# Display top 3 recommendations
print("Top 3 recommended algae for", industry_name)
print(scored_algae.head(3)[["Species", "deviation_score", "Protein (%)", "Fat (%)", "Carbohydrates (%)",
                            "Fiber (%)", "Vitamin A (μg/g)", "Calcium (mg/g)", "Iron (mg/g)"]])

# Step 3: Implement Life Cycle Assessment (LCA)
def lca_score_algae_for_industry(algae_data, industry_requirements, selected_industry):
    reqs = industry_requirements[selected_industry]

    # Calculate LCA score for each algae entry
    lca_score_df = algae_data.copy()
    lca_score_df["lca_score"] = 0  # Initialize LCA score column

    # Production Process Phase
    for nutrient, (min_val, max_val) in reqs.items():
        # Calculate the midpoint of the industry range for this nutrient
        ideal_value = (min_val + max_val) / 2

        # Calculate the absolute deviation from ideal value
        lca_score_df[f"{nutrient}_deviation"] = abs(lca_score_df[nutrient] - ideal_value) / (max_val - min_val)

    # Sum deviation scores across all nutrients to get a total deviation score (lower is better)
    lca_score_df["nutrient_deviation_score"] = lca_score_df[[f"{nutrient}_deviation" for nutrient in reqs]].sum(axis=1)

    # Incorporate sustainability metrics into the LCA score
    for metric, value in sustainability_metrics.items():
        # Normalize the sustainability metric to a range of 0-1
        metric_value_normalized = (value - min(sustainability_metrics.values())) / \
                                  (max(sustainability_metrics.values()) - min(sustainability_metrics.values()))
        # Adjust LCA score based on sustainability metric's weight
        lca_score_df[f"{metric}_impact"] = metric_value_normalized
        lca_score_df["lca_score"] += lca_score_df[f"{metric}_impact"]

    # Combine the nutrient deviation score and the sustainability metrics score
    lca_score_df["lca_score"] += lca_score_df["nutrient_deviation_score"]

    return lca_score_df

lca_scored_algae = lca_score_algae_for_industry(algae_data, industry_requirements, industry_name)

# Step 4: Sort Algae Based on LCA Score and Show Recommendations
# Lower LCA score means better environmental performance
lca_scored_algae = lca_scored_algae.sort_values(by="lca_score").reset_index(drop=True)

# Display top 3 recommendations
print("Top 3 recommended algae for(with sustainability metrics and LCA score)", industry_name)
print(lca_scored_algae.head(3)[["Species", "lca_score",
                            "Carbon Sequestration Rate (ton CO₂/ha/yr)", "Carbon Storage Duration",
                            "Biodiversity Score", "Nutrient Cycling Contribution", "Water Quality Improvement",
                            "Resilience to Environmental Change", "Cultivation Energy (MJ/kg)",
                            "GHG Emissions (kg CO₂-eq/kg)"]])

Please select an industry from the list below:
1. Food and Beverages
2. Biofuels
3. Pharmaceuticals and Medicine
4. Cosmetics and Skin Care
5. Animal Feed
6. Aquaculture and Fisheries
7. Nutraceuticals and Supplements
8. Bioplastics and Biocomposites
9. Agriculture
10. Textiles
11. Carbon Sequestration
12. Biofertilizers
13. Bioherbicides
14. Bioelectronics
15. Bio-based Detergents
16. Marine Industry
17. Pulp and Paper
18. Beverages
19. Wastewater Treatment
20. Building Materials
21. Packaging
22. Luxury Products
23. Fermentation
24. Fire Retardants
25. Water Filtration

Enter the number corresponding to the industry of your choice: 9

You selected: Agriculture

Index(['Species', 'Protein (%)', 'Fat (%)', 'Carbohydrates (%)', 'Fiber (%)',
       'Vitamin A (μg/g)', 'Calcium (mg/g)', 'Iron (mg/g)',
       'Carbon Sequestration Rate (ton CO₂/ha/yr)', 'Carbon Storage Duration',
       'Biodiversity Score', 'Nutrient Cycling Contribution',
       'Water Quality Improvement', 'Resilience t

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error

# Step 1: Load the Algae Dataset and Industry Requirements
algae_data = pd.read_csv('/content/algae_nutrition_environmental_impact_dataset.csv')

# Industry requirements dictionary (same as provided)
industry_requirements = {
     "Food and Beverages": {"Protein (%)": (10, 60), "Fat (%)": (0, 15), "Carbohydrates (%)": (10, 50), "Fiber (%)": (5, 20),
                           "Vitamin A (μg/g)": (100, 500), "Calcium (mg/g)": (0.5, 3), "Iron (mg/g)": (1, 2)},
    "Biofuels": {"Protein (%)": (15, 50), "Fat (%)": (20, 40), "Carbohydrates (%)": (20, 40), "Fiber (%)": (10, 30),
                 "Vitamin A (μg/g)": (0, 10), "Calcium (mg/g)": (0, 1), "Iron (mg/g)": (0, 1)},
    "Pharmaceuticals and Medicine": {"Protein (%)": (10, 50), "Fat (%)": (2, 20), "Carbohydrates (%)": (15, 40), "Fiber (%)": (10, 30),
                                     "Vitamin A (μg/g)": (100, 1000), "Calcium (mg/g)": (1, 5), "Iron (mg/g)": (1, 5)},
    "Cosmetics and Skin Care": {"Protein (%)": (10, 30), "Fat (%)": (5, 15), "Carbohydrates (%)": (5, 20), "Fiber (%)": (3, 10),
                                "Vitamin A (μg/g)": (500, 1500), "Calcium (mg/g)": (0.5, 3), "Iron (mg/g)": (0.5, 1)},
    "Animal Feed": {"Protein (%)": (20, 60), "Fat (%)": (5, 15), "Carbohydrates (%)": (10, 30), "Fiber (%)": (5, 15),
                    "Vitamin A (μg/g)": (500, 1000), "Calcium (mg/g)": (1, 5), "Iron (mg/g)": (0.5, 2)},
    "Aquaculture and Fisheries": {"Protein (%)": (10, 50), "Fat (%)": (5, 15), "Carbohydrates (%)": (15, 40), "Fiber (%)": (10, 30),
                                  "Vitamin A (μg/g)": (1000, 5000), "Calcium (mg/g)": (5, 10), "Iron (mg/g)": (1, 3)},
    "Nutraceuticals and Supplements": {"Protein (%)": (20, 70), "Fat (%)": (5, 20), "Carbohydrates (%)": (10, 40), "Fiber (%)": (10, 20),
                                       "Vitamin A (μg/g)": (100, 2000), "Calcium (mg/g)": (1, 5), "Iron (mg/g)": (1, 5)},
    "Bioplastics and Biocomposites": {"Protein (%)": (10, 30), "Fat (%)": (0, 10), "Carbohydrates (%)": (20, 50), "Fiber (%)": (30, 60),
                                      "Vitamin A (μg/g)": (0, 5), "Calcium (mg/g)": (0, 2), "Iron (mg/g)": (0, 1)},
    "Agriculture": {"Protein (%)": (15, 40), "Fat (%)": (1, 5), "Carbohydrates (%)": (10, 30), "Fiber (%)": (10, 20),
                    "Vitamin A (μg/g)": (0, 10), "Calcium (mg/g)": (1, 5), "Iron (mg/g)": (0.5, 2)},
    "Textiles": {"Protein (%)": (10, 40), "Fat (%)": (1, 5), "Carbohydrates (%)": (20, 50), "Fiber (%)": (20, 40),
                 "Vitamin A (μg/g)": (0, 5), "Calcium (mg/g)": (0, 1), "Iron (mg/g)": (0, 1)},
    "Carbon Sequestration": {"Protein (%)": (5, 30), "Fat (%)": (0, 10), "Carbohydrates (%)": (10, 40), "Fiber (%)": (10, 30),
                             "Vitamin A (μg/g)": (0, 10), "Calcium (mg/g)": (1, 3), "Iron (mg/g)": (0, 2)},
    "Biofertilizers": {"Protein (%)": (15, 30), "Fat (%)": (1, 5), "Carbohydrates (%)": (20, 50), "Fiber (%)": (10, 30),
                       "Vitamin A (μg/g)": (0, 10), "Calcium (mg/g)": (1, 5), "Iron (mg/g)": (0, 2)},
    "Bioherbicides": {"Protein (%)": (10, 30), "Fat (%)": (0, 5), "Carbohydrates (%)": (10, 30), "Fiber (%)": (5, 20),
                      "Vitamin A (μg/g)": (0, 5), "Calcium (mg/g)": (0, 1), "Iron (mg/g)": (0, 1)},
    "Bioelectronics": {"Protein (%)": (10, 40), "Fat (%)": (2, 10), "Carbohydrates (%)": (10, 30), "Fiber (%)": (10, 30),
                       "Vitamin A (μg/g)": (0, 10), "Calcium (mg/g)": (1, 2), "Iron (mg/g)": (0.5, 2)},
    "Bio-based Detergents": {"Protein (%)": (10, 30), "Fat (%)": (1, 5), "Carbohydrates (%)": (10, 40), "Fiber (%)": (20, 40),
                             "Vitamin A (μg/g)": (0, 10), "Calcium (mg/g)": (0, 2), "Iron (mg/g)": (0, 1)},
    "Marine Industry": {"Protein (%)": (5, 20), "Fat (%)": (5, 15), "Carbohydrates (%)": (10, 30), "Fiber (%)": (10, 20),
                        "Vitamin A (μg/g)": (0, 5), "Calcium (mg/g)": (0, 2), "Iron (mg/g)": (0.5, 1)},
    "Pulp and Paper": {"Protein (%)": (10, 30), "Fat (%)": (0, 10), "Carbohydrates (%)": (20, 40), "Fiber (%)": (30, 60),
                       "Vitamin A (μg/g)": (0, 5), "Calcium (mg/g)": (0, 2), "Iron (mg/g)": (0, 1)},
    "Beverages": {"Protein (%)": (10, 40), "Fat (%)": (2, 8), "Carbohydrates (%)": (20, 40), "Fiber (%)": (3, 10),
                  "Vitamin A (μg/g)": (200, 1000), "Calcium (mg/g)": (1, 5), "Iron (mg/g)": (0.5, 2)},
    "Wastewater Treatment": {"Protein (%)": (5, 20), "Fat (%)": (1, 5), "Carbohydrates (%)": (10, 30), "Fiber (%)": (10, 20),
                             "Vitamin A (μg/g)": (0, 5), "Calcium (mg/g)": (0, 2), "Iron (mg/g)": (0, 1)},
    "Building Materials": {"Protein (%)": (10, 30), "Fat (%)": (5, 15), "Carbohydrates (%)": (20, 40), "Fiber (%)": (30, 60),
                           "Vitamin A (μg/g)": (0, 10), "Calcium (mg/g)": (0, 2), "Iron (mg/g)": (0, 1)},
    "Packaging": {"Protein (%)": (10, 30), "Fat (%)": (0, 10), "Carbohydrates (%)": (20, 40), "Fiber (%)": (30, 50),
                  "Vitamin A (μg/g)": (0, 10), "Calcium (mg/g)": (0, 2), "Iron (mg/g)": (0, 1)},
    "Luxury Products": {"Protein (%)": (15, 30), "Fat (%)": (5, 15), "Carbohydrates (%)": (5, 20), "Fiber (%)": (3, 10),
                        "Vitamin A (μg/g)": (500, 1500), "Calcium (mg/g)": (0.5, 3), "Iron (mg/g)": (0.5, 1)},
    "Fermentation": {"Protein (%)": (30, 60), "Fat (%)": (2, 8), "Carbohydrates (%)": (20, 50), "Fiber (%)": (10, 30),
                     "Vitamin A (μg/g)": (200, 1000), "Calcium (mg/g)": (1, 3), "Iron (mg/g)": (0.5, 2)},
    "Fire Retardants": {"Protein (%)": (0, 10), "Fat (%)": (0, 2), "Carbohydrates (%)": (10, 30), "Fiber (%)": (5, 15),
                        "Vitamin A (μg/g)": (0, 5), "Calcium (mg/g)": (0, 1), "Iron (mg/g)": (0, 1)},
    "Water Filtration": {"Protein (%)": (10, 40), "Fat (%)": (2, 10), "Carbohydrates (%)": (15, 50), "Fiber (%)": (10, 30),
                         "Vitamin A (μg/g)": (100, 500), "Calcium (mg/g)": (1, 2), "Iron (mg/g)": (0.5, 1)}
}

# Step 2: Prompt user to select an industry
print("Please select an industry from the list below:")
for i, industry in enumerate(industry_requirements.keys(), 1):
    print(f"{i}. {industry}")

try:
    industry_index = int(input("\nEnter the number corresponding to the industry of your choice: ")) - 1
    industry_name = list(industry_requirements.keys())[industry_index]
    print(f"\nYou selected: {industry_name}\n")
except (ValueError, IndexError):
    print("Invalid selection. Please restart and select a valid industry number.")
    exit()

# Step 3: Calculate Deviation Score for Each Algae
def score_algae_for_industry(algae_data, industry_requirements, selected_industry):
    reqs = industry_requirements[selected_industry]

    # Calculate deviation score for each algae entry
    score_df = algae_data.copy()

    for nutrient, (min_val, max_val) in reqs.items():
        ideal_value = (min_val + max_val) / 2
        score_df[f"{nutrient}_deviation"] = abs(score_df[nutrient] - ideal_value) / (max_val - min_val)

    score_df["deviation_score"] = score_df[[f"{nutrient}_deviation" for nutrient in reqs]].sum(axis=1)

    return score_df

scored_algae = score_algae_for_industry(algae_data, industry_requirements, industry_name)

# Step 4: Prepare Features and Target Variable
features = scored_algae[["Protein (%)", "Fat (%)", "Carbohydrates (%)", "Fiber (%)",
                          "Vitamin A (μg/g)", "Calcium (mg/g)", "Iron (mg/g)"]]
target = scored_algae['deviation_score']

# Step 5: Split the Data
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Step 6: Define and Train Models
models = {
    "Linear Regression": LinearRegression(),
    "Decision Tree": DecisionTreeRegressor(random_state=42),
    "Random Forest": RandomForestRegressor(n_estimators=100, random_state=42),
    "Gradient Boosting": GradientBoostingRegressor(n_estimators=100, random_state=42)
}

# Step 7: Train and Evaluate Each Model
for model_name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    print(f"{model_name} Mean Absolute Error: {mae}")

    # Predict scores for the entire dataset
    scored_algae[f"{model_name} Predicted Score"] = model.predict(features)

# Step 8: Sort Algae Based on Predicted Scores and Show Recommendations
for model_name in models.keys():
    recommendations = scored_algae.sort_values(by=f"{model_name} Predicted Score").reset_index(drop=True
    )
    print(f"\nTop 3 recommended algae for {industry_name} using {model_name}:")
    print(recommendations.head(3)[["Species", f"{model_name} Predicted Score", "Protein (%)", "Fat (%)",
                                      "Carbohydrates (%)", "Fiber (%)", "Vitamin A (μg/g)",
                                      "Calcium (mg/g)", "Iron (mg/g)"]])

Please select an industry from the list below:
1. Food and Beverages
2. Biofuels
3. Pharmaceuticals and Medicine
4. Cosmetics and Skin Care
5. Animal Feed
6. Aquaculture and Fisheries
7. Nutraceuticals and Supplements
8. Bioplastics and Biocomposites
9. Agriculture
10. Textiles
11. Carbon Sequestration
12. Biofertilizers
13. Bioherbicides
14. Bioelectronics
15. Bio-based Detergents
16. Marine Industry
17. Pulp and Paper
18. Beverages
19. Wastewater Treatment
20. Building Materials
21. Packaging
22. Luxury Products
23. Fermentation
24. Fire Retardants
25. Water Filtration

Enter the number corresponding to the industry of your choice: 23

You selected: Fermentation

Linear Regression Mean Absolute Error: 0.04807692307697637
Decision Tree Mean Absolute Error: 6.039999999999998
Random Forest Mean Absolute Error: 2.5418637820512666
Gradient Boosting Mean Absolute Error: 2.034375550316833

Top 3 recommended algae for Fermentation using Linear Regression:
                    Species  Linear 