In [None]:
!pip install sentence_transformers



In [None]:
!pip install numpy



In [None]:
!pip install pandas



In [None]:
!pip install -U scikit-learn



In [None]:
!pip install spacy



In [None]:
from sentence_transformers import SentenceTransformer
import numpy as np
import pandas as pd
import spacy
#from google.colab import drive
#drive.mount('/content/drive')
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
nouns_df = pd.read_csv("/content/drive/My Drive/AI Earth Hackathon/AI_Earth_Hackathon_unique_nouns_processed.csv")
problems_solutions_sorted_df = pd.read_csv("/content/drive/My Drive/AI Earth Hackathon/AI_Earth_Hackathon_problem_solution_sorted.csv")

In [None]:
from sklearn.metrics.pairwise import cosine_similarity
nlp = spacy.load("en_core_web_sm")

def remove_stop_punct(text):
  #remove stop words and punctuations
  if isinstance(text, str):
    doc = nlp(text)
    return " ".join([token.text for token in doc if not token.is_stop and not token.is_punct])
  else:
    return text

word_count_mapping = {}
columns_nouns_df = nouns_df.columns
for word in columns_nouns_df:
  word_count_mapping[word] = sum(nouns_df[word])

def compute_uniqueness_rating(solution):
  #for all nouns across all solutions, check how many of them appear in this particular solution
  word_in_vector = [0]*len(columns_nouns_df)
  for i, word in enumerate(columns_nouns_df):
    if word in solution:
      word_in_vector[i] = 1

  count = 0
  for i, val in enumerate(word_in_vector):
    #if the word appears in the solution and it appears only in atmost 50 of the 1300 solutions in the dataset, increase count (word counts as unique)
    if word_in_vector[i] == 1 and word_count_mapping[columns_nouns_df[i]] <= 50:
      count += 1

  #rating based on how many indicator words appear
  if count >= 40:
    return 3
  elif count < 40 and count >= 30:
    return 2
  else:
    return 1

def compute_similarity_rating(problem, solution):
  #compute how contextually and semantically similar the problem and solution are to assess the relevance of the solution
  problem_solution_pair = [problem, solution]
  embeddings = model.encode(problem_solution_pair,convert_to_tensor=False)
  cosine_similarity_val = cosine_similarity(embeddings)[0,1]

  #rating based on cosine similarity
  if cosine_similarity_val >= 0.6:
    return 3
  elif cosine_similarity_val >= 0.45:
    return 2
  else:
    return 1

def compute_scalability_rating(solution):
  #see how many of these indicator words for scalability are there in your solution
  scalability_words = ["scale", "expand", "increased demand", "scale-up", "scale up", "scalability"]
  total = 0
  if isinstance(solution, str):
    for word in scalability_words:
      if word in solution.lower():
        total += 1

  #rating based on how many indicator words appear
  if total >= 2:
    return 3
  elif total == 1:
    return 2
  else:
    return 1

def compute_feasibility_rating(solution):
  #see how many of these indicator words for feasibility are there in your solution
  feasibility_words = ["feasible", "feasibility", "viable", "viability","practical", "implement", "implementable", "doable"]
  total = 0
  if isinstance(solution, str):
    for word in feasibility_words:
      if word in solution.lower():
        total += 1

  #rating based on how many indicator words appear
  if total >= 2:
    return 3
  elif total == 1:
    return 2
  else:
    return 1


def compute_impact_rating(solution):
  #see how many of these indicator words for impact are there in your solution
  impact_words = ["environmental impact", "financial impact", "efficiency", "productivity", "cost savings", "save cost", "save time", "time savings", "flexibility"]
  total = 0
  if isinstance(solution, str):
    for word in impact_words:
      if word in solution.lower():
        total += 1

  #rating based on how many indicator words appear
  if total >= 2:
    return 3
  elif total == 1:
    return 2
  else:
    return 1

def circular_economy_relevance(solution):
  #see how many of these indicator words for circular economy are there in your solution
  circular_economy_words = [
    "regenerate", "regeneration", "recycling", "waste reduction", "waste elimination",
    "pollution reduction", "circulation", "remanufacturing", "increased product lifespan",
    "zero waste", "resource recovery", "material reusability", "reusable", "lifespan", "eco-friendly", "renewable", "sustainable",
    "circular sourcing", "circular supply chain", "green", "environmental friendly",
    "sustainable", "sustainability", "reuse"]
  total = 0

  if isinstance(solution, str):
    for word in circular_economy_words:
      if word in solution.lower():
        total += 1

  #rating based on how many indicator words appear
  if total >= 3:
    return 3
  elif total >= 1:
    return 2
  else:
    return 1

def compute_market_potential_rating(solution):
  #see how many of these indicator words for market potential are there in your solution
  market_potential_words = ["competitive advantage", "target audience", "market demand",
                            "market share","revenue", "sales", "profitability", "competitor",
                            "market expansion", "industry growth", "growth rate",
                            "market trends", "expanding market", "market penetration",
                            "unique selling proposition", "USP", "product differentiation",
                            "technological advancements"]
  total = 0
  if isinstance(solution, str):
    for word in market_potential_words:
      if word in solution.lower():
        total += 1

  #rating based on how many indicator words appear
  if total >= 2:
    return 3
  elif total == 1:
    return 2
  else:
    return 1


def compute_overall_rating(problem, solution):
  problem = problem.replace('\n', '')
  solution = solution.replace('\n', '')
  problem_processed = remove_stop_punct(problem)
  solution_processed = remove_stop_punct(solution)

  problem_relevance_rating = compute_similarity_rating(problem_processed, solution_processed)
  scalability_rating = compute_scalability_rating(solution_processed)
  feasibility_rating = compute_feasibility_rating(solution_processed)
  impact_rating = compute_impact_rating(solution_processed)
  market_potential_rating = compute_market_potential_rating(solution_processed)
  ce_rating = circular_economy_relevance(solution_processed)
  uniqueness_rating = compute_uniqueness_rating(solution_processed)

  all_ratings = [problem_relevance_rating, scalability_rating, feasibility_rating,
                 impact_rating, market_potential_rating, ce_rating, uniqueness_rating]
  avg_rating = sum(all_ratings)/len(all_ratings)

  ratings_dict = {
      "problem": problem,
      "solution": solution,
      "relevance_to_problem": problem_relevance_rating,
      "scalability": scalability_rating,
      "feasibility": feasibility_rating,
      "impact": impact_rating,
      "market_potential": market_potential_rating,
      "adherence to circular economy principles": ce_rating,
      "novelty": uniqueness_rating,
      "average_rating": avg_rating
  }

  new_row_df = pd.DataFrame(ratings_dict, index=[0])

  # Append the new row to the existing DataFrame
  #ignore_index=True to reset index of combined dataframe
  problems_solutions_sorted_df_updated = pd.concat([problems_solutions_sorted_df, new_row_df], ignore_index=True)


  # Sort the DataFrame by the 'average_rating' column
  problems_solutions_sorted_df_updated = problems_solutions_sorted_df_updated.sort_values(by=['average_rating'], ascending=False)

  problems_solutions_sorted_df_updated.reset_index(drop=True, inplace=True)

  row_index = problems_solutions_sorted_df_updated.index[
    (problems_solutions_sorted_df_updated['problem'] == problem) &
    (problems_solutions_sorted_df_updated['solution'] == solution)]

  rank = row_index.values[0] + 1
  row_count = problems_solutions_sorted_df_updated.shape[0]

  summary = f"Your solution for this problem ranked {rank} out of {row_count} problem solution pairs"

  return ratings_dict, round(avg_rating, 2), summary




In [None]:
'''example_problem_1 = '''
In the electronics industry, the rapid pace of technological advancements has led to a significant increase in electronic waste, with discarded devices contributing to environmental pollution. Traditional linear production and consumption models result in resource depletion, pollution, and the loss of valuable materials.
'''
example_solution_1 = '''
Introduce a circular economy approach to the electronics industry by implementing a product-as-a-service model. Consumers would subscribe to electronic devices rather than owning them outright, fostering a culture of reuse and recycling. Devices would be designed for easy disassembly, enabling the recovery of valuable materials for use in new products. This circular model aims to reduce electronic waste, conserve resources, and promote sustainable practices in the electronics industry.
'''
ratings_1, avg_rating_1, rank_1 = compute_overall_rating(example_problem_1, example_solution_1)
print(ratings_1)
print(avg_rating_1)
print(rank_1)'''

{'problem': 'In the electronics industry, the rapid pace of technological advancements has led to a significant increase in electronic waste, with discarded devices contributing to environmental pollution. Traditional linear production and consumption models result in resource depletion, pollution, and the loss of valuable materials.', 'solution': 'Introduce a circular economy approach to the electronics industry by implementing a product-as-a-service model. Consumers would subscribe to electronic devices rather than owning them outright, fostering a culture of reuse and recycling. Devices would be designed for easy disassembly, enabling the recovery of valuable materials for use in new products. This circular model aims to reduce electronic waste, conserve resources, and promote sustainable practices in the electronics industry.', 'relevance_rating': 3, 'scalability_rating': 1, 'feasibility_rating': 2, 'impact_rating': 1, 'market_potential_rating': 1, 'circular_economy_rating': 3, 'av

In [None]:
'''example_problem_2 = '''
Fast fashion has led to a culture of disposable clothing, contributing to environmental degradation. The textile industry generates massive amounts of waste, from both production leftovers and discarded garments. This waste often ends up in landfills, leading to pollution and resource depletion.
'''

example_solution_2 = '''
Implementing a circular fashion model that emphasizes recycling, upcycling, and sustainable production practices. This involves creating a closed-loop system where clothing items are designed for recyclability, and post-consumer garments are collected, processed, and reintroduced into the production cycle. Additionally, encouraging consumers to embrace sustainable fashion choices, such as buying from eco-friendly brands and participating in clothing swap initiatives, helps reduce the overall environmental impact of the fashion industry.
'''

ratings_2, avg_rating_2, rank_2 = compute_overall_rating(example_problem_2, example_solution_2)
print(ratings_2)
print(avg_rating_2)
print(rank_2)'''

{'problem': 'Fast fashion has led to a culture of disposable clothing, contributing to environmental degradation. The textile industry generates massive amounts of waste, from both production leftovers and discarded garments. This waste often ends up in landfills, leading to pollution and resource depletion.', 'solution': 'Implementing a circular fashion model that emphasizes recycling, upcycling, and sustainable production practices. This involves creating a closed-loop system where clothing items are designed for recyclability, and post-consumer garments are collected, processed, and reintroduced into the production cycle. Additionally, encouraging consumers to embrace sustainable fashion choices, such as buying from eco-friendly brands and participating in clothing swap initiatives, helps reduce the overall environmental impact of the fashion industry.', 'relevance_rating': 2, 'scalability_rating': 1, 'feasibility_rating': 2, 'impact_rating': 2, 'market_potential_rating': 1, 'circul

In [None]:
'''example_problem_3 = '''
The electronics industry faces a growing challenge of electronic waste (e-waste) as consumers regularly discard outdated devices. E-waste contains hazardous materials and valuable resources, making its improper disposal harmful to the environment and a missed opportunity for resource recovery.
'''

example_solution_3 = '''
Establishing a comprehensive e-waste recycling program to collect, disassemble, and recycle electronic devices. This involves creating specialized facilities equipped to handle e-waste safely and efficiently. Additionally, designing electronics with modular components and standardized interfaces can facilitate easier repair and upgrading, extending the lifespan of devices. Promoting consumer awareness about responsible e-waste disposal and incentivizing electronics manufacturers to adopt sustainable design practices contribute to a circular economy in the electronics industry.
'''

ratings_3, avg_rating_3, rank_3 = compute_overall_rating(example_problem_3, example_solution_3)
print(ratings_3)
print(avg_rating_3)
print(rank_3)'''

{'problem': 'The electronics industry faces a growing challenge of electronic waste (e-waste) as consumers regularly discard outdated devices. E-waste contains hazardous materials and valuable resources, making its improper disposal harmful to the environment and a missed opportunity for resource recovery.', 'solution': 'Establishing a comprehensive e-waste recycling program to collect, disassemble, and recycle electronic devices. This involves creating specialized facilities equipped to handle e-waste safely and efficiently. Additionally, designing electronics with modular components and standardized interfaces can facilitate easier repair and upgrading, extending the lifespan of devices. Promoting consumer awareness about responsible e-waste disposal and incentivizing electronics manufacturers to adopt sustainable design practices contribute to a circular economy in the electronics industry.', 'relevance_rating': 3, 'scalability_rating': 1, 'feasibility_rating': 1, 'impact_rating': 1

In [None]:
#ensemble model
from statistics import mode
#get three average rating values and take average of closest two
def average_of_closest_two(values):
    if value1 != value2 and value1 != value3 and value2 != value3:
      return 2
    else:
      return mode([value1, value2, value3])
