# Imports

Libraries

In [227]:
# Basic libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# PyTorch libraries
import torch

#Neural network libraries
import torch.nn as nn
import torch.nn.functional as F

# #Creating optimizer
import torch.optim as optim

# For terminating program (useful for stopping before training is complete and seeing entire result)
import signal
import sys, os

CSV File

In [228]:
script_dir = os.path.abspath('') # absolute file path in Jupyter
file_path = os.path.join(script_dir, "AI EarthHack Dataset.csv") # file path for the text file input

dataset = pd.read_csv(file_path, header=None)
print(dataset[1])

0                                                 problem
1       The construction industry is indubitably one o...
2       I'm sure you, like me, are feeling the heat - ...
3       The massive shift in student learning towards ...
4       The fashion industry is one of the top contrib...
                              ...                        
1296    The linear 'take, make, dispose' model of prod...
1297    The conundrum we face is the improper disposal...
1298               This solution will help the vegetation
1299    Accumulation and improper disposal of single-u...
1300    The excessive and wasteful resource consumptio...
Name: 1, Length: 1301, dtype: object


# Zero-Shot Classification

In [229]:
from transformers import pipeline

# TODO look into changing the modle, bart-large-mnli seems to be the most populat for ZSC
# Other options are T5, GPT, and RoBERTa

# Models tried:
# - facebook/bart-large-mnli
# - sjrhuschlee/flan-t5-base-mnli
# - google/flan-t5-base

classifier = pipeline("zero-shot-classification", model="sjrhuschlee/flan-t5-base-mnli")

# Example class descriptions and input text
# input_text = [["Problem: "+str(dataset[1][i]), "Solution: "+str(dataset[2][i])] for i in range(1,3)]
# Bad solutions: 39, 52, 22
# Good solutions: 3, 117

class_descriptions1=[
    """
    The problem and solution are detailed, specific, and in-depth.
    Bonus points if they are related to circular economy.
    """,
    """
    The problem and/or solution are sloppy, off-topic (i.e., not sustainability related), unsuitable, or vague (such as the over-generic content that prioritizes form over substance, offering generalities instead of specific details).
    In addition, inputs such as, 'Problem: I will research to make solution Solution: Hi' belong in this category
    """
]

class_descriptions2=[
    """
    The solution is feasible.
    """,
    """
    The solution is not feasible.
    This can mean that the solution is too wide in scope to be easily implemented, or puts unreasonable expectations on people, businesses, or the world as a whole.
    Any solution that would take a large amount of resources or manpower to execute successfully would also be considered not feasible.
    """
]

class_descriptions3=[
    """
    The solution is innovative.
    A solution is innovative if it imagines something completely new and novel.
    """,
    """
    The solution is not innovative.
    A solution is considered not innovative if it does not present any originality.
    """
]

class_descriptions4 = [
    """
    The solution is scalable.
    A scalable solution will often provide a clear path on how the solution can be scaled in the future, if required.
    A scalable solution is one that can start small, and grows to have a larger area of effect over time.
    """,
    """
    The solution is not scalable.
    These solutions are ones that only focus on the small scale, and could not easily be done on a large scale.
    """
]

class_descriptions5 = [
    """
    The solution is not pertinent to a circular economy.
    In today’s rapidly evolving world, climate change stands as a formidable problem, deeply influencing our daily lives and the health of our planet. The circular economy, with its focus on reusing and recycling resources to minimize waste, emerges as a crucial strategy in this battle. Innovations like car-sharing platforms significantly reduce the carbon footprint of transportation, while modular designs in various products promote waste reduction by allowing individual components to be upgraded rather than discarding the entire item.
    In the face of climate change's criticality, the urgency to identify and implement high-impact circular economy solutions has never been greater. The challenge we confront today, however, extends beyond coming up with solutions to confront this problem. It lies in the daunting task of effectively evaluating a vast and diverse array of solutions, discerning the most impactful ones amidst a sea of possibilities. This process can be overwhelming, given the complexity and the sheer volume of potential solutions, leading to cognitive overload for human evaluators.
    """,
    "The solution is not pertinent to a circular economy."
]

all_descriptions = {
    "Relevance": class_descriptions1,
    "Feasibility": class_descriptions2,
    "Innovation": class_descriptions3,
    "Scalability": class_descriptions4,
    "Circ. Econ": class_descriptions4
}

scores = {
    "Relevance": [],
    "Feasibility": [],
    "Innovation": [],
    "Scalability": [],
    "Circ. Econ": []
}

# Scalability 40

j=-1
for i in range(130,160):
    input_text = "Problem: " + str(dataset[1][i]) + " Solution: " + str(dataset[2][i]).replace("\"\"\"\"", "")
    #for category in all_descriptions:
    category = "Relevance"
    result = classifier(input_text, all_descriptions[category])
    for label,score in zip(result["labels"], result["scores"]):
        if label==all_descriptions[category][0]:
            scores[category].append(score)
            print(dataset[0][i] + ": " + str(scores[category][(j := j+1)]))

print(scores)

130: 0.9291163682937622
131: 0.8599734306335449
132: 0.9773789048194885
133: 0.8654137849807739
134: 0.8771269917488098
135: 0.8680894374847412
136: 0.9236605167388916
137: 0.9738019108772278
138: 0.9493357539176941
139: 0.9175654649734497
140: 0.9312721490859985
141: 0.4660247564315796
142: 0.9688311815261841
143: 0.9806049466133118
144: 0.8419217467308044
145: 0.9897776246070862
146: 0.9692981839179993
147: 0.8970195651054382
148: 0.9832897186279297
149: 0.793354868888855
150: 0.8537344336509705
151: 0.9407964944839478
152: 0.9167953729629517
153: 0.9715615510940552
154: 0.005652780644595623
155: 0.9821417331695557
156: 0.9849273562431335
157: 0.579616367816925
158: 0.9503622651100159
159: 0.7431895732879639
{'Relevance': [0.9291163682937622, 0.8599734306335449, 0.9773789048194885, 0.8654137849807739, 0.8771269917488098, 0.8680894374847412, 0.9236605167388916, 0.9738019108772278, 0.9493357539176941, 0.9175654649734497, 0.9312721490859985, 0.4660247564315796, 0.9688311815261841, 0.980