# Imports

Libraries

In [46]:
# Basic libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# PyTorch libraries
import torch

#Neural network libraries
import torch.nn as nn
import torch.nn.functional as F

# #Creating optimizer
import torch.optim as optim

# For terminating program (useful for stopping before training is complete and seeing entire result)
import signal
import sys, os

CSV File

In [47]:
script_dir = os.path.abspath('') # absolute file path in Jupyter
file_path = os.path.join(script_dir, "AI EarthHack Dataset.csv") # file path for the text file input

dataset = pd.read_csv(file_path, header=None)
print(dataset[1])

0                                                 problem
1       The construction industry is indubitably one o...
2       I'm sure you, like me, are feeling the heat - ...
3       The massive shift in student learning towards ...
4       The fashion industry is one of the top contrib...
                              ...                        
1296    The linear 'take, make, dispose' model of prod...
1297    The conundrum we face is the improper disposal...
1298               This solution will help the vegetation
1299    Accumulation and improper disposal of single-u...
1300    The excessive and wasteful resource consumptio...
Name: 1, Length: 1301, dtype: object


# Zero-Shot Classification

In [96]:
from transformers import pipeline

# TODO look into changing the modle, bart-large-mnli seems to be the most populat for ZSC
# Other options are T5, GPT, and RoBERTa

# Models tried:
# - facebook/bart-large-mnli
# - sjrhuschlee/flan-t5-base-mnli
# - google/flan-t5-base

classifier = pipeline("zero-shot-classification", model="sjrhuschlee/flan-t5-base-mnli")

# Example class descriptions and input text
# input_text = [["Problem: "+str(dataset[1][i]), "Solution: "+str(dataset[2][i])] for i in range(1,3)]
# Bad solutions: 39, 52, 22
# Good solutions: 3, 117

input_text = "Problem: " + str(dataset[1][39]) + " Solution: " + str(dataset[2][39])

class_descriptions1 = [
    "A problem-solution pair in which the solution is innovative, relevant to the problem, feasible, specific, and actionable",
    "A problem-solution pair in which the solution is relevant to the problem, but not necessarily innovative, feasible, specific, or actionable",
    "A problem-solution pair in which the solution is irrelevant, non-specific, not feasible, or not detailed"
]

class_descriptions2=[
    "The solution is relevant to the problem",
    "The solution is not relevant to the problem"
]

class_descriptions3=[
    "The solution is feasible",
    "The solution is not feasible"
]

class_descriptions4=[
    "The solution is innovative",
    "The solution is not innovative"
]

class_descriptions5 = [
    "The solution is scalable",
    "The solution is not scalable"
]

all_descriptions = {
    "Relevance:": class_descriptions2,
    "Feasibility": class_descriptions3,
    "Innovation:": class_descriptions4,
    "Scalability": class_descriptions5
}

scores = {
    "Relevance:": [],
    "Feasibility": [],
    "Innovation:": [],
    "Scalability": []
}

for i in range(7,8):
    input_text = "Problem: " + str(dataset[1][i]) + " Solution: " + str(dataset[2][i])
    for category in all_descriptions:
        result = classifier(input_text, all_descriptions[category])
        for label,score in zip(result["labels"], result["scores"]):
            if label==all_descriptions[category][0]:
                scores[category].append(score)

print(scores)

{'Relevance:': [0.9870656728744507], 'Feasibility': [0.8657248616218567], 'Innovation:': [0.7322619557380676], 'Scalability': [0.5536961555480957]}
