In [15]:
from data_common.notebook import *

# catalog.py incorporates this into the main flow

In [92]:
import os
import openai
import json
from tqdm import tqdm

openai.api_key = os.getenv("OPENAI_API_KEY")


class ResultsCache:
    filename = Path("data", "interim", "openai_cache.json")

    def __init__(self):
        if self.filename.exists():
            with open(self.filename, "r") as f:
                self.cache = json.load(f)
        else:
            self.cache = {}

    def __getitem__(self, key: str):
        return self.cache[key]

    def __setitem__(self, key: str, value: dict):
        self.cache[key] = value
        self.save()

    def save(self):
        with open(self.filename, "w") as f:
            json.dump(self.cache, f, indent=4)

    def stash_pairs(self, petitions: list[str], results: list[dict]):
        for petition, result in zip(petitions, results):
            self[petition] = result


def is_environmental_only_ten(
    list_of_petitions: list[str], recursion: int = 0
) -> list[dict[str, str]]:
    """
    Given a list of strings, will query the OpenAI API to determine if the petition is enviromental in nature.
    Only allows lists of length 1-10.
    """

    if len(list_of_petitions) > 10:
        raise ValueError("List of petitions must be less than 10 items")

    base_prompt = """Evaulate based on the names of a list of petitions made to the UK Parliament if they are enviromental in nature
    Environmental petitions are those related to climate change, net zero, carbon emissions, air pollution, water pollution, wildlife, ecology, forests, hunting, active travel, cycling, footpaths, etc.
    The original list is a Json encoded list of strings.
    Return a json list as the input list with the structure ["result": bool, "explanation": str, "stub": str (first ten characters of petition)].
    JSON bools are true and false, not True and False.
    The input and the output must have the same length.

    Petition names:
    """

    prompt = base_prompt + json.dumps(list_of_petitions) + "\n\nOutput:\n"

    response = openai.Completion.create(
        model="text-davinci-003",
        prompt=prompt,
        temperature=0.7,
        max_tokens=256 * 4,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0,
    )

    try:
        data = json.loads(response.choices[0].text)  # type: ignore
    except json.JSONDecodeError:
        print(response.choices[0].text)  # type: ignore
        raise ValueError("OpenAI API returned invalid JSON")

    # if the output list is not the same length as the input list, try again
    if len(data) != len(list_of_petitions):
        if recursion > 5:
            print(data)
            raise ValueError("OpenAI API is not returning the full list of results")
        return is_environmental_only_ten(list_of_petitions, recursion=recursion + 1)

    return data


def is_environmental(
    list_of_petitions: list[str], ignore_cache: bool = False
) -> list[dict[str, str]]:
    """
    Given a list of strings, will query the OpenAI API to determine if the petition is enviromental in nature.
    """

    cache = ResultsCache()
    if ignore_cache:
        not_in_cache = list_of_petitions
    else:
        not_in_cache = [
            petition for petition in list_of_petitions if petition not in cache.cache
        ]

    if not_in_cache:
        # send 10 at a time
        results = []
        for i in tqdm(range(0, len(not_in_cache), 10)):
            results.extend(is_environmental_only_ten(not_in_cache[i : i + 10]))
        cache.stash_pairs(not_in_cache, results)

    return [cache[petition] for petition in list_of_petitions]

In [93]:
df = pd.read_csv(Path("data", "interim", "petitions.csv"))

# highest signature_count to top
df = df.sort_values(by="signature_count", ascending=False)

# limit to petitions with more than 1000 signatures
df = df[df["signature_count"] > 1000]

# constructed joined action and background columns
full_text = df["action"] + " " + df["background"]

# run environment check on full_text
results = is_environmental(full_text.tolist())

df["is_environmental"] = [result["result"] for result in results]
df["openai_explanation"] = [result["explanation"] for result in results]

df

100%|██████████| 1/1 [00:02<00:00,  2.94s/it]


Unnamed: 0,id,url,state,action,background,additional_details,signature_count,date_created,date_responded,date_debated,is_environmental,openai_explanation
1228,619781,https://petition.parliament.uk/petitions/61978...,open,Call an immediate general election to end the ...,Call an immediate general election so that the...,The chaos engulfing the UK government is unpre...,901912,2022-07-07T10:13:40.162Z,2022-09-20,2022-10-17,False,This petition does not relate to the environment.
423,590282,https://petition.parliament.uk/petitions/59028...,closed,Give the UK a Bank Holiday on Monday July 12th...,England may be playing a European Championship...,Sunday 8pm is a difficult time for families to...,364821,2021-06-30T09:01:23.069Z,2021-07-22,,False,This petition does not relate to the environment.
3479,319891,https://petition.parliament.uk/petitions/31989...,closed,Limit the Sale and Use of Fireworks to Organis...,Current legislation allows for public use of f...,Restrictions on the sale & use of fireworks ha...,301611,2020-05-08T13:59:43.344Z,2020-08-13,2021-11-08,True,This petition relates to air pollution and wil...
2516,326066,https://petition.parliament.uk/petitions/32606...,closed,Cancel GCSEs and A Levels in 2021,The Government should cancel GCSEs and A Level...,Students in the UK have already missed hundred...,211974,2020-06-09T21:32:29.897Z,2020-10-20,2020-12-07,False,This petition does not relate to the environment.
424,619334,https://petition.parliament.uk/petitions/61933...,open,Include abortion rights in the Bill of Rights,"As Parliament considers the Bill of Rights, th...",In the wake of the United States Supreme Court...,167481,2022-06-29T19:18:39.164Z,2022-09-20,2022-11-28,False,This petition does not relate to the environment.
...,...,...,...,...,...,...,...,...,...,...,...,...
3489,588774,https://petition.parliament.uk/petitions/58877...,closed,Exempt vehicles that use sustainable fuels fro...,The Government should exempt new car/van/motor...,Sustainable fuels could use existing pumps/inf...,1027,2021-06-14T09:32:34.352Z,,,True,Related to the environment
32,549072,https://petition.parliament.uk/petitions/54907...,closed,UK Government to build UK Aid/Disaster Relief ...,To regenerate the UK shipbuilding Industry by ...,The UK has no means of directly addressing dis...,1027,2020-08-20T07:40:24.363Z,,,True,Related to the environment
1117,600138,https://petition.parliament.uk/petitions/60013...,closed,Require Water Companies to Refund Customers Wh...,It’s a consumer rights and pollution issue. We...,Water companies have dumped sewage into rivers...,1021,2021-10-30T11:53:09.432Z,,,True,related to water pollution and consumer rights
3490,563118,https://petition.parliament.uk/petitions/56311...,closed,Exempt CO2 neutral Efuel & other cleaner fuel ...,Government should allow the sale & future deve...,"Alternative fuels use current infrastructure, ...",1019,2020-12-07T11:50:01.855Z,,,True,related to carbon emissions and cleaner fuels
