In [None]:
import os
import pandas as pd
import google.generativeai as genai

GEMINI_API_KEY = os.environ["GEMINI_API_KEY"]
genai.configure(api_key=GEMINI_API_KEY)
model = genai.GenerativeModel('gemini-pro')

In [None]:
data_df = pd.read_parquet("../dataset/temp_reuter_filtered.parquet")

In [None]:
# test if this can run

sentence = data_df["feature"].iloc[0]

template = f"""Given the following news, output -1 if the news is not related to ESG (environmental, social, and governance) and won't have any ESG impact. Output 0 if the ESG impact duration is below 2 years, 1 if the ESG impact duration is between 2 and 5 year and 2 if the ESG impact duration is more than 5 years. You only need to output the number, and do not need any further explanation.

news: {sentence}"""
    
print(template)

response = model.generate_content(template)
    
print(response)

In [None]:
# run this if the test can produce valid result

def generate_gpt_label(sentence):
        
    template = f"""Given the following news, output 0 if the ESG impact duration is below 2 years, 1 if the ESG impact duration is between 2 and 5 year and 2 if the ESG impact duration is more than 5 years. You only need to output the number, and do not need any further explanation.
    
    {sentence}"""
    
    try:
        response = model.generate_content(template)
    
        message = response.text
    except:
        return None
    
    return int(message)

data_df["gemini_label"] = data_df["feature"].apply(generate_gpt_label)

In [None]:
data_df.to_parquet("../dataset/temp_reuter_filtered_gemini.parquet")