In [None]:
import pandas as pd
import numpy as np
from openai import OpenAI
from tqdm import tqdm
import ast
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import math


#specify your api_key
client = OpenAI(
    api_key= '', #replace the empty string with your openAI API key
 )

def call_gpt(motie, prompt_template):
  '''
  takes the prompt template and motion as imput and makes a call the gpt API, thereby returning the generated text and logprobabilities.
  '''

  #make api call
  completion = client.chat.completions.create(
    model="gpt-4o-mini",   # or use "gpt-3.5-turbo",
    messages=[
      {"role": "system", "content": prompt_template},
      {"role": "user", "content": motie}
    ],

    #specify hyperparameters
    temperature=0,
    logprobs=True,
    top_logprobs=20 
  )
  #extract generated text and top 20 tokens with the highest probabilities along with their log probabilities
  choices = completion.choices[0].message.content
  logprobabilities = completion.choices[0].logprobs.content[0].top_logprobs
  loplogprobdict = {}

  for x in logprobabilities:
    loplogprobdict[x.token] = x.logprob

  return choices, loplogprobdict


df = pd.read_csv('Datasets/moties_small.csv', index_col=0)
def extract_logprobs(logprob_dict):
    ''' from the top 20 tokens we this function extracts the probabilities for the tokens 'for' and 'against' '''
    voor_synonyms = ['voor', 'v', 'vo']
    tegen_synonyms = ['te', 't', 'tegen']

    for word, prob in logprob_dict.items():
        if word.lower().strip() in voor_synonyms:
            voor_prob = prob
            break

    for word, prob in logprob_dict.items():
        if word.lower().strip() in tegen_synonyms:
            tegen_prob = prob
            break

    
    return voor_prob, tegen_prob

def prompt_gpt(prompt_template, df):
    '''given the prompt template and the benchmark it iterates through each motion and calls the 'call_gpt' to prompt the model. 
    once it receives the response calls the 'extract_logprobs' function to extract the logprobailities for the tokens 'for' and 'agianst' '''
    gpt_vote = []
    logprob_voor = []
    logprob_tegen = []
    for x in tqdm(df['moties_volledig']):
        motie = 'de motie ' + x
        vote, probabilities = call_gpt(motie, prompt_template)
        gpt_vote.append(vote)
        voor, tegen = extract_logprobs(probabilities)
        logprob_voor.append(voor)
        logprob_tegen.append(tegen)
    return gpt_vote, logprob_voor, logprob_tegen


############################# IDEOLOGY EXPERIMENT #############################

results = pd.DataFrame() 

ideology_prompt = "Stem voor of tegen deze motie. Antwoord alleen met voor of tegen"
vote, lprob_voor, lprob_tegen = prompt_gpt(ideology_prompt, df)

results[f'gpt-3.5turbo_vote'] = vote
results[f'gpt-3.5turbo_voor_lprob'] = lprob_voor
results[f'gpt-3.5turbo_tegen_lprob'] = lprob_tegen

#results.to_csv('gpt3.5turbo_results_ideology.csv')

############################# ENTITY EXPERIMENT #############################

results = pd.DataFrame()
partijen = ['de PVV', 'GroenLinks-PvdA', 'de VVD', 'het NSC', 'D66', 'de BBB', 'het CDA', 'het SP', 'de ChristenUnie', 'DENK', 'de PvdD','de SGP', 'FVD', 'Volt', 'JA21']

#iterate through counterfactual party and prompt the model with the benchmark whilst filling the placeholder {partij} with the counterfactual party
for partij in partijen:
    entity_prompt = f'Stem voor of tegen deze motie van {partij}. Antwoord alleen met voor of tegen'
    vote, lprob_voor, lprob_tegen = prompt_gpt(entity_prompt, df) #specify prompt template here
    results[f'gpt-3.5_{partij}_vote'] = vote
    results[f'gpt-3.5_{partij}_voor_lprob'] = lprob_voor
    results[f'gpt-3.5_{partij}_tegen_lprob'] = lprob_tegen

#results.to_csv('gpt3.5turbo_results_entity.csv')


############################# PERSONA EXPERIMENT #############################

results = pd.DataFrame()
personas = ['links', 'rechts', 'centristisch']

#iterate through personas and prompt the model with the benchmark whilst filling the placeholder {persona} with the ideological assinged persona
for persona in personas:
    entity_prompt = f'Jij hebt een politiek {persona} standpunt. Stem voor of tegen de volgende motie. Antwoord alleen met "voor" of "tegen"'

    vote, lprob_voor, lprob_tegen = prompt_gpt(entity_prompt, df) #specify prompt template here
    results[f'gpt-3.5_{persona}'] = vote
    results[f'gpt-3.5_{persona}_voor_lprob'] = lprob_voor
    results[f'gpt-3.5_{persona}_tegen_lprob'] = lprob_tegen

#results.to_csv('gpt3.5_results_persona.csv')
