In [None]:
import sys
sys.path.append('..')

import dotenv
from huggingface_hub import login
import os
from transformers import AutoTokenizer, AutoModelForCausalLM
from src.utils import read_config, plot_gender_distribution, BiasEvaluator, LocalLLMGenerator, concurrent_bias_evaluation
from openai import OpenAI
from src.prompts import prompt_story_generation, prompt_gender_detection
from peft import PeftModel


dotenv.load_dotenv()
login(token=os.getenv('huggingface_token'))


%load_ext autoreload
%autoreload 2

In [None]:
openai_client = OpenAI(
    api_key=os.getenv('openai_api_key'),
)

llm_configs = read_config('../configs/llm_config.yaml')
generation_config = read_config('../configs/generation_config.yaml')
dataset_config = read_config('../configs/dataset_config.yaml')
print(generation_config)
print(llm_configs)

In [3]:
bias_evaluator = BiasEvaluator(openai_client, llm_configs['gpt_model_to_check_gender'], prompt_gender_detection)


# Local LLM evaluation

In [4]:
local_model_name = llm_configs['local_generative_model_name']
tokenizer = AutoTokenizer.from_pretrained(local_model_name)
model = AutoModelForCausalLM.from_pretrained(local_model_name)



In [5]:
checkpoint_dir = "../../test_down_proj/gender_only_ckpt/checkpoint-500"
lora_model = PeftModel.from_pretrained(model, checkpoint_dir)


In [6]:
local_llm_generator = LocalLLMGenerator(lora_model, tokenizer, prompt_story_generation)

In [None]:
local_llm_generator.generate_story('firefighter')

In [8]:
professions = dataset_config['test_professions']
n_samples = generation_config['n_samples']


In [13]:
professions = ['Electrician', 'HR Specialist', 'Designer']

In [None]:
result_dict = concurrent_bias_evaluation(professions, 20, bias_evaluator.process_profession, local_llm_generator.generate_story)
print(result_dict)

In [None]:
#train_professions
plot_gender_distribution(result_dict)

In [None]:
# test professions
plot_gender_distribution(result_dict)

# ChatGPT Evaluation

In [8]:
def generate_story_chatgpt(profession):
    prompt_story_generation_filled = prompt_story_generation.format(profession=profession)
    chat_completion = openai_client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": prompt_story_generation_filled,
            }
        ],
        model=llm_configs['chatgpt_generative_model_name'],
    )
    return chat_completion.choices[0].message.content


In [None]:
result_dict_chatgpt = concurrent_bias_evaluation(generation_config, bias_evaluator.process_profession, generate_story_chatgpt)
print(result_dict_chatgpt)


In [None]:
plot_gender_distribution(result_dict_chatgpt)