In [None]:
import torch
import csv
import numpy as np
from transformers import AutoModelForCausalLM, AutoTokenizer

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the saved models and tokenizers
distil_gpt2_model = AutoModelForCausalLM.from_pretrained("./trained_modelsF/distil_gpt2").to(device)
distil_gpt2_tokenizer = AutoTokenizer.from_pretrained("./trained_modelsF/distil_gpt2_tokenizer")
gpt2_model = AutoModelForCausalLM.from_pretrained("./trained_modelsF/gpt2").to(device)
gpt2_tokenizer = AutoTokenizer.from_pretrained("./trained_modelsF/gpt2_tokenizer")
gpt_neo_model = AutoModelForCausalLM.from_pretrained("./trained_modelsF/gpt_neo").to(device)
gpt_neo_tokenizer = AutoTokenizer.from_pretrained("./trained_modelsF/gpt_neo_tokenizer")

# Define a function to generate text using a given model and tokenizer
def generate_text(model, tokenizer, max_length=50):
    inputs = torch.tensor([[tokenizer.bos_token_id]], device=device)
    outputs = model.generate(inputs, max_length=max_length, num_return_sequences=1, do_sample=True)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Generate 500 unique texts for each model
num_texts = 500
models = [
    (distil_gpt2_model, distil_gpt2_tokenizer),
    (gpt2_model, gpt2_tokenizer),
    (gpt_neo_model, gpt_neo_tokenizer),
]

generated_texts = []
for model, tokenizer in models:
    texts = []
    for _ in range(num_texts):
        text = generate_text(model, tokenizer, max_length=50)
        texts.append(text)
    generated_texts.append(texts)

# Choose the best text outputs using majority voting
averaged_outputs = []
for i in range(num_texts):
    averaged_output = np.mean([tokenizer.encode(texts[i], return_tensors='pt') for _, tokenizer in models], axis=0)
    averaged_output = averaged_output.astype(int)  # Convert float to integer
    text = distil_gpt2_tokenizer.decode(averaged_output[0], skip_special_tokens=True)
    averaged_outputs.append(text)

# Save the chosen text outputs to a CSV file
csv_file = "Final-major-voting.csv"
with open(csv_file, 'w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(["Text"]) 
    for text in averaged_outputs:
        writer.writerow([text])

print(f"{num_texts} unique texts have been generated using the ensemble model and saved to '{csv_file}'.")
