## FactScore Results Replication

### Environment Setup

In [None]:
import pandas as pd
import numpy as np
import json
import os
import nltk
nltk.download('punkt_tab')

from llments.eval.factscore.factscorer import FactScorer

In [None]:
!python -m spacy download en_core_web_s

In [None]:
!pip install rank_bm25

In [None]:
!pip install torch -U

In [None]:
!python -m llments.eval.factscore.download_data --data_dir /factscore_data

### FactScore Calculation

In [None]:
def extract_data(input_filename):
  questions = []
  outputs = []
  topics = []

  with open(input_filename, 'r', encoding='utf-8') as file:
      for line_number, line in enumerate(file, 1):
          try:
              data = json.loads(line)
              input_field = data.get('input', '').strip()
              output_field = data.get('output', '').strip()
              topic_field = data.get('topic', '').strip()
              if input_field.startswith("Question:"):
                  question = input_field[len("Question:"):].strip()
              else:
                  question = input_field
              questions.append(question)
              outputs.append(output_field)
              topics.append(topic_field)

          except json.JSONDecodeError as e:
              print(f"JSON decoding error on line {line_number}: {e}")
          except Exception as e:
              print(f"Unexpected error on line {line_number}: {e}")

  print(f"Total Questions Extracted: {len(questions)}")
  print(f"Total Outputs Extracted: {len(outputs)}")
  print(f"Total Topics Extracted: {len(topics)}")

In [None]:
fs6 = FactScorer(model_name="retrieval+ChatGPT",
                 data_dir="/factscore_data",
                 model_dir="/factscore_data",
                 cache_dir="/factscore_data",
                 openai_key="key.txt",
                 cost_estimate="consider_cache",
                 abstain_detection_type=None,
                 batch_size=256)

In [None]:
# InstructGPT FactScore replication

input_filename = '/factscore_data/data/labeled/InstructGPT.jsonl'
instructgpt_topics, instructgpt_responses = extract_data(input_filename)

instructgpt_outputs = fs6.get_score(instructgpt_topics, instructgpt_responses)
print (instructgpt_outputs["score"]*100) # FActScore
print (instructgpt_outputs["num_facts_per_response"]) # average number of atomic facts per response

In [None]:
# ChatGPT FactScore replication

input_filename = '/factscore_data/data/labeled/ChatGPT.jsonl'
chatgpt_topics, chatgpt_responses = extract_data(input_filename)

chatgpt_outputs = fs6.get_score(chatgpt_topics, chatgpt_responses)
print (chatgpt_outputs["score"]*100) # FActScore
print (chatgpt_outputs["num_facts_per_response"]) # average number of atomic facts per response