**HotpotQA**

In [1]:
import os
import sys

# Set the project root directory
project_root = os.path.abspath("..")  # Adjust this based on the notebook's location
if project_root not in sys.path:
    sys.path.append(project_root)

In [4]:
from rag.llm.openai_atomicfact_generator import OpenAIAtomicFactGenerator
from rag.scorer.wikitexts_embedding import WikitextsDocumentScorer

file_path = "../index_store/magazine/title_text_map.txt"
wiki_embedding = WikitextsDocumentScorer()
gen = OpenAIAtomicFactGenerator()

wiki_embedding.create_embedding(file_path)
query = "Which magazine was started first Arthur\'s Magazine or First for Women?"
retrieved_docs = wiki_embedding.faiss_manager.search_faiss_index(query, top_k=10)
print(retrieved_docs)
response = wiki_embedding.faiss_manager.generate_response_from_context(query, retrieved_docs)
print(response)
atomicFacts = gen.get_facts_from_text(response)




Loaded texts from file: ../index_store/magazine/title_text_map_texts.json
Associated texts saved to file: ../index_store/magazine/title_text_map_texts.json
1/11 embeddings done
2/11 embeddings done
3/11 embeddings done
4/11 embeddings done
5/11 embeddings done
6/11 embeddings done
7/11 embeddings done
8/11 embeddings done
9/11 embeddings done
10/11 embeddings done
11/11 embeddings done
Embeddings from file '../index_store/magazine/title_text_map.txt' added to FAISS index between indice 50797 to 50808.
["File manager not found for '../index_store/conformalhotpot500/title_text_map_hotpotqa_500.txt' score=0.5978", "page_content='First for Women is a woman's magazine published by Bauer Media Group in the USA. The magazine was started in 1989. It is based in Englewood Cliffs, New Jersey. In 2011 the circulation of the magazine was 1,310,696 copies.\n' metadata={'source': 'First for Women'} indice=1 score=0.5967", "File manager not found for '../index_store/conformalhotpot500/title_text_map_

In [3]:
print(atomicFacts)

["Arthur's Magazine was started first:supported", " Arthur's Magazine was published from 1844 to 1846:supported", ' First for Women was started in 1989:supported']


In [4]:
print(retrieved_docs)

["page_content='First for Women is a woman's magazine published by Bauer Media Group in the USA. The magazine was started in 1989. It is based in Englewood Cliffs, New Jersey. In 2011 the circulation of the magazine was 1,310,696 copies.\n' metadata={'source': 'First for Women'} indice=1 score=0.5966", 'page_content=\'Arthur\'s Magazine (1844–1846) was an American literary periodical published in Philadelphia in the 19th century. Edited by Timothy Shay Arthur, it featured work by Edgar A. Poe, J.H. Ingraham, Sarah Josepha Hale, Thomas G. Spear, and others. In May 1846 it was merged into "Godey\'s Lady\'s Book".\n\' metadata={\'source\': "Arthur\'s Magazine"} indice=5 score=0.5791', 'page_content=\'A few years later Arthur would launch a new publication entitled "Arthur\'s Home Magazine".\n\' metadata={\'source\': "Arthur\'s Magazine"} indice=6 score=0.5322']


In [3]:
for result in retrieved_docs:
    parsed_result = wiki_embedding.faiss_manager.parse_result(result)
    print(parsed_result)


{'page_content': "First for Women is a woman's magazine published by Bauer Media Group in the USA. The magazine was started in 1989. It is based in Englewood Cliffs, New Jersey. In 2011 the circulation of the magazine was 1,310,696 copies.", 'metadata': {'source': 'First for Women'}, 'indice': 1, 'score': 0.5966}
{'page_content': 'Arthur\'s Magazine (1844–1846) was an American literary periodical published in Philadelphia in the 19th century. Edited by Timothy Shay Arthur, it featured work by Edgar A. Poe, J.H. Ingraham, Sarah Josepha Hale, Thomas G. Spear, and others. In May 1846 it was merged into "Godey\'s Lady\'s Book".', 'metadata': {'source': "Arthur's Magazine"}, 'indice': 5, 'score': 0.5791}
{'page_content': 'A few years later Arthur would launch a new publication entitled "Arthur\'s Home Magazine".', 'metadata': {'source': "Arthur's Magazine"}, 'indice': 6, 'score': 0.5322}


In [None]:
for fact in atomicFacts:
    score = wiki_embedding.score(fact.rpartition(':')[0], retrieved_docs)
    print(f'{fact} has a score of {score}')

Arthur's Magazine was started first:supported has a score of [[0.31389496]]
 Arthur's Magazine was published from 1844 to 1846:supported has a score of [[0.29862073]]
 First for Women was started in 1989:supported has a score of [[0.20763773]]


In [6]:
realscore = wiki_embedding.score('Arthur\'s Magazine', retrieved_docs)
print(realscore)

[[0.32663262]]


In [12]:
import json
import os
from rag.llm.openai_atomicfact_generator import OpenAIAtomicFactGenerator
from rag.scorer.wikitexts_embedding import WikitextsDocumentScorer

def scoreWikiAnswers(input_file_path, output_file_path, data_file_path="", skiplines = 0):
    wiki_embedding = WikitextsDocumentScorer()
    if data_file_path and os.path.exists(data_file_path):
        wiki_embedding.create_embedding(data_file_path)
    gen = OpenAIAtomicFactGenerator()

    qa_data = []
    scores = []
    with open(input_file_path, 'r', encoding='utf-8') as f:
        for line in f:
            qa_data.append(json.loads(line))
    for i, qa in enumerate(qa_data):
        if i < skiplines:
            continue
        query = qa['input']
        subclaims_score = {}
        retrieved_docs = wiki_embedding.faiss_manager.search_faiss_index(query, top_k=10, threshold=0.3)
        response = wiki_embedding.faiss_manager.generate_response_from_context(query, retrieved_docs)
        atomicFacts = gen.get_facts_from_text(response)
        for fact in atomicFacts:
            purefact = fact.rpartition(':')[0] if ':' in fact else fact
            score = wiki_embedding.score(purefact, retrieved_docs)
            subclaims_score[purefact] = float(score)
        #sort subclaims based on score
        subclaims_score = sorted(subclaims_score.items(), key=lambda x: x[1], reverse=True)
        answer = qa['output'][0]['answer']
        calibrate_score = float(wiki_embedding.score(query + ' ' + answer, retrieved_docs))
        with open(output_file_path, 'a', encoding='utf-8') as f:
            f.write(json.dumps({'query': query, 'answer': answer, 'calibrate_score': calibrate_score, 'response': response, 'subclaims_score': subclaims_score}) + '\n')
        print(f'calibrate data {i} / {len(qa_data)} done')


In [13]:
#calibrate data
file_path = "../index_store/conformalhotpot500/hotpotqa-train-kilt-500.jsonl"
scoreWikiAnswers(file_path, '../index_store/conformalhotpot500/hotpot_calibrate_score_500.jsonl', '../index_store/conformalhotpot500/title_text_map_hotpotqa_500.txt')

Loaded texts from file: ../index_store/conformalhotpot500/title_text_map_hotpotqa_500_texts.json
Loaded texts from file: ../index_store/magazine/title_text_map_texts.json
Loaded texts from file: ../index_store/conformalpopqa500/title_text_map_popqa_500_texts.json
Loaded texts from file: /Users/naihefeng/Documents/2025/study/layer6/rag_conformal_pred/index_store/conformalpopqa1000/title_text_map_popqa_1000_1500_texts.json
Loaded texts from file: ../index_store/conformalhotpot500/title_text_map_hotpotqa_500_texts.json
File '../index_store/conformalhotpot500/title_text_map_hotpotqa_500.txt' already exists in the FAISS index.
calibrate data 0 / 500 done
calibrate data 1 / 500 done
calibrate data 2 / 500 done
calibrate data 3 / 500 done
calibrate data 4 / 500 done
calibrate data 5 / 500 done
calibrate data 6 / 500 done
calibrate data 7 / 500 done
calibrate data 8 / 500 done
calibrate data 9 / 500 done
calibrate data 10 / 500 done
calibrate data 11 / 500 done
calibrate data 12 / 500 done
cal

In [14]:
import json
from llm.openai_claim_verification import OpenAIClaimVerification

verifier = OpenAIClaimVerification()
llm_responses_file_path = 'llm_responses_500.jsonl'
# Function to transform subclaims_score into the desired format
def annotate_subclaims(data):
    transformed_claims = []
    query = data['query']
    answer = data['answer']
    with open (llm_responses_file_path, "w") as llm_responses_file:
        for subclaim, score in data["subclaims_score"]:
            res = verifier.openAI_response(query, answer, subclaim)
            annotation = verifier.detect_label(res)
            transformed_claims.append({
                "subclaim": subclaim,
                "similarity-score": score,
                "annotation": annotation
                # Assuming annotation is optional and could be added later
            })
            llm_responses_file.write(json.dumps({'subclaim': subclaim, 'score' : score, 'annotation': annotation, 'llmResponse': res},
                                                ensure_ascii=False) + '\n')
    return {
        "prompt": data["query"],
        "original-output": data["response"],
        "claims": transformed_claims,
    }



In [None]:
conformal_format_file_path = 'conformal_format_500.jsonl'
input_file = '../index_store/conformalhotpot500/hotpot_calibrate_score_500.jsonl'
# Input and output file paths
output_file = "similarity_annotations.jsonl"
idx = 0
# Process the file
with open(input_file, "r") as infile, open(output_file, "w") as outfile:
    outfile.write('{"data": [\n')
    for line in infile:
        print(f"Processing line {idx}")
        data = json.loads(line.strip())
        transformed_data = annotate_subclaims(data)
        outfile.write(json.dumps(transformed_data, ensure_ascii=False) + ",\n")
        idx += 1
    outfile.write(']}')
print(f"Transformed data written to {output_file}.")

Processing line 0
Processing line 1
Processing line 2
Processing line 3
Processing line 4
Processing line 5
Processing line 6
Processing line 7
Processing line 8
Processing line 9
Processing line 10
Processing line 11
Processing line 12
Processing line 13
Processing line 14
Processing line 15
Processing line 16
Processing line 17
Processing line 18
Processing line 19
Processing line 20
Processing line 21
Processing line 22
Processing line 23
Processing line 24
Processing line 25
Processing line 26
Processing line 27
Processing line 28
Processing line 29
Processing line 30
Processing line 31
Processing line 32
Processing line 33
Processing line 34
Processing line 35
Processing line 36
Processing line 37
Processing line 38
Processing line 39
Processing line 40
Processing line 41
Processing line 42
Processing line 43
Processing line 44
Processing line 45
Processing line 46
Processing line 47
Processing line 48
Processing line 49
Processing line 50
Processing line 51
Processing line 52
Pro

In [5]:
print(transformed_data)

{'prompt': 'Musician and satirist Allie Goertz wrote a song about the "The Simpsons" character Milhouse, who Matt Groening named after who?', 'original-output': 'The character Milhouse from "The Simpsons" was named by Matt Groening after U.S. President Richard Nixon, whose middle name was Milhous.', 'claims': [{'subclaim': 'The character Milhouse from "The Simpsons" was named by Matt Groening after U.S. President Richard Nixon', 'similarity-score': 0.29849618673324585, 'annotation': 'S'}, {'subclaim': " Richard Nixon's middle name was Milhous", 'similarity-score': 0.14918304979801178, 'annotation': 'F'}]}


**PopQA**

## Create Embedding Only##

In [7]:
data_path = '/Users/naihefeng/Documents/2025/study/layer6/rag_conformal_pred/index_store/conformalpopqa1000/title_text_map_popqa_1000_1500.txt'
wiki_embedding = WikitextsDocumentScorer()
wiki_embedding.create_embedding(data_path)

Loaded texts from file: ../index_store/conformalhotpot500/title_text_map_hotpotqa_500_texts.json
Loaded texts from file: ../index_store/magazine/title_text_map_texts.json
Loaded texts from file: ../index_store/conformalpopqa500/title_text_map_popqa_500_texts.json
Associated texts saved to file: /Users/naihefeng/Documents/2025/study/layer6/rag_conformal_pred/index_store/conformalpopqa1000/title_text_map_popqa_1000_1500_texts.json
1/15390 embeddings done
2/15390 embeddings done
3/15390 embeddings done
4/15390 embeddings done
5/15390 embeddings done
6/15390 embeddings done
7/15390 embeddings done
8/15390 embeddings done
9/15390 embeddings done
10/15390 embeddings done
11/15390 embeddings done
12/15390 embeddings done
13/15390 embeddings done
14/15390 embeddings done
15/15390 embeddings done
16/15390 embeddings done
17/15390 embeddings done
18/15390 embeddings done
19/15390 embeddings done
20/15390 embeddings done
21/15390 embeddings done
22/15390 embeddings done
23/15390 embeddings done
2

## Create Embedding + Score ##

In [6]:
file_path = "../index_store/conformalpopqa500/popqa-train-kilt-500.jsonl"
scoreWikiAnswers(file_path, '../index_store/conformalpopqa500/popqa_calibrate_score_500.jsonl', '../index_store/conformalpopqa500/title_text_map_popqa_500.txt')

Loaded texts from file: ../index_store/conformalhotpot500/title_text_map_hotpotqa_500_texts.json
Loaded texts from file: ../index_store/magazine/title_text_map_texts.json
Loaded texts from file: ../index_store/conformalpopqa500/title_text_map_popqa_500_texts.json
Loaded texts from file: ../index_store/conformalpopqa500/title_text_map_popqa_500_texts.json
File '../index_store/conformalpopqa500/title_text_map_popqa_500.txt' already exists in the FAISS index.
calibrate data 0 / 505 done
calibrate data 1 / 505 done
calibrate data 2 / 505 done
calibrate data 3 / 505 done
calibrate data 4 / 505 done
calibrate data 5 / 505 done
calibrate data 6 / 505 done
calibrate data 7 / 505 done
calibrate data 8 / 505 done
calibrate data 9 / 505 done
calibrate data 10 / 505 done
calibrate data 11 / 505 done
calibrate data 12 / 505 done
calibrate data 13 / 505 done
calibrate data 14 / 505 done
calibrate data 15 / 505 done
calibrate data 16 / 505 done
calibrate data 17 / 505 done
calibrate data 18 / 505 don

In [5]:
from rag.faiss_manager import FAISSIndexManager

fm = FAISSIndexManager()
fm.is_indice_align()

True

In [3]:
input_file = '../index_store/conformalpopqa500/popqa_calibrate_score_500.jsonl'
# Input and output file paths
output_file = "similarity_annotations.jsonl"
idx = 0
# Process the file
with open(input_file, "r") as infile, open(output_file, "w") as outfile:
    outfile.write('{"data": [\n')
    for line in infile:
        print(f"Processing line {idx}")
        data = json.loads(line.strip())
        transformed_data = annotate_subclaims(data)
        outfile.write(json.dumps(transformed_data, ensure_ascii=False) + ",\n")
        idx += 1
    outfile.write(']}')
print(f"Transformed data written to {output_file}.")

Processing line 0
Processing line 1
Processing line 2
Processing line 3
Processing line 4
Processing line 5
Processing line 6
Processing line 7
Processing line 8
Processing line 9
Processing line 10
Processing line 11
Processing line 12
Processing line 13
Processing line 14
Processing line 15
Processing line 16
Processing line 17
Processing line 18
Processing line 19
Processing line 20
Processing line 21
Processing line 22
Processing line 23
Processing line 24
Processing line 25
Processing line 26
Processing line 27
Processing line 28
Processing line 29
Processing line 30
Processing line 31
Processing line 32
Processing line 33
Processing line 34
Processing line 35
Processing line 36
Processing line 37
Processing line 38
Processing line 39
Processing line 40
Processing line 41
Processing line 42
Processing line 43
Processing line 44
Processing line 45
Processing line 46
Processing line 47
Processing line 48
Processing line 49
Processing line 50
Processing line 51
Processing line 52
Pro

In [13]:
import os
os.getcwd()

'/Users/naihefeng/Documents/2025/study/layer6/rag_conformal_pred/rag'

In [4]:
import os

from rag.scorer.document_scorer import DocumentScorer

def conformal_pred(prompt, scorer: DocumentScorer, model='gpt-4'):
    threshold = 0.40
    (output, merged_output, all_subclaims, accepted_subclaims)= scorer.say_less(prompt, threshold, model)
    print("Original output: ")
    print(output)
    print("\n\n\n\n\nModified output: ")
    print(merged_output)
    print("\n\n\n\nAccepted sub-claims: ")
    print(accepted_subclaims)
    print("\n\n\n\nAll sub-claims: ")
    print(all_subclaims)


In [11]:
from rag.scorer.wikitexts_embedding import WikitextsDocumentScorer
wiki_embedding = WikitextsDocumentScorer()
prompt = "What is George Rankin's occupation?"
conformal_pred(prompt, wiki_embedding)

Loaded texts from file: ../index_store/conformalhotpot500/title_text_map_hotpotqa_500_texts.json
Loaded texts from file: ../index_store/magazine/title_text_map_texts.json
Loaded texts from file: ../index_store/conformalpopqa500/title_text_map_popqa_500_texts.json
Loaded texts from file: /Users/naihefeng/Documents/2025/study/layer6/rag_conformal_pred/index_store/conformalpopqa1000/title_text_map_popqa_1000_1500_texts.json
Original output: 
George Rankin was both an Australian soldier and a politician. He served in the military, achieving the rank of major general, and was also a member of the House of Representatives and the Senate, representing the Country Party of Australia.





Modified output: 
George Rankin's occupation was a soldier.




Accepted sub-claims: 
[('George Rankin was an Australian soldier', 0.45235167679190635)]




All sub-claims: 
[('George Rankin was an Australian soldier', 0.45235167679190635), (' George Rankin was a politician', 0.39989348105192185), (' George R