#### Load LLM and device

In [1]:
from langchain_google_vertexai import ChatVertexAI

llm = ChatVertexAI(model="gemini-2.5-flash")

In [2]:
import torch

# Set the torch device
if torch.cuda.is_available():  # NVIDIA GPU
    device = torch.device("cuda")
elif torch.backends.mps.is_available():  # macOS
    device = torch.device("mps")
else:
    device = torch.device("cpu")  # CPU
print(f"Using {device.type} device")

Using cuda device


#### LongForm UQ

In [3]:
from uqlm import LongFormUQ

In [4]:
prompts = ["write a paragraph about Paul McCartney", "write a paragraph about John Lennon"]

In [5]:
# Sentence-Response UQ: aggregated response-level scoring
sent_lfuq = LongFormUQ(llm=llm, granularity="sentence", mode="unit_response", aggregation_method="mean", device=device)
result = await sent_lfuq.generate_and_score(prompts=prompts, num_responses=2)
result.to_df()

Output()

Output()

Unnamed: 0,response,sampled_responses,prompt,sentence_set,entailment,noncontradiction,contrasted_entailment
0,Sir Paul McCartney stands as one of the most i...,[Paul McCartney stands as one of the most icon...,write a paragraph about Paul McCartney,[Sir Paul McCartney stands as one of the most ...,0.462897,0.998953,0.9927
1,John Lennon remains one of the most iconic and...,[John Lennon remains an indelible figure in mu...,write a paragraph about John Lennon,[John Lennon remains one of the most iconic an...,0.52669,0.997217,0.989133


In [6]:
# Sentence-Response UQ: sentence-level scoring
sent_lfuq = LongFormUQ(llm=llm, granularity="sentence", mode="unit_response", aggregation_method=None, device=device)
result = await sent_lfuq.generate_and_score(prompts=prompts, num_responses=2)
result.to_df()

Output()

Output()

Unnamed: 0,response,sampled_responses,prompt,sentence_set,entailment,noncontradiction,contrasted_entailment
0,Sir Paul McCartney stands as one of the most i...,[Sir Paul McCartney stands as one of the most ...,write a paragraph about Paul McCartney,[Sir Paul McCartney stands as one of the most ...,"[0.9603798389434814, 0.35361579060554504, 0.83...","[0.9993108212947845, 0.9985859990119934, 0.999...","[0.9992883503437042, 0.9945769309997559, 0.999..."
1,John Lennon remains one of the most iconic and...,[John Lennon was a pivotal figure in 20th-cent...,write a paragraph about John Lennon,[John Lennon remains one of the most iconic an...,"[0.3790244683623314, 0.32654259726405144]","[0.9929681718349457, 0.998896449804306]","[0.9831673800945282, 0.9907431304454803]"


In [7]:
# Claim-Response UQ:  claim-level scoring
claim_lfuq = LongFormUQ(llm=llm, granularity="claim", mode="unit_response", aggregation_method=None, device=device)
result = await claim_lfuq.generate_and_score(prompts=prompts, num_responses=2)
result.to_df()

Output()

Output()

Unnamed: 0,response,sampled_responses,prompt,claim_set,entailment,noncontradiction,contrasted_entailment
0,Sir Paul McCartney stands as an undeniable tit...,[Sir Paul McCartney stands as one of the most ...,write a paragraph about Paul McCartney,[Sir Paul McCartney stands as a titan of music...,"[0.9855940639972687, 0.9688562452793121, 0.112...","[0.9996415674686432, 0.9994828701019287, 0.974...","[0.9996362328529358, 0.9994658827781677, 0.723..."
1,"John Lennon, an indelible figure in 20th-centu...",[John Lennon remains one of the most iconic an...,write a paragraph about John Lennon,"[John Lennon was an indelible figure., John Le...","[0.9055130183696747, 0.927878350019455, 0.9042...","[0.9991505146026611, 0.9993012547492981, 0.999...","[0.999048501253128, 0.9992361068725586, 0.9990..."


In [8]:
# matched sentence UQ: sentence-level scoring
matched_sent_lfuq = LongFormUQ(llm=llm, granularity="sentence", mode="matched_unit", aggregation_method=None, device=device)
result = await matched_sent_lfuq.generate_and_score(prompts=prompts, num_responses=2)
result.to_df()

Output()

Output()

Unnamed: 0,response,sampled_responses,prompt,sentence_set,entailment,noncontradiction,contrasted_entailment,cosine_sim,bert_score
0,Sir Paul McCartney is one of the most iconic a...,[Sir Paul McCartney stands as one of the most ...,write a paragraph about Paul McCartney,[Sir Paul McCartney is one of the most iconic ...,"[0.4279695302248001, 0.2414419180713594, 0.058...","[0.9994330704212189, 0.9995861351490021, 0.999...","[0.9957833886146545, 0.976411372423172, 0.9844...","[0.9231644421815872, 0.8462588638067245, 0.911...","[0.930171549320221, 0.8999151587486267, 0.8983..."
1,John Lennon remains one of the most iconic and...,[John Lennon remains one of the most iconic an...,write a paragraph about John Lennon,[John Lennon remains one of the most iconic an...,"[0.4112534672021866, 0.20153965055942535, 0.38...","[0.9993768334388733, 0.999542236328125, 0.9992...","[0.9962629675865173, 0.9955682456493378, 0.993...","[0.9497462511062622, 0.8846983760595322, 0.887...","[0.9456483721733093, 0.923531711101532, 0.8986..."


In [9]:
# matched claim UQ: claim-level scoring
matched_claim_lfuq = LongFormUQ(llm=llm, granularity="claim", mode="matched_unit", aggregation_method=None, device=device)
result = await matched_claim_lfuq.generate_and_score(prompts=prompts, num_responses=2)
result.to_df()

Output()

Output()

Unnamed: 0,response,sampled_responses,prompt,claim_set,entailment,noncontradiction,contrasted_entailment,cosine_sim,bert_score
0,Sir Paul McCartney stands as one of the most i...,"[Paul McCartney, a towering figure in popular ...",write a paragraph about Paul McCartney,"[Sir Paul McCartney stands as a figure., Sir P...","[0.8556037843227386, 0.509904257953167, 0.5171...","[0.9990790486335754, 0.999342292547226, 0.9994...","[0.9988324046134949, 0.9955945611000061, 0.992...","[0.8817749470472336, 0.9349848031997681, 0.960...","[0.9203675985336304, 0.9631945490837097, 0.966..."
1,"John Lennon, an iconic English singer, songwri...",[John Lennon remains one of the most iconic an...,write a paragraph about John Lennon,"[John Lennon was an iconic English singer., Jo...","[0.2559227691963315, 0.2893851175904274, 0.309...","[0.9991565048694611, 0.9993019998073578, 0.999...","[0.9504534900188446, 0.9786828458309174, 0.988...","[0.9312989860773087, 0.9325282126665115, 0.934...","[0.9544923901557922, 0.9515130519866943, 0.955..."


#### Decomposition

In [13]:
from uqlm.longform import ResponseDecomposer

In [14]:
rd = ResponseDecomposer(llm)

In [15]:
responses = ["Hello there! Today I visited Dr. Chauhan at his home. He was hanging his framed B.S., M.S., and Ph.D. degrees. It was fun!", "One time I went on a class trip to D.C. to visit the white house. While there, I felt in awe of the beutiful art and important people with titles such as Prof., Esq., and others."]

sampled_responses = [
    ["Hi! I went to see Dr. Chauhan at his house today. He was busy putting up his framed B.S., M.S., and Ph.D. diplomas. It was quite enjoyable!", "Greetings! Today, I stopped by Dr. Chauhan's home. He was in the process of displaying his framed degrees: B.S., M.S., and Ph.D. It was a delightful experience!"],
    [
        "Once, I took a school trip to Washington, D.C. to see the White House. While I was there, I was amazed by the stunning artwork and the notable individuals with titles like Professor, Esquire, and more.",
        "During a class excursion to Washington, D.C., I had the opportunity to visit the White House. I was struck by the beautiful art and the distinguished people holding titles such as Prof., Esq., and others.",
    ],
]

In [16]:
# Decompose responses into sentences
sentence_sets = rd.decompose_sentences(responses=responses)
sentence_sets

[['Hello there!',
  'Today I visited Dr. Chauhan at his home.',
  'He was hanging his framed B.S., M.S., and Ph.D. degrees.',
  'It was fun!'],
 ['One time I went on a class trip to D.C. to visit the white house.',
  'While there, I felt in awe of the beutiful art and important people with titles such as Prof., Esq., and others.']]

In [17]:
# Decompose sampled responses into sentences
sampled_sentence_sets = rd.decompose_candidate_sentences(sampled_responses=sampled_responses)
sampled_sentence_sets

[[['Hi!',
   'I went to see Dr. Chauhan at his house today.',
   'He was busy putting up his framed B.S., M.S., and Ph.D. diplomas.',
   'It was quite enjoyable!'],
  ['Greetings!',
   "Today, I stopped by Dr. Chauhan's home.",
   'He was in the process of displaying his framed degrees: B.S., M.S., and Ph.D. It was a delightful experience!']],
 [['Once, I took a school trip to Washington, D.C. to see the White House.',
   'While I was there, I was amazed by the stunning artwork and the notable individuals with titles like Professor, Esquire, and more.'],
  ['During a class excursion to Washington, D.C., I had the opportunity to visit the White House.',
   'I was struck by the beautiful art and the distinguished people holding titles such as Prof., Esq., and others.']]]

In [18]:
# Decompose responses into claims
claim_sets = await rd.decompose_claims(responses=responses)
claim_sets

[[],
 ['The speaker went on a trip.',
  'The trip was a class trip.',
  'The speaker went to D.C.',
  'The speaker visited the white house.',
  'The speaker was in D.C.',
  'The speaker felt in awe.',
  'The speaker felt in awe of the art.',
  'The art was beautiful.',
  'The speaker felt in awe of the people.',
  'The people were important.',
  'The people had titles.',
  'Prof. was a title.',
  'Esq. was a title.',
  'Other titles were present.']]

In [19]:
# Decompose sampled responses into claims
sampled_claim_sets = await rd.decompose_candidate_claims(sampled_responses=sampled_responses)
sampled_claim_sets

[[[], []],
 [['I took a school trip.',
   'The school trip went to Washington, D.C.',
   'I went to Washington, D.C. to see the White House.',
   'I was in Washington, D.C.',
   'I was amazed in Washington, D.C.',
   'Stunning artwork caused my amazement.',
   'Notable individuals caused my amazement.',
   'The notable individuals had titles.',
   'Professor was a title of notable individuals.',
   'Esquire was a title of notable individuals.',
   'Some notable individuals had more titles.'],
  ['A class excursion took place.',
   'The class excursion was to Washington, D.C.',
   'The narrator had an opportunity.',
   'The opportunity was during the class excursion.',
   'The opportunity was to visit the White House.',
   'The narrator was struck by art.',
   'The art was beautiful.',
   'The narrator was struck by people.',
   'The people were distinguished.',
   'The people held titles.',
   'Prof. was a title.',
   'Esq. was a title.',
   'The people held other titles.']]]

In [21]:
from uqlm.longform.black_box import UnitResponseScorer, MatchedUnitScorer

In [24]:
urs = UnitResponseScorer()

urs_result = urs.evaluate(claim_sets=sentence_sets, sampled_responses=sampled_responses)
urs_result.to_dict(return_all=True)

{'entailment': [[[0.9227377772331238, 0.9302507638931274],
   [0.9965391159057617, 0.9963691830635071],
   [0.989100992679596, 0.9731588363647461],
   [0.9052041172981262, 0.7679730653762817]],
  [[0.9978324174880981, 0.997478723526001],
   [0.9964278340339661, 0.968819260597229]]],
 'noncontradiction': [[[0.9921353459358215, 0.9936415553092957],
   [0.9997926950454712, 0.9997541904449463],
   [0.9995163083076477, 0.9992474913597107],
   [0.9986988306045532, 0.9979174137115479]],
  [[0.99982750415802, 0.999841034412384],
   [0.999812126159668, 0.999585747718811]]],
 'contrasted_entailment': [[[0.9915488362312317, 0.9932112097740173],
   [0.9997920393943787, 0.9997533559799194],
   [0.9995112419128418, 0.9992273449897766],
   [0.9985646605491638, 0.9972955584526062]],
  [[0.9998271465301514, 0.9998406767845154],
   [0.9998114705085754, 0.9995725750923157]]]}

In [25]:
mus = MatchedUnitScorer()

mus_result = mus.evaluate(claim_sets=sentence_sets, sampled_claim_sets=sampled_sentence_sets)
mus_result.to_dict(return_all=True)

{'entailment': [[[0.9210313558578491, 0.9471302032470703],
   [0.99812251329422, 0.9957913160324097],
   [0.9956673979759216, 0.9840846061706543],
   [0.9889243841171265, 0.8758251070976257]],
  [[0.9977548122406006, 0.996763288974762],
   [0.9969518780708313, 0.907191276550293]]],
 'noncontradiction': [[[0.9994022250175476, 0.9995139837265015],
   [0.9998176693916321, 0.9998164772987366],
   [0.9996823072433472, 0.9994068145751953],
   [0.9997873306274414, 0.9986640214920044]],
  [[0.9998548030853271, 0.9998593926429749],
   [0.9998741745948792, 0.9996825456619263]]],
 'contrasted_entailment': [[[0.9993513822555542, 0.9994871020317078],
   [0.9998173713684082, 0.9998157620429993],
   [0.9996810555458069, 0.9993975758552551],
   [0.9997850060462952, 0.9983170628547668]],
  [[0.9998545050621033, 0.9998589754104614],
   [0.9998738169670105, 0.9996501803398132]]],
 'cosine_sim': [[[0.8490213751792908, 0.8225279450416565],
   [0.9674820899963379, 0.9155158400535583],
   [0.899648904800415,