### Import datasets

In [1]:
import pandas as pd

billsum_section_chunks = pd.read_parquet("billsum_section_chunks.parquet")
govreport_chunks = pd.read_parquet("govreport_chunks.parquet")

govreport_chunks['section_id'] = -1
govreport_chunks['section_title'] = 'FULL TEXT'
govreport_chunks['section_chunk_id'] = -1

billsum_section_chunks.shape[1] == govreport_chunks.shape[1]

all_chunks = pd.read_parquet("all_section_chunks.parquet")
all_chunks.shape

(395014, 13)

In [2]:
def standardize_billsum(ds, split_name, jurisdiction):
    df = ds.to_pandas()
    df = df.rename(columns={
        "text": "source_text",
        "summary": "target_summary",
        "title": "title"
    })
    df = df.reset_index(drop=True)
    df['doc_id'] = 'billsum_' + split_name + '_' + df.index.astype(str)
    df['jurisdiction'] = jurisdiction
    df['split'] = split_name
    df['dataset'] = 'billsum'

    return df
from datasets import load_dataset

billsum = load_dataset("billsum")

billsum_train = billsum["train"]
billsum_test_us = billsum["test"]
billsum_test_ca = billsum["ca_test"]

bs_train_df = standardize_billsum(billsum_train, 'train', 'US')
bs_test_us_df = standardize_billsum(billsum_test_us, 'test_us', 'US')
bs_test_ca_df = standardize_billsum(billsum_test_ca, 'test_ca', 'CA')

billsum_docs = pd.concat([bs_train_df, bs_test_us_df, bs_test_ca_df], ignore_index=True)

billsum_mask = dict(zip(billsum_docs['doc_id'], billsum_docs['source_text']))

  from .autonotebook import tqdm as notebook_tqdm


#### Import Models and Tokenizers

In [3]:
import transformers

In [4]:
# retriever models

# from sentence_transformers import SentenceTransformer, util
from transformers import AutoTokenizer, AutoModel, AutoModelForQuestionAnswering, AutoModelForSeq2SeqLM
from transformers import pipeline
import torch

# st_model = SentenceTransformer('all-mpnet-base-v2', device='cuda')
emb_model_name = 'sentence-transformers/all-mpnet-base-v2'
emb_tokenizer = AutoTokenizer.from_pretrained(emb_model_name)
emb_model = AutoModel.from_pretrained(emb_model_name).to('cuda')

qa_model = AutoModelForQuestionAnswering.from_pretrained("distilbert-base-uncased-distilled-squad").to('cuda')
qa_tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-distilled-squad")

qa_pipeline = pipeline(
    "question-answering",
    model=qa_model,
    tokenizer=qa_tokenizer,
    device=0
)

# generator model

tokenizer = AutoTokenizer.from_pretrained("AtharvaKirk/legal-summarizer-distilbart")
model = AutoModelForSeq2SeqLM.from_pretrained("AtharvaKirk/legal-summarizer-distilbart")
model = model.to('cuda')
model.eval()

Device set to use cuda:0


BartForConditionalGeneration(
  (model): BartModel(
    (shared): BartScaledWordEmbedding(50264, 1024, padding_idx=1)
    (encoder): BartEncoder(
      (embed_tokens): BartScaledWordEmbedding(50264, 1024, padding_idx=1)
      (embed_positions): BartLearnedPositionalEmbedding(1026, 1024)
      (layers): ModuleList(
        (0-11): 12 x BartEncoderLayer(
          (self_attn): BartAttention(
            (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
          )
          (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
          (activation_fn): GELUActivation()
          (fc1): Linear(in_features=1024, out_features=4096, bias=True)
          (fc2): Linear(in_features=4096, out_features=1024, bias=True)
        

#### Importing embeddings for ST

In [5]:
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

billsum_embeddings = torch.load("billsum_embeddings.pt")
govreport_embeddings = torch.load("govreport_embeddings.pt")
all_embs = torch.load("all_embeddings.pt")

print(all_embs.shape)

torch.Size([395014, 768])


#### Adding functions required

In [6]:
import torch.nn.functional as F

def mean_pool(last_hidden_state, attention_mask):
    mask = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
    masked = last_hidden_state * mask
    summed = torch.sum(masked, dim=1)
    counts = torch.clamp(mask.sum(dim=1), min=1e-9)
    return summed / counts

@torch.no_grad()
def encode_query(query):
    enc = emb_tokenizer(
        query,
        padding=True,
        return_tensors='pt',
        truncation=True,
        max_length=512,
    ).to('cuda')
    outputs = emb_model(**enc)
    q_emb = mean_pool(outputs.last_hidden_state, enc['attention_mask'])[0]

    return q_emb

@torch.no_grad()
def retrieve_all(query, top_k=5):
    q_emb = encode_query(query)
    # all_embs_cpu = all_embs.cpu()
    sims = F.cosine_similarity(q_emb.unsqueeze(0), all_embs, dim=1)

    top_k = min(top_k, len(sims))
    top_scores, top_idx = torch.topk(sims, k=top_k)

    top_idx = top_idx.cpu().numpy()
    top_scores = top_scores.cpu().numpy()

    return all_chunks.iloc[top_idx].assign(score=top_scores)


def get_neighbor_chunk(doc_id, chunk_id, offset):
    neighbor_id = chunk_id + offset
    neighbor = all_chunks[
        (all_chunks['doc_id'] == doc_id) &
        (all_chunks['chunk_id'] == neighbor_id)
    ]
    if len(neighbor) == 0:
        return None
    return neighbor.iloc[0]

def answer_qa_window(query, top_k=5, window_size=300):
    retrieved = retrieve_all(query, top_k=top_k)
    
    if retrieved.empty:
        return None
    
    best_result = None
    best_score = float('-inf')
    best_row = None

    for _, row in retrieved.iterrows():
        context = row['chunk_text']
        result = qa_pipeline(question=query, context=context)

        if result['score'] > best_score:
            best_result = result
            best_score = result['score']
            best_row = row
    
    if best_result is None or best_row is None:
        return None
    
    dataset = best_row.get('dataset')

    if (dataset == 'billsum') and pd.notna(best_row.get('section_id')) and best_row.get('section_title') != 'FULL TEXT':
        # skip window expansion for sectioned billsum chunks for now
        full_section = all_chunks[
            (all_chunks['doc_id'] == best_row['doc_id']) &
            (all_chunks['section_id'] == best_row['section_id'])
        ].sort_values(['section_chunk_id', 'chunk_id'])
        min_start = full_section['start_token'].min()
        max_end = full_section['end_token'].max()
        full_text = billsum_mask.get(best_row['doc_id'], "")
        print(full_text)
        min_start = max(0, min_start)
        max_end = min(len(full_text), max_end)

        section_ids = full_text[min_start:max_end]
        # full_section_text = t5_tokenizer.decode(section_ids, skip_special_tokens=True)
        clause_window = full_text[min_start:max_end]
        return {
            'question': query,
            'answer_span': best_result['answer'],
            'score': best_result['score'],
            'start': best_result['start'],
            'end': best_result['end'],
            'clause_window': clause_window,
            'full_chunk_text': best_row['chunk_text'],
            'dataset': best_row.get('dataset'),
            'doc_id': best_row.get('doc_id'),
            'chunk_id': best_row.get('chunk_id'),
            'section_id': best_row.get('section_id', None),
            'section_chunk_id': best_row.get('section_chunk_id', None),
        }
    chunk_text = best_row['chunk_text']
    start = best_result['start']
    end = best_result['end']

    start_window = max(0, start - window_size)
    end_window = min(len(chunk_text), end + window_size)

    left_text = chunk_text[start_window:end_window]
    total_len = len(left_text)

    if start_window == 0:
        prev_row = get_neighbor_chunk(best_row['doc_id'], best_row['chunk_id'], -1)
        if prev_row is not None:
            prev_text = prev_row['chunk_text']
            need = window_size - (start)
            if need > 0:
                prefix = prev_text[max(0, len(prev_text)-need):]
                left_text = prefix + left_text

    if end_window == len(chunk_text):
        next_row = get_neighbor_chunk(best_row['doc_id'], best_row['chunk_id'], 1)
        if next_row is not None:
            next_text = next_row['chunk_text']
            need = window_size - (len(chunk_text) - start_window - (end - start))
            if need > 0:
                suffix = next_text[:max(0, need)]
                left_text = left_text + suffix

            

    clause_window = left_text

    return {
        'question': query,
        'answer_span': best_result['answer'],
        'score': best_result['score'],
        'start': start,
        'end': end,
        'clause_window': clause_window,
        'full_chunk_text': chunk_text,
        'dataset': best_row.get('dataset'),
        'doc_id': best_row.get('doc_id'),
        'chunk_id': best_row.get('chunk_id'),
        'section_id': best_row.get('section_id', None),
        'section_chunk_id': best_row.get('section_chunk_id', None),
    }

def answer_question_final(query, top_k=5, window_size=600, max_input_tokens=1024, max_output_tokens=256):
    res = answer_qa_window(
        query,
        top_k=top_k,
        window_size=window_size
    )

    # print(res)
    # prompt = ''
    # # prompt = 'Assume the role of a legal assistant to rewrite the following legal clause in clear, plain English. Preserve all legal conditions and do not add or remove any requirements.\n\n'
    # prompt += res['question'] + '\n\n'
    # prompt += res['clause_window'] + '\n\nPlain English version:\n'
    # print(prompt)
    # print('Prompt for summarization:')
    # print(res['clause_window'])
    # print('--- End of Prompt ---')
    prompt = res['clause_window']
    answer = get_abstractive_summary(
        prompt,
        max_input_tokens=max_input_tokens,
        max_output_tokens=max_output_tokens,
    )
    # print(answer)
    return answer

def get_abstractive_summary(text, max_input_tokens=1024, max_output_tokens=1024):
    inputs = tokenizer(
        text, 
        return_tensors='pt',
        truncation=True,
        max_length=max_input_tokens,
    ).to('cuda')
    # with torch.no_grad():
    outputs = model.generate(
        inputs['input_ids'],
        attention_mask=inputs.get('attention_mask', None),
        max_length=max_output_tokens,
        min_length=100,
        num_beams=6,
        early_stopping=True,
        length_penalty=1.2,
        no_repeat_ngram_size=3,
    )

    summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return summary




#### Testing functions

In [7]:
test_queries = [
    "liability of business entities providing facilities to nonprofits",
    "income derived from private sector business activity",
]

for q in test_queries:
    results = retrieve_all(q, top_k=3)
    print(f"Results for query: {q}")
    print(results[['dataset','doc_id', 'chunk_id', 'section_id', 'section_chunk_id', 'score', 'chunk_text']])

Results for query: liability of business entities providing facilities to nonprofits
       dataset              doc_id  chunk_id  section_id  section_chunk_id  \
8795   billsum  billsum_train_1123         0           0                 0   
0      billsum     billsum_train_0         0           0                 0   
46860  billsum  billsum_train_6083         1           0                 1   

          score                                         chunk_text  
8795   0.865064  SECTION 1. LIABILITY OF BUSINESS ENTITIES PROV...  
0      0.865064  SECTION 1. LIABILITY OF BUSINESS ENTITIES PROV...  
46860  0.719623  public benefit and operated primarily for char...  
Results for query: income derived from private sector business activity
          dataset                 doc_id  chunk_id  section_id  \
329599  govreport  govreport_train_13623         1          -1   
196073  govreport   govreport_train_1551        11          -1   
358314  govreport  govreport_train_16153         3      

In [8]:
res = answer_qa_window(
    "What are the regulations regarding liability of business entities providing facilities to nonprofits?",
    top_k=5,
    window_size=600
)

print('Answer Span:', res['answer_span'])
print('\nWindow\n', res['clause_window'])
print('\nFrom:', res['dataset'], res['doc_id'], 'chunk', res['chunk_id'])

SECTION 1. LIABILITY OF BUSINESS ENTITIES PROVIDING USE OF FACILITIES 
              TO NONPROFIT ORGANIZATIONS.

    (a) Definitions.--In this section:
            (1) Business entity.--The term ``business entity'' means a 
        firm, corporation, association, partnership, consortium, joint 
        venture, or other form of enterprise.
            (2) Facility.--The term ``facility'' means any real 
        property, including any building, improvement, or appurtenance.
            (3) Gross negligence.--The term ``gross negligence'' means 
        voluntary and conscious conduct by a person with knowledge (at 
        the time of the conduct) that the conduct is likely to be 
        harmful to the health or well-being of another person.
            (4) Intentional misconduct.--The term ``intentional 
        misconduct'' means conduct by a person with knowledge (at the 
        time of the conduct) that the conduct is harmful to the health 
        or well-being of another perso

#### Final Testing

In [9]:
question = 'What are the regulations regarding liability of business entities providing facilities to nonprofits?'



final_answer = answer_question_final(
    question,
    top_k=5,
    window_size=600,
    max_input_tokens=1024,
    max_output_tokens=1024,
)

SECTION 1. LIABILITY OF BUSINESS ENTITIES PROVIDING USE OF FACILITIES 
              TO NONPROFIT ORGANIZATIONS.

    (a) Definitions.--In this section:
            (1) Business entity.--The term ``business entity'' means a 
        firm, corporation, association, partnership, consortium, joint 
        venture, or other form of enterprise.
            (2) Facility.--The term ``facility'' means any real 
        property, including any building, improvement, or appurtenance.
            (3) Gross negligence.--The term ``gross negligence'' means 
        voluntary and conscious conduct by a person with knowledge (at 
        the time of the conduct) that the conduct is likely to be 
        harmful to the health or well-being of another person.
            (4) Intentional misconduct.--The term ``intentional 
        misconduct'' means conduct by a person with knowledge (at the 
        time of the conduct) that the conduct is harmful to the health 
        or well-being of another perso



In [10]:
final_answer

'Directs a business entity to: (1) provide liability for the liability of businesses providing the use of certain real property or real property to nonprofit organizations; (2) authorize the disclosure of liability for gross negligence to a person with knowledge that the conduct is likely to be harmful to the health or well-being of another person; (3) establish a facility that is contiguous to any real or physical building, improvement, or appurtenance; and (4) prohibit intentional misconduct'

#### Random Testing

In [66]:
# the summarizer handles 1024 max input tokens. legal text is much, much longer.
#trying out something new here

def chunk_for_summarization(text, tokenizer, max_input_tokens=1024, overlap=128):
    enc = tokenizer(
        text,
        return_tensors='pt',
        truncation=False,
        padding=False,
        add_special_tokens=False,
    )

    input_ids = enc['input_ids'][0]

    chunks = []
    start = 0
    n = len(input_ids)
    print(f'Total tokens in text: {n}')
    while start < n:
        end = min(start + max_input_tokens, n)
        chunk_ids = input_ids[start:end]
        chunk_text = tokenizer.decode(chunk_ids, skip_special_tokens=True)
        chunks.append(chunk_text)

        if end >= n:
            break

        start = end - overlap
    
    return chunks

def summarize_windows(windows, question=None, max_output_tokens=256, max_input_tokens=1024):
    partial_summaries = []

    for w in windows:
        if question:
            prompt = '"{}"\n\nText:\n{}'.format(question, w)
        else:
            prompt = '{}'.format(w)

        summary = get_abstractive_summary(
            prompt,
            max_input_tokens=max_input_tokens,
            max_output_tokens=max_output_tokens,
        )
        partial_summaries.append(summary)
    
    return partial_summaries
    
def combine_summaries(partials, question=None, max_input_tokens=1024, max_output_tokens=256):
    joined = "\n\n".join(f'- {p}'  for p in partials)

    if question:
        prompt = '"{}"\n\nSummaries:\n{}'.format(question, joined)
    else:
        prompt = '{}'.format(joined)
    
    final_summary = get_abstractive_summary(
        prompt,
        max_input_tokens=max_input_tokens,
        max_output_tokens=max_output_tokens,
    )

    return final_summary

def summarize_large_text(clause_text, question=None):
    windows = chunk_for_summarization(
        clause_text,
        tokenizer,
        max_input_tokens=1024,
        overlap=128,
    )
    
    if len(windows) == 1:
        if question:
            prompt = '{}'.format(windows[0])
        else:
            prompt = '{}'.format(windows[0])
        
        return get_abstractive_summary(
            prompt,
            max_input_tokens=1024,
            max_output_tokens=512,
        )
    
    partials = summarize_windows(
        windows,
        question=question,
        max_input_tokens=1024,
        max_output_tokens=256,
    )

    

    final_answer = combine_summaries(
        partials,
        question=question,
        max_input_tokens=1024,
        max_output_tokens=512,
    )

    return final_answer, partials

def answer_question_final_v2(query, top_k=5, window_size=600):
    res = answer_qa_window(
        query,
        top_k=top_k,
        window_size=window_size
    )

    if res is None:
        return {'question': query, 'answer': None, 'reason': 'No relevant chunks found.'}
    
    clause = res['clause_window']

    final_answer, partials = summarize_large_text(
        clause,
        question=query
    )
        
    
    
    return {
        'question': query,
        'answer': final_answer,
        'raw_clause': clause,
        'qa_span': res['answer_span'],
        'qa_score': res['score'],
        'dataset': res['dataset'],
        'doc_id': res['doc_id'],
        'chunk_id': res['chunk_id'],
        'section_id': res.get('section_id', None),
        'section_chunk_id': res.get('section_chunk_id', None),
    }

In [64]:
question = 'What are the regulations regarding liability of business entities providing facilities to nonprofits?'

res = answer_qa_window(
    question,
    top_k=5,
    window_size=600
)

print('Clause Window', res['clause_window'])

SECTION 1. LIABILITY OF BUSINESS ENTITIES PROVIDING USE OF FACILITIES 
              TO NONPROFIT ORGANIZATIONS.

    (a) Definitions.--In this section:
            (1) Business entity.--The term ``business entity'' means a 
        firm, corporation, association, partnership, consortium, joint 
        venture, or other form of enterprise.
            (2) Facility.--The term ``facility'' means any real 
        property, including any building, improvement, or appurtenance.
            (3) Gross negligence.--The term ``gross negligence'' means 
        voluntary and conscious conduct by a person with knowledge (at 
        the time of the conduct) that the conduct is likely to be 
        harmful to the health or well-being of another person.
            (4) Intentional misconduct.--The term ``intentional 
        misconduct'' means conduct by a person with knowledge (at the 
        time of the conduct) that the conduct is harmful to the health 
        or well-being of another perso

#### Testing other statements

In [11]:
question2 = 'What are the tax implications for nonprofits engaging in private sector business activities?'

final_answer2 = answer_question_final(
    question2,
    top_k=5,
    window_size=600
)

print(final_answer2)

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


The recent economic downturn increased the demand for many of the goods and services provided by charitable organizations, while simultaneously placing the same organizations under increased financial constraints. The treatment of nonprofits in health care reform has also been a major issue, as the tax code cannot provide the same incentives to nonprofits (nonprofit health care providers and nonprofit employers providing insurance) that are available to for-profit organizations. To assist policymakers in evaluating reforms that will potentially affect nonprofit and charitable groups, this report provides a broad overview


In [None]:
import gradio as gr

def answer_question_gradio(query, top_k=5, window_size=600):
    if not query or query.strip() == "":
        return "Please enter a valid question."
    
    result = answer_qa_window(
        query,
        top_k=top_k,
        window_size=window_size
    )

    if result is None:
        return "No answer found."

    clause = result['clause_window']
    meta_dataset = result.get('dataset', '')
    meta_doc_id = result.get('doc_id', '')
    meta_chunk_id = result.get('chunk_id', '')
    meta_score = result.get('score', 0)

    try:
        answer = get_abstractive_summary(
            clause,
            max_input_tokens=1024,
            max_output_tokens=512,
        )
    except Exception as e:
        return f"Error during summarization: {str(e)}"
    
    return answer, clause, meta_dataset, meta_doc_id, f"Chunk ID: {meta_chunk_id}, Score: {meta_score}"

demo = gr.Interface(
    fn = answer_question_gradio,
    inputs = [
        gr.Textbox(lines=2, label="Enter your legal question here"),
        gr.Slider(1, 10, value=5, step=1, label="Number of Retrieved Chunks (top_k)"),
        gr.Slider(100, 1000, value=600, step=50, label="Context Window Size (in tokens)"),
    ],
    outputs = [
        gr.Textbox(lines=10, label="Generated Answer"),
        gr.Textbox(lines=10, label="Relevant Clause Window"),
        gr.Textbox(lines=1, label="Dataset"),
        gr.Textbox(lines=1, label="Document ID"),
        gr.Textbox(lines=1, label="Chunk ID and Score"),
    ],
    title="Legal Question Answering System",
    description="Ask legal questions and get answers based on a corpus of legal documents.",
)

if __name__ == "__main__":
    demo.launch()

* Running on local URL:  http://127.0.0.1:7862
* To create a public link, set `share=True` in `launch()`.


SECTION 1. LIABILITY OF BUSINESS ENTITIES PROVIDING USE OF FACILITIES 
              TO NONPROFIT ORGANIZATIONS.

    (a) Definitions.--In this section:
            (1) Business entity.--The term ``business entity'' means a 
        firm, corporation, association, partnership, consortium, joint 
        venture, or other form of enterprise.
            (2) Facility.--The term ``facility'' means any real 
        property, including any building, improvement, or appurtenance.
            (3) Gross negligence.--The term ``gross negligence'' means 
        voluntary and conscious conduct by a person with knowledge (at 
        the time of the conduct) that the conduct is likely to be 
        harmful to the health or well-being of another person.
            (4) Intentional misconduct.--The term ``intentional 
        misconduct'' means conduct by a person with knowledge (at the 
        time of the conduct) that the conduct is harmful to the health 
        or well-being of another perso

In [15]:
text ='''Subtitle A—Nutrition
SEC. 10101. RE-EVALUATION OF THRIFTY FOOD PLAN.
(a) In General.—Section 3 of the Food and Nutrition Act of 2008 (7 U.S.C. 2012) is amended by striking subsection (u) and inserting the following:
“(u) Thrifty Food Plan.—
“(1) 
Definition.

In general.—The term ‘thrifty food plan’ means the diet required to feed a family of 4 persons consisting of a man and a woman ages 20 through 50, a child ages 6 through 8, and a child ages 9 through 11 using the items and quantities of food described in the report of the Department of Agriculture entitled ‘Thrifty Food Plan, 2021’, and each successor report updated pursuant to this subsection, subject to the conditions that—
“(A) the relevant market baskets of the thrifty food plan shall only be changed pursuant to paragraph (4);
“(B) the cost of the thrifty food plan shall be the basis for uniform allotments for all households, regardless of the actual composition of the household; and
“(C) the cost of the thrifty food plan may only be adjusted in accordance with this subsection.
“(2) Household adjustments.—The Secretary shall make household adjustments using the following ratios of household size as a percentage of the maximum 4-person allotment:
“(A) For a 1-person household, 30 percent.
“(B) For a 2-person household, 55 percent.
“(C) For a 3-person household, 79 percent.
“(D) For a 4-person household, 100 percent.
“(E) For a 5-person household, 119 percent.
“(F) For a 6-person household, 143 percent.
“(G) For a 7-person household, 158 percent.
“(H) For an 8-person household, 180 percent.
139 STAT. 81
“(I) For a household of 9 persons or more, an additional 22 percent per person, which additional percentage shall not total more than 200 percent.
“(3) Allowable cost adjustments.—The Secretary shall—
“(A) 
Hawaii.

Alaska.

make cost adjustments in the thrifty food plan for Hawaii and the urban and rural parts of Alaska to reflect the cost of food in Hawaii and urban and rural Alaska;
“(B) 
Guam.

Virgin Islands.

make cost adjustments in the separate thrifty food plans for Guam and the Virgin Islands of the United States to reflect the cost of food in those States, but not to exceed the cost of food in the 50 States and the District of Columbia; and
“(C) 
Effective dates.

Time period.

Expiration date.

on October 1, 2025, and on each October 1 thereafter, adjust the cost of the thrifty food plan to reflect changes in the Consumer Price Index for All Urban Consumers, published by the Bureau of Labor Statistics of the Department of Labor, for the most recent 12-month period ending in June.
“(4) Re-evaluation of market baskets.—
“(A) 
Deadline.

Re-evaluation.—Not earlier than October 1, 2027, the Secretary may re-evaluate the market baskets of the thrifty food plan based on current food prices, food composition data, consumption patterns, and dietary guidance.
“(B) Cost neutrality.—The Secretary shall not increase the cost of the thrifty food plan based on a re-evaluation under this paragraph.”
.
(b) Conforming Amendments.—
(1) Section 16(c)(1)(A)(ii)(II) of the Food and Nutrition Act of 2008 (7 U.S.C. 2025(c)(1)(A)(ii)(II)) is amended by striking “section 3(u)(4)” and inserting “section 3(u)(3)”.
(2) Section 19(a)(2)(A)(ii) of the Food and Nutrition Act of 2008 (7 U.S.C. 2028(a)(2)(A)(ii)) is amended by striking “section 3(u)(4)” and inserting “section 3(u)(3)”.
(3) Section 27(a)(2) of the Food and Nutrition Act of 2008 (7 U.S.C. 2036(a)(2))) is amended by striking “section 3(u)(4)” each place it appears and inserting “section 3(u)(3)”.
SEC. 10102. MODIFICATIONS TO SNAP WORK REQUIREMENTS FOR ABLE-BODIED ADULTS.
(a) Exceptions.—Section 6(o) of the Food and Nutrition Act of 2008 (7 U.S.C. 2015(o)) is amended by striking paragraph (3) and inserting the following:
“(3) Exceptions.—Paragraph (2) shall not apply to an individual if the individual is—
“(A) under 18, or over 65, years of age;
“(B) medically certified as physically or mentally unfit for employment;
“(C) a parent or other member of a household with responsibility for a dependent child under 14 years of age;
“(D) otherwise exempt under subsection (d)(2);
“(E) a pregnant woman;
“(F) an Indian or an Urban Indian (as such terms are defined in paragraphs (13) and (28) of section 4 of the Indian Health Care Improvement Act); or
139 STAT. 82
“(G) a California Indian described in section 809(a) of the Indian Health Care Improvement Act.”
.
(b) Standardizing Enforcement.—Section 6(o)(4) of the Food and Nutrition Act of 2008 (7 U.S.C. 2015(o)(4)) is amended—
(1) in subparagraph (A), by striking clause (ii) and inserting the following:
“(ii) is in a noncontiguous State and has an unemployment rate that is at or above 1.5 times the national unemployment rate.”
; and
(2) by adding at the end the following:
“(C) Definition of noncontiguous state.—
“(i) In general.—In this paragraph, the term ‘noncontiguous State’ means a State that is not 1 of the contiguous 48 States or the District of Columbia.
“(ii) Exclusions.—The term ‘noncontiguous State’ does not include Guam or the Virgin Islands of the United States.”
.
(c) Waiver for Noncontiguous States.—Section 6(o) of the Food and Nutrition Act of 2008 (7 U.S.C. 2015(o)) is amended—
(1) by redesignating paragraph (7) as paragraph (8); and
(2) by inserting after paragraph (6) the following:“(7) Exemption for noncontiguous states.—
“(A) Definition of noncontiguous state.—
“(i) In general.—In this paragraph, the term ‘noncontiguous State’ means a State that is not 1 of the contiguous 48 States or the District of Columbia.
“(ii) Exclusions.—In this paragraph, the term ‘noncontiguous State’ does not include Guam or the Virgin Islands of the United States.
“(B) Exemption.—Subject to subparagraph (D), the Secretary may exempt individuals in a noncontiguous State from compliance with the requirements of paragraph (2) if—
“(i) the State agency submits to the Secretary a request for that exemption, made in such form and at such time as the Secretary may require, and including the information described in subparagraph (C); and
“(ii) 
Determination.

Compliance.

the Secretary determines that based on that request, the State agency is demonstrating a good faith effort to comply with the requirements of paragraph (2).
“(C) Good faith effort determination.—In determining whether a State agency is demonstrating a good faith effort for purposes of subparagraph (B)(ii), the Secretary shall consider—
“(i) any actions taken by the State agency toward compliance with the requirements of paragraph (2);
“(ii) any significant barriers to or challenges in meeting those requirements, including barriers or challenges relating to funding, design, development, procurement, or installation of necessary systems or resources;
“(iii) the detailed plan and timeline of the State agency for achieving full compliance with those requirements, including any milestones (as defined by the Secretary); and
139 STAT. 83
“(iv) any other criteria determined appropriate by the Secretary.
“(D) Duration of exemption.—
“(i) 
Deadline.

In general.—An exemption granted under subparagraph (B) shall expire not later than December 31, 2028, and may not be renewed beyond that date.
“(ii) 
Determination.

Early termination.—The Secretary may terminate an exemption granted under subparagraph (B) prior to the expiration date of that exemption if the Secretary determines that the State agency—
“(I) 
Compliance.

has failed to comply with the reporting requirements described in subparagraph (E); or
“(II) based on the information provided pursuant to subparagraph (E), failed to make continued good faith efforts toward compliance with the requirements of this subsection.
“(E) Reporting requirements.—A State agency granted an exemption under subparagraph (B) shall submit to the Secretary—
“(i) quarterly progress reports on the status of the State agency in achieving the milestones toward full compliance described in subparagraph (C)(iii); and
“(ii) 
Plan.

information on specific risks or newly identified barriers or challenges to full compliance, including the plan of the State agency to mitigate those risks, barriers, or challenges.”
.
SEC. 10103. AVAILABILITY OF STANDARD UTILITY ALLOWANCES BASED ON RECEIPT OF ENERGY ASSISTANCE.
(a) Standard Utility Allowance.—Section 5(e)(6)(C)(iv)(I) of the Food and Nutrition Act of 2008 (7 U.S.C. 2014(e)(6)(C)(iv)(I)) is amended by inserting “with an elderly or disabled member” after “households”.
(b) Third-party Energy Assistance Payments.—Section 5(k)(4) of the Food and Nutrition Act of 2008 (7 U.S.C. 2014(k)(4)) is amended—
(1) in subparagraph (A), by inserting “without an elderly or disabled member” before “shall be”; and
(2) in subparagraph (B), by inserting “with an elderly or disabled member” before “under a State law”.
SEC. 10104. RESTRICTIONS ON INTERNET EXPENSES.
  Section 5(e)(6) of the Food and Nutrition Act of 2008 (7 U.S.C. 2014(e)(6)) is amended by adding at the end the following:“(E) Restrictions on internet expenses.—Any service fee associated with internet connection shall not be used in computing the excess shelter expense deduction under this paragraph.”.
SEC. 10105. MATCHING FUNDS REQUIREMENTS.
(a) In General.—Section 4(a) of the Food and Nutrition Act of 2008 (7 U.S.C. 2013(a)) is amended—
(1) by striking “(a) Subject to” and inserting the following:
“(a) Program.—
“(1) Establishment.—Subject to”
; and
(2) by adding at the end the following:“(2) State quality control incentive.—
139 STAT. 84
“(A) Definition of payment error rate.—In this paragraph, the term ‘payment error rate’ has the meaning given the term in section 16(c)(2).
“(B) 
Time periods.

State cost share.—
“(i) 
Effective date.

In general.—Subject to clause (iii), beginning in fiscal year 2028, if the payment error rate of a State as determined under clause (ii) is—
“(I) less than 6 percent, the Federal share of the cost of the allotment described in paragraph (1) for that State in a fiscal year shall be 100 percent, and the State share shall be 0 percent;
“(II) equal to or greater than 6 percent but less than 8 percent, the Federal share of the cost of the allotment described in paragraph (1) for that State in a fiscal year shall be 95 percent, and the State share shall be 5 percent;
“(III) equal to or greater than 8 percent but less than 10 percent, the Federal share of the cost of the allotment described in paragraph (1) for that State in a fiscal year shall be 90 percent, and the State share shall be 10 percent; and
“(IV) equal to or greater than 10 percent, the Federal share of the cost of the allotment described in paragraph (1) for that State in a fiscal year shall be 85 percent, and the State share shall be 15 percent.
“(ii) Elections.—
“(I) Fiscal year 2028.—For fiscal year 2028, to calculate the applicable State share under clause (i), a State may elect to use the payment error rate of the State from fiscal year 2025 or 2026.
“(II) Fiscal year 2029 and thereafter.—For fiscal year 2029 and each fiscal year thereafter, to calculate the applicable State share under clause (i), the Secretary shall use the payment error rate of the State for the third fiscal year preceding the fiscal year for which the State share is being calculated.
“(iii) Delayed implementation.—
“(I) Fiscal year 2025.—If, for fiscal year 2025, the payment error rate of a State multiplied by 1.5 is equal to or above 20 percent, the implementation date under clause (i) for that State shall be fiscal year 2029.
“(II) Fiscal year 2026.—If, for fiscal year 2026, the payment error rate of a State multiplied by 1.5 is equal to or above 20 percent, the implementation date under clause (i) for that State shall be fiscal year 2030.
“(3) Maximum federal payment.—The Secretary may not pay towards the cost of an allotment described in paragraph (1) an amount that is greater than the applicable Federal share under paragraph (2).”.
(b) Limitation on Authority.—Section 13(a)(1) of the Food and Nutrition Act of 2008 (7 U.S.C. 2022(a)(1)) is amended in
139 STAT. 85
the first sentence by inserting “or the payment or disposition of a State share under section 4(a)(2)” after “16(c)(1)(D)(i)(II)”.
SEC. 10106. ADMINISTRATIVE COST SHARING.
  Section 16(a) of the Food and Nutrition Act of 2008 (7 U.S.C. 2025(a)) is amended in the matter preceding paragraph (1) by striking “agency an amount equal to 50 per centum” and inserting “agency, through fiscal year 2026, 50 percent, and for fiscal year 2027 and each fiscal year thereafter, 25 percent,”.
SEC. 10107. NATIONAL EDUCATION AND OBESITY PREVENTION GRANT PROGRAM.
  Section 28(d)(1)(F) of the Food and Nutrition Act of 2008 (7 U.S.C. 2036a(d)(1)(F)) is amended by striking “for fiscal year 2016 and each subsequent fiscal year” and inserting “for each of fiscal years 2016 through 2025”.
SEC. 10108. ALIEN SNAP ELIGIBILITY.
  Section 6(f) of the Food and Nutrition Act of 2008 (7 U.S.C. 2015(f)) is amended to read as follows:
“(f) No individual who is a member of a household otherwise eligible to participate in the supplemental nutrition assistance program under this section shall be eligible to participate in the supplemental nutrition assistance program as a member of that or any other household unless he or she is—
“(1) a resident of the United States; and
“(2) either—
“(A) a citizen or national of the United States;
“(B) an alien lawfully admitted for permanent residence as an immigrant as defined by sections 101(a)(15) and 101(a)(20) of the Immigration and Nationality Act, excluding, among others, alien visitors, tourists, diplomats, and students who enter the United States temporarily with no intention of abandoning their residence in a foreign country;
“(C) an alien who has been granted the status of Cuban and Haitian entrant, as defined in section 501(e) of the Refugee Education Assistance Act of 1980 (Public Law 96–422); or
“(D) an individual who lawfully resides in the United States in accordance with a Compact of Free Association referred to in section 402(b)(2)(G) of the Personal Responsibility and Work Opportunity Reconciliation Act of 1996.
The income (less, at State option, a pro rata share) and financial resources of the individual rendered ineligible to participate in the supplemental nutrition assistance program under this subsection shall be considered in determining the eligibility and the value of the allotment of the household of which such individual is a member.”'''

In [16]:
summary = get_abstractive_summary(text, max_input_tokens=1024, max_output_tokens=512)

summary

'Amends the Food and Nutrition Act of 2008 to require the Secretary of Agriculture to: (1) revise the thrifty food plan; (2) adjust the cost of the plan to reflect changes in the Consumer Price Index for All Urban Consumers (CPI) for the most recent 12-month period ending in June; and (3) make necessary cost adjustments for Guam and the Virgin Islands. Requires the Secretary to re-evaluate the market baskets of the Thrifty Food Plan based on current'