In [None]:
import re
import ollama
import ast

client = ollama.Client()

def query_deepseekr1(text, policy_area):

    system_prompt = """
    You are a helpful and concise assistant that will help me answer questions about United States Congress bills."
    """
    
    user_prompt = """
    Please provide a short summary of this bill with the key points and topics it would influence.
    """
    
    user_prompt += "\nPOLICY AREA: " + policy_area + "\n" + text
    
    response = client.generate(
        model="deepseek-r1",
        system=system_prompt,
        prompt=user_prompt
    )
    
    outside_think_text = re.sub(r"<think>.*?</think>", "", response.response, flags=re.DOTALL).strip()
    
    system_prompt = """
    You are a helpful and concise assistant that will help me answer questions about United States Congress bills. Use brief responses and ONLY return the information that is requested in JSON format. Do not provide additional information. Do not respond in markdown format. ONLY respond in valid JSON format."
    """
    
    user_prompt = """
    How would a Yea and Nay vote on this bill correspond on the social scale (left or right) and the economic scale (authoritarian or libertarian).
    
    Respond with a number from -5 to 5. 
    
    For the social scale, -5 is all the way left, 5 is all the way right.
    For the economic scale, -5 is all the way authoritarian, 5 is all the way libertarian.
    
    Return your response in a JSON in the following format:
    {
        "Yea_Social_Scale" : "<ONLY respond with a number -5 to 5>",
        "Yea_Economic_Scale" : "<ONLY respond with a number -5 to 5>",
        "Nay_Social_Scale" : "<ONLY respond with a number -5 to 5>",
        "Nay_Economic_Scale" : "<ONLY respond with a number -5 to 5>",
    }
    \n
    """
    
    user_prompt += outside_think_text
    
    response = client.generate(
        model="deepseek-r1",
        system=system_prompt,
        prompt=user_prompt
    )
    
    
    remove_think = re.sub(r"<think>.*?</think>", "", response.response, flags=re.DOTALL).strip()
    print(remove_think)
    json_text = re.findall(r"\{.*?\}", remove_think, re.DOTALL)[0] # should only be one
    
    no_whitespace_text = re.sub(r"\s+", "", json_text)
    json_obj = ast.literal_eval(no_whitespace_text)
    
    return json_obj

In [None]:
VOTES_TO_TEXTS_DIR = "votes_to_texts/"

congresses = [i for i in range(115, 120)] # year 2000-2025
congresses.reverse()
bill_types = ["hr", "s", "hjres", "sjres"] # house of reps, senate, house joint res, senate joint res"

In [None]:
import pandas as pd
import os

for c in congresses:
    for b_t in bill_types:
        file = f"{c}_{b_t}.csv"
        file_name = VOTES_TO_TEXTS_DIR + file
        if os.path.exists(file_name):
            print(f"{file_name} exists")
            data = {}
            df = pd.read_csv(file_name)
            df['VoteMapping'] = [{} for _ in range(len(df))]
            for index, row in df.iterrows():
                try:
                    json_obj = query_deepseekr1(row["Raw Text"], row["Policy Area"])
                except:
                    print("error at index", index, file)
                    continue
                df.at[index, 'VoteMapping'] = json_obj
                
            df.drop(columns=["Raw Text"])
            df.to_csv(f"summaries_and_vote_mapping/{c}_{b_t}.csv", index=False)