# Classifying subjective topics behind public discourse

### Import libraries and helper functions

In [37]:
import ollama
import pandas as pd
from OllamaCached import check_and_download_model

### Load data

In [38]:
import json

# Open the JSON file and load its content
with open("./test_claims_quantemp.json") as f:
    data = json.load(f)
claims = []  # Initialize the list to hold extracted claims

# Iterate over each item in the JSON object
for value in data:
    claims.append(value['claim'])

# Print the complete list of claims after finishing the loop



In [39]:
claims[0]

"Great Western Railway is running a promotion offering all UK residents a gift card for a year's free travel for £1.69."

### Define helper functions to log results
- **generate_filename** function generates a file in a results directory (make sure that you create a results directory)
- **save_prints** will save the responses to the prompts as well as the text it is assigned to in a file

In [40]:
import datetime
import os
import json

"""
    Generate file in results directory

    Args:
        base_name (str): Name of the file
    Returns:

"""
def generate_filename(base_name):
    current_time = datetime.datetime.now()
    time_stamp = current_time.strftime("%Y-%m-%d_%H-%M-%S")
    filename = f"{base_name}_{time_stamp}.txt"
    full_path = os.path.join("results", filename)

    # Open and create the file immediately
    with open(full_path, 'w') as file:
        pass  # Just open and close to create an empty file

    return full_path

def save_prints(file_name, text, content):
    try:
        with open(file_name, 'a', encoding='utf-8') as f:
            data = {
                "text": text,
                "questions": content
            }
            json.dump(data, f, ensure_ascii=False)
            f.write('\n')
    except Exception as e:
        print(f"An error occurred while saving prints: {e}")


### Training data for few-shot training approach

In [41]:
def few_shot_training_data():
    messages = []
    messages.append({"role": "system", "content": "Suppose you are a fact-checker, generate several yes or no quesons to help me answer if this claim is true or false."})

    messages.append({"role": "user", "content": "Claim: Viral image stated on June 8, 2020 in post on Facebook: Cops in Norway: require 3 years of training, 4 people killed since 2002. Cops in Finland: require 2 years of training, 7 people killed since 2000. Cops in Iceland: require 2 years of training, 1 person killed since ever. Cops in the U.S.: require 21 weeks of training, 8,000+ people killed since 2001."})

    messages.append({"role": "assistant", "content": "Questions: \n Does Norway require 3 years of training for cops? \n Have Norwegian cops killed 4 people since the early 2000's?\n Does Finland require only 2 years of training for police?\n Have Finnish police killed 7 people since 2000?\n Does Iceland only require 2 years of training for cops?\n Have Iceland cops only killed 1 person ever?\n Does the U.S. require only 21 weeks of training for cops?\n Have U.S. cops killed more than 8,000 people since 2001?\n Do experts associate only training me with police-related shoong fatalies?"})
    #
    messages.append({"role": "user", "content": "Claim: Barry DuVal stated on September 25, 2015 in an interview: We're the only major oil-producing naon in the world with a self-imposed ban on exporng our crude oil to other naons."})
    messages.append({"role": "assistant", "content": "Questions: Is the U.S. the only major oil-producing naon to ban exports of crude oil? \n Is the self-imposed ban on crude oil export of U.S a complete ban?"})

    messages.append({"role": "user", "content": "Claim: William Barr stated on September 2, 2020 in a CNN interview: We indicted someone in Texas, 1,700 ballots collected from people who could vote, he made them out and voted for the person he wanted to."})
    messages.append({"role": "assistant", "content": "Questions:\n Were 1700 mail-in ballots invesgated for fraud in Texas during the 2020 electon?\n Did the Justice Department indict someone in Texas for voter fraud? \nDid widespread mail-in order fraud happen in Texas during the 2020 electon?\nDid voter disenfranchisement happen in Texas during the 2020 electon?"})
    return messages


### Defining the prompt for training methods

In [42]:
def return_system():
    prompt = """
    Suppose you are a fact-checker, generate 2 yes or no questions to help me answer if this claim is true or false.
    """

    return prompt


def return_user(text):
    text = "Text: " + text
    return text + "\n"

def return_prompt_format():
    format = "The response should have the following format: {'Questions': Question1, Question2}"
    return format


In [43]:
model_name = 'llama2'
check_and_download_model(model_name)

Model Uncached - Downloading...
pulling manifest


pulling 8934d96d3f08: 100%|██████████| 3.83G/3.83G [00:00<00:00, 3.81TB/s]
pulling 8c17c2ebb0ea: 100%|██████████| 7.02k/7.02k [00:00<?, ?B/s]
pulling 7c23fb36d801: 100%|██████████| 4.77k/4.77k [00:00<?, ?B/s]
pulling 2e0493f67d0c: 100%|██████████| 59.0/59.0 [00:00<00:00, 59.1kB/s]
pulling fa304d675061: 100%|██████████| 91.0/91.0 [00:00<?, ?B/s]
pulling 42ba7f8a01dd: 100%|██████████| 557/557 [00:00<00:00, 554kB/s]


verifying sha256 digest
writing manifest
removing any unused layers
success
Download Complete.


### Few-shot prompting
- For few-shot prompting we don't only provide the structure to the model, but also 2 or more examples.
- Few-shot prompting can be used as a technique to enable in-context learning where we provide demonstrations in the prompt to steer the model to better performance.
- We start by getting the training data that is going to be used as examples introduced in the prompt. The number of examples is defined in the **number_of_samples** variable. The **additional_numbered_list** introduced extra information to each example (a binary list of labels for each data item).

In [44]:
messages = few_shot_training_data()

In [45]:
import json
from ollama import RequestError

def save_prints(file_name, text, content):
    try:
        with open(file_name, 'a', encoding='utf-8') as f:
            data = {
                "text": text,
                "questions": content
            }
            json.dump(data, f, ensure_ascii=False)
            f.write('\n')
    except Exception as e:
        print(f"An error occurred while saving prints: {e}")

number_iterations = 1

for i in range(number_iterations):
    # Generate file to save results
    file_name = generate_filename("Few_shot")
    print(file_name)

    for text in claims:
        print("Text: " + text)
        user_message = return_user(text) + return_prompt_format()

        # Ensure the message is not empty before appending
        if user_message:
            messages.append({"role": "user", "content": user_message})
        
        print(messages)

        try:
            response = ollama.chat(
                model=model_name,
                messages=messages,
                stream=False,
                options={
                    'temperature': 0.7
                }
            )

            if "message" in response and "content" in response["message"]:
                save_prints(file_name, text, response["message"]["content"])
                print(text)

        except RequestError as e:
            print(f"An error occurred: {e}")

        # Remove the specific user message if needed
        messages = [msg for msg in messages if not (msg["role"] == "user" and msg["content"] == user_message)]

        print("\n")
print("done")

results\Few_shot_2024-06-10_17-59-02.txt
Text: Great Western Railway is running a promotion offering all UK residents a gift card for a year's free travel for £1.69.
[{'role': 'system', 'content': 'Suppose you are a fact-checker, generate several yes or no quesons to help me answer if this claim is true or false.'}, {'role': 'user', 'content': 'Claim: Viral image stated on June 8, 2020 in post on Facebook: Cops in Norway: require 3 years of training, 4 people killed since 2002. Cops in Finland: require 2 years of training, 7 people killed since 2000. Cops in Iceland: require 2 years of training, 1 person killed since ever. Cops in the U.S.: require 21 weeks of training, 8,000+ people killed since 2001.'}, {'role': 'assistant', 'content': "Questions: \n Does Norway require 3 years of training for cops? \n Have Norwegian cops killed 4 people since the early 2000's?\n Does Finland require only 2 years of training for police?\n Have Finnish police killed 7 people since 2000?\n Does Iceland

In [26]:
import json
def read_and_parse_json(file_path):
    entries = []  # List to hold all parsed JSON objects

    # Open and read the file line by line
    with open(file_path, 'r', encoding='utf-8') as file:
            for line in file:
                if line.strip():  # Ensure the line is not empty
                    # Parse each JSON object from the line and add it to the list
                    json_object = json.loads(line.strip())
                    entries.append(json_object)

    # Convert the list of JSON objects to a single JSON array
    json_data = json.dumps(entries, indent=4)
    with open('./train_claim_decomp.json', 'w') as f:
        json.dump(json_data, f)

    return json_data

In [27]:
json_output = read_and_parse_json('./results/train_claim_decomp.txt')

In [47]:
def append_comma_to_each_line(input_file_path, output_file_path):
    try:
        # Open the input file to read and the output file to write
        with open(input_file_path, 'r', encoding='utf-8') as input_file, \
             open(output_file_path, 'w', encoding='utf-8') as output_file:
            for line in input_file:
                # Strip newline characters from the end of the line, add ', ', and write to output file
                output_file.write(line.rstrip('\n') + ', ' + '\n')

    except FileNotFoundError:
        print("The specified file was not found.")
    except Exception as e:
        print(f"An error occurred: {e}")

# Usage
input_file_path = './results/test_ramaining.txt'  # Replace with the path to your input file
output_file_path = './results/test_remaining_json.txt'  # Replace with the path to your desired output file

append_comma_to_each_line(input_file_path, output_file_path)
print("done")

done
