In [17]:
# Script to run the model

import fire

from llama import Llama
from typing import List

def main(
    ckpt_dir: str,
    tokenizer_path: str,
    validation_set: List[dict],
    temperature: float = 0.6,
    top_p: float = 0.9,
    max_seq_len: int = 128,
    max_gen_len: int = 64,
    max_batch_size: int = 4
):
    """
    Entry point of the program for generating text using a pretrained model.

    Args:
        ckpt_dir (str): The directory containing checkpoint files for the pretrained model.
        tokenizer_path (str): The path to the tokenizer model used for text encoding/decoding.
        temperature (float, optional): The temperature value for controlling randomness in generation.
            Defaults to 0.6.
        top_p (float, optional): The top-p sampling parameter for controlling diversity in generation.
            Defaults to 0.9.
        max_seq_len (int, optional): The maximum sequence length for input prompts. Defaults to 128.
        max_gen_len (int, optional): The maximum length of generated sequences. Defaults to 64.
        max_batch_size (int, optional): The maximum batch size for generating sequences. Defaults to 4.
        validation_set (List[dict]): The validation set containing propositions.
    """
    generator = Llama.build(
        ckpt_dir=ckpt_dir,
        tokenizer_path=tokenizer_path,
        max_seq_len=max_seq_len,
        max_batch_size=max_batch_size,
    )
    
    results = []
    
    for comment in validation_set:
        propositions = comment['propositions']
        for prop in propositions:
            prop_text = prop['text']
            prompt = f'Predict the type of proposition: "{prop_text}"\n'
            result = generator.text_completion(
                [prompt],
                max_gen_len=max_gen_len,
                temperature=temperature,
                top_p=top_p,
            )
            predicted_type = result[0]['generation'].strip()
            results.append(predicted_type)

        return results

if __name__ == "__main__":
    fire.Fire(main)
        
        

[1m[31mERROR: [0mThe function received no value for the required argument: ckpt_dir
Usage: ipykernel_launcher.py CKPT_DIR TOKENIZER_PATH VALIDATION_SET <flags>
  optional flags:        --temperature | --top_p | --max_seq_len |
                         --max_gen_len | --max_batch_size

For detailed information on this command, run:
  ipykernel_launcher.py --help


FireExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [18]:
# Format the dataset so that the json file is readable

import json
import random

def pprint_comment(comment):
    # Format the propositions
    propositions = []
    for prop in comment['propositions']:
        prop_info = {
            "id": prop['id'],
            "text": prop['text'],
            "type": prop['type'],
            "reasons": prop['reasons'],
            "evidence": prop['evidence']
        }
        propositions.append(prop_info)
    
    formatted_comment = {
        "commentID": comment['commentID'],
        "propositions": propositions
    }
    return formatted_comment    


In [19]:
# Create a validation set

formatted_comments = []

# Open the JSON list file
with open('cdcp_type_edge_annot.jsonlist', 'r') as file:
    for line in file:
        comment = json.loads(line)
        formatted_comment = pprint_comment(comment)
        formatted_comments.append(formatted_comment)
#         print(json.dumps(formatted_comment, indent=4))

# Select about 100 comments randomly for the validation set
validation_set = random.sample(formatted_comments, k=100)

# # Print the selected comments
# for comment in validation_set:
#     print(json.dumps(comment, indent=4))

In [20]:
validation_set[0]

{'commentID': 524,
 'propositions': [{'id': 0,
   'text': 'I think that both an English and Spanish letter should be sent.',
   'type': 'policy',
   'reasons': ['1'],
   'evidence': None},
  {'id': 1,
   'text': 'This already happens with most legal and other important things (like voting and letters from school) in my State.',
   'type': 'fact',
   'reasons': None,
   'evidence': None},
  {'id': 2,
   'text': 'But this should be a fed requirement to always include Spanish.',
   'type': 'policy',
   'reasons': None,
   'evidence': None}]}

In [21]:
# Create a dataframe to keep track of commentId, propositionId, actualType, predictedType

import pandas as pd

# A list of records in the format [comment_id, prop_id, actual_type]
records = []

for comment in validation_set:
    comment_id = comment['commentID']
    propositions = comment['propositions']
    for prop in propositions:
        prop_id = prop['id']
        actual_type = prop['type']
        records.append([comment_id, prop_id, actual_type])

df = pd.DataFrame(records, columns=['commentID', 'propositionID', 'actualType'])


In [22]:
# List of predicted types
predicted_types = main('/home/parklab/models/llama2/llama-2-70b-chat', '/home/parklab/models/llama2/tokenizer.model', validation_set=validation_set)

# Add the predicted types to the DataFrame
df['predictedType'] = predicted_types

ValueError: Error initializing torch.distributed using env:// rendezvous: environment variable RANK expected, but not set

In [16]:
from sklearn.metrics import precision_score, recall_score, f1_score

# DataFrame contains actual and predicted types
actual_types = df['actualType']
predicted_types = df['predictedType']

# Convert types to a list for sklearn metrics
actual_types_list = actual_types.tolist()
predicted_types_list = predicted_types.tolist()

# Calculate precision, recall, and F1 scores
precision = precision_score(actual_types_list, predicted_types_list, average='weighted')
recall = recall_score(actual_types_list, predicted_types_list, average='weighted')
f1 = f1_score(actual_types_list, predicted_types_list, average='weighted')

# Print results
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')


ModuleNotFoundError: No module named 'sklearn'