In [2]:
import pandas as pd
from openai import OpenAI
from dotenv import load_dotenv

In [1]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from datasets import load_dataset
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from datasets import Dataset

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
df = pd.read_csv("hf://datasets/darrow-ai/LegalLensNLI-SharedTask/NLI.csv")

In [4]:
df

Unnamed: 0,premise,hypothesis,legal_act,label,Unnamed: 4
0,DEFENDANT has reached a settlement in a class ...,Had to visit DEFENDANT a while back for some r...,privacy,Neutral,
1,A class action lawsuit has been certified agai...,"So, at 22, I was into this whole ""collect-and-...",consumer_protection,Entailed,
2,"DEFENDANT, an auto parts supplier, has agreed ...",As an employee of the aforementioned auto part...,consumer_protection,Contradict,
3,"DEFENDANT has agreed to pay $400,000 to settle...","Hey, got a call from DEFENDANT a while back, s...",privacy,Contradict,
4,DEFENDANT and other health benefit companies h...,"Just checked my mail, got a letter from DEFEND...",privacy,Neutral,
...,...,...,...,...,...
307,DEFENDANT has reached a settlement in a breach...,Feeling a bit perplexed today. I've been a loy...,consumer_protection,Entailed,
308,"DEFENDANT, a seafood restaurant operator in Ca...",Had a fantastic seafood dinner at this place l...,privacy,Neutral,
309,Consumers who received promotional text messag...,Hardly ever use my phone for anything other th...,tcpa,Neutral,
310,"DEFENDANT, a restaurant point-of-sale provider...","Upon my daily visits to the local diner, I fre...",privacy,Entailed,


In [5]:
# Remove unnecessary columns
df = df[["premise", "hypothesis", "label"]]

In [6]:
from sklearn.model_selection import train_test_split

train_df, eval_df = train_test_split(df, test_size=0.3, random_state=42)

In [9]:
import json
# Open a file in write mode
with open('train_data.jsonl', 'w') as outfile:
    for index, row in train_df.iterrows():
        premise = row['premise']
        hypothesis = row['hypothesis']
        label = row['label']

            
        # Create the JSONL entry
        jsonl_entry = {
            "messages": [
                {"role": "system", "content": "You are a helpful assistant that given a pair of premise and hypothesis, you will predict the natural language inference between them: neutral, contradition, entailment"},
                {"role": "user", "content":  f"premise: {premise}\nhypothesis:{hypothesis}"},
                {"role": "assistant", "content": label}
                ]
        }
        
        # Write the JSONL entry to the file
        json.dump(jsonl_entry, outfile)
        outfile.write('\n')

In [10]:
# Open a file in write mode
with open('eval_data.jsonl', 'w') as outfile:
    for index, row in train_df.iterrows():
        premise = row['premise']
        hypothesis = row['hypothesis']
        label = row['label']

            
        # Create the JSONL entry
        jsonl_entry = {
            "messages": [
                {"role": "system", "content": "You are a helpful assistant that given a pair of premise and hypothesis, you will predict the natural language inference between them: neutral, contradition, entailment"},
                {"role": "user", "content":  f"premise: {premise}\nhypothesis:{hypothesis}"},
                {"role": "assistant", "content": label}
                ]
        }
        # Write the JSONL entry to the file
        json.dump(jsonl_entry, outfile)
        outfile.write('\n')

In [11]:
load_dotenv()

True

In [12]:
import os
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])

In [13]:
client.files.create(
  file=open("train_data.jsonl", "rb"),
  purpose="fine-tune"
)

FileObject(id='file-46RRxDkbHkbsBxNNq9GmOtLi', bytes=324915, created_at=1724986965, filename='train_data.jsonl', object='file', purpose='fine-tune', status='processed', status_details=None)

In [14]:
client.files.create(
  file=open("eval_data.jsonl", "rb"),
  purpose="fine-tune"
)

FileObject(id='file-jsKvrU9sPNMDSz8YaDw2cuzQ', bytes=324915, created_at=1724986969, filename='eval_data.jsonl', object='file', purpose='fine-tune', status='processed', status_details=None)

In [15]:
client.fine_tuning.jobs.create(
  training_file="file-46RRxDkbHkbsBxNNq9GmOtLi", 
  model="gpt-4o-2024-08-06",
  validation_file="file-jsKvrU9sPNMDSz8YaDw2cuzQ"
)

FineTuningJob(id='ftjob-qIHaGhJRB9bvWDWDxWPLJt6E', created_at=1724986980, error=Error(code=None, message=None, param=None), fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(n_epochs='auto', batch_size='auto', learning_rate_multiplier='auto'), model='gpt-4o-2024-08-06', object='fine_tuning.job', organization_id='org-Qxm4Gb8DM4gPh1hlxL8eCrsO', result_files=[], seed=19663938, status='validating_files', trained_tokens=None, training_file='file-46RRxDkbHkbsBxNNq9GmOtLi', validation_file='file-jsKvrU9sPNMDSz8YaDw2cuzQ', estimated_finish=None, integrations=[], user_provided_suffix=None)

In [26]:
fine_tune_job = client.fine_tuning.jobs.retrieve("ftjob-qIHaGhJRB9bvWDWDxWPLJt6E")
print(fine_tune_job)

FineTuningJob(id='ftjob-qIHaGhJRB9bvWDWDxWPLJt6E', created_at=1724986980, error=Error(code=None, message=None, param=None), fine_tuned_model='ft:gpt-4o-2024-08-06:georgian::A1mtDgxg', finished_at=1724989622, hyperparameters=Hyperparameters(n_epochs=3, batch_size=1, learning_rate_multiplier=2), model='gpt-4o-2024-08-06', object='fine_tuning.job', organization_id='org-Qxm4Gb8DM4gPh1hlxL8eCrsO', result_files=['file-2SAYbt0iKfMP5N8xwzIuWhug'], seed=19663938, status='succeeded', trained_tokens=188877, training_file='file-46RRxDkbHkbsBxNNq9GmOtLi', validation_file='file-jsKvrU9sPNMDSz8YaDw2cuzQ', estimated_finish=None, integrations=[], user_provided_suffix=None)


In [23]:
inference_df = pd.read_csv("/Users/royalsequiera/Projects/Darrow-LegalLens-NER/NLI_Task/testset_NLI_LegalLens.csv")

In [27]:
def model_inference(premise, hypothesis) -> list[str]:
    output = client.chat.completions.create(
        model="ft:gpt-4o-2024-08-06:georgian::A1mtDgxg",
        messages=[
            {"role": "system", "content": "You are a helpful assistant that given a pair of premise and hypothesis, you will predict the natural language inference between them: neutral, contradition, entailment"},
            {"role": "user", "content":  f"premise: {premise}\nhypothesis:{hypothesis}"},
        ]
    ) 
    
    output_str = output.choices[0].message.content
    
    return output_str

In [30]:
eval_df["predictions"] = df.apply(lambda x: model_inference(x["premise"], x["hypothesis"]), axis=1)

In [31]:
eval_df.head()

Unnamed: 0,premise,hypothesis,label,predictions
228,DEFENDANT has agreed to a $5.25 million settle...,As a regular visitor to a certain company's fa...,Entailed,
9,The DEFENDANT Text Message Class Action Settle...,Been receiving way too many texts from DEFENDA...,Entailed,Entailed
57,DEFENDANT has agreed to pay $7.5 million to se...,Stumbled upon my former employer in the news t...,Contradict,Entailed
60,"DEFENDANT, a hospital in Dixon, Illinois, has ...","So, there's this hospital in Dixon I went to a...",Neutral,Neutral
25,"DEFENDANT, a company that provides ambulance a...",Recently started using the handprint clock-in ...,Neutral,Neutral


In [41]:
eval_df["predictions"] = eval_df["predictions"].fillna("unknown")

from sklearn.metrics import classification_report
print(classification_report(eval_df["label"], eval_df["predictions"]))

              precision    recall  f1-score   support

  Contradict       0.67      0.12      0.20        34
    Entailed       0.29      0.07      0.11        29
     Neutral       0.40      0.19      0.26        31
     unknown       0.00      0.00      0.00         0

    accuracy                           0.13        94
   macro avg       0.34      0.10      0.14        94
weighted avg       0.46      0.13      0.19        94



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [32]:
inference_df["predictions"] = inference_df.apply(lambda x: model_inference(x["premise"], x["hypothesis"]), axis=1)

In [34]:
inference_df["predictions"]

0     Contradict
1     Contradict
2     Contradict
3     Contradict
4       Entailed
         ...    
79       Neutral
80      Entailed
81       Neutral
82    Contradict
83      Entailed
Name: predictions, Length: 84, dtype: object

In [35]:
llama3 = pd.read_csv("/Users/royalsequiera/Projects/Darrow-LegalLens-NER/NLI_Task/predictionsNLILens.csv")

In [37]:
llama3.head()[""]

Unnamed: 0.1,Unnamed: 0,id,premise,hypothesis,label
0,0,11468879,DEFENDANT agreed to a settlement in a class ac...,"I've been with DEFENDANT for a while now, and...",Neutral
1,1,12135604,"DEFENDANT has agreed to a $865,000 class actio...",Despite the data breach at DEFENDANT in Septe...,Neutral
2,2,12332938,A settlement has been reached in a class actio...,I've used the point of sale systems in Illino...,Entailed
3,3,13798813,DEFENDANT has agreed to a $21.875M settlement ...,"Never got any robocalls, DEFENDANT's settleme...",Neutral
4,4,1576896,DEFENDANT has agreed to pay $2 million to sett...,I've been ordering my meals from DEFENDANT's ...,Entailed


In [82]:
df.to_csv("NER_test_set_results_gpt4o_finetuned.csv")

In [105]:
df = pd.read_csv("/Users/royalsequiera/Projects/Darrow-LegalLens-NER/NER_Task_Submission/predictions_NERLens.csv")

In [106]:
df.head()

Unnamed: 0,id,tokens,predicted_tags
0,14243437,"[""a"",""class"",""action"",""lawsuit"",""has"",""been"",""...","['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', ..."
1,18871551,"[""a"",""media"",""company"",""recently"",""came"",""unde...","['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', ..."
2,51514749,"[""a"",""national"",""bank"",""was"",""recently"",""held""...","['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', ..."
3,99676183,"[""a"",""recent"",""case"",""has"",""come"",""to"",""light""...","['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', ..."
4,14188948,"[""a"",""recent"",""incident"",""has"",""come"",""to"",""li...","['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', ..."


In [118]:
import ast

def format_token_tags_from_df(df):
    formatted_data = []

    # Iterate through each row in the DataFrame
    for _, row in df.iterrows():
        # Safely evaluate strings that look like lists
        tokens = row['tokens']
        predicted_tags = row['predicted_tags']
        
        if isinstance(tokens, str):
            tokens = ast.literal_eval(tokens)
        if isinstance(predicted_tags, str):
            predicted_tags = ast.literal_eval(predicted_tags)
        
        token_tag_pairs = [f"{token}/{tag}" for token, tag in zip(tokens, predicted_tags)]
        formatted_string = ', '.join(token_tag_pairs)
        formatted_data.append({'id': row['id'], 'formatted': formatted_string})

    return formatted_data

In [124]:
format_token_tags_from_df(df.sample(2))

[{'id': 40410727,
  'formatted': 'the/O, lead/O, plaintiff/O, alleges/O, that/O, the/O, app/O, has/O, been/O, issuing/B-VIOLATION, false/I-VIOLATION, and/I-VIOLATION, misleading/I-VIOLATION, financial/I-VIOLATION, statements,/I-VIOLATION, thereby/I-VIOLATION, artificially/I-VIOLATION, inflating/I-VIOLATION, the/I-VIOLATION, price/I-VIOLATION, of/I-VIOLATION, its/I-VIOLATION, securities/I-VIOLATION, ./O, this/O, unethical/O, behavior/O, was/O, orchestrated/O, by/O, the/O, apps/O, lead/O, developer/O, and/O, has/O, raised/O, serious/O, doubts/O, about/O, the/O, apps/O, reliability/O, ./O'},
 {'id': 61979338,
  'formatted': 'a/O, restaurant/O, was/O, found/O, guilty/O, of/O, violating/O, the/O, fair/B-LAW, labor/I-LAW, standards/I-LAW, act/I-LAW, ./O, the/O, court/O, found/O, that/O, the/B-VIOLATED BY, restaurant/I-VIOLATED BY, management/I-VIOLATED BY, had/O, failed/O, to/O, comply/O, with/O, the/O, strict/O, notification/I-LAW, requirements/I-LAW, for/O, claiming/O, a/O, tip/O, credit,/

In [128]:
import pandas as pd

def check_ner_format(predictions_file_path, test_file_path):
    """
    Check the format of the NER prediction file.
    The file should be in CSV format with columns: id, tokens, ner_tags
    """
    try:
        df = pd.read_csv(predictions_file_path)

    except Exception as e:
        return False, f"Error reading predictions CSV file: {e}"
    
    try:
        test_df = pd.read_csv(test_file_path)
    except Exception as e:
        return False, f"Error reading test CSV file: {e}"
    
    # Check expected columns
    expected_columns = ['id', 'tokens', 'ner_tags']
    pred_columns = list(df.columns)
    for expected_col in expected_columns:
        if expected_col not in pred_columns:
            return False, f"Incorrect columns. Expected: {expected_columns}, Found: {pred_columns}"
    
    # Check number of rows
    expected_ner_num_rows = len(test_df)
    predictions_ner_num_rows = len(df)
    if predictions_ner_num_rows != expected_ner_num_rows:
        return False, f"Incorrect number of predictions. Expected: {expected_ner_num_rows}, Found: {predictions_ner_num_rows}"

    return True, "NER prediction file format is correct."

def check_nli_format(predictions_file_path, test_file_path):
    """
    Check the format of the NLI prediction file.
    The file should be in CSV format with columns: Premise, hypothesis, label
    """
    try:
        df = pd.read_csv(predictions_file_path)
    except Exception as e:
        return False, f"Error reading predictions CSV file: {e}"
    
    try:
        test_df = pd.read_csv(test_file_path)
    except Exception as e:
        return False, f"Error reading test CSV file: {e}"
    
    # Check expected columns
    expected_columns = ['premise', 'hypothesis', 'label']
    pred_columns = list(df.columns)
    for expected_col in expected_columns:
        if expected_col not in pred_columns:
            return False, f"Incorrect columns. Expected: {expected_columns}, Found: {pred_columns}"
    
    # Check number of rows
    expected_nli_num_rows = len(test_df)
    predictions_nli_num_rows = len(df)
    if predictions_nli_num_rows != expected_nli_num_rows:
        return False, f"Incorrect number of predictions. Expected: {expected_nli_num_rows}, Found: {predictions_nli_num_rows}"
    
    return True, "NLI prediction file format is correct."

# Check NER prediction file
ner_predictions_file_path = '/Users/royalsequiera/Projects/Darrow-LegalLens-NER/NER_Task_Submission/predictions_NERLens.csv' # replace with file path
ner_test_file_path = '/Users/royalsequiera/Projects/Darrow-LegalLens-NER/NER_Task_Submission/test_NERLens.csv' # replace with file path
is_valid, message = check_ner_format(ner_predictions_file_path, ner_test_file_path)
print(f"NER File Check: {message}")

# # Check NLI prediction file
# nli_predictions_file_path = 'predictions_NLILens.csv' # replace with file path
# nli_test_file_path = 'predictions_NLILens.csv' # replace with file path
# is_valid, message = check_nli_format(nli_predictions_file_path, nli_test_file_path)
# print(f"NLI File Check: {message}")

NER File Check: NER prediction file format is correct.


In [126]:
import pandas as pd

# Path to the Excel file
excel_file_path = '/Users/royalsequiera/Projects/Darrow-LegalLens-NER/NER_Task_Submission/NER_test_set.xlsx'

# Read the Excel file into a DataFrame
df = pd.read_excel(excel_file_path)

# Path to the CSV file
csv_file_path = 'output_file.csv'

# Save the DataFrame as a CSV file
df.to_csv(csv_file_path, index=False)
