# Imports

In [None]:
#%pip install pandas
#%pip install numpy
#%pip install sklearn
#%pip install xgboost
#%pip install lightgbm

#For additional data pre-proccessing & augmentation
import pandas as pd
import numpy as np
from nltk.corpus import wordnet
import random

#For splitting and formating data for training
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer

#For evaluating models
from sklearn.metrics import accuracy_score, classification_report

#Models
from sklearn.naive_bayes import MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier
from lightgbm import LGBMClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.ensemble import VotingClassifier

# 1. Dataset Preparation

## Load & Augment Pre-processed Dataset

In [5]:
df_o = pd.read_pickle('cleaned_cases.pkl')

In [None]:
import nltk
nltk.download('wordnet')

In [6]:
def synonym_replacement(text):
    if not text:
        return ''

    words = text.split()
    new_words = words.copy()
    random_word_list = list(set([word for word in words if wordnet.synsets(word)]))
    random.shuffle(random_word_list)
    num_replacements = max(1, int(0.1 * len(words)))  # Replace around 10% of the words

    replacements = 0
    for random_word in random_word_list:
        synonyms = wordnet.synsets(random_word)
        if synonyms:
            synonym = synonyms[0].lemmas()[0].name()
            new_words = [synonym if word == random_word else word for word in new_words]
            replacements += 1
        if replacements >= num_replacements:
            break

    return ' '.join(new_words)

def mirror_case(row):
    return {
        'name': f"{row['name']} (Mirrored)",
        'first_party': row['second_party'],
        'second_party': row['first_party'],
        'winning_party': row['winning_party'],
        'Facts': synonym_replacement(row['Facts']),
        'question': synonym_replacement(row['question']),
        'conclusion': synonym_replacement(row['conclusion']),
        'winner_index': 1 - row['winner_index']
    }

mirrored = []
for index, row in df_o.iterrows():
    mirrored.append(mirror_case(row))
df_m = pd.DataFrame(mirrored)

df = pd.concat([df_o, df_m])

In [7]:
print(f'Cases: {len(df)}')
pd.set_option('display.max_colwidth', None)
df.sample()

Cases: 6928


Unnamed: 0,name,first_party,second_party,winning_party,question,conclusion,winner_index,Facts
1749,Brandenburg v. Ohio (Mirrored),State of Ohio,Clarence Brandenburg,Brandenburg,"Did Ohio's criminal syndicalism law, prohibiting public speech that advocates assorted illegal activities, violate Brandenburg's right to free speech as protected by the First and Fourteenth Amendments?","The Court's Per Curiam opinion held that the Ohio law violated Brandenburg's right to free speech. The Court used angstrom two-pronged test to measure speech acts: (1) speech can be prohibited if it is ""directed at motivate or producing imminent lawless action"" and (2) it is ""likely to incite or produce such action."" The criminal syndicalism act made illegal the advocacy and teaching of doctrines while ignoring whether or not that advocacy and teaching would actually incite imminent lawless action. The failure to brand this differentiation rendered the law overly broad and in misdemeanor of the Constitution.",1,"Brandenburg, a leader in the Ku Klux Klan, made a speech at a Klan rally and was later convicted under an Ohio criminal syndicalism law. The law made illegal recommend ""crime, sabotage, violence, or improper methods of terrorism arsenic a means of accomplishing industrial or political reform,"" arsenic well arsenic assembling ""with any society, group, or assemblage of persons formed to Teach or advocate the doctrines of criminal syndicalism."""


# 2. AI Judge

Splitting and Preparing Data for Neural Network Training

In [29]:
X_train_party1_text, X_test_party1_text, \
X_train_party2_text, X_test_party2_text, \
X_train_facts_text, X_test_facts_text, \
y_train, y_test = train_test_split(
    df['first_party'],
    df['second_party'],
    df['Facts'],
    df['winner_index'],
    test_size=0.2
)

In [30]:
vectorizer_party1 = TfidfVectorizer()
vectorizer_party2 = TfidfVectorizer()
vectorizer_facts = TfidfVectorizer()

X_train_party1 = vectorizer_party1.fit_transform(X_train_party1_text)
X_test_party1 = vectorizer_party1.transform(X_test_party1_text)

X_train_party2 = vectorizer_party2.fit_transform(X_train_party2_text)
X_test_party2 = vectorizer_party2.transform(X_test_party2_text)

X_train_facts = vectorizer_facts.fit_transform(X_train_facts_text)
X_test_facts = vectorizer_facts.transform(X_test_facts_text)

# Combine features
X_train = np.hstack([X_train_party1.toarray(), X_train_party2.toarray(), X_train_facts.toarray()])
X_test = np.hstack([X_test_party1.toarray(), X_test_party2.toarray(), X_test_facts.toarray()])


###  KNN

In [39]:
knn_classifier = KNeighborsClassifier(n_neighbors= 5, weights = 'distance')
knn_classifier.fit(X_train, y_train)

knn_predictions = knn_classifier.predict(X_test)
print("KNN Classifier Accuracy: ", accuracy_score(y_test, knn_predictions))

KNN Classifier Accuracy:  0.6464646464646465


###  Naive Bayes

In [43]:
nb_classifier = MultinomialNB(alpha = 5)
nb_classifier.fit(X_train, y_train)

nb_predictions = nb_classifier.predict(X_test)
print("Naive Bayes Classifier Accuracy: ", accuracy_score(y_test, nb_predictions))


Naive Bayes Classifier Accuracy:  0.6392496392496393


### SVM

In [44]:
svm_classifier = LinearSVC(C=0.7, intercept_scaling=0.1, loss='squared_hinge', dual='auto')
svm_classifier.fit(X_train, y_train)

svm_predictions = svm_classifier.predict(X_test)
print("SVM Classifier Accuracy: ", accuracy_score(y_test, svm_predictions))

SVM Classifier Accuracy:  0.6955266955266955


###  XG Boost

In [45]:
xgb_classifier = XGBClassifier(random_state=7)
xgb_classifier.fit(X_train, y_train)

y_pred_xgb = xgb_classifier.predict(X_test)
xgb_accuracy = accuracy_score(y_test, y_pred_xgb)
print(f"XGBoost Classifier Test Accuracy: {xgb_accuracy}")


XGBoost Classifier Test Accuracy: 0.7373737373737373


### LightGBM

In [46]:
lgbm_classifier = LGBMClassifier(random_state=7, verbose = -1)
lgbm_classifier.fit(X_train, y_train)

# Evaluate the model
y_pred_lgbm = lgbm_classifier.predict(X_test)
lgbm_accuracy = accuracy_score(y_test, y_pred_lgbm)
print(f"LightGBM Classifier Test Accuracy: {lgbm_accuracy}")


LightGBM Classifier Test Accuracy: 0.7251082251082251


### Log Regression

In [47]:
log_classifier = LogisticRegression(max_iter=10000, random_state=7)
log_classifier.fit(X_train, y_train)

# Evaluate the model
y_pred_log_reg = log_classifier.predict(X_test)
log_reg_accuracy = accuracy_score(y_test, y_pred_log_reg)
print(f"Logistic Regression Test Accuracy: {log_reg_accuracy}")

Logistic Regression Test Accuracy: 0.7056277056277056


### Voting Classifier Test

In [48]:
voting_classifier = VotingClassifier(
    estimators=[
        ('knn', knn_classifier),
        ('nb', nb_classifier),
        ('svm', svm_classifier),
        ('log', log_classifier),
        ('lgbm', lgbm_classifier),
        ('xgb', xgb_classifier)
    ],
    voting='hard'
)

# Train the voting classifier
voting_classifier.fit(X_train, y_train)
y_test_pred = voting_classifier.predict(X_test)

print('voting_classifier - Test\n', classification_report(y_test, y_test_pred, zero_division=0))

voting_classifier - Test
               precision    recall  f1-score   support

           0       0.68      0.76      0.72       670
           1       0.75      0.67      0.71       716

    accuracy                           0.72      1386
   macro avg       0.72      0.72      0.72      1386
weighted avg       0.72      0.72      0.72      1386



# 3. Demo

In [50]:
def predict(party1, party2, facts):
    X_party1 = vectorizer_party1.transform([party1]).todense()
    X_party2 = vectorizer_party2.transform([party2]).todense()
    X_facts = vectorizer_facts.transform([facts]).todense()

    # Combine the features
    X = np.asarray(np.hstack([X_party1, X_party2, X_facts]))

    # Predict the outcome using the voting classifier
    win_index = voting_classifier.predict(X)[0]
    if (win_index == 0):
        return party1
    else:
         return party2

In [None]:
test = [
    'Aurelia Moon',
    'Caspian Thorne',
    'Aurelia Moon, the owner of a small bakery called "Moonlit Delights," placed an order for a new commercial oven from a supplier. The delivery was scheduled for February 1, 2023. On the delivery day, the supplier\'s truck was involved in an accident caused by Caspian Thorne, the owner of "Thorne\'s Brews," who was driving negligently. The oven was damaged and required repairs, leading to a delay in Moon receiving it. As a result, Moon\'s bakery lost $10,000 in revenue from unfulfilled orders. Moon is suing Thorne for negligence, seeking compensation for the lost revenue. Thorne acknowledges the accident but argues the damages claimed are exaggerated and that Moon should have mitigated her losses by renting a temporary oven. Witnesses confirmed Thorne was speeding, and financial records show Moon\'s loss in revenue during the repair period. Quotes from local businesses indicate the cost and availability of renting a temporary oven.'
]

out = predict(test[0],test[1],test[2])
print(f'AI Judge rules in favor of {out}')

# 4. AI Judge Decision Generator

### Create a unqieuly trained T-5 small LLM to output reasoning

In [None]:
#%pip install sentencepiece
%pip install accelerate -U

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from transformers import T5Tokenizer, T5ForConditionalGeneration, Trainer, TrainingArguments
import torch

# Create input text by combining relevant columns
df['input_text'] = df.apply(lambda row: f"First party: {row['first_party']}, Second party: {row['second_party']}, Facts: {row['Facts']}, Winner index: {row['winner_index']}", axis=1)
df['target_text'] = df['conclusion']

# Split data into training and validation sets
train_df, val_df = train_test_split(df, test_size=0.2)

# Initialize the tokenizer and model
model_name = 't5-small'
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

In [None]:
# Tokenize the inputs and outputs
train_encodings = tokenizer(train_df['input_text'].tolist(), truncation=True, padding=True, max_length=512)
val_encodings = tokenizer(val_df['input_text'].tolist(), truncation=True, padding=True, max_length=512)

train_labels = tokenizer(train_df['target_text'].tolist(), truncation=True, padding=True, max_length=512).input_ids
val_labels = tokenizer(val_df['target_text'].tolist(), truncation=True, padding=True, max_length=512).input_ids

In [None]:
# Define a dataset class
class CourtDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

train_dataset = CourtDataset(train_encodings, train_labels)
val_dataset = CourtDataset(val_encodings, val_labels)

In [None]:
# Define the training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    evaluation_strategy="epoch",
    gradient_accumulation_steps=8,
)

# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
)

# Train the model
trainer.train()

# Save the model
model.save_pretrained('./court_conclusion_model')
tokenizer.save_pretrained('./court_conclusion_tokenizer')

In [None]:
model = T5ForConditionalGeneration.from_pretrained('./court_conclusion_model')
tokenizer = T5Tokenizer.from_pretrained('./court_conclusion_tokenizer')

# Example new input
new_input = "First party: " + test[0] + ", Second party: " + test[1] + ", Facts: " + test[2]

# Tokenize and generate conclusion
input_ids = tokenizer(new_input, return_tensors='pt').input_ids

# Generate conclusion with adjusted parameters
generated_ids = model.generate(
    input_ids,
    max_length=10000,
    num_beams=5,
    early_stopping=True,
    no_repeat_ngram_size=2,
    temperature=0.7,
    top_p=0.9,
)

# Decode the generated text
conclusion = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
print('Generated conclusion:\n',conclusion)

### Use a general pre-trained T-5 model to provide reasoning

In [None]:
%pip install transformers

In [None]:
from transformers import T5Tokenizer, T5ForConditionalGeneration

# Load the tokenizer and model
tokenizer = T5Tokenizer.from_pretrained('t5-small')
model = T5ForConditionalGeneration.from_pretrained('t5-small')

In [None]:
def generate_reasoning(facts, predicted_winner):
    prompt = f"Facts: {facts} Predicted Winner: {predicted_winner} Reasoning:"
    input_ids = tokenizer.encode(prompt, return_tensors='pt')
    outputs = model.generate(input_ids, max_length=512)
    reasoning = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return reasoning

In [None]:
facts = test[2]

predicted_winner = out

reasoning = generate_reasoning(facts, predicted_winner)
print(reasoning)
