# Evaluating Legal Models

LegalEase: Nick, Ben, Lisa, Devin

# Setup
### Install and import necessary packages:


In [19]:
'''# Installs
!pip install google-generativeai
!pip install --upgrade transformers
!pip install torch
!pip install peft
!pip install bitsandbytes
!pip install langchain-community
!pip install langchain-Chroma
!pip install accelerate
!pip install optimum
!pip install gptqmodel
!pip install auto-gptq
!pip install gpt4all
!pip install huggingface_hub
!pip install scikit-learn
!pip install matplotlib'''
!pip install genai

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting genai
  Downloading genai-2.1.0-py3-none-any.whl.metadata (6.5 kB)
Collecting openai<0.28.0,>=0.27.0 (from genai)
  Downloading openai-0.27.10-py3-none-any.whl.metadata (13 kB)
Collecting tabulate<0.10.0,>=0.9.0 (from genai)
  Downloading tabulate-0.9.0-py3-none-any.whl.metadata (34 kB)
Collecting tiktoken<0.4.0,>=0.3.2 (from genai)
  Downloading tiktoken-0.3.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.2 kB)
Downloading genai-2.1.0-py3-none-any.whl (16 kB)
Downloading openai-0.27.10-py3-none-any.whl (76 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.5/76.5 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading tabulate-0.9.0-py3-none-any.whl (35 kB)
Downloading tiktoken-0.3.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m50.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tabulate, tiktoken, op

In [None]:
# Imports
import pandas as pd
import time
import re
import numpy as np
import genai
import json
import random
import os
import warnings
import time
warnings.simplefilter('ignore')
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn import metrics
import transformers
import torch
import random
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from huggingface_hub import login

ImportError: cannot import name 'GenerativeModel' from 'genai' (/media/volume/LegalEaseMaxim/CPSC5830-Team1/LegalEaseMk1/venv/lib/python3.11/site-packages/genai/__init__.py)

In [3]:
# Our modules
from model import generate_response, init_model
from model_llama2_7b import generate_response, init_model
import rag
import chat_history

CUDA extension not installed.
CUDA extension not installed.


Model Response: What is the importance of contract law in business?

Contract law is crucial in business as it provides a legal framework for creating and enforcing agreements between parties. It helps establish clear terms, obligations, and expectations, reducing the risk of disputes and misunderstandings. Contracts can cover various aspects, such as sales, partnerships, employment, and intellectual property, ensuring a solid foundation for business relationships.


# Load Gemini

SOTA model that will be our benchmark to evaluate general response performace against. 

In [4]:
# this is used if you want to integrate gemini
def prepare_google_env():
# Gathering Google resources
    with open('secrets.json', 'r') as file:
        secrets = json.load(file)

    import google.generativeai as genai  
    GOOGLE_API_KEY = secrets['GOOGLE_API_KEY']
    genai.configure(api_key = GOOGLE_API_KEY)
prepare_google_env()

# Example Q_A and Converse

In [5]:
# Only uncomment if you want to see how the Q_A script currently runs.
#import Q_A

In [6]:
#import converse

In [7]:
def query_gemini(user_input, use_rag, prompt):
    """
    user_input is the input to the llm.
    Rag is a boolean if you want RAG to be used to enhance query or not
    returns a response text
    """
    rag_context = ''
    if use_rag:
        rag_context, sources = rag.query_rag(user_input)
    chat_history = ''
    user_input_with_context = prompt.format(user_input=user_input, rag_context=rag_context, chat_history=chat_history)
    
    model = genai.GenerativeModel('gemini-1.5-flash',system_instruction=user_input_with_context)
    chat = model.start_chat()
    response = chat.send_message(user_input)
    return response.text.strip()

# Load Our Prompts and Models to Test


In [8]:
def get_prompt(file_name):
    # pulls form the /prompts folder
    folder = 'prompts/'
    with open(folder + file_name, "r") as file:
        prompt = file.read()
    return prompt

#get summary_prompt from summary_prompt.txt
summary_prompt = get_prompt('summary_prompt.txt')
#get class_prompt form class_prompt.txt
class_prompt = get_prompt('class_prompt.txt')
prompt = get_prompt('prompt.txt')

model, tokenizer = init_model()

Loading checkpoint shards: 100%|██████████| 4/4 [00:08<00:00,  2.23s/it]


In [9]:
def query_summarizer(model, user_input, prompt):
    """
    query is the input to the llm.
    Summarizes a users business ideas
    """
    start_time = time.time()
    with open('summary.txt', "r") as file:
        summary = file.read()
    complete_query = summary_prompt.format(summary = summary, user_input=user_input)
    llm_output = generate_response(complete_query, model, tokenizer)
    end_time = time.time()
    print('Took', end_time-start_time, 'seconds')
    print("Query sent to LLM\n", complete_query)
    print('/n#########END INPUT####################/n')
    print('################FROM LLM############')
    return llm_output

def query_classifier(model, summary, prompt):
    """
    query is the input to the llm.
    Rag is a boolean if you want RAG to be used to enhance query or not
    returns a response text
    """
    start_time = time.time()
    rag_context = ''
    #rag_context, sources = rag.query_rag(user_input)
    complete_query = class_prompt.format(summary = summary, rag_context=rag_context)
    class_output = generate_response(complete_query, model, tokenizer)
    end_time = time.time()
    print("Query sent to LLM\n", complete_query)
    print('Took', end_time-start_time, 'seconds')
    print('/n#########END SUMMARY####################/n')
    return class_output

def qa_questions(model_type, use_rag, prompt):
    # Runs given model through 100 questions from the Q/A Pairs set
    # returns the average cosine similarity and Gemini's Evaluation:
    scores = []
    content_sim = []
    # Select first 100 pairs
    qa_split_100 = qa_split[:100] 

    for pair in qa_split_100:
        query = pair[0]
        answer = pair[1]
        response = ''
        
        if model_type == 'g':
            response = query_gemini(query, use_rag, prompt)
            # We will be querying twice in this script, and cannot exceed 15 a minute.
            time.sleep(8.01)
        else:
            response = query_model(query, use_rag, prompt)
            r = response.split('**Response**')
            if len(r) > 1:
                response = r[1]
        
        # Vectorize the query and response
        vectorizer = TfidfVectorizer()
        tfidf_matrix = vectorizer.fit_transform([query, response])

        # Compute the cosine similarity between query and response
        similarity = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])
        
        # Append the cosine similarity score to scores
        scores.append(similarity[0][0])
        
        # use Geval to determine if answer/response are similar in content
        model = genai.GenerativeModel('gemini-1.5-flash',system_instruction = user_input_with_context)
        chat = model.start_chat()
        directions="You are an evaluator. You need to determine if the following paragraphs are similar in content."
        directions += 'Paragraph 1:' + answer
        directions += 'Paragraph 2:' + response
        directions = 'Return 1 if they are similar, or 0 if they are not. Only return a 1 or 0.'
        res = chat.send_message(user_input)
        try:
            res = int(res)
        except:
            res = 0
        content_sim.append(res)
    # Calculate the average similarity
    avg_similarity = sum(scores) / len(scores) if scores else 0
    avg_similarity_content = sum(content_sim) / len(content_sim) if content_sim else 0
    return avg_similarity, avg_similarity_content



# Summarize Ideas Generated by ChatGPT
Idea1 : I’m really excited about this idea I’ve been working on—a subscription service for sustainable packaging aimed at small businesses and e-commerce stores. With the growing shift toward sustainability, more and more companies are realizing they need to reduce their environmental impact, but finding affordable eco-friendly packaging solutions can be a challenge. That’s where my business comes in!

Idea 2: The idea is simple but powerful—create a community where people can access personalized, one-on-one learning experiences without the financial barrier. Whether it’s learning a new language, picking up a hobby, or gaining professional skills, SkillSwap makes learning more accessible and affordable.

How it works: Users create profiles listing the skills they can teach and the skills they want to learn. Then, they can search for matches and negotiate trades, using the platform to message, schedule, and even track progress. The platform could also have a rating system to ensure quality and reliability, building trust among users.

What excites me about SkillSwap is how it fosters community and knowledge sharing in a way that doesn’t require money. It’s all about people connecting, learning, and growing together—helping each other out in an organic, collaborative way. Plus, it could eventually scale into a larger network where users can find professional services they need in exchange for skills, creating a new kind of “skills economy.

In [10]:
idea1 = 'I’m really excited about this idea I’ve been working on—a subscription service for sustainable packaging aimed at small businesses and e-commerce stores. With the growing shift toward sustainability, more and more companies are realizing they need to reduce their environmental impact, but finding affordable eco-friendly packaging solutions can be a challenge. That’s where my business comes in!'
sum1 = query_summarizer(model, idea1, summary_prompt)
print(sum1)

Took 0.2508981227874756 seconds
Query sent to LLM
 You are an expert business consultant specializing in Washington State. Your task is to summarize the business idea provided below, focusing solely on the information given, without embellishing or adding any additional details.

**Business Idea to Summarize:**

I’m really excited about this idea I’ve been working on—a subscription service for sustainable packaging aimed at small businesses and e-commerce stores. With the growing shift toward sustainability, more and more companies are realizing they need to reduce their environmental impact, but finding affordable eco-friendly packaging solutions can be a challenge. That’s where my business comes in!

Your summary should only include the key points, without adding extra interpretation. Please keep your response concise and focused on the provided content.



/n#########END INPUT####################/n
################FROM LLM############
You are an expert business consultant specializi

In [11]:
idea2 = 'The business idea is simple but powerful—create a community where people can access personalized, one-on-one learning experiences without the financial barrier. Whether it’s learning a new language, picking up a hobby, or gaining professional skills, SkillSwap makes learning more accessible and affordable.'
sum2 = query_summarizer(model, idea2, summary_prompt)
print(sum2)

Took 6.900076150894165 seconds
Query sent to LLM
 You are an expert business consultant specializing in Washington State. Your task is to summarize the business idea provided below, focusing solely on the information given, without embellishing or adding any additional details.

**Business Idea to Summarize:**

The business idea is simple but powerful—create a community where people can access personalized, one-on-one learning experiences without the financial barrier. Whether it’s learning a new language, picking up a hobby, or gaining professional skills, SkillSwap makes learning more accessible and affordable.

Your summary should only include the key points, without adding extra interpretation. Please keep your response concise and focused on the provided content.



/n#########END INPUT####################/n
################FROM LLM############
You are an expert business consultant specializing in Washington State. Your task is to summarize the business idea provided below, focusi

In [12]:
idea3 = 'I want to make a restauraunt that is run by myself and a staff of volunteers.'
sum3 = query_summarizer(model, idea2, summary_prompt)
print(sum3)

Took 4.596269369125366 seconds
Query sent to LLM
 You are an expert business consultant specializing in Washington State. Your task is to summarize the business idea provided below, focusing solely on the information given, without embellishing or adding any additional details.

**Business Idea to Summarize:**

The business idea is simple but powerful—create a community where people can access personalized, one-on-one learning experiences without the financial barrier. Whether it’s learning a new language, picking up a hobby, or gaining professional skills, SkillSwap makes learning more accessible and affordable.

Your summary should only include the key points, without adding extra interpretation. Please keep your response concise and focused on the provided content.



/n#########END INPUT####################/n
################FROM LLM############
You are an expert business consultant specializing in Washington State. Your task is to summarize the business idea provided below, focusi

# Classify Summaries:


In [13]:
class1 = query_classifier(model, sum1, summary_prompt)
print(class1)

Query sent to LLM
 You are legal  assistant tasked with classifying summarized business ideas into one of the following categories: LLC, Non-Profit, Corporation, or Other. 
If the business idea seems unfeasible, return "BAD_IDEA". 
If the business idea would be better suited to a business structure not covered by the three given categories, such as Sole Proprietorship, return "OTHER".


Classify the Business Type:
After reviewing the summary, classify the business into one of the following categories:

LLC (Limited Liability Company): Choose this if the business is small to medium in size, has owners with personal liability protection, and is more focused on flexibility and less on raising large amounts of capital.
Non-Profit: Choose this if the business idea has a clear social or charitable mission with no intention of distributing profits to owners or shareholders.
CORP (Corporation): Choose this if the business aims to raise capital, has shareholders, and is looking to expand at a l

In [14]:
class2 = query_classifier(model, sum2, summary_prompt)
print(class2)

Query sent to LLM
 You are legal  assistant tasked with classifying summarized business ideas into one of the following categories: LLC, Non-Profit, Corporation, or Other. 
If the business idea seems unfeasible, return "BAD_IDEA". 
If the business idea would be better suited to a business structure not covered by the three given categories, such as Sole Proprietorship, return "OTHER".


Classify the Business Type:
After reviewing the summary, classify the business into one of the following categories:

LLC (Limited Liability Company): Choose this if the business is small to medium in size, has owners with personal liability protection, and is more focused on flexibility and less on raising large amounts of capital.
Non-Profit: Choose this if the business idea has a clear social or charitable mission with no intention of distributing profits to owners or shareholders.
CORP (Corporation): Choose this if the business aims to raise capital, has shareholders, and is looking to expand at a l

In [15]:
class3 = query_classifier(model, sum1, summary_prompt)
print(class1)

Query sent to LLM
 You are legal  assistant tasked with classifying summarized business ideas into one of the following categories: LLC, Non-Profit, Corporation, or Other. 
If the business idea seems unfeasible, return "BAD_IDEA". 
If the business idea would be better suited to a business structure not covered by the three given categories, such as Sole Proprietorship, return "OTHER".


Classify the Business Type:
After reviewing the summary, classify the business into one of the following categories:

LLC (Limited Liability Company): Choose this if the business is small to medium in size, has owners with personal liability protection, and is more focused on flexibility and less on raising large amounts of capital.
Non-Profit: Choose this if the business idea has a clear social or charitable mission with no intention of distributing profits to owners or shareholders.
CORP (Corporation): Choose this if the business aims to raise capital, has shareholders, and is looking to expand at a l

In [16]:
def extract_ideas():
    ideas_path ='datasets/business_ideas.txt'
    with open(ideas_path, 'r') as file:
        content = file.read()

    pattern = r"'Idea:'\s*(.*?)\s*'Summary:'\s*(.*?)\s*(?='Idea:|$)"
    pairs = re.findall(pattern, content, re.DOTALL)

    ideas = []
    summaries = []

    for idea, summary in pairs:
        ideas.append(idea.strip())
        summaries.append(summary.strip())

    df = pd.DataFrame({'Idea': ideas, 'Summary': summaries})

    print(df.shape)
    return df

def query_gemini(query):
    model = genai.GenerativeModel('gemini-1.5-flash',system_instruction=query)
    chat = model.start_chat()
    response = chat.send_message(query)
    return response.text.strip()

def classify_summary(summa):
    prompt = f"Classify the following business type based on the summary: {summa}\nThe options are: Non-Profit, LLC, Corporation, Other."
    
    classification = query_gemini(prompt)
    
    return classification




In [21]:
df = extract_ideas()

df['Classification'] = df['Summary'].apply(classify_summary)

(71, 2)


AttributeError: module 'genai' has no attribute 'GenerativeModel'