# Risk Analysis with Gemini 2.0 Flash
This notebook implements risk analysis for contract clauses using Gemini 2.0 Flash.

In [2]:
import google.generativeai as genai
from datasets import load_dataset
import pandas as pd

# Set up Gemini API key
GEMINI_API_KEY = ''  # Replace with your actual Gemini API key
genai.configure(api_key=GEMINI_API_KEY)

Load and Preprocess Dataset

In [8]:
# Load datasets
dataset_cap = load_dataset("nguha/legalbench", "cuad_cap_on_liability", trust_remote_code=True)
dataset_audit = load_dataset("nguha/legalbench", "cuad_audit_rights", trust_remote_code=True)
dataset_insurance = load_dataset("nguha/legalbench", "cuad_insurance", trust_remote_code=True)

# Convert to DataFrames
df_cap = pd.DataFrame(dataset_cap['test'])
df_audit = pd.DataFrame(dataset_audit['test'])
df_insurance = pd.DataFrame(dataset_insurance['test'])

# Filter 'yes' examples
df_cap_yes = df_cap[df_cap['answer'].str.lower() == 'yes'].copy()
df_cap_yes.loc[:, 'class_label'] = 0  # cap_on_liability
df_audit_yes = df_audit[df_audit['answer'].str.lower() == 'yes'].copy()
df_audit_yes.loc[:, 'class_label'] = 1  # audit_rights
df_insurance_yes = df_insurance[df_insurance['answer'].str.lower() == 'yes'].copy()
df_insurance_yes.loc[:, 'class_label'] = 2  # insurance

# Filter 'no' examples and sample 20%
df_cap_no = df_cap[df_cap['answer'].str.lower() == 'no'].sample(n=int(0.2 * len(df_cap_yes)), random_state=42).copy()
df_cap_no.loc[:, 'class_label'] = 3  # 'no' class
df_audit_no = df_audit[df_audit['answer'].str.lower() == 'no'].sample(n=int(0.2 * len(df_audit_yes)), random_state=42).copy()
df_audit_no.loc[:, 'class_label'] = 3  # 'no' class
df_insurance_no = df_insurance[df_insurance['answer'].str.lower() == 'no'].sample(n=int(0.2 * len(df_insurance_yes)), random_state=42).copy()
df_insurance_no.loc[:, 'class_label'] = 3  # 'no' class

# Combine all
df_combined = pd.concat([df_cap_yes, df_audit_yes, df_insurance_yes, df_cap_no, df_audit_no, df_insurance_no], ignore_index=True)

# Deduplicate and preprocess
print(f"Total clauses before deduplication: {len(df_combined)}")
df_combined = df_combined.drop_duplicates(subset=['text'], keep='first')
print(f"Total clauses after deduplication: {len(df_combined)}")
df_combined.loc[:, 'cleaned_text'] = df_combined['text'].apply(lambda x: x.strip().lower())

Total clauses before deduplication: 2094
Total clauses after deduplication: 2052


Risk Analysis Function and Test

In [5]:
# Risk analysis function with Gemini
def run_risk_analysis_gemini(clause):
    try:
        model = genai.GenerativeModel('gemini-2.0-flash-thinking-exp-01-21')  # gemini-2.0-flash-thinking-exp-01-21, gemma-3-27b-it
        prompt = f"You are a legal advisor. Identify any potential risks in this contract clause: '{clause}'"
        response = model.generate_content(prompt)
        return response.text
    except Exception as e:
        return f"Error analyzing clause: {str(e)}"

In [7]:
# Test on one clause per class
sample_clauses = df_combined.groupby('class_label')['cleaned_text'].apply(lambda x: x.iloc[0]).tolist()
for label, clause in enumerate(sample_clauses):
    risk_analysis = run_risk_analysis_gemini(clause)
    print(f'Class {label}: {clause[:100]}...\nRisk Analysis: {risk_analysis}\n')

Class 0: notwithstanding anything contained in this agreement to the contrary, neither party shall be liable ...
Risk Analysis: This clause is a **limitation of liability clause**, and while common, it has several potential risks and points to consider. Let's break them down:

**1. Overly Broad Exclusion of Damages:**

* **Risk:** The clause attempts to exclude liability for "special, consequential, incidental or punitive damages, however caused, based on any theory of liability." This is a very broad exclusion encompassing a wide range of potential damages.
* **Explanation:**
    * **Consequential Damages:** These are indirect losses that arise as a consequence of a breach (e.g., lost profits, business interruption). They can be substantial.
    * **Incidental Damages:** These are expenses incurred by the non-breaching party to avoid further loss after a breach (e.g., costs of cover, storage fees).
    * **Special Damages:**  These are damages unique to the specific circumstances of t