In [1]:
# [1]
!pip install datasets
!pip install scikit-learn
!pip install ibm-watson-machine-learning==1.0.312

Collecting ibm-watson-machine-learning==1.0.312
  Downloading ibm_watson_machine_learning-1.0.312-py3-none-any.whl.metadata (8.9 kB)
Collecting pandas<1.6.0,>=0.24.2 (from ibm-watson-machine-learning==1.0.312)
  Downloading pandas-1.5.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Downloading ibm_watson_machine_learning-1.0.312-py3-none-any.whl (1.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m28.5 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading pandas-1.5.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.0/12.0 MB[0m [31m92.6 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hInstalling collected packages: pandas, ibm-watson-machine-learning
  Attempting uninstall: pandas
    Found existing installation: pandas 2.1.4
    Uninstalling pandas-2.1.4:
      Successfully uninstalled pandas-2.1.4
  Attempting un

In [2]:
# [2]
import os, getpass
from pandas import read_csv

In [3]:
# [3] Watsonx API connection
credentials = {
    "url": "https://us-south.ml.cloud.ibm.com",
    "apikey": getpass.getpass("Please enter your WML api key (hit enter): ")
}

Please enter your WML api key (hit enter):  ········


In [5]:
# [4] Defining the project id
try:
    project_id = os.environ["PROJECT_ID"]
except KeyError:
    project_id = input("Please enter your project_id (hit enter): ")


In [6]:
# [8]
project_id

'6b34ba33-226c-4442-856c-498cfc8a05cb'

In [7]:
# [9] Creating Legal Document Dataset
import pandas as pd
from sklearn.model_selection import train_test_split

data = {
    "Document_Text": [
        "This Non-Disclosure Agreement is made between Company A and Company B...",
        "This Service Agreement outlines the responsibilities of the provider and the client...",
        "This Employment Contract sets forth the terms of employment between the employer and employee...",
        "This Lease Agreement is entered into between the landlord and tenant...",
        "This Consulting Agreement is made effective as of the date between the consultant and the client...",
        "This Purchase Order confirms the agreement to purchase goods from the vendor...",
        "This Licensing Agreement grants the licensee certain rights to use intellectual property...",
        "This Partnership Agreement details the terms of collaboration between the two firms...",
        "This Memorandum of Understanding outlines the preliminary terms agreed by both parties...",
        "This Loan Agreement specifies the obligations of the borrower and the lender..."
    ],
    "Document_Type": [
        "NDA", "Service Agreement", "Employment Contract", "Lease Agreement", "Consulting Agreement",
        "Purchase Order", "Licensing Agreement", "Partnership Agreement", "MOU", "Loan Agreement"
    ]
}

df = pd.DataFrame(data)
train_df, test_df = train_test_split(df, test_size=0.3, random_state=42)
train_df.to_csv("legal_train.csv", index=False)
test_df.to_csv("legal_test.csv", index=False)

In [8]:
# [10] Load Training Data
train_data = pd.read_csv("legal_train.csv")
train_data.head(5)

Unnamed: 0,Document_Text,Document_Type
0,This Non-Disclosure Agreement is made between ...,NDA
1,This Partnership Agreement details the terms o...,Partnership Agreement
2,This Employment Contract sets forth the terms ...,Employment Contract
3,This Loan Agreement specifies the obligations ...,Loan Agreement
4,This Consulting Agreement is made effective as...,Consulting Agreement


In [9]:
# [11] Load Test Data
test_data = pd.read_csv("legal_test.csv")
test_data.head(5)

Unnamed: 0,Document_Text,Document_Type
0,This Memorandum of Understanding outlines the ...,MOU
1,This Service Agreement outlines the responsibi...,Service Agreement
2,This Purchase Order confirms the agreement to ...,Purchase Order


In [10]:
# [12]
train_data.shape

(7, 2)

In [11]:
# [13]
test_data.shape

(3, 2)

In [12]:
# [14] Import FLAN model
from ibm_watson_machine_learning.foundation_models.utils.enums import ModelTypes
model_id = ModelTypes.FLAN_UL2

In [13]:
# [15] Prompt for Classification
classification_instruction = """
Classify the legal document into one of the following types:
'NDA', 'Service Agreement', 'Employment Contract', 'Lease Agreement', 'Consulting Agreement',
'Purchase Order', 'Licensing Agreement', 'Partnership Agreement', 'MOU', 'Loan Agreement'.

Document: This Non-Disclosure Agreement is made between Company A and Company B...
Type: NDA\n\n
"""

In [14]:
# [16]
from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams

parameters = {
    GenParams.MAX_NEW_TOKENS: 10
}

In [15]:
# [17]
from ibm_watson_machine_learning.foundation_models import Model

model = Model(
    model_id=model_id,
    params=parameters,
    credentials=credentials,
    project_id=project_id
)

In [16]:
# [18] Run Classification
results = []
documents = list(test_data.Document_Text)

for doc in documents:
    results.append(model.generate_text(prompt=" ".join([classification_instruction, doc])))


In [17]:
# [19]
documents

['This Memorandum of Understanding outlines the preliminary terms agreed by both parties...',
 'This Service Agreement outlines the responsibilities of the provider and the client...',
 'This Purchase Order confirms the agreement to purchase goods from the vendor...']

In [18]:
# [20]
results

['MOU', 'Service Agreement', 'Purchase Order']