In [1]:
import pandas as pd
df= pd.read_csv("/content/DATASET2.csv", names=["Text", "Labels"],header=None)
print(df.shape)
df.head(3)

(12, 2)


Unnamed: 0,Text,Labels
0,"BEFORE THE MOTOR ACCIDENTS CLAIMS TRIBUNAL, KA...",TITLE
1,PETITIONER/S:\n\n1.\tAmbika w/o Late Chandraka...,PETITIONER
2,"RESPONDENT/S\n\n1 \tKapil s/o Shamu Chavan, Ag...",RESPONDENT INFORMATION


In [1]:
!pip install transformers



In [None]:
df["Text"][0]

'BEFORE THE MOTOR ACCIDENTS CLAIMS TRIBUNAL, KALABURAGI\nIN THE COURT OF THE   DIST  JUDGE  AT KALABURAGI.\n\nM.V.C. No.\t\t/ 2023\n'

In [None]:
import re

def Preprocess(text):
    # Remove extra white spaces, tabs, and line breaks
    text = re.sub('\s+', ' ', text)

    # Remove special characters and punctuation
    text = re.sub(r'[^\w\s]', '', text)

    return text

In [None]:
df["Text"] = df["Text"].map(Preprocess)

In [None]:
df["Text"].head()

0    BEFORE THE MOTOR ACCIDENTS CLAIMS TRIBUNAL KAL...
1                                         MVC No  2023
2    PETITIONERS 1 Ambika wo Late Chandrakant Ratho...
3    RESPONDENTS 1 Kapil so Shamu Chavan Age major ...
4    Under Sec 166 of the Motor Vehicles Act 1989 t...
Name: Text, dtype: object

In [None]:
df["Labels"] = df["Labels"].map(Preprocess)

In [None]:
df["Labels"].head()

0    CASE INFORMATION
1    CASE INFORMATION
2    CASE INFORMATION
3    CASE INFORMATION
4    CASE INFORMATION
Name: Labels, dtype: object

In [None]:
import torch
import transformers
from transformers import BertTokenizer, BertForSequenceClassification, BertForMaskedLM
from torch.utils.data import DataLoader, Dataset, random_split

# Define your dataset class
class LegalDocumentDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        encoding = self.tokenizer(
            text, padding='max_length', truncation=True, max_length=self.max_length, return_tensors='pt'
        )
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': label
        }

# Load and preprocess your dataset
texts = df["Text"]  # List of legal document text
labels = df["Labels"]  # List of corresponding labels
j;
# Split your dataset into training and validation sets
split_ratio = 0.8  # 80% for training, 20% for validation
total_samples = len(texts)
train_size = int(split_ratio * total_samples)
val_size = total_samples - train_size
train_texts = texts[:train_size]
train_labels = labels[:train_size]
val_texts = texts[train_size:]
val_labels = labels[train_size:]

# Initialize the BERT tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForMaskedLM.from_pretrained('bert-base-uncased')

# Create data loaders for training and validation
train_dataset = LegalDocumentDataset(train_texts, train_labels, tokenizer, max_length=512)
val_dataset = LegalDocumentDataset(val_texts, val_labels, tokenizer, max_length=512)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16)

# Define training parameters
optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)
epochs = 3

# Training loop
for epoch in range(epochs):
    model.train()
    for batch in train_loader:
        input_ids = batch['input_ids']
        attention_mask = batch['attention_mask']
        labels = batch['labels']
        outputs = model(input_ids, attention_mask=attention_mask, labels=input_ids)
        loss = outputs.loss
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

# Save the fine-tuned model
model.save_pretrained('fine_tuned_legal_model')

# Use the fine-tuned model to generate legal documents
model = BertForMaskedLM.from_pretrained('fine_tuned_legal_model')

# Generate a legal document
input_text = "In the Court of the DIST JUDGE AT KALABURAGI, M.V.C. No. /2023 ..."
input_ids = tokenizer(input_text, return_tensors='pt')['input_ids']
output = model.generate(input_ids, max_length=512, num_return_sequences=1)
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
print(generated_text)



Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from torch.utils.data import DataLoader, Dataset

# Define your dataset class
class LegalDocumentDataset(Dataset):
    def __init__(self, text_to_generate, tokenizer, max_length):
        self.text_to_generate = text_to_generate
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.text_to_generate)

    def __getitem__(self, idx):
        text = self.text_to_generate[idx]
        encoding = self.tokenizer(
            text, padding='max_length', truncation=True, max_length=self.max_length, return_tensors='pt'
        )
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten()
        }

# Initialize the GPT-2 tokenizer and model
tokenizer = AutoTokenizer.from_pretrained('gpt2')
model = AutoModelForCausalLM.from_pretrained('gpt2')

# Create data loaders for generating legal documents
texts_to_generate = df['Text']

generate_dataset = LegalDocumentDataset(texts_to_generate, tokenizer, max_length=512)
generate_loader = DataLoader(generate_dataset, batch_size=1)

# Generate legal documents
generated_documents = []

model.eval()
for batch in generate_loader:
    input_ids = batch['input_ids']
    attention_mask = batch['attention_mask']
    output = model.generate(input_ids, max_length=512, num_return_sequences=1, no_repeat_ngram_size=2)
    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
    generated_documents.append(generated_text)

# Print generated legal documents
for i, document in enumerate(generated_documents):
    print(f"Generated Document {i + 1}:\n")
    print(document)
    print("\n")

# Save the generated documents to files
for i, document in enumerate(generated_documents):
    with open(f"generated_document_{i + 1}.txt", "w") as file:
        file.write(document)


NameError: ignored

In [None]:
import pandas as pd
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer, TextDataset, DataCollatorForLanguageModeling, Trainer, TrainingArguments

# Load the pre-trained model and tokenizer
model_name = "gpt2"  # You can use a different model based on your requirements
model = GPT2LMHeadModel.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)

# Define the structure and sections of your documents
document_structure = {
    "Title/Header Information": "[TITLE]",
    "Petitioner Information": "[PETITIONER]",
    "Respondent Information": "[RESPONDENT]",
    "Motor Accident Claim": "[CLAIM]",
    "Accident Details": "[ACCIDENT]",
    "Injury and Medical Information": "[INJURY]",
    "Loss and Compensation Claim": "[CLAIM]",
    "Jurisdiction and Prayer": "[JURISDICTION]",
    "Declaration": "[DECLARATION]",
    "List of Documents": "[DOCUMENTS]",
    "Application for Permission to Engage Counsel": "[PERMISSION]",
    "Memo": "[MEMO]"
}

# Read data from a CSV file
#df = pd.read_csv("/content/DATASET2.csv")  # Replace with the path to your CSV file

# Prepare data for fine-tuning
text_data = df["Text"]  # Assuming the text is in a column named "Text"
# You might need to adapt the code to match your specific CSV format

# Tokenize and preprocess the text data
input_ids = []
for text in text_data:
    input_ids.append(tokenizer.encode(text, add_special_tokens=True, max_length=128, truncation=True))  #, padding=True
print("\n",input_ids)

"""
new_list = [str(current_integer) for current_integer in input_ids]
string_value = "".join(new_list)
number = int(string_value)
print(number)
"""

with open("input_ids.txt","w") as f:
    for ids in input_ids:
        f.write(" ".join(map(str, ids)) + "\n")

# Create a TextDataset
dataset = TextDataset(
    tokenizer=tokenizer,
    file_path= "input_ids.txt",  # Pass None as we're using text_data directly
    block_size=128,
    #document_structure=document_structure
)

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False,
)

# Define training arguments
training_args = TrainingArguments(
    output_dir="./output",
    overwrite_output_dir=True,
    num_train_epochs=3,
    per_device_train_batch_size=8,
    save_steps=10_000,
    save_total_limit=2,
)

# Create a Trainer instance and start fine-tuning
trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=dataset,
)

trainer.train()

# Save the fine-tuned model
trainer.save_model()

# You can now use the fine-tuned model for generating legal accident documents



 [[12473, 30818, 3336, 42982, 1581, 15859, 2389, 15365, 47666, 3955, 50, 37679, 33, 4944, 1847, 11, 509, 1847, 6242, 45570, 18878, 198, 1268, 3336, 46627, 3963, 3336, 220, 220, 360, 8808, 220, 449, 8322, 8264, 220, 5161, 509, 1847, 6242, 45570, 18878, 13, 198, 198, 44, 13, 53, 13, 34, 13, 1400, 13, 197, 197, 14, 1160, 1954, 198], [47731, 17941, 1137, 14, 50, 25, 198, 198, 16, 13, 197, 35649, 9232, 266, 14, 78, 18319, 46295, 74, 415, 26494, 375, 11, 7129, 546, 1058, 3439, 812, 11, 1609, 25, 37306, 11, 220, 198, 198, 17, 13, 197, 24095, 3972, 360, 14, 78, 46295, 74, 415, 26494, 375, 11, 7129, 546, 1058, 1433, 331, 3808, 1609, 1058, 3710, 198, 198, 18, 13, 197, 6719, 2611, 360, 14, 78, 46295, 74, 415, 26494, 375, 11, 7129, 546, 1058, 1415, 331, 3808, 1609, 1058, 13613, 11, 220, 220, 198, 198, 19, 13, 197, 2025, 73, 7344, 360, 14, 78, 46295, 74, 415, 26494, 375, 11, 7129, 546, 1058, 1485, 331, 3808, 1609, 1058, 3710, 198, 198, 20, 13, 197, 45, 45429, 73, 311, 14, 78, 46295, 74, 415, 26494

ImportError: ignored

In [75]:
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer, TextDataset, DataCollatorForLanguageModeling, Trainer, TrainingArguments


model_name = "/content/output_1"
model = GPT2LMHeadModel.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)


dataset = TextDataset(
    tokenizer=tokenizer,
    file_path="/content/1.json",
    block_size=128,
)

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer, mlm=False,
)

training_args = TrainingArguments(
    output_dir="./output_2",
    overwrite_output_dir=True,
    num_train_epochs=2,
    per_device_train_batch_size=8,
    save_steps=10_000,
    save_total_limit=2,
)


trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=dataset,
)

trainer.train()

trainer.save_model()
# Save the tokenizer to the output directory
tokenizer.save_pretrained('/content/output_1')




Step,Training Loss


('/content/output_1/tokenizer_config.json',
 '/content/output_1/special_tokens_map.json',
 '/content/output_1/vocab.json',
 '/content/output_1/merges.txt',
 '/content/output_1/added_tokens.json')

In [4]:
import tensorflow as tf
from transformers import TFGPT2LMHeadModel, GPT2Tokenizer, TextDataset, DataCollatorForLanguageModeling, TrainingArguments, TFTrainer


model_name = "gpt2"
model = TFGPT2LMHeadModel.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)

dataset = TextDataset(
    tokenizer=tokenizer,
    file_path="/content/data_nlp.txt",
    block_size=128,
)

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer, mlm=False,
)


training_args = TrainingArguments(
    output_dir="./output_1",
    overwrite_output_dir=True,
    num_train_epochs=3,
    per_device_train_batch_size=8,
    save_steps=10_000,
    save_total_limit=2,
)


trainer = TFTrainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    data_collator=data_collator,
)


trainer.train()


trainer.save_model()



All PyTorch model weights were used when initializing TFGPT2LMHeadModel.

All the weights of TFGPT2LMHeadModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFGPT2LMHeadModel for predictions without further training.


In [22]:
!pip uninstall accelerate
!pip cache purge
!pip install accelerate==0.20.3

[0mFiles removed: 56
Collecting accelerate==0.20.3
  Downloading accelerate-0.20.3-py3-none-any.whl (227 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m227.6/227.6 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: accelerate
Successfully installed accelerate-0.20.3


In [76]:
!zip -r trainer.zip /content/output_2

  adding: content/output_2/ (stored 0%)
  adding: content/output_2/training_args.bin (deflated 51%)
  adding: content/output_2/vocab.json (deflated 68%)
  adding: content/output_2/config.json (deflated 50%)
  adding: content/output_2/generation_config.json (deflated 24%)
  adding: content/output_2/merges.txt (deflated 53%)
  adding: content/output_2/tokenizer_config.json (deflated 54%)
  adding: content/output_2/special_tokens_map.json (deflated 74%)
  adding: content/output_2/model.safetensors (deflated 7%)
  adding: content/output_2/runs/ (stored 0%)
  adding: content/output_2/runs/Nov03_22-52-42_dd74f5a69515/ (stored 0%)
  adding: content/output_2/runs/Nov03_22-52-42_dd74f5a69515/events.out.tfevents.1699051963.dd74f5a69515.11510.5 (deflated 60%)
  adding: content/output_2/runs/Nov03_22-50-54_dd74f5a69515/ (stored 0%)
  adding: content/output_2/runs/Nov03_22-50-54_dd74f5a69515/events.out.tfevents.1699051854.dd74f5a69515.11510.4 (deflated 60%)
  adding: content/output_2/runs/Nov03_23-

In [8]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer


model_name = trainer
model = GPT2LMHeadModel.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)


prompt = "In the case of an accident, the injured party should"
input_ids = tokenizer.encode(prompt, return_tensors="pt")


output = model.generate(input_ids, max_length=150, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50)


generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
print(generated_text)


In [10]:
fine_tuned_model = GPT2LMHeadModel.from_pretrained("/content/output_1")  # Replace with your fine-tuned model directory


fine_tuned_model.eval()


prompt =  """
* Date of the accident:
* Time of the accident:
* Location of the accident:
* Parties involved in the accident:
* Injuries sustained in the accident:
* Damages incurred in the accident:
"""


generated_text = fine_tuned_model.generate(input_ids=tokenizer.encode(prompt, return_tensors='pt'), max_length=1000, num_return_sequences=1, no_repeat_ngram_size=2)


generated_text = tokenizer.decode(generated_text[0], skip_special_tokens=True)
print(generated_text)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



* Date of the accident:
* Time of the accident:
* Location of the accident:
* Parties involved in the accident:
* Injuries sustained in the accident:
* Damages incurred in the accident:
The accident was reported to the police on the following day: October 1, 2004.
On the same day, the driver of a car was killed in a road accident in Kolkata. The driver was a resident of Kalyanagar, Kailash, and was driving a white car. He was travelling at a speed of about 50 km/h. His vehicle was stopped at the intersection of Bhatkal Road and Kalkalagar Road. A police officer was present at his place of employment. On the morning of October 2, a police car stopped the vehicle and the car went into a ditch. It was found that the body of one of its occupants was lying on a pile of debris. There was no body on it. No one was injured. Police had taken the deceased to a hospital. After the autopsy, they found the remains of two of his legs and a large amount of blood on his body. They also found a small 

In [None]:
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer, TextDataset, DataCollatorForLanguageModeling, Trainer, TrainingArguments


model_name = "gpt2"
model = GPT2LMHeadModel.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)


dataset = TextDataset(
    tokenizer=tokenizer,
    file_path="/content/ambika_dataset.json",
    block_size=128,
)


data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False,
)


training_args = TrainingArguments(
    output_dir="./output_legal",
    overwrite_output_dir=True,
    num_train_epochs=3,
    per_device_train_batch_size=8,
    save_steps=10_000,
    save_total_limit=2,
)


trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=dataset,
)

trainer.train()


trainer.save_model()


In [10]:
import bard

from bardapi import Bard

response = bard.generate(prompt + user_response, max_tokens=10000)


ModuleNotFoundError: ignored

In [15]:
import bardapi
from bardapi import Bard

# Set the Bard API key
api_key = "cgjVQwGYLkc60XgsiGIBGwOMadvlFnyIEh18GlgrDZb7junAWVxuOe0muyHYnyI5M9-HhA."

# Define the prompt
prompt = """
Please provide the following information about the motor vehicle accident:

* Date of the accident:
* Time of the accident:
* Location of the accident:
* Parties involved in the accident:
* Injuries sustained in the accident:
* Damages incurred in the accident:

Once you have provided this information, I will generate a legal accident document on your behalf.
"""

# Get the user's response to the prompt
user_response = input(prompt)

# Create a Bard instance with your API key
bard = Bard(token='cgjVQwGYLkc60XgsiGIBGwOMadvlFnyIEh18GlgrDZb7junAWVxuOe0muyHYnyI5M9-HhA.')


# Generate the legal document using Bard
response = bard.generate(prompt + user_response, max_tokens=10000)
# Get the generated document
generated_document = response["generated_text"]

# Print or save the generated document
print(generated_document)



Please provide the following information about the motor vehicle accident:

* Date of the accident:
* Time of the accident:
* Location of the accident:
* Parties involved in the accident:
* Injuries sustained in the accident:
* Damages incurred in the accident:

Once you have provided this information, I will generate a legal accident document on your behalf.
dfghjkl


Exception: ignored

In [7]:
!pip install accelerate -U



In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

model_name = "tiiuae/falcon-7b"
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Example text generation pipeline
text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
text = text_generator("Generate text based on this prompt: ", max_length=5, num_return_sequences=1)

print(text)


Using Gradio to wrap a text to text interface around GPT-J-6B

In [4]:
!pip install --q gradio
!pip install -q git+https://github.com/huggingface/transformers.git

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.8/3.8 MB[0m [31m12.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m21.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m295.0/295.0 kB[0m [31m16.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for transformers (pyproject.toml) ... [?25l[?25hdone


In [16]:
import gradio as gr
import tensorflow as tf
from transformers import TFGPT2LMHeadModel, GPT2Tokenizer

In [17]:
model_name = "gpt2"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = TFGPT2LMHeadModel.from_pretrained(model_name)

All PyTorch model weights were used when initializing TFGPT2LMHeadModel.

All the weights of TFGPT2LMHeadModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFGPT2LMHeadModel for predictions without further training.


In [18]:
def generate_text(inp):
  input_ids = tokenizer.encode(inp, return_tensors='tf')
  beam_output = model.generate(input_ids,max_length=100,num_beams=5,no_repeat_ngram_size=2,early_stopping=True)
  output = tokenizer.decode(beam_output[0], skip_special_token=True,clean_up_tokenization_spaces=True)
  return ".".join(output.split(".")[:-1]) + "."

In [19]:
output_text = gr.outputs.Textbox()
gr.Interface(generate_text,"text_box",output_text,title="GPT-2",
             description="Hello GPT2.").launch()

AttributeError: ignored

In [None]:
import gradio as gr

def generate_text(inp):
  input_ids = tokenizer.encode(inp, return_tensors='tf')
  beam_output = model.generate(input_ids,max_length=100,num_beams=5,no_repeat_ngram_size=2,early_stopping=True)
  output = tokenizer.decode(beam_output[0], skip_special_token=True,clean_up_tokenization_spaces=True)
  return ".".join(output.split(".")[:-1]) + "."

# Create a Gradio interface with text input and text output
iface = gr.Interface(
    fn=generate_text,
    inputs="text",
    outputs="text",
    title="GPT-2",
    description="Hello GPT2."
)

# Launch the Gradio interface
iface.launch()

In [None]:
import gradio as gr

def generate_text(inp):
  input_ids = tokenizer.encode(inp, return_tensors='tf')
  beam_output = model.generate(input_ids,max_length=100,num_beams=5,no_repeat_ngram_size=2,early_stopping=True)
  output = tokenizer.decode(beam_output[0], skip_special_token=True,clean_up_tokenization_spaces=True)
  return ".".join(output.split(".")[:-1]) + "."

# Create a Gradio interface with text input and text output
iface = gr.Interface(
    fn=generate_text,
    inputs="text",
    outputs="text",
    title="GPT-2",
    description="Hello GPT2."
)

# Launch the Gradio interface

iface.launch()

In [28]:
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load the GPT-2 model and tokenizer
model_name = "gpt2"
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Define a list of prompts
prompts = [
"Please provide the date of the accident:",
"Please provide the time of the accident:",
"Please provide the location of the accident:",
"Please provide the parties involved in the accident:",
"Please provide information about injuries sustained in the accident:",
"Please provide accidental details:"
]

# Create input components for each prompt
input_components = [gr.Textbox(label="Date of Accident"),
                    gr.Textbox(label="Time of Accident"),
                    gr.Textbox(label="Location of Accident"),
                    gr.Textbox(label="Parties involved in the Accident"),
                    gr.Textbox(label="Injuries sustained in the Accident"),
                    gr.Textbox(label="Damages incurred in the Accident")]

def generate_text(*inputs):
  # Combine user input with the predefined prompts
  full_prompt = "\n".join([f"{prompt} {user_input}" for prompt, user_input in zip(prompts, inputs)])
  input_ids = tokenizer.encode(full_prompt, return_tensors='pt')
  output = model.generate(input_ids, max_length=200, num_return_sequences=1, no_repeat_ngram_size=2, early_stopping=True)
  generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
  return generated_text

# Create a Gradio interface
iface = gr.Interface(
  fn=generate_text,
  inputs=input_components,
  outputs="text",
  title="Legal Document Generator",
  description="Generate a legal accident document based on user input."
)

# Launch the Gradio interface
iface.launch()


Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://2ef10db7e0ed260239.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




In [None]:
import gradio as gr
import requests
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load the GPT-2 model and tokenizer
model_name = "gpt2"
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Define the prompt
prompt = """
Please provide the following information about the motor vehicle accident:

* Date of the accident:
* Time of the accident:
* Location of the accident:
* Parties involved in the accident:
* Injuries sustained in the accident:
* Damages incurred in the accident:
"""

# Create input components for each prompt
input_components = [gr.Textbox() for prompt in prompts]

# Define a function to generate the legal document
def generate_legal_document(full_prompt):
  """Generates a legal accident document.

  Args:
    full_prompt: The full prompt, including the predefined prompt and the user's response.

  Returns:
    The generated legal accident document.
  """

  # Generate the legal document using the GPT-2 model.
  input_ids = tokenizer.encode(full_prompt, return_tensors='pt')
  output = model.generate(input_ids, max_length=200, num_return_sequences=1, no_repeat_ngram_size=2, early_stopping=True)
  generated_document = tokenizer.decode(output[0], skip_special_tokens=True)

  return generated_document

# Define the Gradio interface
iface = gr.Interface(
  fn=generate_legal_document,
  inputs=input_components,
  outputs="text",
  title="Legal Document Generator",
  description="Generate a legal accident document based on user input."
)

# Launch the Gradio interface
iface.launch()


In [37]:
import langchain
from langchain.client import Client
# Initialize the LangChain client
client = langchain.Client()

# Load the GPT-2 model
model = langchain.Model("gpt2")

# Define the prompt
prompt = """
Please provide the following information about the motor vehicle accident:

* Date of the accident:
* Time of the accident:
* Location of the accident:
* Parties involved in the accident:
* Injuries sustained in the accident:
* Damages incurred in the accident:
"""

# Get the user's response to the prompt
user_response = input(prompt)

# Generate the legal document
generated_document = client.generate(model, prompt + user_response, max_length=200)

# Print the generated document
print(generated_document)




In [40]:
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load the GPT-2 model and tokenizer
model_name = "/content/output_1"
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Define a list of prompts
prompts = [
"Please provide the date of the accident:",
"Please provide the time of the accident:",
"Please provide the location of the accident:",
"Please provide the parties involved in the accident:",
"Please provide information about injuries sustained in the accident:",
"Please provide accidental details:"
]

# Create input components for each prompt
input_components = [gr.Textbox(label="Date of Accident"),
                    gr.Textbox(label="Time of Accident"),
                    gr.Textbox(label="Location of Accident"),
                    gr.Textbox(label="Parties involved in the Accident"),
                    gr.Textbox(label="Injuries sustained in the Accident"),
                    gr.Textbox(label="Damages incurred in the Accident")]

def generate_text(*inputs):
  # Combine user input with the predefined prompts
  full_prompt = "\n".join([f"{prompt} {user_input}" for prompt, user_input in zip(prompts, inputs)])
  input_ids = tokenizer.encode(full_prompt, return_tensors='pt')
  output = model.generate(input_ids, max_length=200, num_return_sequences=1, no_repeat_ngram_size=2, early_stopping=True)
  generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
  return generated_text





In [None]:
import gradio as gr
from transformers import GPT2LMHeadModel, GPT2Tokenizer

# Load the GPT-2 model and tokenizer
model = GPT2LMHeadModel.from_pretrained('./output_1')
tokenizer = GPT2Tokenizer.from_pretrained('./output_1')

# Create a state variable to store the previous data
previous_data = []

def chatbot(input_text):
  """Generates a response to the given input text."""

  # Update the state variable with the new input
  previous_data.append(input_text)

  # Generate the next response based on the previous data
  input_ids = tokenizer.encode(input_text, return_tensors='pt')
  response_ids = model.generate(input_ids, max_length=50, num_return_sequences=1, no_repeat_ngram_size=2, early_stopping=True)
  response_text = tokenizer.decode(response_ids[0], skip_special_tokens=True, clean_up_tokenization_spaces=True)

  return response_text

# Create a Gradio interface for the chatbot
iface = gr.Interface(
  fn=chatbot,
  inputs=[gr.Textbox()],
  outputs="text",
  title="Law ",
  description="Conversion.",
)

# Launch the Gradio interface
iface.launch()


In [60]:
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load the fine-tuned GPT-2 language model
model = AutoModelForCausalLM.from_pretrained("/content/output_1")

# Load the GPT-2 tokenizer
tokenizer = AutoTokenizer.from_pretrained("/content/output_1")

# Define the list of questions to ask the user
questions = ["What is the name of the victim?", "What is the vehicle number?"]

# Define the function to ask the user a question
def ask_question(question):
  print(question)
  response = input()
  return response

# Define the function to generate text using the GPT-2 language model
def generate_text(prompt):
  generated_text = model.generate(input_ids=tokenizer.encode(prompt+Data_set, return_tensors='pt'), max_length=1000, num_return_sequences=1, no_repeat_ngram_size=2)
  generated_text = tokenizer.decode(generated_text[0], skip_special_tokens=True)
  return generated_text

# Initialize the conversation state
conversation_state = {}

# Iterate over the list of questions and ask the user each question
for question in questions:
  response = ask_question(question)
  conversation_state[question] = response

Data_set = " ".join(conversation_state)

# Generate text using the GPT-2 language model, based on the conversation state
generated_text = generate_text(f"Based on the information you have provided, the following is an enhanced version of the case details:")

print(generated_text)

What is the name of the victim?
Sadanand
What is the vehicle number?
KA32 Q 657


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Based on the information you have provided, the following is an enhanced version of the case details:What is the name of the victim? What is the vehicle number?What are the names of all the witnesses?Who is responsible for the crime? Who is in charge of investigating the incident?Where is this information?How did the police arrive at the scene?Why did they not arrive?When did police come to the spot?Was there any physical contact between the two men?Did they have any contact with the victims?Were there no witnesses to this incident or any other incident that occurred in the vicinity of this place?If so, what was the reason for this?Is there a witness to any of these incidents?Are there witnesses who have been present at this spot for some time?Do you know any witnesses that have come forward to tell the truth about the matter? If so what are they?

The police have asked the public to come out and come and see the accused.
.


In [64]:
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load the fine-tuned GPT-2 language model
model = AutoModelForCausalLM.from_pretrained("/content/output_1")

# Load the GPT-2 tokenizer
tokenizer = AutoTokenizer.from_pretrained("/content/output_1")

# Define the list of questions to ask the user
questions = ["What is the petitioner's name?", "What is the respondent's name?", "What motor vehicle act was violated?", "Can you provide more details about the accident?"]

# Define the function to ask the user a question
def ask_question(question):
  print(question)
  response = input()
  return response

# Define the function to generate text using the GPT-2 language model
def generate_text(prompt):
  generated_text = model.generate(input_ids=tokenizer.encode(prompt, return_tensors='pt'), max_length=1000, num_return_sequences=1, no_repeat_ngram_size=2)
  generated_text = tokenizer.decode(generated_text[0], skip_special_tokens=True,clean_up_tokenization_spaces=True)
  return generated_text


# Initialize the conversation state
conversation_state = {}

# Iterate over the list of questions and ask the user each question
for question in questions:
  response = ask_question(question)
  conversation_state[question] = response

Data_set = " ".join(conversation_state)

generated_text = generate_text(f"""*Case Details*
*Petitioner Name:* {conversation_state['petitioner name']}
*Respondent Name:* {conversation_state['respondent name']}

*Motor Vehicle Act Violated:* {conversation_state['motor vehicle act']}

*Accident Details:* {conversation_state['accident details']}""")

print(generated_text)

What is the petitioner's name?
Sadanand
What is the respondent's name?
Vaibhav
What motor vehicle act was violated?
Under  Sec. 166 of the Motor Vehicles Act. 1989
Can you provide more details about the accident?
That  on 6.5.2023, the deceased Sadanand was proceeding on Motor cycle No.KA.32. EJ 5374 from his village to Sannur cross. He was riding the  Motor cycle slowly with due care and precautions and at about 9.30 Pm, when he was near  Mugal Nagaon cross Sannur Shahabad road, Tq. Shahabad, Dist. Kalaburagi  i.e. 1 Km from Peth Siroor at that time the driver of the Mahindra Pick up van bearing No. KA.32. AA 5003  came  driving his vehicle from opposite side in high speed and in rash and negligent manner, endangering human lives and dashed to the Motor cycle  of the petitioner due to which the deceased Chandrakant fell down and sustained grievous injuries over ie.. grievous fracture over both legs, knee,  abdominal head,  back  and also grievous injuries over other parts of the body.

In [65]:
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load the fine-tuned GPT-2 language model
model = AutoModelForCausalLM.from_pretrained("/content/output_1")

# Load the GPT-2 tokenizer
tokenizer = AutoTokenizer.from_pretrained("/content/output_1")

# Define the list of questions to ask the user
questions = [
    "What is the petitioner's name?",
    "What is the respondent's name?",
    "What motor vehicle act was violated?",
    "Can you provide more details about the accident?"
]

# Define the function to ask the user a question
def ask_question(question):
    print(question)
    response = input()
    return response

# Define the function to generate text using the GPT-2 language model
def generate_text(prompt):
    generated_text = model.generate(input_ids=tokenizer.encode(prompt, return_tensors='pt'), max_length=1000, num_return_sequences=1, no_repeat_ngram_size=2)
    generated_text = tokenizer.decode(generated_text[0], skip_special_tokens=True, clean_up_tokenization_spaces=True)
    return generated_text

# Initialize the conversation state
conversation_state = {}

# Iterate over the list of questions and ask the user each question
for question in questions:
    response = ask_question(question)
    conversation_state[question] = response

# Use the correct keys to access responses in the conversation_state
generated_text = generate_text(f"""*Case Details*

*Petitioner Name:* {conversation_state[questions[0]]}
*Respondent Name:* {conversation_state[questions[1]]}
*Motor Vehicle Act Violated:* {conversation_state[questions[2]]}
*Accident Details:* {conversation_state[questions[3]]}""")

print(generated_text)


What is the petitioner's name?
Sadanand
What is the respondent's name?
Vaibhav
What motor vehicle act was violated?
under the act 166 of motor vehicle 
Can you provide more details about the accident?
That  on 6.5.2023, the deceased Sadanand was proceeding on Motor cycle No.KA.32. EJ 5374 from his village to Sannur cross. He was riding the  Motor cycle slowly with due care and precautions and at about 9.30 Pm, when he was near  Mugal Nagaon cross Sannur Shahabad road, Tq. Shahabad, Dist. Kalaburagi  i.e. 1 Km from Peth Siroor at that time the driver of the Mahindra Pick up van bearing No. KA.32. AA 5003  came  driving his vehicle from opposite side in high speed and in rash and negligent manner, endangering human lives and dashed to the Motor cycle  of the petitioner due to which the deceased Chandrakant fell down and sustained grievous injuries over ie.. grievous fracture over both legs, knee,  abdominal head,  back  and also grievous injuries over other parts of the body.  Immediatel

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


*Case Details*

*Petitioner Name:* Sadanand
*Respondent Name:* Vaibhav
*Motor Vehicle Act Violated:* under the act 166 of motor vehicle 
*Accident Details:* That  on 6.5.2023, the deceased Sadanand was proceeding on Motor cycle No.KA.32. EJ 5374 from his village to Sannur cross. He was riding the  Motor cycle slowly with due care and precautions and at about 9.30 Pm, when he was near  Mugal Nagaon cross Sannur Shahabad road, Tq. Shahabad, Dist. Kalaburagi  i.e. 1 Km from Peth Siroor at that time the driver of the Mahindra Pick up van bearing No. KA.32. AA 5003  came  driving his vehicle from opposite side in high speed and in rash and negligent manner, endangering human lives and dashed to the Motor cycle  of the petitioner due to which the deceased Chandrakant fell down and sustained grievous injuries over ie.. grievous fracture over both legs, knee,  abdominal head,  back  and also grievous injuries over other parts of the body.  Immediately the deceased was brought to Subedar Hospit

In [66]:
import requests
from transformers import AutoModelForCausalLM, AutoTokenizer

model = AutoModelForCausalLM.from_pretrained("/content/output_1")

tokenizer = AutoTokenizer.from_pretrained("/content/output_1")

# Define the list of questions to ask the user
questions = ["What is the petitioner's name?", "What is the respondent's name?", "What motor vehicle act was violated?", "Can you provide more details about the accident?"]

# Define the function to ask the user a question
def ask_question(question):
  print(question)
  response = input()
  return response

# Define the function to generate text using the GPT-2 language model
def generate_text(prompt, conversation_state):
  """Generates text using the GPT-2 language model, based on the given prompt and conversation state.

  Args:
    prompt: The prompt to use for generating the text.
    conversation_state: A dictionary containing the conversation state.

  Returns:
    A string containing the generated text.
  """

  # Encode the prompt and conversation state using the tokenizer.
  encoded_prompt = tokenizer.encode(prompt, return_tensors='pt')
  encoded_conversation_state = tokenizer.encode(conversation_state, return_tensors='pt')

  # Concatenate the encoded prompt and conversation state.
  encoded_input = torch.cat([encoded_prompt, encoded_conversation_state], dim=1)

  # Generate text using the GPT-2 language model.
  generated_text = model.generate(input_ids=encoded_input, max_length=1000, num_return_sequences=1, no_repeat_ngram_size=2)

  # Decode the generated text.
  decoded_text = tokenizer.decode(generated_text[0], skip_special_tokens=True)

  return decoded_text

# Start the conversation
conversation_state = {}

for question in questions:
  response = ask_question(question)
  conversation_state[question] = response

# Generate text using the GPT-2 language model, based on the conversation state
generated_text = generate_text(prompt="*Case Details*", conversation_state=conversation_state)

# Enhance the generated text
enhanced_text = """Based on the information you have provided, the following is an enhanced version of the case details:

*Motor Vehicle Act Violated:* Section {conversation_state['motor vehicle act']} of the Motor Vehicles Act, 1988

*Accident Details:* On {date of accident}, at around {time of accident}, the petitioner, {conversation_state['petitioner name']}, was driving a {vehicle type} bearing registration number {vehicle number} on {road name} in {city name}. The respondent, {conversation_state['respondent name']}, was driving a {vehicle type} bearing registration number {respondent vehicle number} in the opposite direction. At the intersection of {road name} and {cross street name}, the respondent's vehicle collided with the petitioner's vehicle. As a result of the collision, the petitioner sustained serious injuries and was taken to the hospital for treatment.

The petitioner has filed a petition under Section {section number} of the Motor Vehicles Act, 1988, seeking compensation for the injuries sustained in the accident. The petitioner has also sought punitive damages from the respondent for reckless driving.

The case is currently pending before the {court name}."""

# Print the generated text to the user
print(enhanced_text)

What is the petitioner's name?
Sadanand
What is the respondent's name?
Vaibhav
What motor vehicle act was violated?
section 166
Can you provide more details about the accident?
That  on 6.5.2023, the deceased Sadanand was proceeding on Motor cycle No.KA.32. EJ 5374 from his village to Sannur cross. He was riding the  Motor cycle slowly with due care and precautions and at about 9.30 Pm, when he was near  Mugal Nagaon cross Sannur Shahabad road, Tq. Shahabad, Dist. Kalaburagi  i.e. 1 Km from Peth Siroor at that time the driver of the Mahindra Pick up van bearing No. KA.32. AA 5003  came  driving his vehicle from opposite side in high speed and in rash and negligent manner, endangering human lives and dashed to the Motor cycle  of the petitioner due to which the deceased Chandrakant fell down and sustained grievous injuries over ie.. grievous fracture over both legs, knee,  abdominal head,  back  and also grievous injuries over other parts of the body.  Immediately the deceased was broug

In [69]:
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load the fine-tuned GPT-2 language model
model = AutoModelForCausalLM.from_pretrained("/content/output_1")

# Load the GPT-2 tokenizer
tokenizer = AutoTokenizer.from_pretrained("/content/output_1")

# Define the list of questions to ask the user
questions = [
    "What is the petitioner's name?",
    "What is the respondent's name?",
    "What motor vehicle act was violated?",
    "Can you provide more details about the accident?"
]

# Define the function to ask the user a question
def ask_question(question):
    print(question)
    response = input()
    return response

# Define the function to generate text using the GPT-2 language model
def generate_text(prompt):
    generated_text = model.generate(input_ids=tokenizer.encode(prompt, return_tensors='pt'), max_length=1000, num_return_sequences=1, no_repeat_ngram_size=2)
    generated_text = tokenizer.decode(generated_text[0], skip_special_tokens=True, clean_up_tokenization_spaces=True)
    return generated_text

# Initialize the conversation state
conversation_state = {}

# Iterate over the list of questions and ask the user each question
for question in questions:
    response = ask_question(question)
    conversation_state[question] = response

# Concatenate user responses into a single string
user_responses = "\n".join(f"{question} {response}" for question, response in conversation_state.items())

# Generate the legal document based on user responses
generated_text = generate_text(user_responses)

# Print the generated legal document
print(generated_text)


What is the petitioner's name?
Sadanand
What is the respondent's name?
abc
What motor vehicle act was violated?
section 166
Can you provide more details about the accident?
That  on 6.5.2023, the deceased Sadanand was proceeding on Motor cycle No.KA.32. EJ 5374 from his village to Sannur cross. He was riding the  Motor cycle slowly with due care and precautions and at about 9.30 Pm, when he was near  Mugal Nagaon cross Sannur Shahabad road, Tq. Shahabad, Dist. Kalaburagi  i.e. 1 Km from Peth Siroor at that time the driver of the Mahindra Pick up van bearing No. KA.32. AA 5003  came  driving his vehicle from opposite side in high speed and in rash and negligent manner, endangering human lives and dashed to the Motor cycle  of the petitioner due to which the deceased Chandrakant fell down and sustained grievous injuries over ie.. grievous fracture over both legs, knee,  abdominal head,  back  and also grievous injuries over other parts of the body.  Immediately the deceased was brought t

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


What is the petitioner's name? Sadanand
What is the respondent's name? abc
What motor vehicle act was violated? section 166
Can you provide more details about the accident? That  on 6.5.2023, the deceased Sadanand was proceeding on Motor cycle No.KA.32. EJ 5374 from his village to Sannur cross. He was riding the  Motor cycle slowly with due care and precautions and at about 9.30 Pm, when he was near  Mugal Nagaon cross Sannur Shahabad road, Tq. Shahabad, Dist. Kalaburagi  i.e. 1 Km from Peth Siroor at that time the driver of the Mahindra Pick up van bearing No. KA.32. AA 5003  came  driving his vehicle from opposite side in high speed and in rash and negligent manner, endangering human lives and dashed to the Motor cycle  of the petitioner due to which the deceased Chandrakant fell down and sustained grievous injuries over ie.. grievous fracture over both legs, knee,  abdominal head,  back  and also grievous injuries over other parts of the body.  Immediately the deceased was brought t

In [81]:
import gradio as gr
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer

# Load the trained GPT-2 model and tokenizer
model_name = "./output_1"  # Path to the directory where your trained model is saved
model = GPT2LMHeadModel.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)

def generate_legal_document(title_information, petitioner_information, respondent_information,
                            motor_accident_claim, victim_information, employment_information,
                            income_details, place_date_and_time_of_accident, travel_details,
                            medical_information, vehical_information, applicants_information,
                            relation_information, property_information, accident_detail,
                            description_of_deceased_and_accident_impact, description_of_accident_and_liability,
                            loss_and_compensation_claim, prayer, declaration, application_for_permission_to_engage_counsel, memo):
    document = {
        "Title_Information": title_information,
        "Petitioner_Information": petitioner_information,
        "Respondent_Information": respondent_information,
        "Motor_accident_claim": motor_accident_claim,
        "Victim_information": victim_information,
        "Employment_information": employment_information,
        "Income_details": income_details,
        "Place_date_and_time_of_accident": place_date_and_time_of_accident,
        "Travel_details": travel_details,
        "Medical_information": medical_information,
        "Vehical_information": vehical_information,
        "Applicants_information": applicants_information,
        "Relation_information": relation_information,
        "Property_information": property_information,
        "Accident_detail": accident_detail,
        "Description_of_deceased_and_accident_impact": description_of_deceased_and_accident_impact,
        "Description_of_accident_and_liability": description_of_accident_and_liability,
        "Loss_and_compensation_claim": loss_and_compensation_claim,
        "Prayer": prayer,
        "Declaration": declaration,
        "Application_for_permission_to_engage_counsel": application_for_permission_to_engage_counsel,
        "Memo": memo
    }

    # Generate content for each section using the model
    for section, prompt in document.items():
        if prompt:
            generated_text = model.generate(
                input_ids=tokenizer.encode(prompt, return_tensors='pt'),
                max_length=500,  # Adjust the length as needed
                num_return_sequences=1,
                no_repeat_ngram_size=2,
                top_k=50,
                top_p=0.95,
            )
            generated_text = tokenizer.decode(generated_text[0], skip_special_tokens=True, clean_up_tokenization_spaces=True)
            document[section] = generated_text

    # Combine sections into the final document
    final_document = "\n\n".join([f"{section}:\n{content}" for section, content in document.items() if content])
    return final_document

# Define the Gradio interface
iface = gr.Interface(
    fn=generate_legal_document,
    inputs=[
        gr.inputs.Textbox(label="Title/Information"),
        gr.inputs.Textbox(label="Petitioner Information"),
        gr.inputs.Textbox(label="Respondent Information"),
        gr.inputs.Textbox(label="Motor Accident Claim"),
        gr.inputs.Textbox(label="Victim Information"),
        gr.inputs.Textbox(label="Employment Information"),
        gr.inputs.Textbox(label="Income Details"),
        gr.inputs.Textbox(label="Place, Date, and Time of Accident"),
        gr.inputs.Textbox(label="Travel Details"),
        gr.inputs.Textbox(label="Medical Information"),
        gr.inputs.Textbox(label="Vehicle Information"),
        gr.inputs.Textbox(label="Applicants Information"),
        gr.inputs.Textbox(label="Relation Information"),
        gr.inputs.Textbox(label="Property Information"),
        gr.inputs.Textbox(label="Accident Detail"),
        gr.inputs.Textbox(label="Description of Deceased and Accident Impact"),
        gr.inputs.Textbox(label="Description of Accident and Liability"),
        gr.inputs.Textbox(label="Loss and Compensation Claim"),
        gr.inputs.Textbox(label="Prayer"),
        gr.inputs.Textbox(label="Declaration"),
        gr.inputs.Textbox(label="Application for Permission to Engage Counsel"),
        gr.inputs.Textbox(label="Memo"),
    ],
    outputs=gr.outputs.Textbox(label="Generated Legal Document"),
    title="Legal Document Generator",
)

# Launch the Gradio interface
iface.launch()


In [84]:
import gradio as gr
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer

# Load the trained GPT-2 model and tokenizer
model_name = "./output_1"  # Path to the directory where your trained model is saved
model = GPT2LMHeadModel.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)

def generate_legal_document(title_information, petitioner_information, respondent_information,
                             motor_accident_claim, victim_information, employment_information,
                             income_details, place_date_and_time_of_accident, travel_details,
                             medical_information, vehical_information, applicants_information,
                             relation_information, property_information, accident_detail,
                             description_of_deceased_and_accident_impact, description_of_accident_and_liability,
                             loss_and_compensation_claim, prayer, declaration, application_for_permission_to_engage_counsel, memo):
    document = {
        "Title_Information": title_information,
        "Petitioner_Information": petitioner_information,
        "Respondent_Information": respondent_information,
        "Motor_accident_claim": motor_accident_claim,
        "Victim_information": victim_information,
        "Employment_information": employment_information,
        "Income_details": income_details,
        "Place_date_and_time_of_accident": place_date_and_time_of_accident,
        "Travel_details": travel_details,
        "Medical_information": medical_information,
        "Vehical_information": vehical_information,
        "Applicants_information": applicants_information,
        "Relation_information": relation_information,
        "Property_information": property_information,
        "Accident_detail": accident_detail,
        "Description_of_deceased_and_accident_impact": description_of_deceased_and_accident_impact,
        "Description_of_accident_and_liability": description_of_accident_and_liability,
        "Loss_and_compensation_claim": loss_and_compensation_claim,
        "Prayer": prayer,
        "Declaration": declaration,
        "Application_for_permission_to_engage_counsel": application_for_permission_to_engage_counsel,
        "Memo": memo
    }

    # Generate content for each section using the model
    for section, prompt in document.items():
        if prompt:
            generated_text = model.generate(
                input_ids=tokenizer.encode(prompt, return_tensors='pt'),
                max_length=500,  # Adjust the length as needed
                num_return_sequences=1,
                no_repeat_ngram_size=2,
                top_k=50,
                top_p=0.95,
            )
            generated_text = tokenizer.decode(generated_text[0], skip_special_tokens=True, clean_up_tokenization_spaces=True)
            document[section] = generated_text

    # Combine sections into the final document
    final_document = "\n\n".join([f"{section}:\n{content}" for section, content in document.items() if content])
    return final_document

# Define the Gradio interface
iface = gr.Interface(
    fn=generate_legal_document,
    inputs=[
        gr.inputs.Dropdown(label="Title/Information", choices=["Mr.", "Ms.", "Dr.", "Prof."]),
        gr.inputs.Textbox(label="Petitioner Information"),
        gr.inputs.Textbox(label="Respondent Information"),
        gr.inputs.Textbox(label="Motor Accident Claim"),
        gr.inputs.Checkbox(label="Victim Information"),
        gr.inputs.Checkbox(label="Employment Information"),
        gr.inputs.Checkbox(label="Income Details"),
        gr.inputs.Checkbox(label="Place, Date, and Time of Accident"),
        gr.inputs.Checkbox(label="Travel Details"),
        gr.inputs.Checkbox(label="Medical Information"),
        gr.inputs.Checkbox(label="Vehicle Information"),
        gr.inputs.Checkbox(label="Applicants Information"),
        gr.inputs.Checkbox(label="Relation Information"),
        gr.inputs.Checkbox(label="Property Information"),
        gr.inputs.Checkbox(label="Accident Detail"),
        gr.inputs.Textbox(label="Description of Deceased and Accident Impact"),
        gr.inputs.Textbox(label="Description of Accident and Liability"),
        gr.inputs.Textbox(label="Loss and Compensation Claim"),
        gr.inputs.Checkbox(label="Prayer"),
        gr.inputs.Checkbox(label="Declaration"),
        gr.inputs.Checkbox(label="Application for Permission to Engage Counsel"),
        gr.inputs.Checkbox(label="Memo"),
    ],
    outputs=gr.outputs.Textbox(label="Generated Legal Document"),
    title="Legal Document Generator",
)

# Launch the Gradio interface
iface.launch()


In [86]:
import gradio as gr
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer

# Load the trained GPT-2 model and tokenizer
model_name = "./output_1"  # Path to the directory where your trained model is saved
model = GPT2LMHeadModel.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)

def generate_legal_document(
    title_information, petitioner_information, respondent_information,
    motor_accident_claim, victim_information, employment_information,
    income_details, place_date_and_time_of_accident, travel_details,
    medical_information, vehical_information, applicants_information,
    relation_information, property_information, accident_detail,
    description_of_deceased_and_accident_impact, description_of_accident_and_liability,
    loss_and_compensation_claim, prayer, declaration, application_for_permission_to_engage_counsel, memo
)
    # Your generation code remains the same

# Define the Gradio interface
iface = gr.Interface(
    fn=generate_legal_document,
    inputs=[
        gr.inputs.Textbox(label="Title/Information"),
        gr.inputs.Textbox(label="Petitioner Information"),
        gr.inputs.Textbox(label="Respondent Information"),
        gr.inputs.Textbox(label="Motor Accident Claim"),
        gr.inputs.Textbox(label="Victim Information"),
        gr.inputs.Textbox(label="Employment Information"),
        gr.inputs.Textbox(label="Income Details"),
        gr.inputs.Textbox(label="Place, Date, and Time of Accident"),
        gr.inputs.Textbox(label="Travel Details"),
        gr.inputs.Textbox(label="Medical Information"),
        gr.inputs.Textbox(label="Vehicle Information"),
        gr.inputs.Textbox(label="Applicants Information"),
        gr.inputs.Textbox(label="Relation Information"),
        gr.inputs.Textbox(label="Property Information"),
        gr.inputs.Textbox(label="Accident Detail"),
        gr.inputs.Textbox(label="Description of Deceased and Accident Impact"),
        gr.inputs.Textbox(label="Description of Accident and Liability"),
        gr.inputs.Textbox(label="Loss and Compensation Claim"),
        gr.inputs.Textbox(label="Prayer"),
        gr.inputs.Textbox(label="Declaration"),
        gr.inputs.Textbox(label="Application for Permission to Engage Counsel"),
        gr.inputs.Textbox(label="Memo"),
    ],
    outputs=gr.outputs.Textbox(label="Generated Legal Document"),
    title="Legal Document Generator",
)

# Launch the Gradio interface
iface.launch()


In [None]:
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer

model_name = "/content/output_2"  # Path to the directory where your trained model is saved
model = GPT2LMHeadModel.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)

def generate_legal_document():
    document = {}  # Initialize an empty dictionary to store document sections

    # Collect information from the user for different sections
    document['Title_Information'] = input("Enter title_information: ")
    document['Petitioner_Information'] = input("Enter petitioner_information: ")
    document['Respondent_Information'] = input("respondent_information: ")
    document['Accident_detail'] = input("Enter accident_detai: ")
    document['Description_of_deceased_and_accident_impact'] = input("Enter Description_of_deceased_and_accident_impact: ")
    document['Loss_and_compensation_claim'] = input("Enter Loss_and_compensation_claim: ")


    # Repeat the above line for other sections (Petitioner_Information, Respondent_Information, etc.)

    # Generate content for each section using the model
    for section, prompt in document.items():
        if prompt:
            generated_text = model.generate(
                input_ids=tokenizer.encode(prompt, return_tensors='pt'),
                max_length=500,  # Adjust the length as needed
                num_return_sequences=1,
                no_repeat_ngram_size=2,
                top_k=50,
                top_p=0.95,
            )
            generated_text = tokenizer.decode(generated_text[0], skip_special_tokens=True, clean_up_tokenization_spaces=True)
            document[section] = generated_text

    # Combine sections into the final document
    final_document = "\n\n".join([f"{section}:\n{content}" for section, content in document.items() if content])
    return final_document

# Generate a legal document based on user input
generated_document = generate_legal_document()
print(generated_document)


Enter title_information: 2
Enter petitioner_information: w
respondent_information: w
Enter accident_detai: wd
Enter Description_of_deceased_and_accident_impact: 
Enter Loss_and_compensation_claim: dd


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In [None]:

s

wd
ds