In [2]:
from LLM import LLM
from BusinessRuleExtractor import BusinessRuleExtractor
from PyPDF2 import PdfReader

def read_pdf(file_path):
    reader = PdfReader(file_path)
    text = ""
    for page in reader.pages:
        text += page.extract_text() + "\n"
    return text

# Example usage
# Replace this with the path to your PDF document
file_path = "resources/business_rules_document.pdf"

# Read the content of the PDF document
legal_text = read_pdf(file_path)

# Create an instance of BusinessRuleExtractionLibrary
ruleExtractor = BusinessRuleExtractor(LLM.OLLAMA)

rules = ruleExtractor.extract_business_rules_from_document(legal_text)

# Output the Prolog rules
print(rules)

["I'm happy to help, but you haven't provided a text segment for me to extract the rules from. Could you please provide the text, and I'll do my best to convert it into Prolog format?", 'Here are the extracted rules in Prolog format:\n\n```prolog\n% Rule 1: Customer must provide valid identification\nvalidate_identity(X) :- identity(X).\nidentity(X).\n\n% Rule 2: Customer must agree to terms and conditions\nagree_to_terms(X) :- accept_terms(X).\naccept_terms(X).\n```\n\nNote that these Prolog rules are very simple and do not include any actual validation or verification logic. In a real-world implementation, the `validate_identity/1` and `agree_to_terms/1` predicates would contain more complex definitions to enforce the actual requirements of the rules.\n\nAlso, please note that in Prolog, it is conventional to use lowercase letters for variable names and uppercase letters for predicate names. I have followed this convention in the above code snippet.', 'Unfortunately, I couldn\'t find

In [6]:
from LLM import LLM
from BusinessRuleExtractor import BusinessRuleExtractor
from PyPDF2 import PdfReader

def read_pdf(file_path):
    reader = PdfReader(file_path)
    text = ""
    for page in reader.pages:
        text += page.extract_text() + "\n"
    return text

# Example usage
# Replace this with the path to your PDF document
file_path = "resources/business_rules_document.pdf"

# Read the content of the PDF document
legal_text = read_pdf(file_path)

# Create an instance of BusinessRuleExtractionLibrary
ruleExtractor = BusinessRuleExtractor(LLM.MISTRAL)

rules = ruleExtractor.extract_business_rules_from_document(legal_text)

# Output the Prolog rules
print(rules)

ConnectError: [Errno 61] Connection refused

In [4]:
from LLM import LLM
from BusinessRuleExtractor import BusinessRuleExtractor
from PyPDF2 import PdfReader

def read_pdf(file_path):
    reader = PdfReader(file_path)
    text = ""
    for page in reader.pages:
        text += page.extract_text() + "\n"
    return text

# Example usage
# Replace this with the path to your PDF document
file_path = "resources/business_rules_document.pdf"

# Read the content of the PDF document
legal_text = read_pdf(file_path)

# Create an instance of BusinessRuleExtractionLibrary
ruleExtractor = BusinessRuleExtractor(LLM.GPT)

rules = ruleExtractor.extract_business_rules_from_document(legal_text)

# Output the Prolog rules
print(rules)

['Sure, please provide the text from the Business Rules Document for analysis and extraction of the Prolog rules.', '```prolog\n% 1. Customers must provide valid identification before opening an account.\n%    A customer can open an account only if they have a valid ID.\ncan_open_account(CustomerID) :- \n    customer(CustomerID), \n    valid_id(CustomerID).\n```', '```prolog\n% Transactions Reporting to Compliance\n\n% For a given transaction, check amount and determine if it needs reporting.\nreport_to_compliance(TransactionID) :-\n    transaction(TransactionID, Amount),\n    Amount > 10000.\n```', '```prolog\n% Loan Applications and Credit Scores\n\n% Rule: Loan applications can only be processed if the applicant’s credit score >= 650.\ncanProcessLoan(ApplicantID) :- \n    loan_applicant(ApplicantID),\n    credit_score(ApplicantID, Score),\n    Score >= 650.\n```', '```prolog\n% Employees are not allowed to approve their own expense reports.\n% An employee can approve an expense repo

##  Structured Output with GPT & Instructor
*Using OpenAI's GPT with Pydantic models for structured data extraction*


In [4]:
from LLM import LLM
from BusinessRuleExtractor import BusinessRuleExtractor
from PyPDF2 import PdfReader
import structured_output
import json


def read_pdf(file_path):
    reader = PdfReader(file_path)
    text = ""
    for page in reader.pages:
        text += page.extract_text() + "\n"
    return text

# Example usage
# Replace this with the path to your PDF document
file_path = "resources/business_rules_document.pdf"

# Read the content of the PDF document
legal_text = read_pdf(file_path)

# Create an instance of BusinessRuleExtractionLibrary
ruleExtractor = BusinessRuleExtractor(LLM.GPT)

rules = ruleExtractor.extract_business_rules_from_document(legal_text)

# Output the Prolog rules
structured_rules = structured_output.extract_rules(rules)
print("\n=== Rules in JSON format ===")
print(json.dumps(structured_rules.model_dump(), indent=2))


=== Rules in JSON format ===
{
  "rules": [
    {
      "rule_id": "R1",
      "description": "Customers must provide valid identification before opening an account. A customer can open an account only if they have a valid ID.",
      "head_predicate": "can_open_account",
      "head_arguments": [
        "CustomerID"
      ],
      "conditions": [
        {
          "predicate": "customer",
          "arguments": [
            "CustomerID"
          ],
          "comparison": null,
          "comparison_value": null
        },
        {
          "predicate": "valid_id",
          "arguments": [
            "CustomerID"
          ],
          "comparison": null,
          "comparison_value": null
        }
      ]
    },
    {
      "rule_id": "R2",
      "description": "All transactions above $10,000 must be reported to the compliance department.",
      "head_predicate": "report_to_compliance",
      "head_arguments": [
        "TransactionID"
      ],
      "conditions": [
      

##  LangChain Integration with GPT
Using Langchain to work in chains. The same functionality could be achieved with direct API calls, making the LangChain abstraction layer superfluous here. 


In [5]:
from pypdf import PdfReader  
from LangChainModel import BusinessRulesProcessor

def read_pdf(file_path):
    reader = PdfReader(file_path)
    text = ""
    for page in reader.pages:
        text += page.extract_text() + "\n"
    return text


file_path = "resources/Geschaeftsregeln_Dokument.pdf"

# Read the content of the PDF document
legal_text = read_pdf(file_path)

# Initialize the processor and process the document
processor = BusinessRulesProcessor()
result = processor.process_document(legal_text)

print("\nExtracted Prolog Rules:")
print(result)


Checking language and translating if needed...
Structuring text...
Extracting Prolog rules...

Extracted Prolog Rules:
```prolog
% Customer Account Rules
% Customers must present a valid ID before opening an account.
can_open_account(CustomerID) :-
    customer(CustomerID),
    valid_id(CustomerID).

% Transaction Reporting
% All transactions over 10,000 USD must be reported to the Compliance Department.
report_to_compliance(TransactionID) :-
    transaction(TransactionID, Amount),
    Amount > 10000.

% Credit Application Process
% Credit applications can only be processed if the applicant has a credit score of 650 or higher.
can_process_loan(ApplicantID) :-
    loan_applicant(ApplicantID),
    credit_score(ApplicantID, Score),
    Score >= 650.

% Employee Expenses
% Employees are not allowed to approve their own expense reports.
can_approve_expense(EmployeeID, ReportID) :-
    expense_report(ReportID, Submitter),
    EmployeeID \= Submitter.

% Refund Policy
% Refunds will only be g