In [None]:
!pip install openai
!pip install --upgrade pip
!pip install python-dotenv

In [8]:
import os
import openai
import tiktoken
from dotenv import load_dotenv, find_dotenv

_ = load_dotenv(find_dotenv())  # read local .env file

openai.api_key = os.environ["OPENAI_API_KEY"]

In [16]:
import tiktoken
import fitz
import shutil, random, os

In [21]:
client = openai.OpenAI()


def get_completion(prompt, model="gpt-3.5-turbo-1106"):
    messages = [{"role": "user", "content": prompt}]
    response = client.chat.completions.create(
        model=model, messages=messages, temperature=0
    )
    return response.choices[0].message.content

In [15]:
text = f"""
You should express what you want a model to do by \ 
providing instructions that are as clear and \ 
specific as you can possibly make them. \ 
This will guide the model towards the desired output, \ 
and reduce the chances of receiving irrelevant \ 
or incorrect responses. Don't confuse writing a \ 
clear prompt with writing a short prompt. \ 
In many cases, longer prompts provide more clarity \ 
and context for the model, which can lead to \ 
more detailed and relevant outputs.
"""
prompt = f"""
Summarize the text delimited by triple backticks \ 
into a single sentence.
```{text}```
"""
response = get_completion(prompt)
print(response)

To guide a model towards the desired output and reduce irrelevant or incorrect responses, it is important to provide clear and specific instructions, which can be achieved through longer prompts that offer more clarity and context.


  Building code skeleton and prompt experimentation

In [18]:
# convert letter ruling context PDF to text
pdf_to_convert = fitz.open("/Users/st414/Documents/PLR/plr_literature.pdf")
letterRuling_context = ""
for page in pdf_to_convert:
    text = page.get_text()
    letterRuling_context += text

##### PDF 1

In [19]:
# convert PLR PDF to text
pdf_to_convert = fitz.open("/Users/st414/Documents/PLR/test_plrs/PLR 9428031.pdf")
plr_9428031 = ""
for page in pdf_to_convert:
    text = page.get_text()
    plr_9428031 += text

In [52]:
# edit 1
# get classification
prompt = f"""
Your task is to classify letter rulings as adverse or non-adverse by using
knowledge and context from the literature provided to you below, delimited
by triple dollar signs.

Literature: $$${letterRuling_context}$$$

Below is the letter ruling, delimited by triple backticks, which has to be classified as Adverse or Non Adverse.

Letter Ruling: ```{plr_9428031}```

Provide your output in json format with the keys: Adverse or Non-Adverse.
"""

response = get_completion(prompt)
print(response)

{
  "Adverse": false
}


In [53]:
# edit 2
# get classification
prompt = f"""
Your task is to classify letter rulings as adverse or non-adverse by using
knowledge and context from the literature provided to you below, delimited
by triple dollar signs.

Literature: $$${letterRuling_context}$$$

Below is the letter ruling, delimited by triple backticks, which has to be classified as Adverse or Non Adverse.

Letter Ruling: ```{plr_9428031}```

Provide your output in json format with the key as PLR number and values: Adverse or Non-Adverse.
"""

response = get_completion(prompt)
print(response)

{
  "PLR 9428031": "Non-Adverse"
}


##### PDF 2

In [28]:
# convert PLR PDF to text
pdf_to_convert = fitz.open("/Users/st414/Documents/PLR/test_plrs/PLR 200224023.pdf")
plr_200224023 = ""
for page in pdf_to_convert:
    text = page.get_text()
    plr_200224023 += text

In [35]:
# get classification
prompt = f"""
Your task is to classify letter rulings as adverse or non-adverse by using
knowledge and context from the literature provided to you below, delimited
by triple dollar signs.

Literature: $$${letterRuling_context}$$$

Classify the letter ruling below, delimited by triple 
backticks, in JSON format as Adverse or Non-Adverse.

Letter Ruling: ```{plr_200224023}```
"""

response = get_completion(prompt)
print(response)

{
  "LetterRuling": "Adverse"
}


In [54]:
# edit 2
# get classification
prompt = f"""
Your task is to classify letter rulings as adverse or non-adverse by using
knowledge and context from the literature provided to you below, delimited
by triple dollar signs.

Literature: $$${letterRuling_context}$$$

Below is the letter ruling, delimited by triple backticks, which has to be classified as Adverse or Non Adverse.

Letter Ruling: ```{plr_200224023}```

Provide your output in json format with the key as PLR number and values: Adverse or Non-Adverse.
"""

response = get_completion(prompt)
print(response)

{
  "200224023": "Adverse"
}


##### PDF 3

In [56]:
# convert PLR PDF to text
pdf_to_convert = fitz.open("/Users/st414/Documents/PLR/test_plrs/PLR 201549019 .pdf")
plr_201549019 = ""
for page in pdf_to_convert:
    text = page.get_text()
    plr_201549019 += text

In [58]:
# edit 2
# get classification
prompt = f"""
Your task is to classify letter rulings as adverse or non-adverse by using
knowledge and context from the literature provided to you below, delimited
by triple dollar signs.

Literature: $$${letterRuling_context}$$$

Below is the letter ruling, delimited by triple backticks, which has to be classified as Adverse or Non Adverse.

Letter Ruling: ```{plr_201549019}```

Provide your output in json format with the key as PLR number and values: Adverse or Non-Adverse.
"""

response = get_completion(prompt)
print(response)

{
  "PLR 201549019": "Non-Adverse"
}


##### PDF 4

In [60]:
# convert PLR PDF to text
pdf_to_convert = fitz.open("/Users/st414/Documents/PLR/test_plrs/PLR 202311001.pdf")
plr_202311001 = ""
for page in pdf_to_convert:
    text = page.get_text()
    plr_202311001 += text

In [61]:
# edit 2
# get classification
prompt = f"""
Your task is to classify letter rulings as adverse or non-adverse by using
knowledge and context from the literature provided to you below, delimited
by triple dollar signs.

Literature: $$${letterRuling_context}$$$

Below is the letter ruling, delimited by triple backticks, which has to be classified as Adverse or Non Adverse.

Letter Ruling: ```{plr_202311001}```

Provide your output in json format with the key as PLR number and values: Adverse or Non-Adverse.
"""

response = get_completion(prompt)
print(response)

{
  "PLR 202311001": "Adverse"
}


##### PDF 5

In [62]:
# convert PLR PDF to text
pdf_to_convert = fitz.open("/Users/st414/Documents/PLR/test_plrs/PLR 202014005.pdf")
plr_202014005 = ""
for page in pdf_to_convert:
    text = page.get_text()
    plr_202014005 += text

In [63]:
# edit 2
# get classification
prompt = f"""
Your task is to classify letter rulings as adverse or non-adverse by using
knowledge and context from the literature provided to you below, delimited
by triple dollar signs.

Literature: $$${letterRuling_context}$$$

Below is the letter ruling, delimited by triple backticks, which has to be classified as Adverse or Non Adverse.

Letter Ruling: ```{plr_202014005}```

Provide your output in json format with the key as PLR number and values: Adverse or Non-Adverse.
"""

response = get_completion(prompt)
print(response)

{
  "202014005": "Non-Adverse"
}
