In [1]:
import json

from promptify.models.nlp.openai_model import OpenAI
from promptify.prompts.nlp.prompter import Prompter

In [2]:
model = OpenAI(api_key="")
nlp_prompter = Prompter(model)

In [3]:
data = json.load(open("data/binary.json",'r'))

In [4]:
len(data)

10

In [5]:
examples = []
for sample in data[:3]:
    print(sample,"\n")
    examples.append((sample['text'],sample['labels']))

{'text': 'Eight years the republicans denied obama’s picks. Breitbarters outrage is as phony as their fake president.', 'labels': 'negative', 'score': '', 'complexity': ''} 

{'text': 'Except he’s the most successful president in our lifetimes. He’s undone most of the damage Obummer did and set America on the right path again.', 'labels': 'positive', 'score': '', 'complexity': ''} 

{'text': 'So disappointed in wwe summerslam! I want to see john cena wins his 16th title', 'labels': 'negative', 'score': '', 'complexity': ''} 



In [6]:
prompt = nlp_prompter.generate_prompt('binary_classification.jinja',
                                      label_0="positive",
                                      label_1="negative",
                                      examples=examples,
                                      text_input="Amazing customer service.",
                                     description="Binary Classificaion System")
print(prompt)

Binary Classificaion System
You are a highly intelligent and accurate Binary Classification system. You take Passage as input and classify that as either positive or negative Category. Your output format is only [{'C':Category}] form, no other form.

Examples:

Input: Eight years the republicans denied obama’s picks. Breitbarters outrage is as phony as their fake president.
Output: [{'C': 'negative' }]

Input: Except he’s the most successful president in our lifetimes. He’s undone most of the damage Obummer did and set America on the right path again.
Output: [{'C': 'positive' }]

Input: So disappointed in wwe summerslam! I want to see john cena wins his 16th title
Output: [{'C': 'negative' }]

Input: Amazing customer service.
Output:


In [7]:
output = nlp_prompter.fit('binary_classification.jinja',
                 label_0="positive",
                 label_1="negative",
                 examples=examples,
                 text_input="Amazing customer service.",
                 model_name="text-davinci-003")

In [8]:
eval(output['text'].strip())

[{'C': 'positive'}]

In [9]:
data = json.load(open("data/multiclass.json",'r'))

In [10]:
labels = set(sample['category'] for sample in data)
labels

{'hate', 'joy', 'neutral', 'sadness', 'surprise', 'worry'}

In [15]:
prompt = nlp_prompter.generate_prompt('multiclass_classification.jinja',
                                      labels=labels,
                                      text_input="Amazing customer service.",
                                     )
print(prompt)

You are a highly intelligent and accurate Multiclass Classification system. You take Passage as input and classify that as one of the following appropriate Categories:
{'surprise', 'neutral', 'hate', 'joy', 'worry', 'sadness'}
Your output format is only [{{'C': Appropriate Category from the list of provided Categories}}] form, no other form.

Input: Amazing customer service.
Output:


In [16]:
output = nlp_prompter.fit('multiclass_classification.jinja',
                          labels=labels,
                          text_input="Amazing customer service.",
                          model_name="text-davinci-003")

In [18]:
eval(output['text'].strip())

[{'C': 'joy'}]

In [52]:
data = json.load(open("data/paragraph.json",'r'))['samples']

In [53]:
labels = json.load(open("data/optimized_multilabel.json",'r'))['samples']

In [54]:
examples = []

for sample in data:
    for label in labels:
        if sample['id'] == label['id']:
            examples.append((sample['paragraph'],json.dumps(label['data'])))

In [55]:
examples[0]

('Leptomeningeal metastases (LM) occur in patients with breast cancer (BC) and lung cancer (LC). The cerebrospinal fluid (CSF) tumour microenvironment (TME) of LM patients is not well defined at a single-cell level. We did an analysis based on single-cell RNA sequencing (scRNA-seq) data and four patient-derived CSF samples of idiopathic intracranial hypertension (IIH)',
 '[{"main class": "Health", "1": "Medicine", "2": "Oncology", "3": "Metastasis", "4": "Breast cancer", "5": "Lung cancer", "6": "Cerebrospinal fluid", "7": "Tumor microenvironment", "8": "Single-cell RNA sequencing", "9": "Idiopathic intracranial hypertension", "branch": "Health", "group": "Clinical medicine"}]')

In [56]:
labels = ['Medicine','Oncology','Metastasis','Breast cancer','Lung cancer','Cerebrospinal fluid','Tumor microenvironment','Single-cell RNA sequencing','Idiopathic intracranial hypertension']

In [62]:
prompt = nlp_prompter.generate_prompt('multilabel_classification.jinja',
                                      n_output_labels=len(labels),
                                      domain="healthcare",
                                      labels=labels,
                                      examples=[examples[0]],
                                      text_input="The patient is a 93-year-old female with a medical history of chronic right hip pain, osteoporosis, hypertension, depression, and chronic atrial fibrillation admitted for evaluation and management of severe nausea and vomiting and urinary tract infection",
                                     )
print(prompt)

You are a highly intelligent and accurate healthcare domain multi-label classification system. You take Passage as input and classify that into 9 appropriate healthcare domain Categories from the given category list:
['Medicine', 'Oncology', 'Metastasis', 'Breast cancer', 'Lung cancer', 'Cerebrospinal fluid', 'Tumor microenvironment', 'Single-cell RNA sequencing', 'Idiopathic intracranial hypertension'].
Your output format is only [{'main class': Main Classification Category ,'1': 2nd level Classification Category, '2': 3rd level Classification Category,...,'branch' : Appropriate branch of the Passage ,'group': Appropriate Group of the Passage}] form, no other form.

Examples:

Input: Leptomeningeal metastases (LM) occur in patients with breast cancer (BC) and lung cancer (LC). The cerebrospinal fluid (CSF) tumour microenvironment (TME) of LM patients is not well defined at a single-cell level. We did an analysis based on single-cell RNA sequencing (scRNA-seq) data and four patient-der

In [63]:
output = nlp_prompter.fit('multilabel_classification.jinja',
                                      n_output_labels=len(labels),
                                      domain="healthcare",
                                      labels=labels,
                                      examples=[examples[0]],
                                      text_input="The patient is a 93-year-old female with a medical history of chronic right hip pain, osteoporosis, hypertension, depression, and chronic atrial fibrillation admitted for evaluation and management of severe nausea and vomiting and urinary tract infection",
                                     )

In [64]:
output

{'prompt_tokens': 430,
 'completion_tokens': 59,
 'total_tokens': 489,
 'text': "\n\n[[{'main class': Main Classification Category ,'1': 2nd level Classification Category, '2': 3rd level Classification Category,...,'branch' : Appropriate branch of the Passage ,'group': Appropriate Group of the Passage}] form, no other form."}