In [None]:
from pydantic import BaseModel
from typing import List
from llama_index.core.program import LLMTextCompletionProgram

class taskB(BaseModel):
    peptideName: str
    peptideSequences: str
    relevantContent: str

class information(BaseModel):
    taskA: str
    taskB: List[taskB]

In [None]:
import os
os.environ["http_proxy"] = "http://192.168.1.254:7890"
os.environ["https_proxy"] = "http://192.168.1.254:7890"

os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"

In [None]:
import torch
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.core.prompts import PromptTemplate
from transformers import BitsAndBytesConfig

openchat_35_0106 = "/mnt/nas1/models/openchat-3.5-0106"
selected_model = openchat_35_0106

SYSTEM_PROMPT = """You are an AI assistant that answers questions in a friendly manner, based on the given source documents. Here are some rules you always follow:
- Generate human readable output, avoid creating output with gibberish text.
- Generate only the requested output, don't include any other language before or after the requested output.
- Never say thank you, that you are happy to help, that you are an AI agent, etc. Just answer directly.
- Generate professional language typically used in business documents in North America.
- Never generate offensive or foul language.
"""

query_wrapper_prompt = PromptTemplate(
    "[INST]<<SYS>>\n" + SYSTEM_PROMPT + "<</SYS>>\n\n{query_str}[/INST] "
)

llm = HuggingFaceLLM(
    model_name = selected_model,
    tokenizer_name=selected_model,
    context_window = 8192,
    max_new_tokens = 4096,
    generate_kwargs = {"temperature": 0.0, "do_sample": False},
    query_wrapper_prompt = query_wrapper_prompt,
    device_map = "auto",
)

from llama_index.core import Settings
Settings.llm = llm

In [None]:
from llama_index.llms.llama_api import LlamaAPI

llama_api_key = 'LL-SxQGxNT1jWMZyQJgOQPNuOoUlAz1UhSkTqlYEicFUaPRajxavvY7jBbvYmN53ncI'
llm = LlamaAPI(api_key = llama_api_key, model = 'llama-7b-chat', temperature = 0.0)
from llama_index.core import Settings
Settings.llm = llm

In [None]:
prompt_template_str = """\
The answer are based solely on the information provided in the given context and do not rely on any prior knowledge or assumptions. \
The given context is : {contexts}. \
Please only use the given content to complete the following tasks. \
Task A：only Use YES or NO to answer whether the given content contains an anti-inflammatory peptide. Don't have redundant content. \
Task B: List the names, sequences, and related content of all anti-inflammatory peptides in the following format: [peptideName:'', peptideSequence:'', relatedContent:'']. \
Generate some example information, with the task A and task B. \
"""

from llama_index.core.output_parsers import PydanticOutputParser

program = LLMTextCompletionProgram.from_defaults(
    output_parser=PydanticOutputParser(output_cls=information),
    prompt_template_str=prompt_template_str,
    verbose=True,
)
output = program(contexts = "Boosting transcorneal permeability and pharmacological activity of drug poses a great challenge in the field of ocular drug delivery. In the present study, we propose a drug-peptide supramolecular hydrogel based on anti-inflammatory drug, dexamethasone (Dex), and Arg-Gly-Asp (RGD) motif for boosting transcorneal permeability and pharmacological activity via the ligand-receptor interaction. The drug-peptide (Dex-SA-RGD/RGE) supramolecular hydrogel comprised of uniform nanotube architecture formed spontaneously in phosphate buffered saline (PBS, pH\u00a0=\u00a07.4) without external stimuli. Upon storage at 4\u00a0\u00b0C, 25\u00a0\u00b0C, and 37\u00a0\u00b0C for 70 days, Dex-SA-RGD in hydrogel did not undergo significant hydrolysis, suggesting great long-term stability. In comparison to Dex-SA-RGE, Dex-SA-RGD exhibited a more potent in vitro anti-inflammatory efficacy in lipopolysaccharide (LPS)-activated RAW 264.7 macrophages via the inhibition of nuclear factor \u043aB (NF-\u03baB) signal pathway. More importantly, using drug-peptide supramolecular hydrogel labeled with 7-nitro-2,1,3-benzoxadiazole (NBD), the Dex-SA-K(NBD)RGD showed increased performance in terms of integrin targeting and cellular uptake compared to Dex-SA-K(NBD)RGE, as revealed by cellular uptake assay. On topical instillation in rabbit's eye, the proposed Dex-SA-K(NBD)RGD could effectively enhance the transcorneal distribution and permeability with respect to the Dex-SA-K(NBD)RGE. Overall, our findings demonstrate the performance of the ligand-receptor interaction for boosting transcorneal permeability and pharmacological activity of drug.")
# output = program(context = '{}'.format(''.join(['Content {}: {}'.format(index, node.text) for index,node in enumerate(response_nodes)])))
print(output)

In [1]:
import json
with open('/mnt/nas1/patent_data/anti-inflammation_peptide/parsed_pmc.json', 'r') as f:
    js_file = json.load(f)
print(len(js_file))

1491


In [10]:
list_info = []
for item in js_file:
    information = {"pmc":"", "pmid":"", "paragraph":[]}

    if item.get('pmc') == None:
        continue
    information['pmc'] = item['pmc']
    information['pmid'] = item['pmid']

    for sec in item['paragraph']:
        paragraph_dict = {"section":"", "text":""}
        paragraph_dict['section'] = sec['root_section']

        if sec['root_section'] == sec['father_section'] and sec['father_section'] == sec['section']:
            paragraph_dict['text'] = sec['text']
        elif sec['section'] != sec['father_section']:
            paragraph_dict['text'] = "{}: {}".format(sec['section'], sec['text'])
        information['paragraph'].append(paragraph_dict)   
    list_info.append(information)
print(len(list_info))

1486


In [12]:
with open('/mnt/nas1/patent_data/anti-inflammation_peptide/parsed_pmc_merge.json', 'w') as file:
    json.dump(list_info, file, indent = 4)