# iLykei Lecture Series   
# Generative AI App Client   

### Y. Balasanov, A. Kobyshev, M. Tselishchev, &copy; iLykei 2023

In [1]:
# Install necessary libs & compile proto-files
!protoc --python_out=./ *.proto

from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from scipy.spatial.distance import cdist
import openai
from openai import OpenAI
import pinecone
import os
import json
import numpy as np
import time

  from tqdm.autonotebook import tqdm


### Load PDF

In [2]:
# Load document
loader = PyPDFLoader("./Generative_AI_App_Doc.pdf")
data = loader.load()

### Clean Document

In [3]:
# Split document into smaller texts
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, 
    chunk_overlap=50,
    length_function=len, 
    add_start_index=True
    )
texts = text_splitter.split_documents(data)

### Get Embeddings

In [4]:
# Setup OpenAI API Key
with open("OPENAI_KEY.txt",'r') as f:
    openai_api_key = f.readline().strip()
openai.api_key = openai_api_key
os.environ['OPENAI_API_KEY'] = openai_api_key

with open("my_pinecone_key.txt", 'r') as f:
    my_Pinecone_key = f.readline().strip()

import pinecone
pinecone.init(api_key=my_Pinecone_key, environment='gcp-starter')
index = pinecone.Index(index_name='generative-ai-app')

In [5]:
# Initialize Embeddings Model
embeddings_model = OpenAIEmbeddings(disallowed_special=())

### Upload to Pinecone

In [6]:
from langchain.vectorstores import Pinecone

# Assuming `texts` contains your document texts and `embeddings` are their corresponding embeddings
docs_upload = Pinecone.from_documents(texts, embeddings_model, index_name='generative-ai-app')

In [7]:
from langchain.vectorstores import Pinecone
retriever = docs_upload.as_retriever()

In [13]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain

language_model = "gpt-4-1106-preview"
model = ChatOpenAI(model_name=language_model, temperature=0.2)
qa = ConversationalRetrievalChain.from_llm(model, retriever=retriever)

Define the event handler responding to test questions.

In [14]:
def question_handler(question_id, question):
    chat_history = []
    response = qa({"question": question, "chat_history": chat_history})
    print(f"Question: {question_id}")
    print(f"Question: {question}")
    print(f"Response: {response}")
    return response['answer']

Instead of the line `answer = "I don't know"` insert the code denerating the answer by your app.

In [15]:
from Generative_AI_App_connection import connect

with open("my_credentials.txt",'r') as f:
    lines = f.readlines()
login, password = map(str.strip, lines)

# server options
host = 'datastream.ilykei.com' # do not change
port = 30095   # do not change
stream_name = 'Generative_AI_App'   # do not change
catch_handler_errors = True  # we recommend using TRUE during the test and FALSE during workshop

# make connection with your handler
result = connect(host, port, login, password, stream_name,
                 question_handler, catch_handler_errors)

Connecting to datastream.ilykei.com:30095
Sending login message
Logged in successfully as  samuelswain2023@u.northwestern.edu
Question: 0
Question: What is the current stage of development for PancreXcel, and how many patients are involved in the ongoing trials?
Response: {'question': 'What is the current stage of development for PancreXcel, and how many patients are involved in the ongoing trials?', 'chat_history': [], 'answer': 'PancreXcel is currently in Phase II clinical trials, and there are 250 patients involved in these ongoing trials.'}
Question: 1
Question: What information must be included in PancreXcel's proposed labeling?
Question: 2
Question: What is the role of the Target Product Profile (TPP) in the NDA process?
Response: {'question': 'What is the role of the Target Product Profile (TPP) in the NDA process?', 'chat_history': [], 'answer': "The Target Product Profile (TPP) in the New Drug Application (NDA) process serves to identify key characteristics of the drug, in thi

In [16]:
# check results
result

{'problems': [], 'n_signals': 10, 'penalty': 4, 'score': 96}