In [None]:
!pip install -Uqqq pip --progress-bar off
!pip install -qqq torch==2.1 --progress-bar off
!pip install -qqq transformers==4.34.1 --progress-bar off
!pip install -qqq accelerate==0.23.0 --progress-bar off
!pip install -qqq bitsandbytes==0.41.1 --progress-bar off
!pip install -qqq llava-torch==1.1.1 --progress-bar off

In [None]:
!pip install h5py
!pip install typing-extensions
!pip install wheel

In [None]:
import textwrap
from io import BytesIO

import requests
import torch
from llava.constants import DEFAULT_IMAGE_TOKEN, IMAGE_TOKEN_INDEX
from llava.conversation import SeparatorStyle, conv_templates
from llava.mm_utils import (
    KeywordsStoppingCriteria,
    get_model_name_from_path,
    process_images,
    tokenizer_image_token,
)
from llava.model.builder import load_pretrained_model
from llava.utils import disable_torch_init
from PIL import Image

[2023-12-02 03:12:15,599] [INFO] [real_accelerator.py:110:get_accelerator] Setting ds_accelerator to cuda (auto detect)


In [None]:
disable_torch_init()

In [None]:
MODEL = "4bit/llava-v1.5-13b-3GB"
model_name = get_model_name_from_path(MODEL)
model_name

'llava-v1.5-13b-3GB'

In [None]:
tokenizer, model, image_processor, context_len = load_pretrained_model(
    model_path=MODEL, model_base=None, model_name=model_name, load_4bit=True
)

In [None]:
CONV_MODE = "llava_v0"

In [None]:
! pip install gradio

In [None]:
import gradio as gr

In [None]:
def process_image(image):
    args = {"image_aspect_ratio": "pad"}
    image_tensor = process_images([image], image_processor, args)
    return image_tensor.to(model.device, dtype=torch.float16)

In [None]:
def create_prompt(prompt: str):
    conv = conv_templates[CONV_MODE].copy()
    roles = conv.roles
    prompt = DEFAULT_IMAGE_TOKEN + "\n" + prompt
    conv.append_message(roles[0], prompt)
    conv.append_message(roles[1], None)
    return conv.get_prompt(), conv

In [None]:
def ask_image(image: Image, prompt: str):
    image_tensor = process_image(image)
    prompt, conv = create_prompt(prompt)
    input_ids = (
        tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt")
        .unsqueeze(0)
        .to(model.device)
    )

    stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2
    stopping_criteria = KeywordsStoppingCriteria(
        keywords=[stop_str], tokenizer=tokenizer, input_ids=input_ids
    )

    with torch.inference_mode():
        output_ids = model.generate(
            input_ids,
            images=image_tensor,
            do_sample=True,
            temperature=0.01,
            max_new_tokens=512,
            use_cache=True,
            stopping_criteria=[stopping_criteria],
        )
    return tokenizer.decode(
        output_ids[0, input_ids.shape[1] :], skip_special_tokens=True
    ).strip()

In [None]:
iface = gr.Interface(
    fn=ask_image,
    inputs=[gr.Image(), "text"],
    outputs="text",
    live = False,
    title = "Edvent - Queryable Engine",
    flagging_options = ["Good", "Bad", "Mid"],
    description = "Upload an image and ask away!"
)

iface.launch(share=True)

In [None]:
!pip install langchain gradio pypdf pandas matplotlib tiktoken textract transformers openai==0.28.1 faiss-cpu

In [None]:
import os
import gradio as gr
import pandas as pd
import matplotlib.pyplot as plt
from transformers import GPT2TokenizerFast
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
from langchain.chains import ConversationalRetrievalChain

In [None]:
os.environ["OPENAI_API_KEY"] = "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"

In [None]:
def process_pdf_and_query(pdf_content, query):
  loader = PyPDFLoader(pdf_content)
  pages = loader.load_and_split()
  chunks = pages
  embeddings = OpenAIEmbeddings()
  db = FAISS.from_documents(chunks, embeddings)
  chain = load_qa_chain(OpenAI(temperature=0), chain_type="stuff")
  docs = db.similarity_search(query)
  result = chain.run(input_documents=docs, question=query)
  return result

In [None]:
# Gradio interface
inface = gr.Interface(
    fn=process_pdf_and_query,
    inputs=[gr.Textbox(label = "Enter file path"),gr.Textbox(label = "Ask your questions")],
    outputs="text",
    live = False,
    title = "Edvent - Queryable Engine",
    flagging_options = ["Good", "Bad", "Mid"],
    description = "Upload your file and ask away!"
)

inface.launch(share = True)

In [None]:
# tts_demo = gr.Interface(
#     fn = transcribe_english,
#     inputs = "text",
#     outputs = gr.File(),
#     title=None,
#     description="Upload your audio and generate the presentation in English!",
#     cache_examples=False
# )
# tts_demo1 = gr.Interface(
#     fn = transcribe_kannada,
#     inputs = "text",
#     outputs = gr.File(),
#     title=None,
#     description="Upload your audio and generate the presentation in Kannada!",
#     cache_examples=False
# )

# tts_demo2 = gr.Interface(
#     fn = transcribe_hindi,
#     inputs = "text",
#     outputs = gr.File(),
#     title=None,
#     description="Upload your audio and generate the presentation in Hindi!",
#     cache_examples=False
# )

inface = gr.Interface(
    fn=process_pdf_and_query,
    inputs=[gr.Textbox(label = "Enter file path"),gr.Textbox(label = "Ask your questions")],
    outputs="text",
    live = False,
    title = None,
    # flagging_options = ["Good", "Bad", "Mid"],
    description = "Upload your file and ask away!",
     theme = gr.themes.Soft()
)

iface = gr.Interface(
    fn=ask_image,
    inputs=[gr.Image(), "text"],
    outputs="text",
    live = False,
    title = None,
    # flagging_options = ["Good", "Bad", "Mid"],
    description = "Upload an image and ask away!",
    theme = gr.themes.Soft()
)

demo = gr.TabbedInterface([inface, iface], ["PDF Queryable Engine","Image Queryable Engine"], title="Edvent - Queryable Engine", theme = gr.themes.Soft())
demo.launch(share = True)