In [None]:
!pip install torch==2.0.0



In [None]:
!pip install torchaudio==2.0.0
!pip install torchvision==0.15.1
!pip install torchtext==0.15.1



In [None]:
!pip install anyascii



In [None]:
!pip install fastai
!pip install  tokenizers
!pip install  torchdata



In [None]:
!pip install xformers==0.0.19



In [None]:
!pip install -qU transformers accelerate einops langchain bitsandbytes faiss-gpu sentence_transformers

In [None]:
!pip install -i https://pypi.org/simple/ bitsandbytes
!pip install --upgrade accelerate

Looking in indexes: https://pypi.org/simple/


In [None]:
!pip -qqq install accelerate

In [None]:
!pip install -i https://pypi.org/simple/ bitsandbytes

Looking in indexes: https://pypi.org/simple/


In [None]:
!pip install pypdf



In [None]:
# from huggingface_hub import notebook_login
# from google.colab import userdata
# hf_token = userdata.get('hftoken')
# notebook_login()

In [None]:
from torch import cuda, bfloat16
import transformers

model_id = 'google/gemma-2b'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

# set quantization configuration to load large model with less GPU memory
# this requires the `bitsandbytes` library
bnb_config = transformers.BitsAndBytesConfig(
    load_in_8bit=True,
    bnb_8bit_quant_type='nf8',
    bnb_8bit_use_double_quant=True,
    bnb_8bit_compute_dtype=bfloat16
)

# begin initializing HF items, you need an access token
hf_auth = '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'
model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    quantization_config=bnb_config,
    device_map='auto',
    use_auth_token=hf_auth
)

# enable evaluation mode to allow model inference
model.eval()

print(f"Model loaded on {device}")



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Model loaded on cuda:0


In [None]:
tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)



In [None]:
stop_list = ['\nHuman:', '\n```\n']

stop_token_ids = [tokenizer(x)['input_ids'] for x in stop_list]
stop_token_ids

[[1, 29871, 13, 29950, 7889, 29901], [1, 29871, 13, 28956, 13]]

In [None]:
import torch

stop_token_ids = [torch.LongTensor(x).to(device) for x in stop_token_ids]
stop_token_ids


[tensor([    1, 29871,    13, 29950,  7889, 29901], device='cuda:0'),
 tensor([    1, 29871,    13, 28956,    13], device='cuda:0')]

In [None]:
from transformers import StoppingCriteria, StoppingCriteriaList

# define custom stopping criteria object
class StopOnTokens(StoppingCriteria):
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        for stop_ids in stop_token_ids:
            if torch.eq(input_ids[0][-len(stop_ids):], stop_ids).all():
                return True
        return False

stopping_criteria = StoppingCriteriaList([StopOnTokens()])

In [None]:
generate_text = transformers.pipeline(
    model=model,
    tokenizer=tokenizer,
    return_full_text=True,  # langchain expects the full text
    task='text-generation',
    # we pass model parameters here too
    stopping_criteria=stopping_criteria,  # without this model rambles during chat
    temperature=0.1,  # 'randomness' of outputs, 0.0 is the min and 1.0 the max
    max_new_tokens=512,  # max number of tokens to generate in the output
    repetition_penalty=1.1  # without this output begins repeating
)

In [None]:
res = generate_text("What is Principal Component Analysis?")
print(res[0]["generated_text"])

What is Principal Component Analysis?
 nobody 2018-09-17T14:36:57+00:00
Principal Component Analysis (PCA) is a popular dimensionality reduction technique used to simplify complex datasets while retaining most of their information. It transforms the original features into a new set of orthogonal features called principal components, which are linear combinations of the original features that capture most of the variation in the data.

The main idea behind PCA is that the first few principal components often capture a large proportion of the variance in the data, while the remaining components explain a smaller amount of variance. By keeping only the first few principal components, PCA reduces the dimensionality of the dataset while preserving the most important information.

Here are some key concepts related to PCA:

1. Linear Combination: PCA transforms the original features into a new set of linear combinations called principal components. These components are created by taking the 

In [None]:
from langchain.llms import HuggingFacePipeline

llm = HuggingFacePipeline(pipeline=generate_text)

# checking again that everything is working fine
# llm(prompt="Explain me the difference between Data Lakehouse and Data Warehouse.")

In [None]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("BrnoCompSpeed_Review_of_Traffic_Camera_Calibration.pdf")
documents = loader.load_and_split()

In [None]:
print(documents)



In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
all_splits = text_splitter.split_documents(documents)

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {"device": "cuda"}

embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs)

# storing embeddings in the vector store
vectorstore = FAISS.from_documents(all_splits, embeddings)

In [None]:
from langchain.chains import ConversationalRetrievalChain

chain = ConversationalRetrievalChain.from_llm(llm, vectorstore.as_retriever(), return_source_documents=True)

In [None]:
chat_history = []

query = "What is camera calibration?"
result = chain({"question": query, "chat_history": chat_history})

# print(result['answer'])

In [None]:
returned = list(result["answer"].split())
ans, flag = list(), 0
for i in returned :
  if i == "Question:":
    flag = 1
  if flag :
    ans.append(i)
print(" ".join(ans))

Question: What is camera calibration? Helpful Answer: Camera calibration is the process of determining the intrinsic parameters of a camera, such as focal length, principal point, distortion coefficients, etc. This is done to correct for optical distortions and ensure accurate 3D measurements from a single monocular camera.


In [None]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"

In [None]:
!pip install gTTS



In [None]:
from gtts import gTTS
import os
language = "en"
myobj = gTTS(text=result["answer"], lang=language, slow=False)

myobj.save("welcome.mp3")

# Playing the converted file
os.system("mpg321 welcome.mp3")

32512

In [None]:
from IPython.display import Audio, display

display(Audio("welcome.mp3", autoplay=True))