# Welcome to my GPT WebApp

## Install Dependencies

In [None]:
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu117 --upgrade
!pip install torch==1.7.0+cpu torchvision==0.8.1+cpu torchaudio==0.7.0 -f https://download.pytorch.org/whl/torch_stable.html
!pip install langchain einops accelerate transformers bitsandbytes

## Import Dependencies

In [None]:
from langchain import HuggingFacePipeline
from langchain import PromptTemplate, LLMChain
from transformers import AutoTokenizer, AutoModelForCausalLM
import transformers
import torch

In [None]:
# Check is cuda is available
torch.cuda.is_available() 

## Build Pipeline

In [None]:
# Identifier for the model to be loaded
model_id = "tiiuae/falcon-40b-instruct"

# Initialize tokenizer from Hugging Face's transformers
tokenizer = AutoTokenizer.from_pretrained(model_id)

# Load the model with specified configurations
model = AutoModelForCausalLM.from_pretrained(
    model_id, 
    cache_dir='./workspace/', 
    torch_dtype=torch.bfloat16, 
    trust_remote_code=True, 
    device_map='auto', 
    offload_folder='offload'
)

# Prepare the model for inference
model.eval()

# Setup the pipeline for text generation
pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device_map="auto",
    max_length=400,
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id
)

In [None]:
pipeline('Who is Kim Kardasian?')