In [1]:
import langchain
import os
import yaml

In [2]:
# Load API credentials
with open('api_key.yaml', 'r') as f:
    config = yaml.safe_load(f)
os.environ['OPENAI_API_KEY'] = config['OPEN_AI_KEY']
os.environ['HUGGINGFACEHUB_API_TOKEN'] = config['HUGGING_FACE_TOKEN_KEY']

## Components

### - Models
    - LLMs: 20+ integrations models
    - Chat Models
    - Text Embedding models: 10+ integrations

### - Prompts
    - Prompt Templates
    - Output Parsers: 5+ implementations
        - Retry/fixing logic
    - Example Selectors: 5+ implementations

### - Indexes
    - Document Loaders: 50+ implementations
    - Text Splitters: 10+ implementations
    - Vector stores
    - Retrievers

### - Chains
    - Prompt + LLM + Output parsing
    - Can be used as building blocks for longer chains
    - More application specific chains: 20 + types

### - Agents
    - Agent types: 5+
        - Algorithms for getting LLMs to use tools
    - Agent toolkits: 10+
        - Agents armed with specific tools for a specific application

In [1]:
# As it is now there are six models being covered in Longchain
# LLMs and prompts
# Chains
# Data Augmented Generation
# Agents
# Memory
# Evaluation

## LLMs and Prompts
LLMs take a string as an input (prompt) and output a string (completion).

In [4]:
from langchain.llms import OpenAI
from langchain import HuggingFaceHub

llm = OpenAI(
    model="text-davinci-003"
)

llm_hugging_face = HuggingFaceHub(
    repo_id='google/flan-t5-xl'
)

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
# The LLM takes a prompt as an input and outputs a completion
# prompt = "My name is Jerry and I am looking for a senior data scientist or machine learning engineer job"
# completion = llm_hugging_face(prompt)

In [7]:
# embeddings
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceEmbeddings
embeddings = OpenAIEmbeddings()
embeddings_hugging_face = HuggingFaceEmbeddings(
    model_name = "sentence-transformers/all-MiniLM-L6-v2"
)

Downloading (…)e9125/.gitattributes: 100%|██████████| 1.18k/1.18k [00:00<00:00, 391kB/s]
Downloading (…)_Pooling/config.json: 100%|██████████| 190/190 [00:00<00:00, 95.3kB/s]
Downloading (…)7e55de9125/README.md: 100%|██████████| 10.6k/10.6k [00:00<00:00, 5.31MB/s]
Downloading (…)55de9125/config.json: 100%|██████████| 612/612 [00:00<00:00, 204kB/s]
Downloading (…)ce_transformers.json: 100%|██████████| 116/116 [00:00<00:00, 38.7kB/s]
Downloading (…)125/data_config.json: 100%|██████████| 39.3k/39.3k [00:00<00:00, 9.82MB/s]
Downloading pytorch_model.bin: 100%|██████████| 90.9M/90.9M [00:01<00:00, 71.1MB/s]
Downloading (…)nce_bert_config.json: 100%|██████████| 53.0/53.0 [00:00<00:00, 27.0kB/s]
Downloading (…)cial_tokens_map.json: 100%|██████████| 112/112 [00:00<00:00, 55.9kB/s]
Downloading (…)e9125/tokenizer.json: 100%|██████████| 466k/466k [00:00<00:00, 13.3MB/s]
Downloading (…)okenizer_config.json: 100%|██████████| 350/350 [00:00<00:00, 175kB/s]
Downloading (…)9125/train_script.py: 100%|█

In [14]:
# The embeddings model takes a text as an input and outputs a list of floats
text = "My name is Jerry and I am looking for a senior data scientist or machine learning engineer job"
text_embedding = embeddings_hugging_face.embed_query(text)

print(text_embedding)
print(len(text_embedding))

[-0.06501688808202744, -0.04788777232170105, 0.051248155534267426, 0.06370953470468521, -0.020586827769875526, -0.13232406973838806, -0.025029074400663376, -0.06336953490972519, -0.07616551220417023, -0.05247809365391731, -0.07098755240440369, -0.03720574080944061, 0.03114102967083454, -0.0573573000729084, -0.034847892820835114, 0.0413450226187706, -0.048038505017757416, -0.01105794683098793, 0.02295728586614132, -0.13795898854732513, 0.01054036058485508, 0.004916073754429817, -0.018240058794617653, -0.11778751015663147, 0.00014174864918459207, -0.00906381756067276, 0.03587860241532326, 0.049300871789455414, 0.0018293778412044048, 0.011244402267038822, -0.04029207304120064, -0.01543509867042303, 0.03291197866201401, 0.10186728835105896, 0.02767772413790226, 0.05389564111828804, -0.05304254591464996, 0.014578720554709435, 0.03618146851658821, 0.04540051147341728, -0.032780278474092484, -0.002884884597733617, -0.07596230506896973, -0.04300385341048241, -0.052731454372406006, 0.0220257993

## Models, Prompts and Parsers

In [20]:
import openai
import os
import yaml

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file
# Load API credentials
with open('api_key.yaml', 'r') as f:
    config = yaml.safe_load(f)
os.environ['OPENAI_API_KEY'] = config['OPEN_AI_KEY']
os.environ['HUGGINGFACEHUB_API_TOKEN'] = config['HUGGING_FACE_TOKEN_KEY']

openai.api_key = os.environ['OPENAI_API_KEY']

In [21]:
def get_completion(prompt, model = "gpt-3.5-turbo"):
    messages = [{"role": "user", "content": prompt}]
    response = openai.ChatCompletion.create(
        model = model,
        messages = messages,
        temperature = 0.5
    )

    return response.choices[0].message['content']

In [24]:
get_completion("What is AI")

'AI stands for Artificial Intelligence. It refers to the development of computer systems or machines that can perform tasks that typically require human intelligence, such as visual perception, speech recognition, decision-making, and problem-solving. AI aims to create intelligent machines that can learn, reason, and adapt to new situations, ultimately mimicking or augmenting human intelligence. AI can be categorized into two types: narrow AI, which is designed for specific tasks, and general AI, which has the ability to understand, learn, and apply knowledge across various domains.'

In [26]:
paragraph_1 = """
I am really sad that people are facing massive layoffs... \
The same situation may happen to me as well. \
The best you can do is to stay positive and work hard!!! \
The future will be better!!!
"""

In [31]:
style = """
Canada English \ 
into a positive tone
"""

In [32]:
prompt = f"""
Translate the text that is delimited by triple backticks
into a style that is {style}.
text: '''{paragraph_1}'''
"""

In [33]:
print(prompt)


Translate the text that is delimited by triple backticks
into a style that is 
Canada English \ 
into a positive tone
.
text: '''
I am really sad that people are facing massive layoffs... The same situation may happen to me as well. The best you can do is to stay positive and work hard!!! The future will be better!!!
'''



In [34]:
response = get_completion(prompt=prompt)
response

### Model

In [37]:
 # model
from langchain.chat_models import ChatOpenAI

chat = ChatOpenAI(temperature=0.3)
chat

ChatOpenAI(cache=None, verbose=False, callbacks=None, callback_manager=None, tags=None, metadata=None, client=<class 'openai.api_resources.chat_completion.ChatCompletion'>, model_name='gpt-3.5-turbo', temperature=0.3, model_kwargs={}, openai_api_key='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', openai_api_base='', openai_organization='', openai_proxy='', request_timeout=None, max_retries=6, streaming=False, n=1, max_tokens=None, tiktoken_model_name=None)

### Prompt (template)

In [39]:
# prompt template
from langchain.prompts import ChatPromptTemplate

# this template_1 is a translate template
template_1 = """
translate the text that in delimiated by double quos into a style that is {style}. \
text: "{text}"
"""


prompt_template = ChatPromptTemplate.from_template(
    template = template_1
)

In [43]:
prompt_template.messages[0].prompt.input_variables

['style', 'text']

In [56]:
# feed the input string with 1) text 2) style into the prompt template
paragraph_1 = """
I really hate school!!! \
I want to fk you all off, school sucks!!! \
"""
style = """
English \
in a rude and disrespectful tone
""" 

In [57]:
prompt_msg = prompt_template.format_messages(
    style = style,
    text = paragraph_1
)

In [58]:
prompt_response = chat(prompt_msg)

In [59]:
# generate the content back
prompt_response.content

"I absolutely despise school!!! I couldn't care less about any of you, school is absolutely garbage!!!"

### Parse (can treat it as the reverse of prompt, to extract the key information from the paragraph)

In [None]:
# use case
# to parse the review/comment with parse template to extract the key entity