# LangChain

The sourse idea is here https://youtu.be/aywZrzNaKjs

## Installing libraries. Service functions

In [None]:
!pip -q install langchain==0.0.137
!pip -q install openai

In [None]:
!pip -q install --upgrade tiktoken

In [None]:
# !pip -q install openai chromadb

In [None]:
import os
import ipywidgets as widgets

class bcolors:
    HEADER = '\033[95m'
    OKBLUE = '\033[94m'
    OKCYAN = '\033[96m'
    OKGREEN = '\033[92m'
    WARNING = '\033[93m'
    FAIL = '\033[91m'
    ENDC = '\033[0m'
    BOLD = '\033[1m'
    UNDERLINE = '\033[4m'

class MyTools():
  def __init__(self):
    pass

  @classmethod
  def set_key(cls):
      password_input = widgets.Password(
          description='Enter OpenAi API key:',
          layout=widgets.Layout(width='500px'),
          style={'description_width': 'initial', 'white-space': 'pre-wrap', 'overflow': 'auto'})
      login_button = widgets.Button(description='Authorization')
      output = widgets.Output()

      def on_button_clicked(_):
          with output:
              api_key = password_input.value
              os.environ["OPENAI_API_KEY"] = api_key
              print(f'{bcolors.OKGREEN}{bcolors.BOLD}Key saved!{bcolors.ENDC}')
              password_input.layout.display = 'none'
              login_button.layout.display = 'none'

      login_button.on_click(on_button_clicked)
      display(widgets.VBox([password_input, login_button, output]))



## Set up the OpenAI API key

In [None]:
tools = MyTools()
MyTools.set_key()

## 1. Basic query with OpenAI wrapper

In [None]:
# Run basic query with OpenAI wrapper
from langchain.llms import OpenAI
llm = OpenAI(model_name="text-davinci-003")
llm("explain large language models in one sentence")

In [None]:
# import schema for chat messages and ChatOpenAI in order to query chatmodels GPT-3.5-turbo or GPT-4

from langchain.schema import (
    AIMessage,
    HumanMessage,
    SystemMessage
)
from langchain.chat_models import ChatOpenAI

In [None]:
chat = ChatOpenAI(model_name="gpt-3.5-turbo",temperature=0.3)
messages = [
    SystemMessage(content="You are an expert data scientist"),
    HumanMessage(content="Write a Python script that trains a neural network on simulated data ")
]
response=chat(messages)

print(response.content,end='\n')

Sure! Here's an example of a Python script that trains a neural network on simulated data using the Keras library:

```python
import numpy as np
from keras.models import Sequential
from keras.layers import Dense

# Generate simulated data
np.random.seed(0)
X = np.random.rand(100, 2)
y = np.random.randint(2, size=(100, 1))

# Create a neural network model
model = Sequential()
model.add(Dense(4, input_dim=2, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(X, y, epochs=10, batch_size=10)

# Evaluate the model
loss, accuracy = model.evaluate(X, y)
print(f'Loss: {loss}, Accuracy: {accuracy}')
```

In this script, we first generate simulated data using `np.random.rand` and `np.random.randint`. We then create a neural network model using the `Sequential` class from Keras. The model consists of two dense layers with ReLU and sigmoid activations, resp

## 2. Prompt Template

In [None]:
# Import prompt and define PromptTemplate

from langchain import PromptTemplate

template = """
You are an expert data scientist with an expertise in building deep learning models.
Explain the concept of {concept} in a couple of lines
"""

prompt = PromptTemplate(
    input_variables=["concept"],
    template=template,
)

In [None]:
prompt

PromptTemplate(input_variables=['concept'], output_parser=None, partial_variables={}, template='\nYou are an expert data scientist with an expertise in building deep learning models.\nExplain the concept of {concept} in a couple of lines\n', template_format='f-string', validate_template=True)

In [None]:
# Run LLM with PromptTemplate

llm(prompt.format(concept="autoencoder"))

'\nAn autoencoder is a type of artificial neural network that is used to learn a compressed representation (encoding) of the input data, usually for the purpose of dimensionality reduction. The network is trained to reconstruct the original input from the encoded representation.'

In [None]:
llm(prompt.format(concept="regularization"))

'\nRegularization is a technique used in machine learning to reduce the complexity of models by adding a penalty to the loss function. This penalty term reduces the magnitude of the weights of the model, which in turn reduces overfitting and improves the generalization of the model.'

## 3. Chains

In [None]:
# Import LLMChain and define chain with language model and prompt as arguments.

from langchain.chains import LLMChain
chain = LLMChain(llm=llm, prompt=prompt)

# Run the chain only specifying the input variable.
print(chain.run("autoencoder"))


Autoencoders are a type of deep learning model that can be used to learn an efficient representation of data (called an "encoding") by training a neural network to learn to reconstruct its input. This can be used for data compression, feature extraction, and dimensionality reduction.


In [None]:
# Define a second prompt

second_prompt = PromptTemplate(
    input_variables=["ml_concept"],
    template="Turn the concept description of {ml_concept} and explain it to me like I'm five in 500 words",
)
chain_two = LLMChain(llm=llm, prompt=second_prompt)

In [None]:
# Define a sequential chain using the two chains above: the second chain takes the output of the first chain as input

from langchain.chains import SimpleSequentialChain
overall_chain = SimpleSequentialChain(chains=[chain, chain_two], verbose=True)

# Run the chain specifying only the input variable for the first chain.
explanation = overall_chain.run("autoencoder")
print(explanation)



[1m> Entering new SimpleSequentialChain chain...[0m




[36;1m[1;3m
An autoencoder is a type of artificial neural network that is used to learn a compressed representation of data (known as an encoding) by training the network to reconstruct its original input. This is done by forcing the network to learn a latent space representation of the data that is smaller than the original input.[0m




[33;1m[1;3m

An autoencoder is a type of computer program that can learn how to do something. It can learn by looking at examples and then trying to figure out how to do the same thing. Autoencoders are used to learn how to take data and make it smaller. 

Think of it like having a bunch of boxes and you want to put them in a smaller box. You can take each of the boxes and place them in the smaller box, but it doesn't always work out perfectly. You might have to stack the boxes, or put some of them on top of each other. That's what an autoencoder can do - it can look at the data and figure out how to make it fit into a smaller space. 

The way an autoencoder works is by training itself to take a large amount of data and then create a smaller representation of that data. This is known as an encoding. It does this by learning a "latent space" representation of the data, which is a kind of map of how the data is organized. This map is smaller than the original data, but still contains i

## 4. Embeddings and VectorStores

In [None]:
# Import utility for splitting up texts and split up the explanation given above into document chunks

from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 100,
    chunk_overlap  = 0,
)

texts = text_splitter.create_documents([explanation])

In [None]:
texts

[Document(page_content='An autoencoder is a type of computer program that can learn how to do something. It can learn by', metadata={}),
 Document(page_content='looking at examples and then trying to figure out how to do the same thing. Autoencoders are used to', metadata={}),
 Document(page_content='learn how to take data and make it smaller.', metadata={}),
 Document(page_content='Think of it like having a bunch of boxes and you want to put them in a smaller box. You can take', metadata={}),
 Document(page_content="each of the boxes and place them in the smaller box, but it doesn't always work out perfectly. You", metadata={}),
 Document(page_content="might have to stack the boxes, or put some of them on top of each other. That's what an autoencoder", metadata={}),
 Document(page_content='can do - it can look at the data and figure out how to make it fit into a smaller space.', metadata={}),
 Document(page_content='The way an autoencoder works is by training itself to take a large am

In [None]:
texts[0].page_content

'An autoencoder is a type of computer program that can learn how to do something. It can learn by'

In [None]:
# Import and instantiate OpenAI embeddings
from langchain.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings(model_name="ada")

In [None]:
# Turn the first text chunk into a vector with the embedding
import tiktoken
query_result = embeddings.embed_query(texts[0].page_content)
print(query_result)

[-0.029225335606222633, 0.04170081280416132, 0.0073318982398839355, 0.02499185435697045, 0.028795852487133452, 0.017138439703634947, -0.017659956117086515, 0.0056293027832758444, 0.029327592959535742, -0.0007490398511220017, -0.03828539596934931, -0.02897991721321313, 0.02476688780715259, 0.02875494880075017, 0.0187745685325153, -0.014305893880526888, 0.05787802705365969, -0.010128654268729172, 0.016586247388041014, 0.002252232455956256, 0.014305893880526888, -0.0072858825240252915, 0.05427854363012291, -0.012772024267604742, 0.036731074513235526, 0.020676567597596802, 0.02495095067058717, 0.03941023244372882, 0.0015044707868260728, -0.023887467863137488, -0.028959465370021486, -0.0024005064564505602, 0.04585248481800183, 0.02098334152018123, 0.031454558574435104, 0.02769146599330049, 0.03824449228296603, 0.0006806548153571801, -0.029961592647896242, -0.011013186242219634, 0.006447366732054747, -0.02235359825021534, 0.0047089810155225564, 0.013252636063350478, -0.010302493073213027, -0