In [1]:
# Load environment variables

from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv())

True

# 1 - Model (LLM wrappers)

In [4]:
# Run basic query with OpenAI wrapper

from langchain.llms import OpenAI

llm = OpenAI(model_name = 'text-davinci-003')
llm('explain large language models in a sentence')

'\n\nLarge language models are deep neural networks that are trained to generate contextualized representations of words, phrases, and sentences.'

In [5]:
# import schema for chat messages and ChatOpenAI in order to query chatmodels GPT-3.5-turbo or GPT-4

from langchain.schema import (
    AIMessage,
    HumanMessage,
    SystemMessage
)

from langchain.chat_models import ChatOpenAI

In [9]:
chat = ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0.3)

messages = [
    SystemMessage(content='You are an expert data scientist'),
    HumanMessage(content='Write a Python script that trains a neural network on simulated data')
]

response = chat(messages)

In [10]:
print(response.content, end ='\n')

Sure, here's an example script that trains a simple neural network on simulated data using the Keras library:

```python
import numpy as np
from keras.models import Sequential
from keras.layers import Dense

# Generate some simulated data
X = np.random.rand(1000, 10)
y = np.sum(X, axis=1)

# Define the neural network architecture
model = Sequential()
model.add(Dense(32, input_dim=10, activation='relu'))
model.add(Dense(1, activation='linear'))

# Compile the model
model.compile(loss='mean_squared_error', optimizer='adam')

# Train the model
model.fit(X, y, epochs=100, batch_size=32)

# Evaluate the model on some test data
X_test = np.random.rand(100, 10)
y_test = np.sum(X_test, axis=1)
loss = model.evaluate(X_test, y_test)
print('Test loss:', loss)
```

This script generates a random matrix `X` with 1000 rows and 10 columns, and a corresponding vector `y` that is the sum of each row of `X`. It then defines a neural network with one hidden layer of 32 units and a linear output layer. Th

# 2 - Prompts

In [11]:
# Import prompt and define PromptTemplat

from langchain import PromptTemplate

template = """
You are an expert data scientist with an expertise in building deep learning models.
Explain the concept of {concept} in a couple of lines
"""

prompt = PromptTemplate(
    input_variables=['concept'],
    template=template
)

In [12]:
prompt

PromptTemplate(input_variables=['concept'], output_parser=None, partial_variables={}, template='\nYou are an expert data scientist with an expertise in building deep learning models.\nExplain the concept of {concept} in a couple of lines\n', template_format='f-string', validate_template=True)

In [13]:
# Run LLM with PromptTemplate

llm(prompt.format(concept='regularization'))

'\nRegularization is a technique used to reduce the complexity of a model by penalizing certain parameters to prevent them from becoming too large. This helps to reduce the overfitting of the model and improve the generalization of the model to unseen data.'

In [14]:
# Run LLM with PromptTemplate

llm(prompt.format(concept='autoencoder'))

'\nAutoencoders are a type of neural network that learn to compress and reconstruct input data from its own output. They are used for unsupervised learning tasks, such as dimensionality reduction, denoising, and feature extraction.'

# 3 - Chains

In [15]:
# Import LLMChain and define chain with language model and prompt as arguments.

from langchain.chains import LLMChain

chain = LLMChain(llm=llm, prompt=prompt)

print(chain.run('autoencoder'))


Autoencoders are a type of neural network that use an encoding-decoding architecture to learn an efficient representation of input data. This representation is used to reconstruct the original input data, and the network is trained to optimize the reconstruction error. Autoencoders are popular for data compression, feature learning, and anomaly detection.


In [20]:
# Define a second prompt 

second_prompt = PromptTemplate(
    input_variables=['ml_concept'],
    template = "Turn the concept description of {ml_concept} and explain it to me like I'm five in 500 words"
)

chain_two = LLMChain(llm=llm, prompt=second_prompt)

In [21]:
# Define a sequential chain using the two chains above: the second chain takes the output of the first chain as input

from langchain.chains import SimpleSequentialChain

overall_chain = SimpleSequentialChain(chains=[chain, chain_two], verbose=True)

# Run the chain specifying only the input variable for the first chain.
explanation = overall_chain.run("autoencoder")
print(explanation)



[1m> Entering new SimpleSequentialChain chain...[0m
[36;1m[1;3m
An autoencoder is a type of artificial neural network used to learn efficient data codings in an unsupervised manner. It is composed of an encoder, which learns to map the input data into a hidden representation, and a decoder, which learns to reconstruct the input data from the hidden representation.[0m
[33;1m[1;3m

An autoencoder is like an invisible robot that helps us figure out how to store information in the most efficient way. To do this, it uses something called artificial neural networks. 

Artificial neural networks are like a complex network of computers that can process information in a very smart way. The autoencoder uses this network to learn how to store information in the most efficient way possible. 

The autoencoder is made up of two parts: an encoder and a decoder. The encoder's job is to take the information and figure out how to store it in the most efficient way. It does this by creating a hi

# Embeddings and VectorStores

In [22]:
# Import utility for splitting up texts and split up the explanation given above into document chunks

from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 100,
    chunk_overlap  = 0,
)

texts = text_splitter.create_documents([explanation])

In [23]:
# Individual text chunks can be accessed with "page_content"

texts[0].page_content

'An autoencoder is like an invisible robot that helps us figure out how to store information in the'

In [25]:
# Import and instantiate OpenAI embeddings

from langchain.embeddings import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="ada")

In [30]:
# Turn the first text chunk into a vector with the embedding

query_result = embeddings.embed_query(texts[0].page_content)
print(query_result)

1536

In [27]:
# Import and initialize Pinecone client

import os
import pinecone
from langchain.vectorstores import Pinecone


pinecone.init(
    api_key=os.getenv('PINECONE_API_KEY'),  
    environment=os.getenv('PINECONE_ENV')  
)

  from tqdm.autonotebook import tqdm


In [32]:
# Upload vectors to Pinecone

index_name = "langchain-quickstart"
search = Pinecone.from_documents(texts, embeddings, index_name=index_name)

In [33]:
# Do a simple vector similarity search

query = "What is magical about an autoencoder?"
result = search.similarity_search(query)

print(result)

[Document(page_content='efficient way possible.', metadata={}), Document(page_content='An autoencoder is like an invisible robot that helps us figure out how to store information in the', metadata={}), Document(page_content='information and figure out how to store it in the most efficient way. It does this by creating a', metadata={}), Document(page_content='computer can use to store the data in the most efficient way.', metadata={})]


# Agents

In [34]:
# Import Python REPL tool and instantiate Python agent

from langchain.agents.agent_toolkits import create_python_agent
from langchain.tools.python.tool import PythonREPLTool
from langchain.python import PythonREPL
from langchain.llms.openai import OpenAI

agent_executor = create_python_agent(
    llm=OpenAI(temperature=0, max_tokens=1000),
    tool=PythonREPLTool(),
    verbose=True
)

In [35]:
# Execute the Python agent

agent_executor.run("Find the roots (zeros) if the quadratic function 3 * x**2 + 2*x -1")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m I need to solve a quadratic equation
Action: Python REPL
Action Input: import numpy as np[0m

Observation: [36;1m[1;3m[0m
Thought:[32;1m[1;3m I can use numpy to solve the equation
Action: Python REPL
Action Input: np.roots([3,2,-1])[0m

Observation: [36;1m[1;3m[0m
Thought:[32;1m[1;3m I now know the final answer
Final Answer: (-1.0, 0.3333333333333333)[0m

[1m> Finished chain.[0m


'(-1.0, 0.3333333333333333)'