## dotenv and chatmodels setup

In [21]:
from dotenv import load_dotenv, find_dotenv
import os

load_dotenv(find_dotenv(), override=True)

True

In [22]:
from langchain_groq import ChatGroq

llm = ChatGroq(model="llama3-8b-8192", temperature=0.1)

response = llm.invoke("What is the capital of France?")
print(response.content)

The capital of France is Paris.


### Splitting and embedding text using LangChain

In [10]:
import builtins

with builtins.open("churchill_speech.txt", "r") as f:
    churchill_speech = f.read()

In [11]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=100,
    chunk_overlap=20,
    length_function=len
    )


In [14]:
chunk = text_splitter.create_documents([churchill_speech])
print(chunk[0].page_content)

print(f"Number of chunks: {len(chunk)}")

Winston Churchill Speech - We Shall Fight on the Beaches
We Shall Fight on the Beaches
June 4, 1940
Number of chunks: 300


In [27]:
from langchain_ollama import OllamaEmbeddings

#embeddings = OllamaEmbeddings(model="llama3.2:1b")

embeddings = OllamaEmbeddings(model="granite-embedding:30m")


In [28]:
embeddings.embed_query("Hello, world!")

[-0.014020913,
 0.020698927,
 0.025318453,
 0.030996142,
 -0.014270018,
 -0.030164946,
 0.0034748819,
 -0.009558959,
 0.04326238,
 0.032545805,
 0.010579122,
 -0.026071867,
 0.014671314,
 -0.049787726,
 -0.016744599,
 0.04604847,
 -0.067450754,
 -0.0010768307,
 0.011744954,
 0.0011922532,
 -0.029830711,
 -0.09787039,
 0.00644962,
 -0.044513036,
 -0.02393714,
 -0.073956296,
 0.008636561,
 0.011485706,
 -0.007264876,
 -0.20042428,
 0.021676565,
 0.03024629,
 -0.06650216,
 -0.055927057,
 -0.05618756,
 -0.06853635,
 0.07058183,
 -0.031773612,
 0.039032932,
 0.04069257,
 -0.0074569876,
 -0.050709903,
 -0.08009088,
 0.015561068,
 0.09609409,
 -0.0010460769,
 -0.038580686,
 0.079659685,
 -0.00011509497,
 0.02388783,
 -0.0010435545,
 -0.03586251,
 -0.05159879,
 0.003935109,
 0.04465009,
 -0.04639948,
 -0.02400431,
 -0.037323315,
 -0.007983015,
 -0.034837738,
 0.069088146,
 -0.0021365804,
 0.041331872,
 -0.042668216,
 0.014704898,
 0.02469132,
 0.025557864,
 0.036149286,
 -0.053136803,
 -0.0645

### Inserting embeddings into a pinecone index

In [30]:
import pinecone
from langchain.vectorstores import Pinecone

pc = pinecone.Pinecone()