## CSV dosyasındaki verinin yüklenmesini gerçekleştirme

In [11]:

from langchain_community.document_loaders.csv_loader import CSVLoader

filepath = "tmdb_5000_movies.csv"
loader = CSVLoader(filepath, encoding="utf-8")

data = loader.load()

print(f"Toplam {len(data)} film yüklendi.")

Toplam 4803 film yüklendi.


## Veriyi parçalarına ayırma(Chunking işlemi)

In [13]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(data)

print(f"Number of documents after splitting: {len(docs)}")
docs[7]

Number of documents after splitting: 9348


Document(metadata={'source': 'tmdb_5000_movies.csv', 'row': 3}, page_content='original_title: The Dark Knight Rises\noverview: Following the death of District Attorney Harvey Dent, Batman assumes responsibility for Dent\'s crimes to protect the late attorney\'s reputation and is subsequently hunted by the Gotham City Police Department. Eight years later, Batman encounters the mysterious Selina Kyle and the villainous Bane, a new terrorist leader who overwhelms Gotham\'s finest. The Dark Knight resurfaces to protect a city that has branded him an enemy.\npopularity: 112.31295\nproduction_companies: [{"name": "Legendary Pictures", "id": 923}, {"name": "Warner Bros.", "id": 6194}, {"name": "DC Entertainment", "id": 9993}, {"name": "Syncopy", "id": 9996}]\nproduction_countries: [{"iso_3166_1": "US", "name": "United States of America"}]\nrelease_date: 2012-07-16\nrevenue: 1084939099\nruntime: 165\nspoken_languages: [{"iso_639_1": "en", "name": "English"}]\nstatus: Released\ntagline: The Leg

## Google Generative AI Embeddings'i kullanarak Embedding Oluşturma İşlemi


In [None]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from dotenv import load_dotenv


In [None]:
load_dotenv()  # Load environment variables from .env file
import os
api_key = os.getenv("GOOGLE_API_KEY")
if api_key is None:
    print("HATA: API anahtarı .env dosyasından okunamadı!")
else:
    print(f"Anahtar başarıyla yüklendi. İlk 4 karakter: {api_key[:4]}")

In [None]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")
vector = embeddings.embed_query("hello, world!")
vector[:5]

## ChromaDB üzerine kayıt işlemi

In [17]:
from langchain_chroma import Chroma

In [18]:
vector_store = Chroma.from_documents(documents=docs, embedding=embeddings)

import time

# Example of manual chunking to stay under 100 RPM
for i in range(0, len(docs), 50):
    vector_store.add_documents(docs[i:i+50])
    print(f"Uploaded batch {i//50}. Sleeping to avoid rate limit...")
    time.sleep(10) # Wait 10 seconds between batches

GoogleGenerativeAIError: Error embedding content (RESOURCE_EXHAUSTED): 429 RESOURCE_EXHAUSTED. {'error': {'code': 429, 'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/usage?tab=rate-limit. \n* Quota exceeded for metric: generativelanguage.googleapis.com/embed_content_free_tier_requests, limit: 1000, model: gemini-embedding-1.0\nPlease retry in 2.362869553s.', 'status': 'RESOURCE_EXHAUSTED', 'details': [{'@type': 'type.googleapis.com/google.rpc.Help', 'links': [{'description': 'Learn more about Gemini API quotas', 'url': 'https://ai.google.dev/gemini-api/docs/rate-limits'}]}, {'@type': 'type.googleapis.com/google.rpc.QuotaFailure', 'violations': [{'quotaMetric': 'generativelanguage.googleapis.com/embed_content_free_tier_requests', 'quotaId': 'EmbedContentRequestsPerDayPerProjectPerModel-FreeTier', 'quotaDimensions': {'location': 'global', 'model': 'gemini-embedding-1.0'}, 'quotaValue': '1000'}]}, {'@type': 'type.googleapis.com/google.rpc.RetryInfo', 'retryDelay': '2s'}]}}

In [16]:
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 5 })

NameError: name 'vector_store' is not defined

In [15]:
retrieved_docs = retriever.invoke("high budget expensive movies")     

len(retrieved_docs)

print(retrieved_docs[5].page_content) 


NameError: name 'retriever' is not defined

## Google Gemini API Yapısını Kullanarak LLM Tetikleme İşlemleri

- Düşük Değerler (0.1-0.4): Daha kesin ve daha tutarlı cevaplar verilir. Model daha tahmin edilebilir hale gelir.
- Orta Değerler (0.5-0.7): Hem mantıklı hem de yaratıcı cevaplar verilir.
- Yüksek Değerler (0.7-1): Daha rastgele ve yaratıcı, ancak bazen tutarsız yanıtlar verebilir.

In [14]:
from langchain_google_genai import ChatGoogleGenerativeAI

model = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash-lite",  # Gemini 2.5 model
    temperature=0.3,  # Gemini 3.0+ defaults to 1.0
    max_tokens=500,
)

In [19]:

from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain


ModuleNotFoundError: No module named 'langchain.chains'

In [None]:
system prompt = (
    "You are assistant for question-answering tasks."
    "Use the following context to answer the question at the end."
    "If you don't know the answer, just say that you don't know, don't try to make up an answer."
    "\n\n"
    "{context}"
)



In [None]:
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("user", "{input}"),
    ]
)


## Soru-Cevap Zinciri Oluşturma (LLM + PROMPT)


In [None]:
question_answering_chain = create_stuff_documents_chain(llm, prompt)


## RAG Zinciri Oluşturma (RAG + LLM)


In [None]:
rag_chain = create_retrieval_chain(retriever, question_answering_chain)

## Kullanıcı sorgusunu çalıştırma

In [None]:
response = rag_chain.invoke({"input": "Explain the transformer architecture?"})

print(response["answer"])