## CSV dosyasındaki verinin yüklenmesini gerçekleştirme

In [None]:

from langchain_community.document_loaders.csv_loader import CSVLoader

filepath = "IMDb_Top_1000_Movies_Dataset.csv"
loader = CSVLoader(filepath, encoding="utf-8")

data = loader.load()

print(f"Toplam {len(data)} film yüklendi.")

## Veriyi parçalarına ayırma(Chunking işlemi)

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(data)

print(f"Number of documents after splitting: {len(docs)}")
docs[7]

## Google Generative AI Embeddings'i kullanarak Embedding Oluşturma İşlemi


In [None]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from dotenv import load_dotenv


In [None]:
load_dotenv()  # Load environment variables from .env file
import os
api_key = os.getenv("GOOGLE_API_KEY")
if api_key is None:
    print("HATA: API anahtarı .env dosyasından okunamadı!")
else:
    print(f"Anahtar başarıyla yüklendi. İlk 4 karakter: {api_key[:4]}")

# Huggingface alternatifi

In [None]:
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vector = embeddings.embed_query("hello, world!")
vector[:5]


In [None]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")
vector = embeddings.embed_query("hello, world!")
vector[:5]

## ChromaDB üzerine kayıt işlemi

In [None]:
from langchain_chroma import Chroma

In [None]:
vector_store = Chroma.from_documents(documents=docs, embedding=embeddings)

In [None]:
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 5 })

In [None]:
retrieved_docs = retriever.invoke("high budget expensive movies")     

len(retrieved_docs)

print(retrieved_docs[5].page_content) 


## Google Gemini API Yapısını Kullanarak LLM Tetikleme İşlemleri

- Düşük Değerler (0.1-0.4): Daha kesin ve daha tutarlı cevaplar verilir. Model daha tahmin edilebilir hale gelir.
- Orta Değerler (0.5-0.7): Hem mantıklı hem de yaratıcı cevaplar verilir.
- Yüksek Değerler (0.7-1): Daha rastgele ve yaratıcı, ancak bazen tutarsız yanıtlar verebilir.

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI

model = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash-lite",  # Gemini 2.5 model
    temperature=0.3,  # Gemini 3.0+ defaults to 1.0
    max_tokens=500,
)

In [None]:

from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain


In [None]:
system prompt = (
    "You are assistant for question-answering tasks."
    "Use the following context to answer the question at the end."
    "If you don't know the answer, just say that you don't know, don't try to make up an answer."
    "\n\n"
    "{context}"
)



In [None]:
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("user", "{input}"),
    ]
)


## Soru-Cevap Zinciri Oluşturma (LLM + PROMPT)


In [None]:
question_answering_chain = create_stuff_documents_chain(llm, prompt)


## RAG Zinciri Oluşturma (RAG + LLM)


In [None]:
rag_chain = create_retrieval_chain(retriever, question_answering_chain)

## Kullanıcı sorgusunu çalıştırma

In [None]:
response = rag_chain.invoke({"input": "Explain the transformer architecture?"})

print(response["answer"])