# 🤖 Abschlussprojekt: RAG-System mit AI Index 2025

In [None]:
# Özet
# Bu kod, PDF’den alınan büyük bir metni parçalara bölüp, 
# metin parçalarını sayısal vektörlere dönüştürüp vektör veritabanına kaydediyor. 
# Sonrasında, GPT-3.5 modelini bu veritabanı ile birlikte kullanarak,
# PDF içeriği üzerinden sorulara hem sohbet bağlamında hem de belge tabanlı cevaplar veriyor. 
# Ayrıca, LangSmith ile çağrı ve sonuçlar izleniyor.

# 1. Gerekli kütüphaneleri yükle
%pip install pymupdf langchain chromadb sentence-transformers langsmith langchain-community openai --quiet

# 2. Ortam değişkenlerini ayarla
import os
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = "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"  # LangSmith API anahtarın
os.environ["OPENAI_API_KEY"] = "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"          # OpenAI API anahtarın

# 3. PDF dosyasını aç, metni çıkar
import fitz
pdf_path = "hai_ai_index_report_2025.pdf"
doc = fitz.open(pdf_path)
raw_text = ''.join([page.get_text() for page in doc])
print(f"Toplam karakter sayısı: {len(raw_text)}")

# 4. Metni parçalara böl
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_text(raw_text)
print(f"Chunk sayısı: {len(chunks)}")

# 5. LangChain Document objesi oluştur
from langchain.docstore.document import Document
documents = [Document(page_content=chunk, metadata={"chunk_id": i}) for i, chunk in enumerate(chunks)]

# 6. Embedding ve ChromaDB kurulum
from langchain_community.vectorstores import Chroma
from langchain.embeddings import SentenceTransformerEmbeddings

embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
db = Chroma.from_documents(documents=documents, embedding=embedding_function, persist_directory="rag_index")
db.persist()

# 7. LLM ve hafıza (memory) oluştur
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory

llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

# 8. Conversational Retrieval Chain (Sohbet + Belge Arama Zinciri) oluştur
from langchain.chains import ConversationalRetrievalChain

conversation_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=db.as_retriever(),
    memory=memory,
    return_source_documents=False
)

# 9. LangSmith izleme fonksiyonu
from langsmith import traceable

@traceable(name="AI-Trend-RAG-Run")
def run_with_trace(query: str):
    return conversation_chain.run(query)

# 10. Sorgu örneği ve çıktı
response = run_with_trace("What are the main AI investment trends in 2024?")
print(response)

# 11. Test sorularıyla deneme
test_questions = [
    "What are the main AI investment trends in 2024?",
    "What happened in AI investments last year?",
    "Give me key trends in artificial intelligence funding in 2024.",
    "Summarize AI investment highlights for 2024.",
    "Which sectors received the most AI funding in 2024?"
]

for i, q in enumerate(test_questions):
    print(f"\nTest #{i+1}: {q}")
    print(conversation_chain.run(q))

# 12. Farklı prompt varyasyonlarıyla deneme
variations = [
    "What happened in AI investments last year?",
    "Give me key trends in artificial intelligence funding in 2024.",
    "Summarize AI investment highlights for 2024."
]

for i, v in enumerate(variations):
    print(f"\nPrompt #{i+1}: {v}")
    print(conversation_chain.run(v))

Note: you may need to restart the kernel to use updated packages.
Toplam karakter sayısı: 822820
Chunk sayısı: 1033
In 2024, the main AI investment trend was a significant increase in global private AI investment, which grew by 44.5% compared to 2023. Additionally, funding for generative AI saw a sharp increase, attracting $33.9 billion in 2024, representing an 18.7% increase from the previous year and over 8.5 times the investment of 2022. Generative AI also accounted for more than a fifth of all AI-related private investment in 2024.

Test #1: What are the main AI investment trends in 2024?
The percentage increase in global private AI investment in 2024 compared to 2023 was 44.5%.

Test #2: What happened in AI investments last year?
In 2024, the main AI investment trend was a significant increase in global private AI investment, which grew by 44.5% compared to 2023. Additionally, funding for generative AI saw a sharp increase, attracting $33.9 billion, an 18.7% increase from the prev