<a href="https://colab.research.google.com/github/kmk4444/System_engineering/blob/main/system_engineering_v9.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install langchain openai pypdf chroma streamlit langchain_openai langchain_community langchain transformers bitsandbytes accelerate torch faiss-gpu faiss-cpu langchain_chroma langchain_experimental sentence-transformers cohere rank_bm25 nltk scikit-learn

In [None]:
pip install --upgrade huggingface-hub transformers

In [None]:
from huggingface_hub import login
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import pipeline

login(token = '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')

# Hücre 1: Gerekli Kütüphaneler ve İndirme Fonksiyonları

In [None]:
# Gerekli kütüphanelerin import edilmesi
from langchain.document_loaders.pdf import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema.document import Document
from langchain.vectorstores.chroma import Chroma
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModel
import torch
import os
from langchain_core.prompts import PromptTemplate
from langchain.retrievers import EnsembleRetriever
from langchain_community.retrievers import BM25Retriever
import nltk
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

nltk.download('punkt')
from nltk.tokenize import sent_tokenize

templates = {
    "system": "You are a professional prompt engineer. Apply the mentioned prompt engineering technique and provide ONLY the improved prompt without any additional commentary or explanations.",
    "system_multiple": "You are a professional prompt engineer. Thoroughly apply EVERY prompt engineering technique listed in the [Prompt Engineering Techniques to Apply] section. Use these techniques to enhance the original prompt provided below, ensuring the enhancement is clear and effective. Provide ONLY the improved version of the prompt without any additional commentary or explanations.",
    "lang_default": "Identify the language of the user's original prompt in the [original] section. You MUST provide the enhanced version of the prompt in the **same language** as the user's original prompt. You'll be penalized if you translate it into another language unless explicitly requested by the user.",
    "lang_eng": "If the original prompt is not in English, first translate it into English before proceeding with the improvement process.",
    "deeper_understanding_simpler": "Explain to me as if I’m a beginner in System Engineering. Example: Change \"Explain system architecture.\" to \"Explain system architecture to beginners.\"",
    "task_decomposition_simpler": "For complex or multi-step tasks, divide the original prompt into a series of simpler, more manageable sub-prompts. This approach allows the model to focus on one part of the task at a time, generating more detailed and coherent responses for each step.",
    "fewshot_prompting_simpler": "Improve the original prompt by adding a couple of relevant examples that demonstrate the kind of answer or information being requested. Incorporate those examples smoothly into the prompt to make the desired response clear."
}

# Sabitler
DATA_PATH = "/content/drive/MyDrive/data"  # PDF dosyalarının bulunduğu dizin
CHROMA_PATH = "chroma"  # Chroma veritabanının saklanacağı dizin

# Embeddings'in başlatılması
def initialize_embeddings():
    return HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-l6-v2")

embeddings = initialize_embeddings()

def initialize_colbertv2_model():
    """
    ColBERTv2 tokenizer ve modelini yükler.

    Returns:
        colbert_tokenizer: Tokenizer nesnesi
        colbert_model: Model nesnesi
    """
    # ColBERTv2 tokenizer ve modelini yükleme
    colbert_tokenizer = AutoTokenizer.from_pretrained("colbert-ir/colbertv2.0")
    colbert_model = AutoModel.from_pretrained("colbert-ir/colbertv2.0")

    return colbert_tokenizer, colbert_model

# Kullanım:
colbert_tokenizer, colbert_model = initialize_colbertv2_model()

# Modelin başlatılması
from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer

def initialize_model():
    # 4-bit quantization için gerekli konfigürasyon
    load_in_4bit = False

    model = AutoPeftModelForCausalLM.from_pretrained(
        "kmk4444/Llama-3.2-1B-Instruct_fine_sis",
        load_in_4bit=load_in_4bit,
        device_map="auto"
    )

    tokenizer = AutoTokenizer.from_pretrained("kmk4444/Llama-3.2-1B-Instruct_fine_sis")
    return tokenizer, model

# Modeli ve tokenizer'ı başlat
tokenizer, model = initialize_model()



# Hücre 2: Yardımcı Fonksiyonlar

In [None]:
# Rerank fonksiyonu
def rerank_with_colbertv2(query, documents, colbert_tokenizer, colbert_model):
    # Query'yi tokenlara ayırma
    query_tokens = colbert_tokenizer(query, return_tensors='pt', padding=True, truncation=True)

    # Query için embedding hesaplama
    with torch.no_grad():
        query_embedding = colbert_model(**query_tokens).last_hidden_state.mean(dim=1)

    # Her bir belge ile query arasındaki benzerlik skorlarını hesaplama
    scores = []
    for doc in documents:
        doc_tokens = colbert_tokenizer(doc.page_content, return_tensors='pt', padding=True, truncation=True)
        doc_embedding = colbert_model(**doc_tokens).last_hidden_state.mean(dim=1)

        # Query ve belge embedding'leri arasındaki cosine benzerlik skorunu hesaplama
        score = torch.nn.functional.cosine_similarity(query_embedding, doc_embedding)
        scores.append((doc, score.item()))

    # Belgeleri benzerlik skoruna göre sıralama (azalan sırada)
    sorted_documents = sorted(scores, key=lambda x: x[1], reverse=True)

    return [doc for doc, score in sorted_documents]

# Belgeleri yükleme ve bölme
def load_and_split_documents(data_path, embeddings):
    document_loader = PyPDFDirectoryLoader(data_path)
    raw_documents = document_loader.load()
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=50,
        length_function=len
    )
    return text_splitter.split_documents(raw_documents)

# Özel belgelerin oluşturulması
def create_custom_documents(splitted_documents):
    custom_documents = []
    for i, raw_doc in enumerate(splitted_documents):
        new_doc = Document(
                page_content=raw_doc.page_content,
                metadata={
                    "source": raw_doc.metadata.get("source", "Unknown Source"),
                    "title": raw_doc.metadata.get("title", "No Title"),
                    "description": raw_doc.metadata.get("description", "No Description"),
                    "language": raw_doc.metadata.get("language", "Unknown Language"),
                    "doc_id": i
                }
        )
        custom_documents.append(new_doc)
    return custom_documents

# Vectorstore ve Retriever'ın başlatılması
def initialize_vectorstore(custom_documents, embeddings, persist_directory):
    try:
        # Chroma'ya belgeleri ve embeddings'i geçirin
        vectorstore = Chroma.from_documents(
            custom_documents,
            embeddings,
            persist_directory=persist_directory
        )

        return vectorstore.as_retriever(
            search_type="mmr",
            search_kwargs={'k': 10, 'lambda_mult': 0.40}
        )
    except Exception as e:
        print(f"Error initializing vectorstore: {e}")
        raise e

# İlgili belgelerin getirilmesi
def retrieve_relevant_documents(retriever, prompt):
    return retriever.get_relevant_documents(prompt)

def get_relevant_documents_with_bm25(documents, query):
    bm25_retriever = BM25Retriever.from_documents(documents=documents)
    bm25_retriever.k = 5

    bm25_relevant_documents = bm25_retriever.get_relevant_documents(query=query)

    return bm25_relevant_documents, bm25_retriever

def get_relevant_documents_for_hybrid_search(query, retriever1, retriever2, weight1, weight2):
    ensemble_retriever = EnsembleRetriever(
                                retrievers=[retriever1, retriever2],
                                weights=[weight1, weight2])

    hybrid_relevant_documents = ensemble_retriever.get_relevant_documents(query)

    return hybrid_relevant_documents

# Final promptun oluşturulması
def generate_final_prompt(prompt, context_data, chat_history, relevant_documents):
    history_prompt = "\n".join([f"{msg['role']}: {msg['content']}" for msg in chat_history])
    metadata_info = "\n".join([f"Belge {doc.metadata['doc_id']} - Başlık: {doc.metadata['title']}, Kaynak: {doc.metadata['source']}" for doc in relevant_documents])
    return  f"""
###Instruction###
You are an expert assistant dedicated to providing answers for beginner systems engineers. Follow these guidelines:

1. Deliver clear, concise, and expert-level information in Turkish.
2. Use the provided documents (###Context###) and historical conversation (###Previous Conversations###) data to answer questions (###Question###). Reference these documents while interpreting and generating your answers.
3. Ensure that your answers are coherent and free from any repetitive or duplicate content.
4. Each sentence should provide unique and valuable information.
5. You have to answer just in TURKISH.

###Previous Conversations###
{history_prompt}

###Question###
{prompt}

###Context###
This is the information we have to answer the question: {context_data}
""", metadata_info

# Tekrarları kaldırmak için fonksiyon
def remove_repetitions(response, threshold=0.7):
    # Cümlelere bölme
    sentences = sent_tokenize(response)

    # TF-IDF vektörizer oluşturma
    vectorizer = TfidfVectorizer().fit_transform(sentences)
    vectors = vectorizer.toarray()

    # Cosine similarity hesaplama
    cosine_matrix = cosine_similarity(vectors)

    # Tekrar eden cümleleri tespit etme ve kaldırma
    unique_sentences = []
    seen_indices = set()

    for i in range(len(sentences)):
        if i in seen_indices:
            continue
        unique_sentences.append(sentences[i])
        for j in range(i + 1, len(sentences)):
            if cosine_matrix[i, j] > threshold:
                seen_indices.add(j)

    # Tekrarları kaldırılmış yanıtı yeniden oluşturma
    cleaned_response = ' '.join(unique_sentences)

    return cleaned_response

# RAG ile çoklu PDF'ler
def rag_with_multiple_pdfs(prompt, chat_history):
    splitted_documents = load_and_split_documents(DATA_PATH, embeddings)
    custom_documents = create_custom_documents(splitted_documents)
    chroma_retriever = initialize_vectorstore(custom_documents, embeddings, CHROMA_PATH)
    chroma_relevant_documents = retrieve_relevant_documents(chroma_retriever, prompt)

    bm25_documents, bm25retriever = get_relevant_documents_with_bm25(custom_documents,prompt)

    weight1 = 0.2
    hybrid_search_documents = get_relevant_documents_for_hybrid_search(
        query=prompt,
        retriever1=bm25retriever,
        retriever2=chroma_retriever,
        weight1=weight1,
        weight2=1-weight1
    )

    # Hibrit arama sonuçlarını ColBERTv2 ile yeniden sıralama
    reranked_documents = rerank_with_colbertv2(prompt, hybrid_search_documents, colbert_tokenizer, colbert_model)

    context_data = " ".join([doc.page_content for doc in reranked_documents])
    cleaned_context_data = remove_repetitions(context_data)
    final_prompt, metadata_info = generate_final_prompt(prompt, cleaned_context_data, chat_history, reranked_documents)
    return final_prompt, metadata_info, chroma_relevant_documents, bm25_documents, reranked_documents

def generate_prompt_engineering(prompt, tokenizer, model):
    system_message = templates["system_multiple"]
    system_message += '\n' + templates["lang_eng"]

    skills = ["deeper_understanding", "task_decomposition", "fewshot_prompting"]
    integrated_templates = "[Prompt Engineering Techniques to Apply]\n"

    for idx, skill in enumerate(skills):
        template = templates[f"{skill}_simpler"]
        integrated_templates += f"{idx+1}. {skill}: {template}\n"
    integrated_templates += "Based on [Prompt engineering techniques to apply], refine the prompt provided below. Ensure that each technique is fully incorporated to achieve a clear and effective improvement:\n\n[original]\n{prompt}\n[improved]\n"

    prompt_template = PromptTemplate.from_template(integrated_templates)
    formatted_input = prompt_template.format(prompt=prompt)

    messages = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": formatted_input}
    ]

    # input_ids ve attention_mask alınarak giriş hazırlanır
    encoded_input = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        return_tensors="pt"
    )

    # GPU'ya taşıma işlemi
    encoded_input = {k: v.to(model.device) for k, v in encoded_input.items()} \
        if isinstance(encoded_input, dict) else encoded_input.to(model.device)

    # Model çıktısı üretilir
    outputs = model.generate(
        input_ids=encoded_input if not isinstance(encoded_input, dict) else encoded_input['input_ids'],
        max_new_tokens=1024,
        do_sample=True,
        temperature=0.70,
        top_p=0.90,
        repetition_penalty=1.2,
        pad_token_id=tokenizer.pad_token_id if tokenizer.pad_token_id is not None else tokenizer.eos_token_id
    )

    # Yanıtı decode etme
    if isinstance(encoded_input, dict):
        input_length = encoded_input['input_ids'].shape[1]
    else:
        input_length = encoded_input.shape[1]

    response = outputs[0][input_length:]
    return tokenizer.decode(response, skip_special_tokens=True)

# LLaMA3 yanıtının üretilmesi
def generate_llama3_response(prompt_input, tokenizer, model):
    SYS_PROMPT = """You are an expert assistant dedicated to guiding and empowering beginner systems engineers. Follow these guidelines:
1. Provide clear, concise, and expert-level information in Turkish.
2. Use the provided documents and historical conversation data to deliver insightful answers. Previous conversation history and document information will be given to you in the user's prompt.
3. Ensure that your answers are free from any repetitive or duplicate content and sentences. You have to eliminate repetitive sentences. DON'T REPEAT THE SAME SENTENCES!
4. If unsure, confidently state "Bilmiyorum" without speculating.
5. Your responses should be motivational and tailored to empower beginners in understanding systems engineering principles.
6. Only respond in TURKISH."""

    messages = [
        {"role": "system", "content": SYS_PROMPT},
        {"role": "user", "content": prompt_input}
    ]

    # input_ids ve attention_mask alınarak giriş hazırlanır
    encoded_input = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        return_tensors="pt"
    )

    # GPU'ya taşıma işlemi
    encoded_input = {k: v.to(model.device) for k, v in encoded_input.items()} \
        if isinstance(encoded_input, dict) else encoded_input.to(model.device)

    # Model çıktısı üretilir
    outputs = model.generate(
        input_ids=encoded_input if not isinstance(encoded_input, dict) else encoded_input['input_ids'],
        max_new_tokens=1024,
        do_sample=True,
        temperature=0.85,
        top_p=0.90,
        repetition_penalty=1.2,
        pad_token_id=tokenizer.pad_token_id if tokenizer.pad_token_id is not None else tokenizer.eos_token_id
    )

    # Yanıtı decode etme
    if isinstance(encoded_input, dict):
        input_length = encoded_input['input_ids'].shape[1]
    else:
        input_length = encoded_input.shape[1]

    response = outputs[0][input_length:]
    return tokenizer.decode(response, skip_special_tokens=True)

def evaluate_answer_relevance(question, answer, tokenizer, model):
    """
    Dil modelini kullanarak cevabın soruya uygun olup olmadığını değerlendirir.
    """
    evaluation_prompt = f"Soru: {question}\nCevap: {answer}\nBu cevap soruyu doğru ve tam olarak yanıtlıyor mu? Lütfen 'Evet' veya 'Hayır' şeklinde cevap ver."

    messages = [
        {"role": "user", "content": evaluation_prompt}
    ]

    # input_ids ve attention_mask alınarak giriş hazırlanır
    encoded_input = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        return_tensors="pt"
    )

    # GPU'ya taşıma işlemi
    encoded_input = {k: v.to(model.device) for k, v in encoded_input.items()} \
        if isinstance(encoded_input, dict) else encoded_input.to(model.device)

    # Model çıktısı üretilir
    outputs = model.generate(
        input_ids=encoded_input if not isinstance(encoded_input, dict) else encoded_input['input_ids'],
        max_new_tokens=50,
        do_sample=False,
        temperature=0.0,
        top_p=0.0,
        repetition_penalty=1.0,
        pad_token_id=tokenizer.pad_token_id if tokenizer.pad_token_id is not None else tokenizer.eos_token_id
    )

    # Yanıtı decode etme
    if isinstance(encoded_input, dict):
        input_length = encoded_input['input_ids'].shape[1]
    else:
        input_length = encoded_input.shape[1]

    evaluation_response = outputs[0][input_length:]
    evaluation_text = tokenizer.decode(evaluation_response, skip_special_tokens=True).strip()

    # Cevabı değerlendirme
    if "Evet" in evaluation_text:
        return True
    else:
        return False


def select_best_response(question, responses, tokenizer, model):
    """
    Dil modelini kullanarak verilen cevaplar arasından en iyi olanı seçer.

    Args:
        question (str): Kullanıcının sorusu.
        responses (list): Cevapların listesi.
        tokenizer: Tokenizer nesnesi.
        model: Dil modeli nesnesi.

    Returns:
        str: En iyi cevap.
    """
    # Cevapları numaralandır
    numbered_responses = "\n".join([f"{idx+1}. {resp}" for idx, resp in enumerate(responses)])

    selection_prompt = f"Kullanıcının sorusu: {question}\n\nVerilen cevaplar:\n{numbered_responses}\n\nBu cevaplar arasından soruya en iyi yanıt veren hangisidir? Lütfen sadece cevabın numarasını yaz."

    messages = [
        {"role": "user", "content": selection_prompt}
    ]

    # input_ids ve attention_mask alınarak giriş hazırlanır
    encoded_input = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        return_tensors="pt"
    )

    # GPU'ya taşıma işlemi
    encoded_input = {k: v.to(model.device) for k, v in encoded_input.items()} \
        if isinstance(encoded_input, dict) else encoded_input.to(model.device)

    # Model çıktısı üretilir
    outputs = model.generate(
        input_ids=encoded_input if not isinstance(encoded_input, dict) else encoded_input['input_ids'],
        max_new_tokens=10,
        do_sample=False,
        temperature=0.0,
        top_p=0.0,
        repetition_penalty=1.0,
        pad_token_id=tokenizer.pad_token_id if tokenizer.pad_token_id is not None else tokenizer.eos_token_id
    )

    # Yanıtı decode etme
    if isinstance(encoded_input, dict):
        input_length = encoded_input['input_ids'].shape[1]
    else:
        input_length = encoded_input.shape[1]

    selection_response = outputs[0][input_length:]
    selection_text = tokenizer.decode(selection_response, skip_special_tokens=True).strip()

    # Seçilen numarayı al
    try:
        selected_index = int(selection_text.split('.')[0].strip()) - 1
        if 0 <= selected_index < len(responses):
            return responses[selected_index]
        else:
            # Geçersiz seçim durumunda ilk cevabı döndür
            return responses[0]
    except ValueError:
        # Anlaşılamayan yanıt durumunda ilk cevabı döndür
        return responses[0]


# Hücre 3: Ana Kod Çalıştırma

In [None]:
def main():
    # Modeli ve tokenizer'ı başlat
    #tokenizer, model = initialize_model()

    # Sohbet geçmişini başlat
    chat_history = []

    # Kullanıcı girdisini al
    prompt = input("Mesajınızı Giriniz: ")

    # Kullanıcı mesajını sohbet geçmişine ekle
    chat_history.append({"role": "user", "content": prompt})

    # Optimize edilmiş promptu oluştur
    optimized_prompt = generate_prompt_engineering(prompt, tokenizer, model)
    print("\n" + "="*50)
    print("OPTİMİZE EDİLMİŞ PROMPT:")
    print("="*50)
    print(optimized_prompt)

    # Final promptu oluştur ve belgeleri getir
    final_prompt, metadata_info, chroma_relevant_documents, bm25_documents, hybrid_search_documents = rag_with_multiple_pdfs(optimized_prompt, chat_history)

    # BM25 belgelerini yazdır
    print("\n" + "="*50)
    print("KARAKTER BAZLI ARAMA | BM25:")
    print("="*50)
    for doc in bm25_documents:
        print(f"ID: {doc.metadata['doc_id']} || {doc.page_content}\n")

    # Hibrit Arama belgelerini yazdır
    print("\n" + "="*50)
    print("HİBRİT ARAMA:")
    print("="*50)
    for doc in hybrid_search_documents:
        print(f"ID: {doc.metadata['doc_id']} || {doc.page_content}\n")

    # Semantik Arama belgelerini yazdır (Chroma'dan)
    print("\n" + "="*50)
    print("SEMANTİK ARAMA:")
    print("="*50)
    for doc in chroma_relevant_documents:
        print(f"ID: {doc.metadata['doc_id']} || {doc.page_content}\n")

    max_attempts = 3  # Maksimum deneme sayısı
    attempts = 0
    is_relevant = False
    previous_responses = []  # Önceki cevapları tutmak için liste

    while attempts < max_attempts and not is_relevant:
        # Yanıtı üret
        if attempts == 0:
            response = generate_llama3_response(final_prompt, tokenizer, model)
        else:
            # Geri bildirimle yeni prompt oluştur
            feedback = f"Cevabın kullanıcının sorusuna tam olarak yanıt vermedi. Önceki cevabın: '{previous_responses[-1]}'. Lütfen kullanıcının sorusuna odaklanarak daha uygun bir cevap ver."
            final_prompt_with_feedback = f"{feedback}\n\n{final_prompt}"
            response = generate_llama3_response(final_prompt_with_feedback, tokenizer, model)

        # Cevabın uygunluğunu kontrol et
        is_relevant = evaluate_answer_relevance(prompt, response, tokenizer, model)

        # Önceki cevapları listeye ekle
        previous_responses.append(response)
        attempts += 1

    if not is_relevant:
        # Üç cevap arasından en iyisini seç
        best_response = select_best_response(prompt, previous_responses, tokenizer, model)
        response = best_response

    # Uygun cevap sohbet geçmişine eklenir
    chat_history.append({"role": "assistant", "content": response})

    print("\n" + "="*50)
    print("ASİSTAN YANITI:")
    print("="*50)
    print(response)



if __name__ == "__main__":
    main()
