# 1.  Installations and Settings 🛠️

In [1]:
%%bash
pip install -qqq -U langchain-huggingface
pip install -qqq -U langchain
pip install -qqq -U langchain-community
pip install -qqq -U faiss-cpu

# 2.  Setting up your LLM 🧠

In [2]:
from langchain_huggingface import HuggingFaceEndpoint

# This info's at the top of each HuggingFace model page
hf_model = "mistralai/Mistral-7B-Instruct-v0.3"

llm = HuggingFaceEndpoint(repo_id = hf_model)

  from .autonotebook import tqdm as notebook_tqdm


# 3.  Retrieval Augmented Generation 🔃

## 3.1 loading Data

In [None]:
AsyncChromiumLoader

In [2]:
# load dependancies
from langchain_community.document_loaders import AsyncHtmlLoader

In [4]:
urls = [
    "https://www.daad.de/en/studying-in-germany/scholarships/daad-scholarships/",
    "https://www.daad.de/en/studying-in-germany/universities/the-right-degree-programme/", 
    "https://www2.daad.de/deutschland/stipendium/datenbank/en/21148-scholarship-database/", 
    "https://www2.daad.de/deutschland/stipendium/datenbank/en/21148-scholarship-database/?status=&origin=&subjectGrps=&daad=&intention=&q=&page=1&detail=57742130",
    "https://www2.daad.de/deutschland/stipendium/datenbank/en/21148-scholarship-database/?status=&origin=&subjectGrps=&daad=&intention=&q=&page=1&detail=57135739",
    "https://www2.daad.de/deutschland/stipendium/datenbank/en/21148-scholarship-database/?status=&origin=&subjectGrps=&daad=&intention=&q=&page=1&detail=57507783",
    "https://www.daad.de/en/studying-in-germany/universities/universities/",
    "https://www.daad.de/en/studying-in-germany/universities/haw/",
    "https://www.daad.de/en/studying-in-germany/universities/dual-studies/",
    "https://www.daad.de/en/studying-in-germany/scholarships/daad-scholarships/",
    "https://www.daad.de/en/studying-in-germany/scholarships/information-for-scholarship-applicants/#requirements",
    "https://www.daad.de/en/studying-in-germany/scholarships/funding-options/",
    "https://www.daad.de/en/studying-in-germany/requirements/", 
    "https://www.daad.de/en/studying-in-germany/requirements/overview/", 
    "https://www.daad.de/en/studying-in-germany/requirements/application-process/",
    "https://www.daad.de/en/studying-in-germany/requirements/enrolling/",
    "https://www.daad.de/en/studying-in-germany/requirements/studienkollegs/",
    "https://www.daad.de/en/studying-in-germany/living-in-germany/visa/",
    "https://www.daad.de/en/studying-in-germany/living-in-germany/registering/",
    "https://www.daad.de/en/studying-in-germany/living-in-germany/health-insurance/",
    "https://www.daad.de/en/studying-in-germany/living-in-germany/renting/",
    "https://www.daad.de/en/studying-in-germany/living-in-germany/german-language/learning/",
    "https://www.daad.de/en/studying-in-germany/living-in-germany/german-language/",
    "https://www.daad.de/en/studying-in-germany/living-in-germany/finances/",
    "https://www.daad.de/en/studying-in-germany/living-in-germany/safety/",
    "https://www.daad.de/en/studying-in-germany/work-career/",
    "https://www.daad.de/en/studying-in-germany/work-career/career-planning/",
    "https://www.daad.de/en/studying-in-germany/work-career/side-jobs/",
    "https://www.daad.de/en/studying-in-germany/work-career/work-placements/",
    "https://www.daad.de/en/studying-in-germany/advisory-service/publications/",
    "https://www.daad.de/en/studying-in-germany/advisory-service/psychological-wellbeing/",
    "https://www.daad.de/en/the-daad/mobility-with-a-disability/"
]

In [5]:
from langchain_community.document_loaders import AsyncHtmlLoader

loader = AsyncHtmlLoader(urls)
docs = loader.load()

Fetching pages: 100%|##########| 32/32 [00:08<00:00,  3.96it/s]


## 3.2 Splitting the document

In [6]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=800,
                                               chunk_overlap=150)

docs = text_splitter.split_documents(docs)

## 3.3 Creating vectors with embeddings

In [7]:
from langchain_huggingface import HuggingFaceEmbeddings

# embeddings
embedding_model = "sentence-transformers/all-MiniLM-l6-v2"
embeddings_folder = "docs"

embeddings = HuggingFaceEmbeddings(model_name=embedding_model,
                                   cache_folder=embeddings_folder)

  from tqdm.autonotebook import tqdm, trange


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

## 3.4 Creating a vector database

In [8]:
from langchain.vectorstores import FAISS

vector_db = FAISS.from_documents(docs, embeddings)

In [9]:
vector_db.save_local("docs")

In [10]:
retriever = vector_db.as_retriever(search_kwargs={"k": 3}), # top 2 results only, speed things up

In [11]:
from langchain.memory import ConversationBufferMemory

memory = ConversationBufferMemory(memory_key = 'chat_history',
                                  return_messages = True,
                                  output_key = 'answer')

In [18]:
from langchain_huggingface import HuggingFaceEndpoint, HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain

hf_model = "mistralai/Mistral-7B-Instruct-v0.3"
llm = HuggingFaceEndpoint(repo_id=hf_model)

embedding_model = "sentence-transformers/all-MiniLM-l6-v2"
embeddings_folder = "docs"

embeddings = HuggingFaceEmbeddings(model_name=embedding_model,
                                   cache_folder=embeddings_folder)

vector_db = FAISS.load_local("docs", embeddings, allow_dangerous_deserialization=True)

retriever = vector_db.as_retriever(search_kwargs={"k": 3})

memory = ConversationBufferMemory(memory_key = 'chat_history',
                                  return_messages = True,
                                  output_key = 'answer')  # Set output_key to 'answer'

template = """You are a nice chatbot having a conversation with a human. Answer the question based only on the following context and previous conversation. Keep your answers short and succinct.

Previous conversation:
{chat_history}

Context to answer question:
{context}

New human question: {question}
Response:"""

prompt = PromptTemplate(template = template,
                        input_variables = ["context", "question"])

# chain
chain = ConversationalRetrievalChain.from_llm(llm,
                                              retriever = retriever,
                                              memory = memory,
                                              return_source_documents = True,
                                              combine_docs_chain_kwargs = {"prompt": prompt})



In [17]:
while True:
    user_input = input("You: ")

    # Check for exit condition
    if user_input.lower() == 'end':
        print("Ending the conversation. Goodbye!")
        break

    # Get the response from the conversation chain
    response = chain.invoke(user_input)

    # Print the chatbot's response
    print("Chatbot:", response["answer"])

You:  Who makes a decision my daad application?


Chatbot:   The DAAD scholarships are awarded based on professional criteria by an independent and voluntary selection committee. One of the most important criteria is the candidate's academic qualification. The selection committee is responsible for making the decision on the allocation of scholarships.


You:  who makes the decision on the application?


Chatbot:  The DAAD scholarships are awarded based on professional criteria by an independent and voluntary selection committee. One of the most important criteria is the candidate's academic qualification. The selection committee is responsible for making the decision on the allocation of scholarships.


You:  how do I apply for a daad scholarship? 


Chatbot:   To apply for a DAAD scholarship, please read the call for applications for your chosen scholarship programme in the <a class="js-link" href="https://www2.daad.de/deutschland/stipendium/datenbank/en/21148-scholarship-database/">scholarship database</a>. Applications for this scholarship programme are possible in the period June until the stated application deadline. Click on "Application portal" at the bottom of the page to go to the DAAD portal. There you will be provided with an online application form to enter your application data. This is what you have to do:

1. Register in the DAAD portal (<a href="https://www.meindaad.de/de/help/wie-kann-ich-meine-bewerbung-einreichen-i/">Read notes about registering in the portal &gt;&gt;</a>)
2. Request recommendation form


You:  end


Ending the conversation. Goodbye!


In [None]:
a 30m² studio flat costs around 696 EUR in Frankfurt

In [19]:
from langchain_huggingface import HuggingFaceEndpoint, HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
import streamlit as st

In [20]:
print('\n'.join(f'{m.__name__}=={m.__version__}' for m in globals().values() if hasattr(m, '__version__')))


streamlit==1.35.0
