# FAQ Search using sqlitesearch

This notebook demonstrates using `sqlitesearch` to build a simple FAQ search
for DataTalks Club courses.

In [None]:
import requests

## Fetch the courses index

In [None]:
base_faq_url = 'https://datatalks.club/faq'
courses_index_url = f'{base_faq_url}/json/courses.json'

courses_index = requests.get(courses_index_url).json()
courses_index

## Load all FAQ documents

In [None]:
documents = []

for course in courses_index:
    course_path = course['path']
    course_url = f'{base_faq_url}/{course_path}'
    course_data = requests.get(course_url).json()
    documents.extend(course_data)

len(documents)

## Inspect a document

In [None]:
documents[5]

## Create the search index

In [None]:
from sqlitesearch import TextSearchIndex

index = TextSearchIndex(
    text_fields=['section', 'question', 'answer'],
    keyword_fields=['course'],
    db_path='faq.db'
)

index.fit(documents)

## Search with filtering and boosting

In [None]:
question = 'I just discovered the course. Can I join now?'

filter_dict = {
    'course': 'llm-zoomcamp'
}

boost_dict = {
    'question': 3,
    'section': 0.5,
}

search_results = index.search(
    question,
    filter_dict=filter_dict,
    boost_dict=boost_dict,
    num_results=5
)

search_results

## Define a search function

In [None]:
def search(question):
    filter_dict = {
        'course': 'llm-zoomcamp'
    }
    
    boost_dict = {
        'question': 3,
        'section': 0.5,
    }
    
    search_results = index.search(
        question,
        filter_dict=filter_dict,
        boost_dict=boost_dict,
        num_results=5
    )
    
    return search_results

## Try different queries

In [None]:
search('How do I get a certificate?')

In [None]:
search('What are the prerequisites?')

In [None]:
search('homework deadline')

## Persistence

The index is saved to `faq.db`. You can reopen it later without reindexing:

In [None]:
# Close the current index
index.close()

# Reopen the existing index
index = TextSearchIndex(
    text_fields=['section', 'question', 'answer'],
    keyword_fields=['course'],
    db_path='faq.db'
)

# Ready to search immediately!
search('certificate')