Give me a python code for question answering with roberta model. Context should come from few cyrillic pdf files in pdf_files directory, few cyrillic HTML files in html_files directory and a questions-answers.csv in csv_files directory.

In [None]:
import os
import pandas as pd
from transformers import pipeline
from bs4 import BeautifulSoup
import pdfplumber

# Load RoBERTa QA pipeline
qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")

# Function to extract text from PDFs
def extract_text_from_pdfs(directory):
    text = ""
    for filename in os.listdir(directory):
        if filename.endswith(".pdf"):
            pdf_path = os.path.join(directory, filename)
            with pdfplumber.open(pdf_path) as pdf:
                for page in pdf.pages:
                    page_text = page.extract_text()
                    if page_text:
                        text += page_text + "\n"
    return text

# Function to extract text from HTML files
def extract_text_from_htmls(directory):
    text = ""
    for filename in os.listdir(directory):
        if filename.endswith(".html") or filename.endswith(".htm"):
            html_path = os.path.join(directory, filename)
            with open(html_path, "r", encoding="utf-8") as html_file:
                soup = BeautifulSoup(html_file, "html.parser")
                text += soup.get_text() + "\n"
    return text

# Function to extract context from a CSV file
def extract_context_from_csv(directory):
    text = ""
    for filename in os.listdir(directory):
        if filename.endswith(".csv"):
            csv_path = os.path.join(directory, filename)
            df = pd.read_csv(csv_path)
            for _, row in df.iterrows():
                text += f"Q: {row['question']} A: {row['answer']}\n"
    return text

# Function to answer a question given a context
def answer_question(question, context):
    if not context.strip():
        return "No context available for answering."
    result = qa_pipeline(question=question, context=context)
    return result

# Main function
if __name__ == "__main__":
    # Directories
    pdf_directory = "pdf_files"
    html_directory = "html_files"
    csv_directory = "csv_files"

    # Extract context from files
    pdf_context = extract_text_from_pdfs(pdf_directory)
    html_context = extract_text_from_htmls(html_directory)
    csv_context = extract_context_from_csv(csv_directory)

    # Combine all contexts
    combined_context = pdf_context + html_context + csv_context

    # Example question
    question = "Какво е RoBERTa?"  # Example Cyrillic question

    # Get answer
    result = answer_question(question, combined_context)

    # Display results
    print(f"Question: {question}")
    if isinstance(result, dict):
        print(f"Answer: {result['answer']}")
        print(f"Confidence Score: {result['score']:.2f}")
    else:
        print(result)