/* 安裝函式庫 */
1. No Module name 'docx'
=> conda install conda-forge::python-docx

2. ImportError: cannot import name 'cached_download' from 'huggingface_hub' (C:\Users\Fgadmin\anaconda3\envs\LLM-RAG\lib\site-packages\huggingface_hub\__init__.py)
Downgrading from 0.26.1 to 0.20.2 worked for me.
=> conda install -c conda-forge huggingface_hub=0.20.2

/* 使用 Google 大型語言模型 API */
https://aistudio.google.com/app/apikey?hl=zh-tw => Get API key
API 和服務 => 已啟用的 API 和服務 => 啟用 Generative Language API

In [3]:
import gradio as gr
import docx
import requests
import torch  # Import torch for topk function
from sentence_transformers import SentenceTransformer, util

# Load the model for semantic search
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')  # Lightweight model for document search

# Function to read and extract text from the uploaded DOCX file
def read_docx(file):
    doc = docx.Document(file.name)
    full_text = []
    for para in doc.paragraphs:
        full_text.append(para.text)
    return full_text  # Return list of paragraphs

# Function to perform document retrieval using semantic search
def retrieve_relevant_content(task, paragraphs):
    # Task-based query to retrieve relevant sections
    if task == "立法院擬答":
        query = "向立法院起草正式的模擬回應，回覆應明確指出要點並提供專業的語氣"
    elif task == "新聞稿":
        query = "寫出一個具備教育專業語氣的新聞稿"
    elif task == "總結內容":
        query = "總結出重點摘要"
    elif task == "生成教育相關內容":
        query = "挑選出與教育內容相關的段落，並給予更深入的詳細解說"
    else:
        return "Invalid task selected.", []

    # Perform semantic search to get the most relevant paragraphs
    paragraph_embeddings = model.encode(paragraphs, convert_to_tensor=True)
    query_embedding = model.encode(query, convert_to_tensor=True)
    scores = util.pytorch_cos_sim(query_embedding, paragraph_embeddings)[0]
    top_k = min(5, len(paragraphs))  # Retrieve top 5 relevant paragraphs
    top_results = torch.topk(scores, k=top_k)  # Use torch.topk to get the top results

    # Collect relevant paragraphs
    relevant_paragraphs = [paragraphs[idx] for idx in top_results.indices]
    return " ".join(relevant_paragraphs)  # Join paragraphs into one string

# Function to interact with the Gemini LLM API
def generate_response(task, file):
    # Read file content as a list of paragraphs
    paragraphs = read_docx(file)
    
    # Retrieve relevant content
    relevant_content = retrieve_relevant_content(task, paragraphs)
    
    # Define the API endpoint and your API key
    api_url = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
    api_key = " 你的 API key "  # Replace with your actual API key

    # Define the headers and payload
    headers = {
        "Content-Type": "application/json"
    }

    # Construct the API prompt based on relevant content
    prompt = f"{task} 根據以下內容: {relevant_content}"

    # Construct the API payload
    payload = {
        "contents": [
            {
                "parts": [
                    {"text": prompt}
                ]
            }
        ]
    }

    # Make the API request
    response = requests.post(f"{api_url}?key={api_key}", json=payload, headers=headers)

    # Check if the request was successful
    if response.status_code == 200:
        # Print the entire response for debugging
        print("Response JSON:", response.json())
        
        # Extract the generated text from the response structure
        response_json = response.json()
        if "candidates" in response_json and len(response_json["candidates"]) > 0:
            generated_text = response_json["candidates"][0]["content"]["parts"][0]["text"]
            return generated_text
        else:
            return "No generated text found"
    else:
        return f"Error: {response.status_code}, {response.text}"

# Gradio Interface
dropdown_choices = ["立法院擬答", "新聞稿", "總結內容", "生成教育相關內容"]

with gr.Blocks() as interface:
    gr.Markdown("## Gemini-Integrated Chatbot with RAG")
    task = gr.Dropdown(choices=dropdown_choices, label="Select Task")
    file = gr.File(label="Upload DOCX File", file_types=[".docx"])
    output = gr.Textbox(label="Generated Output")
    
    submit_btn = gr.Button("Generate")
    submit_btn.click(generate_response, inputs=[task, file], outputs=output)

# Launch the interface
interface.launch()

Running on local URL:  http://127.0.0.1:7862

To create a public link, set `share=True` in `launch()`.




--------


Response JSON: {'candidates': [{'content': {'parts': [{'text': '## 康芮颱風路徑南修 恐增為強颱 氣象署：最快明天下午發海警\n\n**台北訊**  中央氣象署預報中心科長林伯東今（29）日上午表示，康芮颱風目前在鵝鑾鼻東南方1050公里海面朝西前進，未來朝西北方向移動靠近台灣，預計周四及周五影響最大，11月1日2日逐漸遠離。康芮颱風目前持續增強中，有可能增為中度颱風，甚至不排除增為強颱。\n\n林伯東指出，上午8點最新路徑預測與凌晨兩點相較再往南修，且颱風速度稍稍快一點，不排除登陸，但路徑仍有變化，仍有不確定性。如果從東半邊通過，中南部下雨不明顯，如果比較偏南通過，南部甚至中部下雨就要留意一下。\n\n根據最新預測，康芮颱風速度稍稍快一點，明天下午到晚上可能發海警，周三上半天發陸警。\n\n林伯東提醒，康芮颱風暴風半徑大，全台都會受影響，未來一周台灣天氣都會受其影響。請民眾密切注意氣象資訊，做好防颱準備。 \n'}], 'role': 'model'}, 'finishReason': 'STOP', 'index': 0, 'safetyRatings': [{'category': 'HARM_CATEGORY_SEXUALLY_EXPLICIT', 'probability': 'NEGLIGIBLE'}, {'category': 'HARM_CATEGORY_HATE_SPEECH', 'probability': 'NEGLIGIBLE'}, {'category': 'HARM_CATEGORY_HARASSMENT', 'probability': 'NEGLIGIBLE'}, {'category': 'HARM_CATEGORY_DANGEROUS_CONTENT', 'probability': 'NEGLIGIBLE'}]}], 'usageMetadata': {'promptTokenCount': 330, 'candidatesTokenCount': 286, 'totalTokenCount': 616}, 'modelVersion': 'gemini-1.5-flash-001'}
Response JSON: {'candidates': [{'conten