Phần Backend

In [260]:
import os
import pandas as pd
import numpy as np
import openai
from dotenv import load_dotenv
from openai import AzureOpenAI
from sklearn.neighbors import NearestNeighbors
import json


In [261]:
def split_text(text, max_length=1000,min_length=100):
    """Split text into chunks of max_length."""
    words = text.split()
    chunks = []
    current_chunk = []

    for word in words:
        current_chunk.append(word)
        if len(' '.join(current_chunk)) < max_length and len(' '.join(current_chunk)) > min_length:
            chunks.append(' '.join(current_chunk))
            current_chunk = []

        # If the last chunk didn't reach the minimum length, add it anyway
        if current_chunk:
            chunks.append(' '.join(current_chunk))

        return chunks


    if current_chunk:
        chunks.append(' '.join(current_chunk))

    return chunks

In [262]:
def create_embeddings(client,text, model=os.getenv('AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT')):
    """Create embeddings for the given text using OpenAI API."""
    embeddings = client.embeddings.create(input = text, model=model).data[0].embedding
    return embeddings

In [263]:
def chatbot(client,user_input,flattened_df):
    # Convert the question to a query vector
    query_vector = create_embeddings(client, user_input)

    # Fit NearestNeighbors on the embeddings if not already fitted
    embeddings_matrix = np.array(flattened_df['embeddings'].tolist())
    nbrs = NearestNeighbors(n_neighbors=3, metric='cosine').fit(embeddings_matrix)

    # Find the most similar documents
    distances, indices = nbrs.kneighbors([query_vector])

    # add documents to query to provide context
    history = []
    for index in indices[0]:
        history.append(flattened_df['chunks'].iloc[index])

    # combine the history and the user input
    history.append(user_input)

    # create a message object
    messages = [
        {"role": "system", "content": "You are an AI assistant that helps with AI questions."},
        {"role": "user", "content": history[-1]}
    ]

    # use chat completion to generate a response
    response = client.chat.completions.create(
        model=os.getenv("AZURE_OPENAI_DEPLOYMENT"),
        temperature=0.7,
        max_tokens=800,
        messages=messages
    )

    return response.choices[0].message.content.strip()

In [264]:
def summarize_for_chart(client,answer):
    """Extract a concise description to use for image generation."""
    prompt = f"Please describe an illustration for the following content in a clear, concise, and unambiguous way, under 200 words:\n\n{answer}"

    messages = [
        {"role": "system", "content": "You are an assistant specialized in crafting prompts for image generation."},
        {"role": "user", "content": prompt}
    ]

    response = client.chat.completions.create(
        model=os.getenv("AZURE_OPENAI_DEPLOYMENT"),
        temperature=0.5,
        max_tokens=200,
        messages=messages
    )

    return response.choices[0].message.content.strip()



In [265]:
def draw(prompt):
    drawbot = AzureOpenAI(
        api_key=os.environ['AZURE_OPENAI_API_DALLE_KEY'],  # this is also the default, it can be omitted
        api_version = os.environ['AZURE_OPENAI_API_DALLE_VERSION'],  # e.g. "2023-06-01-preview"
        azure_endpoint= os.environ['AZURE_OPENAI_DALLE_ENDPOINT']
    )

    model = "dall-e-3"
    response = drawbot.images.generate(
        model=model,
        prompt=prompt,
        size="1024x1024",
        n=1
    )
    generation_response = json.loads(response.model_dump_json())
    # Set the directory for the stored image
    image_dir = os.path.join(os.curdir, 'images')

    # If the directory doesn't exist, create it
    if not os.path.isdir(image_dir):
        os.mkdir(image_dir)

    # Initialize the image path (note the filetype should be png)
    image_url = os.path.join(image_dir, 'generated-image.png')

    # Retrieve the generated image
    image_url = generation_response["data"][0]["url"] 
    return image_url

In [266]:
# List to store data before creating DataFrame
data = []

# Your file paths
data_paths = [
    "data/frameworks.md",# ?WT.mc_id=academic-105485-koreyst
    "data/own_framework.md",#?WT.mc_id=academic-105485-koreyst
    "data/perceptron.md"#?WT.mc_id=academic-105485-koreyst
]

# Read each file and collect content
for path in data_paths:
    with open(path, 'r', encoding='utf-8') as file:
        file_content = file.read()
        data.append({'path': path, 'text': file_content})

# Create DataFrame from the list
df = pd.DataFrame(data)

# Display first few rows
print(df.head())


                    path                                               text
0     data/frameworks.md  # Neural Network Frameworks\n\nAs we have lear...
1  data/own_framework.md  # Introduction to Neural Networks. Multi-Layer...
2     data/perceptron.md  # Introduction to Neural Networks: Perceptron\...


In [267]:
load_dotenv()  # Load environment variables from .env file

True

In [268]:
splitted_df = df.copy()
splitted_df['chunks'] = splitted_df['text'].apply(lambda x: split_text(x, 400, 300))
splitted_df

Unnamed: 0,path,text,chunks
0,data/frameworks.md,# Neural Network Frameworks\n\nAs we have lear...,[#]
1,data/own_framework.md,# Introduction to Neural Networks. Multi-Layer...,[#]
2,data/perceptron.md,# Introduction to Neural Networks: Perceptron\...,[#]


In [269]:
flattened_df = splitted_df.explode('chunks')
flattened_df.head()

Unnamed: 0,path,text,chunks
0,data/frameworks.md,# Neural Network Frameworks\n\nAs we have lear...,#
1,data/own_framework.md,# Introduction to Neural Networks. Multi-Layer...,#
2,data/perceptron.md,# Introduction to Neural Networks: Perceptron\...,#


In [270]:
client = AzureOpenAI(
    api_key = os.getenv("AZURE_OPENAI_API_KEY"),  
    api_version = os.getenv("AZURE_OPENAI_API_VERSION"),
    azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
  )

In [271]:
embeddings = []
for chunk in flattened_df['chunks']:
    embeddings.append(create_embeddings(client,chunk))

# store the embeddings in the dataframe
flattened_df['embeddings'] = embeddings

flattened_df.head()

Unnamed: 0,path,text,chunks,embeddings
0,data/frameworks.md,# Neural Network Frameworks\n\nAs we have lear...,#,"[0.0079061733558774, -0.0016227177111431956, 0..."
1,data/own_framework.md,# Introduction to Neural Networks. Multi-Layer...,#,"[0.0079061733558774, -0.0016227177111431956, 0..."
2,data/perceptron.md,# Introduction to Neural Networks: Perceptron\...,#,"[0.008603407070040703, -0.002345751039683819, ..."


Phần UI

In [272]:
def chatbot_with_image(user_input, history=[]):
    # Sinh câu trả lời văn bản
    answer = chatbot(client, user_input, flattened_df)

    # Tóm tắt nội dung để tạo prompt ảnh
    description = summarize_for_chart(client,answer)

    # Sinh ảnh minh họa
    try:
        image_url = draw(description)
    except openai.BadRequestError as e:
        print("⚠️ Prompt bị chặn:", e)
        image_url = None


    # Trả về dạng message (text + image)
    bot_message = f"{answer}\n\n![Minh họa]({image_url})"#
    
    # Gradio Chatbot expects history = [(user, bot), ...]
    history.append((user_input, bot_message))
    return history, history


In [273]:
import gradio as gr

with gr.Blocks() as demo:
    chatbot_ui = gr.Chatbot()
    user_input = gr.Textbox(show_label=False, placeholder="Nhập câu hỏi...")
    state = gr.State([])

    def respond(user_message, chat_history):
        return chatbot_with_image(user_message, chat_history)

    user_input.submit(respond, [user_input, state], [chatbot_ui, state])

demo.launch()

  chatbot_ui = gr.Chatbot()


* Running on local URL:  http://127.0.0.1:7884
* To create a public link, set `share=True` in `launch()`.


