In [None]:
import os
import time
import string

import numpy as np
import pandas as pd

from datetime import datetime, timedelta
from collections import Counter

import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import matplotlib.dates as mdates

from tqdm import tqdm

# Font
from matplotlib import font_manager
font_path = "/workspace/fonts/"
font_list = os.listdir(font_path)
for font_file in font_list:
    try:
        font_manager.fontManager.addfont(font_path + font_file)
    except:
        raise Exception(f"Cannot Load {font_path+font_file}")


'''
N_ROW = 1
N_COL = 2
X_SIZE = 6
Y_SIZE = 4
DPI = 300
# plt.rcParams['font.family'] = ['NanumSquare', 'Helvetica']
plt.rcParams['font.family'] = ['Helvetica', 'NanumSquare']

fig=plt.figure(figsize = (X_SIZE*N_COL,Y_SIZE*N_ROW), dpi=DPI)
spec = gridspec.GridSpec(ncols=N_COL, nrows=N_ROW, figure=fig)#, width_ratios=[1,1,.1], wspace=.3)
axes = []

axi=0
ax = fig.add_subplot(spec[axi//N_COL,axi%N_COL]) # row, col
ax.text(-.05, 1.02, '%s'%(string.ascii_uppercase[axi]), transform=ax.transAxes, size=12, weight='bold')

'''

In [1]:
import torch.nn.functional as F
from transformers import AutoModel

instruction = "Given a question, retrieve passages that answer the question"
queries = [
    "are judo throws allowed in wrestling?", 
    "how to become a radiology technician in michigan?",
]

passages = [
    "Since you're reading this, you are probably someone from a judo background or someone who is just wondering how judo techniques can be applied under wrestling rules. So without further ado, let's get to the question. Are Judo throws allowed in wrestling? Yes, judo throws are allowed in freestyle and folkstyle wrestling. You only need to be careful to follow the slam rules when executing judo throws. In wrestling, a slam is lifting and returning an opponent to the mat with unnecessary force.",
    "Below are the basic steps to becoming a radiologic technologist in Michigan:Earn a high school diploma. As with most careers in health care, a high school education is the first step to finding entry-level employment. Taking classes in math and science, such as anatomy, biology, chemistry, physiology, and physics, can help prepare students for their college studies and future careers.Earn an associate degree. Entry-level radiologic positions typically require at least an Associate of Applied Science. Before enrolling in one of these degree programs, students should make sure it has been properly accredited by the Joint Review Committee on Education in Radiologic Technology (JRCERT).Get licensed or certified in the state of Michigan.",
]

model = AutoModel.from_pretrained(
    "kakaocorp/kanana-nano-2.1b-embedding",
    trust_remote_code=True,
).to("cuda")

max_length = 512
query_embeddings = model.encode(queries, instruction=instruction, max_length=max_length)
passage_embeddings = model.encode(passages, instruction="", max_length=max_length)

# get the embeddings with DataLoader (spliting the datasets into multiple mini-batches)
# batch_size = 2
# query_embeddings = model._do_encode(queries, batch_size=batch_size, instruction=instruction, max_length=max_length)
# passage_embeddings = model._do_encode(passages, batch_size=batch_size, instruction="", max_length=max_length)

query_embeddings = F.normalize(query_embeddings, p=2, dim=1)
passage_embeddings = F.normalize(passage_embeddings, p=2, dim=1)


scores = (query_embeddings @ passage_embeddings.T) * 100
print(scores.tolist())

# Output:
# [[84.36527252197266, 31.752296447753906], [35.940425872802734, 81.82719421386719]]

config.json:   0%|          | 0.00/861 [00:00<?, ?B/s]

configuration_kanana2vec.py:   0%|          | 0.00/10.8k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/kakaocorp/kanana-nano-2.1b-embedding:
- configuration_kanana2vec.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


modeling_kanana2vec.py:   0%|          | 0.00/9.50k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/kakaocorp/kanana-nano-2.1b-embedding:
- modeling_kanana2vec.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors:   0%|          | 0.00/4.17G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/51.0k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/444 [00:00<?, ?B/s]

RuntimeError: No CUDA GPUs are available

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
from threading import Thread

model_name = "LGAI-EXAONE/EXAONE-Deep-2.4B-AWQ"
streaming = True    # choose the streaming option

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Choose your prompt:
#   Math example (AIME 2024)
prompt = r"""Let $x,y$ and $z$ be positive real numbers that satisfy the following system of equations:
\[\log_2\left({x \over yz}\right) = {1 \over 2}\]\[\log_2\left({y \over xz}\right) = {1 \over 3}\]\[\log_2\left({z \over xy}\right) = {1 \over 4}\]
Then the value of $\left|\log_2(x^4y^3z^2)\right|$ is $\tfrac{m}{n}$ where $m$ and $n$ are relatively prime positive integers. Find $m+n$.

Please reason step by step, and put your final answer within \boxed{}."""
#   Korean MCQA example (CSAT Math 2025)
prompt = r"""Question : $a_1 = 2$인 수열 $\{a_n\}$과 $b_1 = 2$인 등차수열 $\{b_n\}$이 모든 자연수 $n$에 대하여\[\sum_{k=1}^{n} \frac{a_k}{b_{k+1}} = \frac{1}{2} n^2\]을 만족시킬 때, $\sum_{k=1}^{5} a_k$의 값을 구하여라.

Options :
A) 120
B) 125
C) 130
D) 135
E) 140
 
Please reason step by step, and you should write the correct option alphabet (A, B, C, D or E) within \\boxed{}."""

messages = [
    {"role": "user", "content": prompt}
]
input_ids = tokenizer.apply_chat_template(
    messages,
    tokenize=True,
    add_generation_prompt=True,
    return_tensors="pt"
)

if streaming:
    streamer = TextIteratorStreamer(tokenizer)
    thread = Thread(target=model.generate, kwargs=dict(
        input_ids=input_ids.to("cuda"),
        eos_token_id=tokenizer.eos_token_id,
        max_new_tokens=32768,
        do_sample=True,
        temperature=0.6,
        top_p=0.95,
        streamer=streamer
    ))
    thread.start()

    for text in streamer:
        print(text, end="", flush=True)
else:
    output = model.generate(
        input_ids.to("cuda"),
        eos_token_id=tokenizer.eos_token_id,
        max_new_tokens=32768,
        do_sample=True,
        temperature=0.6,
        top_p=0.95,
    )
    print(tokenizer.decode(output[0]))


# RAG Example

In [None]:
# # I. Data Load
path = '/Data/Data/KISTI/'
N_docs = 10

splits = load_pdfs_from_folder(path)

# 텍스트와 메타데이터 추출
texts = [doc.page_content for doc in splits]
metadatas = [doc.metadata for doc in splits]

# II. Vector Embedding Model
# model_embedding = 'sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2'
model_embedding = "jhgan/ko-sroberta-multitask"

from sentence_transformers import SentenceTransformer
from langchain_chroma import Chroma
from langchain.embeddings import HuggingFaceEmbeddings

# HuggingFace용 래퍼 생성
embedding_function = HuggingFaceEmbeddings(model_name=model_embedding)

# 벡터 저장소 생성
reset_DB()
vectorstore = Chroma.from_texts(
    texts=texts,
    embedding=embedding_function,  # 임베딩 직접 전달
    metadatas=metadatas,
    persist_directory="./vectorstore",
)
retriever = vectorstore.as_retriever()
retriever.search_kwargs["k"] = N_docs  # 상위 20개 문서를 반환

# III. LLM Model
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

# EXAONE 모델 로드
model_name = "LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct"
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.bfloat16,
    trust_remote_code=True
)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# 모델을 GPU로 이동
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)

def generate_response(prompt):
    messages = [
        {"role": "system", "content": "너는 KISTI 규정집을 참조하여 질문에 관련된 규정 항목들을 활용해 상황을 설명하고, 필요한 경우 여러 항목을 종합적으로 참조하는 챗봇이야. 또한 규정 해석에 도움이 될 만한 추가 정보도 제공해."},
        {"role": "user", "content": prompt}
    ]
    input_ids = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt"
    )
    output = model.generate(
        input_ids.to(device),
        eos_token_id=tokenizer.eos_token_id,
        max_new_tokens=1024,
        do_sample=False,
    )
    return tokenizer.decode(output[0], skip_special_tokens=True)


# IV. RAG Chain
from langchain_core.prompts import PromptTemplate

# Prompt 설정
prompt_template = PromptTemplate(
    template="""
    You are a regulation-based question-answering assistant. Use the following retrieved regulation contexts to answer the question. Reference multiple related regulations if applicable, and provide interpretations or additional insights if they help clarify the regulation.

    Question: {question} 

    Context: {context} 

    Answer:
    """,
    input_variables=["question", "context"]
)

prompt = PromptTemplate.from_template(prompt_template)

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

def rag_chain(question):
    # 검색
    retrieved_docs = retriever.get_relevant_documents(question)
    context = format_docs(retrieved_docs)
    
    # Prompt 작성 및 응답 생성
    formatted_prompt = prompt.format(question=question, context=retrieved_docs)
    return generate_response(formatted_prompt), context

# 테스트
result = rag_chain("난 선임연구원인데, 내년에 육아 관련해서 휴가가 조금 많이 필요한 상황이야. 어떻게 하면 될까?")
print(result)
