In [None]:
# 필요한 라이브러리 설치
!pip install faiss-cpu
!pip install sentence-transformers
!pip install transformers
!pip install fastapi uvicorn nest_asyncio pyngrok
!pip install datasets

Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl (30.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/30.7 MB[0m [31m65.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.10.0
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from to

In [None]:
# ================================
# 1. Sentence-BERT 임베딩
# ================================
from fastapi import FastAPI, Request
from pydantic import BaseModel
from typing import List
import numpy as np
import faiss

from sentence_transformers import SentenceTransformer
from transformers import pipeline, GPT2Tokenizer, GPTNeoForCausalLM, TrainingArguments, Trainer, DataCollatorForLanguageModeling

app = FastAPI()

# Sentence-BERT 모델 로드
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')  # 문장 임베딩 특화

# GPT-Neo 모델 로드
model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
tokenizer.pad_token = tokenizer.eos_token
qa_generator = pipeline("text-generation", model=model, tokenizer=tokenizer)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.35k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/5.31G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/200 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/90.0 [00:00<?, ?B/s]

Device set to use cuda:0


In [None]:
from datasets import Dataset
import torch
import json
from google.colab import files

uploaded = files.upload()

# 2. 데이터 로딩 (예: 질문+문맥 => 정답)
# JSONL 또는 리스트 형식 예시:
# [
#   {"question": "BERT는 무엇인가요?", "context": "BERT는 ...", "answer": "BERT는 자연어 처리 모델입니다."}
# ]

with open("qa_augmented_500.json", "r", encoding="utf-8") as f:
    raw_data = json.load(f)

Saving qa_augmented_500.json to qa_augmented_500.json


In [None]:
# 3. 학습용 포맷 구성
formatted_data = []
for item in raw_data:
    prompt = f"질문: {item['Question']}\n"
    completion = f"답변: {item['Answer']}"
    formatted_data.append({"text": prompt + completion})

In [None]:
# 4. Dataset 변환
dataset = Dataset.from_list(formatted_data)

def tokenize(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=512)

tokenized_dataset = dataset.map(tokenize, batched=True)

data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

In [None]:
# 5. 학습 설정
training_args = TrainingArguments(
    output_dir="./gptneo-finetuned-qa",
    overwrite_output_dir=True,
    num_train_epochs=5,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    logging_steps=25,
    save_steps=250,
    save_total_limit=1,
    fp16=torch.cuda.is_available(),
    learning_rate=5e-5,
    warmup_steps=50,
)

In [None]:
# 6. Trainer 구성
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

  trainer = Trainer(


In [None]:
# 7. 학습 시작
trainer.train()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mcirclehalf17[0m ([33mcirclehalf17-no-job[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss
25,0.788
50,0.1646
75,0.0851
100,0.0477
125,0.0386
150,0.0312
175,0.0264
200,0.0277
225,0.0255
250,0.0253


TrainOutput(global_step=625, training_loss=0.0635282283782959, metrics={'train_runtime': 493.47, 'train_samples_per_second': 5.066, 'train_steps_per_second': 1.267, 'total_flos': 9280935690240000.0, 'train_loss': 0.0635282283782959, 'epoch': 5.0})

In [None]:
# 8. 모델 저장
model.save_pretrained("/content/drive/MyDrive/gptneo-finetuned-qa")
tokenizer.save_pretrained("/content/drive/MyDrive/gptneo-finetuned-qa")

('/content/drive/MyDrive/gptneo-finetuned-qa/tokenizer_config.json',
 '/content/drive/MyDrive/gptneo-finetuned-qa/special_tokens_map.json',
 '/content/drive/MyDrive/gptneo-finetuned-qa/vocab.json',
 '/content/drive/MyDrive/gptneo-finetuned-qa/merges.txt',
 '/content/drive/MyDrive/gptneo-finetuned-qa/added_tokens.json')

In [None]:
# ================================
# 1. Sentence-BERT 임베딩
# ================================
from fastapi import FastAPI, Request
from pydantic import BaseModel
from typing import List
import numpy as np
import faiss

from sentence_transformers import SentenceTransformer
from transformers import pipeline, GPT2Tokenizer, GPTNeoForCausalLM, TrainingArguments, Trainer, DataCollatorForLanguageModeling

app = FastAPI()

# Sentence-BERT 모델 로드
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')  # 문장 임베딩 특화

# GPT-Neo 모델 로드
model = GPTNeoForCausalLM.from_pretrained("/content/drive/MyDrive/gptneo-finetuned-qa")
tokenizer = GPT2Tokenizer.from_pretrained("/content/drive/MyDrive/gptneo-finetuned-qa")
tokenizer.pad_token = tokenizer.eos_token
qa_generator = pipeline("text-generation", model=model, tokenizer=tokenizer)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cpu


In [None]:
def get_bert_embedding(text):
    """
    Sentence-BERT 기반 임베딩 생성
    """
    embedding = embedding_model.encode([text], normalize_embeddings=True)  # 코사인 유사도용 정규화 포함
    return embedding.astype('float32')

In [None]:
# ================================
# 2. FAISS 인덱스 생성
# ================================
def create_faiss_index(embeddings):
    """
    FAISS 인덱스 생성 (코사인 유사도용 Inner Product)
    """
    dim = embeddings.shape[1]
    index = faiss.IndexFlatIP(dim)
    index.add(embeddings)
    return index

In [None]:
# ================================
# 3. GPT-Neo 응답 생성 (Pipeline 사용)
# ================================
def generate_gpt_response(question):
    """
    GPT-Neo를 사용한 답변 생성
    """
    prompt = f"질문: {question}\n답변:"
    response = qa_generator(
        prompt,
        max_new_tokens=500,
        temperature=0.7,
        top_p=0.9,
        repetition_penalty=1.5,
        num_return_sequences=1,
        pad_token_id=tokenizer.eos_token_id
        )
    answer = response[0]['generated_text'].split("답변:")[-1].strip()
    return answer

In [None]:
# Q&A 데이터셋
qa_texts = [
    "예선대회 결과는 언제 공지되나요?",
    "문제 정답과 점수는 공개되지 않나요?",
    "채점 기준은 어떻게 되나요?",
    "대회 기간을 놓쳤습니다. 어떻게 해야 하나요?",
    "답안 작성이 되지 않습니다.",
    "대회 원서접수를 했는데 문제보기가 되지 않습니다.",
    "대회 기간 중인데 문제보기가 되지 않습니다.",
    "예선대회 진행방식은 어떻게 되나요?",
    "대회 관련 기출 문제들은 블로그 등에 올려도 되나요?",
    "접속자가 많아 접수가 어렵습니다.",
    "신청 기간 이후 추가 접수 가능한가요?",
    "학교에 다니지 않는 사람은 어떤 부문에 참가해야 하나요?",
    "해외거주 중인데 참가 가능한가요?",
    "팀명은 꼭 7자 이내로만 작성해야 하나요?",
    "대회 참가 접수를 확인하고 싶습니다.",
    "대회 원서 접수 시 팀원이 모두 가입해야 하나요?",
    "다른 부문끼리 팀 구성이 가능한가요?",
    "같은 학교 학생끼리만 팀 구성이 가능한가요?",
    "대회 참가 자격이 궁금합니다.",
    "대회 개최 일정은 어떻게 되나요?",
    "대회 관련 문의사항은 어디로 문의하나요?"
]

embeddings = np.vstack([get_bert_embedding(text) for text in qa_texts])
index = create_faiss_index(embeddings)

# --------------------------
# 입력/출력 모델 정의
# --------------------------
class QARequest(BaseModel):
    question: str

class QAResponse(BaseModel):
    response: str

# --------------------------
# FastAPI 라우터
# --------------------------
@app.post("/qa", response_model=QAResponse)
def qa_endpoint(request: QARequest):
    question = request.question
    # 질문 임베딩
    query_vec = get_bert_embedding(question)
    distances, indices = index.search(query_vec, 1)
    similar_index = indices[0][0]
    # 유사 문맥 추출
    similar_text = qa_texts[similar_index]
    print("질문: ", question)
    print("유사질문: ", similar_text)
    # GPT 응답 생성
    gpt_response = generate_gpt_response(similar_text)

    return QAResponse(
        response=gpt_response
    )

In [None]:
# 2. ngrok 실행 코드
from pyngrok import ngrok
import nest_asyncio
import uvicorn

# 3. ngrok 설정
ngrok.set_auth_token("2v1Fi5CEzLumREBpheNMIIepRlM_7uLFbq5PGe81hmEZiAe9K")  # 🔑 Ngrok 토큰 입력 (한 번만 필요)
ngrok.kill()  # 이전 터널 종료
public_url = ngrok.connect(3000)  # 로컬 3000 포트를 외부에 노출
print("🔗 Public URL:", public_url.public_url)

# 4. 이벤트 루프 충돌 방지 (Colab 전용)
nest_asyncio.apply()

# 5. uvicorn 실행
uvicorn.run(app, host="0.0.0.0", port=3000)

🔗 Public URL: https://66e8-35-234-37-58.ngrok-free.app


INFO:     Started server process [3776]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:3000 (Press CTRL+C to quit)


INFO:     123.213.153.172:0 - "GET /docs HTTP/1.1" 200 OK
INFO:     123.213.153.172:0 - "GET /openapi.json HTTP/1.1" 200 OK
질문:  예선 결과는 언제 나와?
유사질문:  예선대회 결과는 언제 공지되나요?
INFO:     123.213.153.172:0 - "POST /qa HTTP/1.1" 200 OK
