In [1]:
# A100 GPU에서 실행
# 구글 드라이브에 backend 폴더를 업로드 후 진행
import os
os.chdir('/content/drive/MyDrive/final_project/backend')  # 구글 드라이브 backend 폴더 위치를 입력

In [None]:
!pip install -qqq accelerate==0.26.1 peft==0.8.2 bitsandbytes==0.42.0 transformers==4.37.2 langchain faiss-gpu sentence-transformers

In [None]:
!pip install -U langchain-community

In [None]:
!pip -qqq install gradio==3.45.0 --use-deprecated=legacy-resolver typing_extensions --upgrade

In [None]:
!pip install pymysql uvicorn pyngrok fastapi

In [None]:
import torch
from peft import AutoPeftModelForCausalLM
from transformers import (
    AutoTokenizer,
    BitsAndBytesConfig,
    )
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.memory import VectorStoreRetrieverMemory
from langchain.memory import ConversationBufferWindowMemory
from langchain.docstore import InMemoryDocstore
from langchain.vectorstores import FAISS
import faiss

from transformers import ElectraTokenizer, ElectraForSequenceClassification, pipeline

compute_dtype = getattr(torch, 'float16')

quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=False,
)

MODEL_DIR = "hskhyl/EEVE-finetuned-05-13_1"
model = AutoPeftModelForCausalLM.from_pretrained(MODEL_DIR,
                                                      quantization_config=quant_config,
                                                      device_map="auto")

tokenizer = AutoTokenizer.from_pretrained("hskhyl/EEVE-finetuned-05-13_1")

model_name = "jhgan/ko-sbert-nli"
encode_kwargs = {'normalize_embeddings': True}
hf = HuggingFaceEmbeddings(
    model_name=model_name,
    encode_kwargs=encode_kwargs
)

embedding_size = 768
index = faiss.IndexFlatL2(embedding_size)
embedding_fn = hf.embed_query
vectorstore = FAISS(embedding_fn, index, InMemoryDocstore({}), {})
retriever = vectorstore.as_retriever(search_kwargs=dict(k=2))
retriever_memory = VectorStoreRetrieverMemory(retriever=retriever, return_docs=False)
ConversationBufferWindowMemory()
buffer_memory = ConversationBufferWindowMemory(k=1, return_messages=False)

In [None]:
compute_dtype = getattr(torch, 'float16')

quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=False,
)

MODEL_DIR = "hskhyl/05-13_1-dpo"
model_dpo = AutoPeftModelForCausalLM.from_pretrained(MODEL_DIR,
                                                      quantization_config=quant_config,
                                                      device_map="auto")

In [None]:
device = 0 if torch.cuda.is_available() else -1

model_name = "nlp04/korean_sentiment_analysis_kcelectra"
sentiment_analyzer = pipeline('sentiment-analysis', model=model_name, tokenizer=model_name, device=device)

In [None]:
from transformers import pipeline

device = 0 if torch.cuda.is_available() else -1
converter_model = "KoJLabs/bart-speech-style-converter"
speech_style_converter = pipeline('text2text-generation',model=converter_model, tokenizer=converter_model, device=device)

In [11]:
import uvicorn
from threading import Thread

def run_server():
    uvicorn.run("main:app", host="0.0.0.0", port=8000, log_level="info")

thread = Thread(target=run_server)
thread.start()

In [None]:
# 위에 코드 실행 후, 3분 정도 후에 아래코드 실행

In [12]:
from pyngrok import ngrok
ngrok.set_auth_token('')  # ngrok 토큰 입력



In [None]:
public_url = ngrok.connect(8000)
print("Public URL:", public_url)
# 코드 실행 후, 출력되는 두개의 주소 중, 왼쪽 첫번째 주소를 복사해서, BACKEND_URL에 입력

In [None]:
# ngrok_process = ngrok.get_ngrok_process()
# try:
#     # ngrok 프로세스 로그를 실시간으로 출력
#     ngrok_process.proc.wait()
# except KeyboardInterrupt:
#     print("Shutting down ngrok...")
#     ngrok.kill()

In [None]:
import json
import requests
import gradio as gr
BACKEND_URL = ""

def res(message: str, history, tone: str) -> str:
    payload = {"msg": message,
               "tone": tone,
               }
    response = requests.post(
        BACKEND_URL + "/counselor", data=json.dumps(payload)
    ).json()
    answer = response["result"]
    return answer

demo = gr.ChatInterface(
        fn=res,
        textbox=gr.Textbox(placeholder="고민을 얘기해주세요🙌", container=False, scale=1),
        title="멘토스(Mental Mate Talk on Support)",
        description="멘토스는 당신의 고민을 들어주며 격려해주는 상담친구에요😊",
        theme="soft",
        submit_btn="보내기",
        retry_btn="다시 보내기 ↩",
        undo_btn="이전 대화 삭제 ❌",
        clear_btn="전체 대화 삭제 💫",
        additional_inputs=
            gr.Radio(choices=["멘토스", "전문적인 상담사", "문어체", "안드로이드", "아재", "entp", "할아버지", "나루토", "선비", "소심한"], label="말투 선택", value="멘토스"),
        additional_inputs_accordion_name ="말투를 변경하고 싶으시면 클릭해주세요😀",
        )

demo.queue().launch(debug=True, share=True)