In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install -U sentence-transformers



In [None]:
!pip install streamlit transformers torch



In [None]:
!pip install -q streamlit

In [None]:
!pip install streamlit-option-menu



In [None]:
%%writefile app.py
import streamlit as st
from streamlit_option_menu import option_menu
import random
import os
import re
import pandas as pd
import numpy as np
from tqdm.auto import tqdm
import torch
from transformers import GPT2LMHeadModel, PreTrainedTokenizerFast
from transformers import AutoTokenizer, AutoModelForCausalLM
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import pairwise_distances

# 재현성 함수
def reset_seeds(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
SEED = 42
reset_seeds(SEED)

DATA_PATH = '/content/drive/MyDrive/DACON 경진대회/한솔 도배하자 질의응답/최종 제출/data/' #########################################################

# 유사도 평가 함수
def evaluate_similarity(input_text, generated_text, alpha=0.95):
    bert_model = SentenceTransformer('jhgan/ko-sroberta-multitask')
    input_embedding = bert_model.encode(input_text)
    generated_embedding = bert_model.encode(generated_text)
    cosine_sim = 1 - pairwise_distances([input_embedding], [generated_embedding], metric='cosine')[0][0]
    input_tokens = set(input_text.split())
    generated_tokens = set(generated_text.split())
    jaccard_sim = len(input_tokens.intersection(generated_tokens)) / len(input_tokens.union(generated_tokens))
    weighted_sim = alpha*cosine_sim + (1 - alpha)*jaccard_sim
    return weighted_sim

# 모델 로드
@st.cache_resource
def loaded_model(DATA_PATH):
    model_name = 'kogpt2_120000_3epoch_0.21946loss'
    model = GPT2LMHeadModel.from_pretrained(f'{DATA_PATH}model/{model_name}')
    return model

# 토크나이저 로드
@st.cache_resource
def loaded_tokenizer():
    tokenizer = AutoTokenizer.from_pretrained("skt/kogpt2-base-v2", bos_token='</s>', eos_token='</s>', unk_token='<unk>', pad_token='<pad>', mask_token='<mask>')
    return tokenizer

model = loaded_model(DATA_PATH)
tokenizer = loaded_tokenizer()

# 답변 생성 함수
def Chatbot(input_text, tokenizer, model, max_length=200, temperature=0.87, top_k=27, top_p=0.7, num_samples=5, generated=True):
    text = input_text.strip()
    text = '<q>' + text + '</s><a>'
    input_ids = tokenizer.encode(text, return_tensors='pt')
    q_len = len(text) + 1
    best_generated_text = None
    best_similarity_score = -1.0
    generated_texts = []
    for i in range(num_samples):
        result_ids = model.generate(input_ids,
                                    max_length=max_length,
                                    temperature=temperature,
                                    top_k=top_k,
                                    top_p=top_p,
                                    do_sample=True,
                                    num_return_sequences=1,
                                    )
        generated_text = tokenizer.decode(result_ids[0])
        generated_text = generated_text[q_len:-4]
        similarity_score = evaluate_similarity(text, generated_text)
        generated_texts.append((similarity_score,generated_text))
        if similarity_score > best_similarity_score:
            best_similarity_score = similarity_score
            best_generated_text = generated_text
    return best_generated_text

# main 실행 함수
def main():
    DATA_PATH = '/content/drive/MyDrive/DACON_DOBAE/data/'
    with st.sidebar:
        choice = option_menu("Menu", ["문의하기", "사업제휴"],
                             icons=['house', 'kanban', 'bi bi-robot'],
                             menu_icon="app-indicator", default_index=0,
                             styles={
                                 "container": {"padding": "4!important", "background-color": "#fafafa"},
                                 "icon": {"color": "black", "font-size": "25px"},
                                 "nav-link": {"font-size": "16px", "color": "black", "text-align": "left", "margin":"0px", "--hover-color": "#cae8e6"},
                                 "nav-link-selected": {"background-color": "#08c7b4"},
                             }
        )
    if choice == "문의하기":
        url = "https://www.hansoldeco.co.kr/views/res/imgs/page/inquiry/sec_01_img_01.png"
        st.image(url)
        st.title('한솔데코와 함께하세요.\n이곳은 도배하자 상담실입니다.\n무엇을 도와드릴까요?')
        user_input = st.text_input(label="질문", value="")
        submit_button = st.button("Submit")
        if submit_button:
            if user_input:
                # 상담실 답변 생성 부분
                with st.spinner("답변 생성 중입니다..."):
                    answer = Chatbot(user_input, tokenizer, model, num_samples=5, generated=False, max_length=200, temperature=0.5, top_k=20, top_p=0.95)
                st.success(f"답변완료 : \n\n{answer}")

if __name__ == "__main__":
    main()

Overwriting app.py


In [None]:
!pip install streamlit



In [None]:
!pip install streamlit_option_menu
from streamlit_option_menu import option_menu



In [None]:
import urllib
print("Password/Enpoint IP for localtunnel is:",urllib.request.urlopen('https://ipv4.icanhazip.com').read().decode('utf8').strip("\n"))

# "Password/Enpoint IP for localtunnel is:" 우측에 xx.xxx.xx.xxx 혹은 xx.xxx.xxx.xxx 형식의 숫자가 나온다.

Password/Enpoint IP for localtunnel is: 34.69.236.149


In [None]:
!npm install localtunnel

[K[?25h[37;40mnpm[0m [0m[30;43mWARN[0m [0m[35msaveError[0m ENOENT: no such file or directory, open '/content/package.json'
[0m[37;40mnpm[0m [0m[30;43mWARN[0m [0m[35menoent[0m ENOENT: no such file or directory, open '/content/package.json'
[0m[37;40mnpm[0m [0m[30;43mWARN[0m[35m[0m content No description
[0m[37;40mnpm[0m [0m[30;43mWARN[0m[35m[0m content No repository field.
[0m[37;40mnpm[0m [0m[30;43mWARN[0m[35m[0m content No README data
[0m[37;40mnpm[0m [0m[30;43mWARN[0m[35m[0m content No license field.
[0m
[K[?25h+ localtunnel@2.0.2
updated 1 package and audited 36 packages in 0.587s

3 packages are looking for funding
  run `npm fund` for details

found 2 [93mmoderate[0m severity vulnerabilities
  run `npm audit fix` to fix them, or `npm audit` for details

[33m[39m
[33m   ╭────────────────────────────────────────────────────────────────╮[39m
   [33m│[39m                                                                [33m│

In [None]:
!streamlit run app.py &>/content/logs.txt &

In [None]:
# 실행하기 전에 런타임 재시작 한 번 해주자

In [None]:
!npx localtunnel --port 8501

# "your url is:" 우측에 사이트 주소가 생성된다.

[K[?25hnpx: installed 22 in 2.809s
your url is: https://pretty-wombats-design.loca.lt
^C


In [None]:
# 테라죠의 기대수명이 어떻게 돼? 그리고 장판은 어떤 장점이 있어?

In [None]:
# 청고벽돌의 단점은 무엇이며, 도배하기 적합한 계절은 언제인가요?
# 청고벽돌은 다른 외장재보다 시공비가 많이 드는 편이며, 오래된 수입 벽돌을 사용하면 강도가 약할 수 있다는 점이 단점입니다. 또 도배하기 좋은 계절은 봄과 가을입니다.
# 봄과 가을은 계절적으로 선선하고 상대적으로 습기가 적으므로 하자가 발생할 요인이 적어 도배를 하기에 최적의 시기입니다.