### dependency 설치(매 pull마다 실행하기)

In [None]:
%pip install --upgrade pip
%pip install -r requirements.txt

# llama index를 사용한 gemini 챗봇

### .env 파일을 통해 API key 설정

In [16]:
from dotenv import load_dotenv
import os

#load .env
load_dotenv()

GOOGLE_API_KEY = os.environ.get('GOOGLE_API_KEY')

### png 불러오기(tesseract를 사용한 OCR)

In [None]:
from IPython.display import display, Markdown
from llama_index.core import Document
from PIL import Image
import pytesseract

# 문서의 형식 = 텍스트 
# tesseract 를 사용하기 위해서는 따로 설치를 하여야 합니다
# 튜토리얼 : https://www.allmyuniverse.com/implementing-python-ocr-with-tesseract/ 
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"

image_path = r'data_png\dwld.png'
doc2_text = pytesseract.image_to_string(Image.open(image_path), lang='kor')

# 객체로 변환하기 
doc2 = Document(text=doc2_text)
# Document 객체 출력
print(doc2)

### pdf 학습 데이터 위치 설정 후 불러오기

In [1]:
from llama_index.core import SimpleDirectoryReader

# 학습 데이터 위치 설정 후 불러오기
input_dir = r"data_pdf"
reader = SimpleDirectoryReader(input_dir=input_dir)
doc1 = reader.load_data()

input_dir = r"resources/data"
reader = SimpleDirectoryReader(input_dir=input_dir)
doc2 = reader.load_data()

doc1 += doc2

### pdf OCR로 텍스트 변환(예정)
https://medium.com/@dr.booma19/extracting-text-from-pdf-files-using-ocr-a-step-by-step-guide-with-python-code-becf221529ef

### 파일 결합

In [None]:
# all_docs = doc1 + [doc2] 
# print(all_docs)

### 임베딩 다운로드

In [3]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

embed_model_ko = HuggingFaceEmbedding(model_name="bespin-global/klue-sroberta-base-continue-learning-by-mnr") 



### llama index 설정

In [5]:
from llama_index.core import VectorStoreIndex
from llama_index.llms.gemini import Gemini
from llama_index.core import ServiceContext

llm = Gemini(model_name='models/gemini-1.5-flash', request_timeout=120.0)

service_context = ServiceContext.from_defaults(llm=llm, chunk_size=800, chunk_overlap=20, embed_model=embed_model_ko)
index = VectorStoreIndex.from_documents(doc1,service_context=service_context,show_progress=True)

index.storage_context.persist()

query_engine = index.as_query_engine()


  service_context = ServiceContext.from_defaults(llm=llm, chunk_size=800, chunk_overlap=20, embed_model=embed_model_ko)
Parsing nodes: 100%|██████████| 390/390 [00:01<00:00, 303.92it/s]
Generating embeddings: 100%|██████████| 677/677 [21:05<00:00,  1.87s/it]


### 멀티턴 질문과 답변

In [6]:
# 멀티 턴 대화를 위한 history 리스트
history = []

def ask_query(query, history):
    # 대화 히스토리에 현재 쿼리를 추가
    history.append({"role": "user", "content": query})
    
    # 대화 히스토리를 문자열로 병합
    formatted_history = "\n".join([f"{item['role']}: {item['content']}" for item in history])
    
    # 쿼리 엔진에 현재 히스토리를 전달하여 응답 생성
    response = query_engine.query(formatted_history)
     
    # 응답을 히스토리에 추가
    history.append({"role": "ai", "content": response})
    
    return response



### 예시 대화

In [None]:
# query1 = "해운법에 대해 설명"
# response1 = ask_query(query1, history)
# print(f"Model: {response1}")

# query2 = "더 자세히 알려줘"
# response2 = ask_query(query2, history)
# print(f"Model: {response2}")

# query3 = "칸예 웨스트에 대해 설명해줘"
# response3 = ask_query(query3, history)
# print(f"Model: {response3}")

## Flask로 웹 서버 구동하기

In [7]:
from flask import Flask, request, jsonify

app = Flask(__name__)

@app.route('/chatbot', methods=['POST'])
def chatbot_response():
    user_input = request.json.get('message')
    # 여기에 챗봇 로직을 추가하세요.
    response = ask_query(user_input, history)
    return jsonify({'response': str(response)})

if __name__ == '__main__':
    app.run(port=5000)

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
127.0.0.1 - - [02/Sep/2024 13:59:37] "POST /chatbot HTTP/1.1" 200 -
127.0.0.1 - - [02/Sep/2024 13:59:53] "POST /chatbot HTTP/1.1" 200 -
127.0.0.1 - - [02/Sep/2024 13:59:59] "POST /chatbot HTTP/1.1" 200 -
127.0.0.1 - - [02/Sep/2024 14:00:06] "POST /chatbot HTTP/1.1" 200 -
127.0.0.1 - - [02/Sep/2024 14:00:15] "POST /chatbot HTTP/1.1" 200 -
127.0.0.1 - - [02/Sep/2024 14:00:22] "POST /chatbot HTTP/1.1" 200 -
127.0.0.1 - - [02/Sep/2024 14:00:54] "POST /chatbot HTTP/1.1" 200 -
127.0.0.1 - - [02/Sep/2024 14:01:08] "POST /chatbot HTTP/1.1" 200 -
127.0.0.1 - - [02/Sep/2024 14:01:17] "POST /chatbot HTTP/1.1" 200 -
127.0.0.1 - - [02/Sep/2024 14:01:36] "POST /chatbot HTTP/1.1" 200 -
127.0.0.1 - - [02/Sep/2024 14:27:13] "POST /chatbot HTTP/1.1" 200 -
127.0.0.1 - - [02/Sep/2024 14:27:33] "POST /chatbot HTTP/1.1" 200 -
127.0.0.1 - - [02/Sep/2024 14:34:50] "POST /chatbot HTTP/1.1" 200 -
127.0.0.1 - - [02/Sep/2024 14:35:05] "POST /chatbot HTTP/1.