In [1]:
from dotenv import load_dotenv
import os
import shutil
import pandas as pd

In [2]:
from typing import List, Tuple

In [3]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import MarkdownHeaderTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma

In [4]:
from langchain_community.retrievers import BM25Retriever
from langchain.retrievers import EnsembleRetriever
from langchain_community.document_transformers import LongContextReorder

In [5]:
from langchain_openai import ChatOpenAI
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnableLambda, RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

In [6]:
import tiktoken

In [26]:
class Evaluator:
    def __init__(self):
        self.system = "Evaluator"

    def load_env(self) -> None:
        load_dotenv('.env')

        # Langsmith API Key load
        os.getenv("LANGCHAIN_TRACING_V2")
        os.getenv("LANGCHAIN_ENDPOINT")
        os.getenv("LANGCHAIN_API_KEY")

        # LM Studio API Key load
        os.getenv("LM_URL")
        os.getenv("LM_LOCAL_URL")

    def docs_load(self) -> List[str]:
        """
        문서를 읽는 함수
        """

        try:
            loader = TextLoader("input/pep8.txt", encoding="utf-8").load()

            docs = []
            for doc in loader:
                docs = doc.page_content
                
            print("Documents loaded:", docs)
            return docs
        except FileNotFoundError:
            print("파일을 찾을 수 없습니다. 경로를 확인하세요.")
            return []
        except Exception as e:
            print(f"오류가 발생했습니다: {e}")
            return []

    def text_split(self, corpus):
        """
        문서를 분리하는 함수
        """

        headers_to_split_on = [  # 문서를 분할할 헤더 레벨과 해당 레벨의 이름을 정의합니다.
            (
                "#",
                "Header 1",
            ),  # 헤더 레벨 1은 '#'로 표시되며, 'Header 1'이라는 이름을 가집니다.
            (
                "##",
                "Header 2",
            ),  # 헤더 레벨 2는 '##'로 표시되며, 'Header 2'라는 이름을 가집니다.
            (
                "###",
                "Header 3",
            ),  # 헤더 레벨 3은 '###'로 표시되며, 'Header 3'이라는 이름을 가집니다.
        ]

        splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers_to_split_on)

        chunks = splitter.split_text(corpus)
        print("Document chunks after splitting:", chunks)
        # 토크나이저 => 2028 토큰이 가장 큰 사이즈임
        # tokenizer = tiktoken.get_encoding("o200k_base")
        # for chunk in chunks:
        #     print(len(tokenizer.encode(chunk.page_content)))

        return chunks

    def pep8_docs_embedding(self, chunk):
        """
        문서를 임베딩하는 함수
        """

        model_name = "BAAI/bge-m3"
        model_kwargs = {'device': 'cuda'}  # gpu를 사용하기 위해 설정
        encode_kwargs = {'normalize_embeddings': True}
        model = HuggingFaceEmbeddings(
            model_name=model_name,
            model_kwargs=model_kwargs,
            encode_kwargs=encode_kwargs
        )

        # 벡터 저장소를 저장할 디렉토리
        pep8_save_directory = "./pep8_chroma"

        print("\n잠시만 기다려주세요.\n\n")

        # 벡터저장소가 이미 존재하는지 확인
        if os.path.exists(pep8_save_directory):
            shutil.rmtree(pep8_save_directory)
            print(f"디렉토리 {pep8_save_directory}가 삭제되었습니다.\n")

        print("코딩 스타일 가이드 PEP8 문서 벡터화를 시작합니다. ")
        pep8_db = Chroma.from_documents(chunk, model, persist_directory=pep8_save_directory).as_retriever()
        pep8_bm_db = BM25Retriever.from_documents(
            chunk
        )
        print("코딩 스타일 가이드 PEP8 문서 데이터베이스가 생성되었습니다.\n")

        return pep8_db, pep8_bm_db

    def load_pep8_vector_store(self, query):
        # 벡터 저장소 디렉토리
        pep8_save_directory = "./pep8_chroma"
    
        # 저장된 데이터가 있는지 확인
        if not os.path.exists(pep8_save_directory):
            print("벡터 저장소가 존재하지 않습니다. 먼저 저장소를 생성하십시오.")
            return None, None
    
        # 임베딩 모델 설정
        model_name = "BAAI/bge-m3"
        model_kwargs = {'device': 'cuda'}
        encode_kwargs = {'normalize_embeddings': True}
        model = HuggingFaceEmbeddings(
            model_name=model_name,
            model_kwargs=model_kwargs,
            encode_kwargs=encode_kwargs
        )
    
        # 벡터 저장소 로드
        print("벡터 저장소를 불러옵니다...\n")
        pep8_db = Chroma(persist_directory=pep8_save_directory, embedding_function=model).as_retriever()
    
        results = pep8_db.get_relevant_documents(query)
    
        print(f"쿼리 '{query}'에 대한 검색 결과:")
        for idx, result in enumerate(results):
            print(f"{idx + 1}. {result}")
    
        return pep8_db


    def chat_llm(self):
        """
        코딩 역량 평가에 사용되는 거대언어모델을 생성하는 함수
        """
        
        # LM Studio API를 사용할 경우
        # llm = ChatOpenAI(
        #     model_name="bartowski/gemma-2-9b-it-GGUF",
        #     base_url=os.getenv("LM_LOCAL_URL"),
        #     api_key="lm-studio",
        #     temperature=0,
        #     streaming=True,
        #     callbacks=[StreamingStdOutCallbackHandler()],
        # )
        
        # OpenAI API를 사용할 경우
        llm = ChatOpenAI(
            model_name="gpt-4o-mini",
            api_key=os.getenv("OPENAI_API_KEY"),
            temperature=0,
            streaming=True,
            callbacks=[StreamingStdOutCallbackHandler()],
        )

        return llm

    def format_docs(self, docs):
        return "\n\n".join(document.page_content for document in docs)

    def reorder_documents(self, docs):
        # 재정렬
        reordering = LongContextReorder()
        reordered_docs = reordering.transform_documents(docs)
        combined = self.format_docs(reordered_docs)

        return combined

    def evaluate(self, llm, pep8_db, pep8_bm_db, query):
        """
        문서를 평가하는 함수
        """

        ensemble_retriever = EnsembleRetriever(
            retrievers=[pep8_bm_db, pep8_db],
            weights=[0.5, 0.5],
            search_type="mmr",
        )

        # 질문에 대한 답변을 찾기 위한 프롬프트
        prompt = ChatPromptTemplate.from_messages(
            [
                (
                    "system",
                    """
                    You are a large language model performing a crucial role in a RAG-based coding proficiency evaluation system. 
                    Your primary task is to evaluate the code input by the user. Analyze the code based on the official PEP8 coding style guide and provide feedback on style and rules. 
                    Include clear and specific explanations to help the user understand, and suggest improvements when necessary.
                    
                    **Role:**
                    1. Evaluate Python code provided by the user according to the PEP8 coding style guide.
                    2. Analyze the code and provide feedback on aspects such as style, readability, and consistency.
                    3. If errors or inefficiencies are found, identify them and offer specific suggestions for correction.
                    4. Clearly explain parts of the code that violate the PEP8 coding style guide and mention the specific rules involved.
                    5. Deliver feedback as clearly and concisely as possible to ensure the user can easily understand.
                    6. When responding to questions, provide relevant examples or links to appropriate documentation related to the code.
                    
                    **Instructions:**
                    - Always evaluate the code following the rules specified in the PEP8 coding style guide.
                    - Feedback should be specific and practical. For example, clearly explain issues such as variable naming conventions, indentation, line length, and so on.
                    - In cases of PEP8 guideline violations, explicitly describe which rules have been breached and offer suggestions on how to correct the code according to those rules.
                    - Provide specific and actionable suggestions to help the user write better code.
                    - Responses should be objective, with clear references to the rules that form the basis of the evaluation.
                    
                    Based on these guidelines, evaluate the given code according to the PEP8 standards and clearly explain which parts violate the PEP8 coding style guide, providing feedback accordingly.
                    
                    **Important**: All responses, except for the code, must be written in Korean.
                    
                    **Context:** {context}

                    """,
                ),
                (
                    "human",
                    """
                    **Input Code**                
                    ```python
                    {question}
                    ```               
                    """
                ),
            ]
        )

        chain = {
                    "context": ensemble_retriever | RunnableLambda(self.reorder_documents),
                    "question": RunnablePassthrough()
                } | prompt | llm | StrOutputParser()

        response = chain.invoke(query)

        if not isinstance(llm, ChatOpenAI):
            print("\n\n{}".format(response))

        return response

    def evaluate_code_list_load(self, file_path='input/code.xlsx', sheet_name='example'):
        """
        input/code.xlsx 파일 저장된 코드 목록을 읽어 반환하는 함수

        :return: 
        """

        # 엑셀 파일의 example 시트를 읽어옵니다. 첫 번째 행을 헤더로 사용합니다.
        df = pd.read_excel(file_path, sheet_name=sheet_name)

        # 'Code'라는 열 이름이 존재하는지 확인합니다.
        if 'Code' in df.columns:
            # A2부터 끝까지 'Code' 열의 내용을 리스트로 가져옵니다.
            code_list = df['Code'].dropna().tolist()
            return code_list
        else:
            # 'Code' 열이 없을 경우 빈 리스트 반환
            return []

    def evaluate_result_save(self, inputs, outputs):
        """
        결과를 저장하는 함수
        inputs: 질문으로 입력한 코드 (string)
        outputs: 거대언어모델이 생성한 응답 리스트 (list of strings)
        """
        # 파일 이름 지정
        file_name = 'output/output.md'

        # 마크다운 형식으로 저장할 내용 생성
        markdown_content = "# Evaluation Results\n\n"

        # 목차 생성
        markdown_content += "## Table of Contents\n"
        for idx in range(len(inputs)):
            markdown_content += f"- [Input {idx + 1}](#input-{idx + 1})\n"
            markdown_content += f"  - [Response {idx + 1}](#response-{idx + 1})\n"

        markdown_content += "\n---\n\n"

        # 입력과 그에 해당하는 응답을 매칭하여 저장
        for idx, (input_code, response) in enumerate(zip(inputs, outputs)):
            markdown_content += f"## Input {idx + 1}\n"
            markdown_content += "```python\n"
            markdown_content += f"{input_code}\n"
            markdown_content += "```\n\n"

            markdown_content += f"### Response {idx + 1}\n"
            markdown_content += f"{response}\n\n"

            # 구분선 추가
            markdown_content += "---\n\n"

        # 파일에 내용 저장
        with open(file_name, 'w', encoding='utf-8') as file:
            file.write(markdown_content)
        print(f"Results saved to {file_name}")

    def run(self) -> None:
        self.load_env()

        # 문서 읽기
        docs = self.docs_load()

        # 문서 분리
        chunks = self.text_split(docs)

        # 문서 임베딩
        pep8_db, pep8_bm_db = self.pep8_docs_embedding(chunks)

        # 거대언어모델 생성
        llm = self.chat_llm()

        # 코드 리스트 읽기
        code_list = evaluator.evaluate_code_list_load()

        # 코드 평가
        response = []
        for query in code_list:
            response.append(self.evaluate(llm, pep8_db, pep8_bm_db, query))

        # print(response)

        # 결과 저장
        self.evaluate_result_save(code_list, response)

In [27]:
eval = Evaluator()
eval.load_env()

# 문서 읽기
docs = eval.docs_load()

# 문서 분리
chunks = eval.text_split(docs)

Documents loaded: # Introduction

This document gives coding conventions for the Python code comprising
the standard library in the main Python distribution. Please see the
companion informational PEP describing `style guidelines for the C code
in the C implementation of Python <7>`{.interpreted-text role="pep"}.

This document and `257`{.interpreted-text role="pep"} (Docstring
Conventions) were adapted from Guido\'s original Python Style Guide
essay, with some additions from Barry\'s style guide[^1].

This style guide evolves over time as additional conventions are
identified and past conventions are rendered obsolete by changes in the
language itself.

Many projects have their own coding style guidelines. In the event of
any conflicts, such project-specific guides take precedence for that
project.

# A Foolish Consistency is the Hobgoblin of Little Minds

One of Guido\'s key insights is that code is read much more often than
it is written. The guidelines provided here are intended to

In [28]:
chunks[0]

Document(metadata={'Header 1': 'Introduction'}, page_content='This document gives coding conventions for the Python code comprising\nthe standard library in the main Python distribution. Please see the\ncompanion informational PEP describing `style guidelines for the C code\nin the C implementation of Python <7>`{.interpreted-text role="pep"}.  \nThis document and `257`{.interpreted-text role="pep"} (Docstring\nConventions) were adapted from Guido\\\'s original Python Style Guide\nessay, with some additions from Barry\\\'s style guide[^1].  \nThis style guide evolves over time as additional conventions are\nidentified and past conventions are rendered obsolete by changes in the\nlanguage itself.  \nMany projects have their own coding style guidelines. In the event of\nany conflicts, such project-specific guides take precedence for that\nproject.')

In [10]:
import sqlite3
print(sqlite3.sqlite_version)

3.37.2


In [36]:
# 임베딩 모델 설정
model_name = "BAAI/bge-m3"
model_kwargs = {'device': 'cuda'}
encode_kwargs = {'normalize_embeddings': True}
model = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

In [39]:
%%time
# Document Embedding 수행
print("문서 임베딩을 시작합니다. 잠시만 기다려주세요...\n")
embedded_documents = model.embed_documents([doc.page_content for doc in chunks])

문서 임베딩을 시작합니다. 잠시만 기다려주세요...

CPU times: user 5.76 s, sys: 18.2 ms, total: 5.78 s
Wall time: 5.58 s


In [68]:
# 결과 확인
print("[HuggingFace Endpoint Embedding]")
print(f"Model: \t\t{model_name}")
print(f"Dimension: \t{len(embedded_documents[0])}")
print(*(f'[Document {i}]\n{embedded_documents[i][:16]}' for i in range(5)), sep='\n')

[HuggingFace Endpoint Embedding]
Model: 		BAAI/bge-m3
Dimension: 	1024
[Document 0]
[-0.0358884371817112, -0.041736066341400146, -0.034454215317964554, 0.02067151665687561, 0.00270713004283607, -0.09040525555610657, 0.005794009659439325, 0.007708392571657896, 0.02329755201935768, 0.001958679873496294, -0.007051785010844469, 0.028489846736192703, -0.05900341644883156, -0.017196299508213997, -0.05294771492481232, 0.008109518326818943]
[Document 1]
[-0.05479702726006508, -0.023096546530723572, -0.04644380137324333, 0.015030707232654095, -0.005018358118832111, -0.06523799151182175, 0.015516090206801891, -0.02104056440293789, -0.002936915960162878, 0.027825305238366127, -0.007027186453342438, 0.00505776097998023, -0.03668908774852753, 0.004750032909214497, -0.05426468700170517, -0.005971211474388838]
[Document 2]
[-0.020047342404723167, -0.008060406893491745, -0.0022888879757374525, 0.031096067279577255, -0.0034301672130823135, -0.034766070544719696, 0.022972211241722107, 0.0477028079330921

In [64]:
# 질문(embedded_query)
query = 'a += 10; print(a)'
embedded_query = model.embed_query(query)
print(embedded_query[:16])

[-0.0224461667239666, 0.014571323990821838, -0.043716076761484146, 0.014743060804903507, -0.024649787694215775, 0.01899437978863716, 0.006989574059844017, 0.017510078847408295, 0.017405442893505096, -0.01815807819366455, -0.007677202578634024, 0.030567040666937828, -0.01583309844136238, 0.002230739453807473, 0.008771724067628384, 0.021787129342556]


In [57]:
# 유사도 계산값 (코사인 유사도)
import numpy as np

np.array(embedded_query) @ np.array(embedded_documents).T

array([0.35931882, 0.40320418, 0.46186205, 0.39569324, 0.41861094,
       0.44317534, 0.41720873, 0.43840305, 0.39853774, 0.40682932,
       0.4120885 , 0.47416488, 0.44385979, 0.35942035, 0.39352727,
       0.38457362, 0.44058082, 0.40110728, 0.37018366, 0.38472645,
       0.44271015, 0.42698861, 0.43680771, 0.42477556, 0.38721468,
       0.40391742, 0.3693216 , 0.40038005, 0.43143371, 0.39558437,
       0.44324094, 0.48497413, 0.37725883, 0.34357159, 0.44575177,
       0.36294052, 0.43142491])

In [58]:
sorted_idx = (np.array(embedded_query) @ np.array(embedded_documents).T).argsort()[::-1]
sorted_idx

array([31, 11,  2, 34, 12, 30,  5, 20, 16,  7, 22, 28, 36, 21, 23,  4,  6,
       10,  9, 25,  1, 17, 27,  8,  3, 29, 14, 24, 19, 15, 32, 18, 26, 35,
       13,  0, 33])

In [59]:
print(f"[Query] {query}\n====================================")
for i, idx in enumerate(sorted_idx[:2]):
    print(f"[{i}] {chunks[idx]}")
    print()

[Query] a += 10; print(a)
[0] page_content='Constants are usually defined on a module level and written in all
capital letters with underscores separating words. Examples include
`MAX_OVERFLOW` and `TOTAL`.' metadata={'Header 1': 'Naming Conventions', 'Header 2': 'Prescriptive: Naming Conventions', 'Header 3': 'Constants'}

[1] page_content='Avoid extraneous whitespace in the following situations:  
-   Immediately inside parentheses, brackets or braces:  
``` {. .good}
# Correct:
spam(ham[1], {eggs: 2})
```  
``` {. .bad}
# Wrong:
spam( ham[ 1 ], { eggs: 2 } )
```  
-   Between a trailing comma and a following close parenthesis:  
``` {. .good}
# Correct:
foo = (0,)
```  
``` {. .bad}
# Wrong:
bar = (0, )
```  
-   Immediately before a comma, semicolon, or colon:  
``` {. .good}
# Correct:
if x == 4: print(x, y); x, y = y, x
```  
``` {. .bad}
# Wrong:
if x == 4 : print(x , y) ; x , y = y , x
```  
-   However, in a slice the colon acts like a binary operator, and
should have equal am

In [11]:
# 문서 임베딩
import warnings
warnings.filterwarnings('ignore')
pep8_db, pep8_bm_db = eval.pep8_docs_embedding(chunks)

  from tqdm.autonotebook import tqdm, trange



잠시만 기다려주세요.


코딩 스타일 가이드 PEP8 문서 벡터화를 시작합니다. 
코딩 스타일 가이드 PEP8 문서 데이터베이스가 생성되었습니다.



In [23]:
query = 'if statement?'

In [69]:
pep8_db = eval.load_pep8_vector_store(query)

벡터 저장소를 불러옵니다...

쿼리 'a += 10; print(a)'에 대한 검색 결과:
1. page_content='Constants are usually defined on a module level and written in all
capital letters with underscores separating words. Examples include
`MAX_OVERFLOW` and `TOTAL`.' metadata={'Header 1': 'Naming Conventions', 'Header 2': 'Prescriptive: Naming Conventions', 'Header 3': 'Constants'}
2. page_content='Avoid extraneous whitespace in the following situations:  
-   Immediately inside parentheses, brackets or braces:  
``` {. .good}
# Correct:
spam(ham[1], {eggs: 2})
```  
``` {. .bad}
# Wrong:
spam( ham[ 1 ], { eggs: 2 } )
```  
-   Between a trailing comma and a following close parenthesis:  
``` {. .good}
# Correct:
foo = (0,)
```  
``` {. .bad}
# Wrong:
bar = (0, )
```  
-   Immediately before a comma, semicolon, or colon:  
``` {. .good}
# Correct:
if x == 4: print(x, y); x, y = y, x
```  
``` {. .bad}
# Wrong:
if x == 4 : print(x , y) ; x , y = y , x
```  
-   However, in a slice the colon acts like a binary operator, a

In [29]:
# 거대언어모델 생성
llm = eval.chat_llm()

In [31]:
# 코드 리스트 읽기
code_list = eval.evaluate_code_list_load()

In [32]:
# 테스트 데이터(코드) 출력
print(code_list)

["def MyFunction():\n    a=10\n    b = 20\n    sum=a+b\n    print('Sum:', sum )", 'def calculate_area(radius):\n  if radius > 0:\n       area = 3.14 * radius ** 2\n  return area', 'def print_message(message):\n    print("This is a very long message that exceeds the maximum line length recommended by PEP8 style guide, which is typically 79 characters.")\n    print( "Another message" )', 'import math, sys\n\ndef compute_square_root(number):\n    x = 100\n    return math.sqrt(number)', 'def convertToCelsius(tempFahrenheit):\n    pi = 3.14\n    temp_C = (tempFahrenheit - 32) * 5/9\n    return temp_C', 'def add_numbers(a, b):\n    return a+b\ndef subtractNumbers(a, b):\n    return a - b', 'def greet(name):\n    print("Hello, " + name + \'!\'); \n\ngreet("Alice")', 'def generate_list():\n    my_list = [ 1,  2 ,3 , 4, 5 ]\n    print(sum(len(str(x)) for x in my_list))\n\ngenerate_list()', 'def multiply_values(a, b, c,\n                    d, e, f):\n    return a * b * c * d * e * f', 'def chec

In [34]:
# 코드 평가
response = []
for query in code_list:
    response.append(eval.evaluate(llm, pep8_db, pep8_bm_db, query))

코드에 대한 PEP8 스타일 가이드에 따른 평가 및 피드백은 다음과 같습니다.

1. **함수 이름**: 
   - `MyFunction`은 PEP8에 따라 소문자로 시작하고 단어 사이에 언더스코어(_)를 사용하는 것이 좋습니다. 따라서 `my_function`으로 변경하는 것이 좋습니다.
   - 관련 규칙: [PEP8 - Naming Conventions](https://www.python.org/dev/peps/pep-0008/#function-names)

2. **변수 할당**:
   - `a=10`과 같이 변수 할당 시 등호(=) 양쪽에 공백이 없습니다. PEP8에서는 할당 연산자 양쪽에 공백을 두는 것을 권장합니다. 따라서 `a = 10`으로 수정해야 합니다.
   - 관련 규칙: [PEP8 - Whitespace in Expressions and Statements](https://www.python.org/dev/peps/pep-0008/#whitespace-in-expressions-and-statements)

3. **일관성 있는 공백**:
   - `b = 20`은 올바르게 공백이 있지만, `sum=a+b`는 공백이 없습니다. 모든 변수 할당에서 일관되게 공백을 사용하는 것이 좋습니다. 따라서 `sum = a + b`로 수정해야 합니다.

4. **출력문**:
   - `print('Sum:', sum )`에서 `sum` 뒤에 불필요한 공백이 있습니다. PEP8에서는 괄호 뒤에 공백을 두지 않도록 권장합니다. 따라서 `print('Sum:', sum)`으로 수정해야 합니다.

5. **들여쓰기**:
   - 함수 내부의 코드 블록은 4개의 공백으로 들여쓰기가 되어 있어야 합니다. 현재 코드에서는 이 부분이 잘 지켜지고 있습니다.

최종적으로 수정된 코드는 다음과 같습니다:

```python
def my_function():
    a = 10
    b = 20
    sum = a + b
    print('Sum:', sum)
``

In [35]:
# 결과 저장
eval.evaluate_result_save(code_list, response)

Results saved to output/output.md
