# 받아쓰기 시험 유형 만들기

- 받아쓰기 지문 만들기
- 받아쓰기 평가?


In [1]:
import json
from typing import List

from tqdm.notebook import tqdm
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import JsonOutputParser, CommaSeparatedListOutputParser
from langchain.pydantic_v1 import BaseModel, Field
from langchain.schema import HumanMessage, AIMessage, StrOutputParser
import pandas as pd

In [2]:
model = ChatOpenAI(model="gpt-4-1106-preview", temperature=1.0)

### 받아쓰기를 위한 문장 샘플링하기 

In [3]:
csv_parser = CommaSeparatedListOutputParser()

In [4]:
csv_format_instruction = csv_parser.get_format_instructions()

In [5]:
sent_gen_prompt_template = PromptTemplate.from_template(template="영어 받아쓰기 시험에 나올 법한 일상적인 문장들을 10개 나열해줘. 번호나 bulletpoint 없이\n{format_instruction}",
                                                        partial_variables={"format_instruction": csv_format_instruction})

In [6]:
sent_gen_chain = sent_gen_prompt_template | model | csv_parser

In [7]:
sent_list = sent_gen_chain.invoke({})
sent_list

['Could you help me find my glasses',
 "I'm going to the grocery store later",
 'The weather is really nice today',
 'Please turn off the lights when you leave',
 'What time is our appointment tomorrow',
 "Don't forget to bring your homework",
 'How much does this cost',
 'Make sure to water the plants',
 'I need to charge my phone',
 'Are we out of milk again']

## 문장에 대한 오디오 파일 만들기

In [8]:
from openai import OpenAI

In [9]:
client = OpenAI()

In [10]:
def gen_speech_file(text, output_file_path):
    response = client.audio.speech.create(
        model="tts-1",
        voice="alloy", # alloy, echo, fable, onyx, nova, and shimmer
        input=text
    )
    response.stream_to_file(output_file_path)

In [11]:
!mkdir -p ./data/writing__dictation

In [12]:
save_dir = "./data/writing__dictation"

In [13]:
sent_list

['Could you help me find my glasses',
 "I'm going to the grocery store later",
 'The weather is really nice today',
 'Please turn off the lights when you leave',
 'What time is our appointment tomorrow',
 "Don't forget to bring your homework",
 'How much does this cost',
 'Make sure to water the plants',
 'I need to charge my phone',
 'Are we out of milk again']

In [14]:
record_list = []

for i, q in tqdm(enumerate(sent_list), total=len(sent_list)):
    output_file_path = f"{save_dir}/question_{i}.wav"
    gen_speech_file(q, output_file_path)

    record = {"sentence": q, "audio_file_path": output_file_path}
    record_list.append(record)

  0%|          | 0/10 [00:00<?, ?it/s]

In [15]:
df = pd.DataFrame(record_list)
df

Unnamed: 0,sentence,audio_file_path
0,Could you help me find my glasses,./data/writing__dictation/question_0.wav
1,I'm going to the grocery store later,./data/writing__dictation/question_1.wav
2,The weather is really nice today,./data/writing__dictation/question_2.wav
3,Please turn off the lights when you leave,./data/writing__dictation/question_3.wav
4,What time is our appointment tomorrow,./data/writing__dictation/question_4.wav
5,Don't forget to bring your homework,./data/writing__dictation/question_5.wav
6,How much does this cost,./data/writing__dictation/question_6.wav
7,Make sure to water the plants,./data/writing__dictation/question_7.wav
8,I need to charge my phone,./data/writing__dictation/question_8.wav
9,Are we out of milk again,./data/writing__dictation/question_9.wav


In [16]:
df.to_csv(f"{save_dir}/sent_and_audio.csv", index=False)

## 받아쓰기 평가

### 기계적 방법

#### 나이브한 방법

In [21]:
def grade_dictation(correct_script, student_response):
    correct_words = correct_script.split()
    student_words = student_response.split()
    
    correct_count = 0
    incorrect_count = 0
    missing_count = len(correct_words) - len(student_words)

    for i in range(min(len(correct_words), len(student_words))):
        if correct_words[i] == student_words[i]:
            correct_count += 1
        else:
            incorrect_count += 1
    
    if missing_count < 0:
        # In case student's response is longer than the correct script
        incorrect_count += abs(missing_count)
        missing_count = 0

    return {
        "correct": correct_count,
        "incorrect": incorrect_count,
        "missing": missing_count
    }

# Example usage
correct_script = "This is an example of a correct English sentence."
student_response = "This is example of a correct English sentence."

result = grade_dictation(correct_script, student_response)
print(result)

{'correct': 2, 'incorrect': 6, 'missing': 1}


#### 최장 공통 부분 수열(Longest Common Subsequence, LCS)

In [22]:
def lcs(X, Y):
    m = len(X)
    n = len(Y)
    L = [[0] * (n+1) for i in range(m+1)]

    for i in range(m+1):
        for j in range(n+1):
            if i == 0 or j == 0:
                L[i][j] = 0
            elif X[i-1] == Y[j-1]:
                L[i][j] = L[i-1][j-1] + 1
            else:
                L[i][j] = max(L[i-1][j], L[i][j-1])
    
    return L[m][n]

def grade_dictation(correct_script, student_response):
    correct_words = correct_script.split()
    student_words = student_response.split()

    lcs_length = lcs(correct_words, student_words)
    correct_count = lcs_length
    incorrect_count = len(student_words) - lcs_length
    missing_count = len(correct_words) - lcs_length

    return {
        "correct": correct_count,
        "incorrect": incorrect_count,
        "missing": missing_count
    }

# Example usage
correct_script = "This is an example of a correct English sentence."
student_response = "This an exampel of correct English"

result = grade_dictation(correct_script, student_response)
print(result)

{'correct': 5, 'incorrect': 1, 'missing': 4}


#### 르벤슈타인 거리(Levenshtein distance)

In [29]:
def levenshtein(a, b):
    "Calculates the Levenshtein distance between a and b."
    n, m = len(a), len(b)
    if n > m:
        # Make sure n <= m, to use O(min(n,m)) space
        a, b = b, a
        n, m = m, n

    current_row = range(n + 1)
    for i in range(1, m + 1):
        previous_row, current_row = current_row, [i] + [0] * n
        for j in range(1, n + 1):
            add, delete, change = previous_row[j] + 1, current_row[j - 1] + 1, previous_row[j - 1]
            if a[j - 1] != b[i - 1]:
                change += 1
            current_row[j] = min(add, delete, change)

    return current_row[n]

def grade_dictation(correct_script, student_response):
    correct_words = correct_script.split()
    student_words = student_response.split()

    # Compute the Levenshtein distance
    distance = levenshtein(correct_words, student_words)

    # Calculate total words and accuracy
    total_words = max(len(correct_words), len(student_words))
    accuracy = (total_words - distance) / total_words

    return {
        "levenshtein_distance": distance,
        "accuracy": accuracy
    }

# Example usage
correct_script = "let's go for a picnic"
student_response = "Let's go for a picnic."

result = grade_dictation(correct_script, student_response)
print(result)


{'levenshtein_distance': 2, 'accuracy': 0.6}


### LLM을 사용하는 평가 방법

In [30]:
from langchain_openai import ChatOpenAI
from typing import Union
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field

In [31]:
model = ChatOpenAI(model="gpt-4-1106-preview", temperature=0.8) # CoT 는 다양한 샘플을 만들어야하기 때문에 temperature를 올려야함

In [32]:
class Evaluation(BaseModel):
    reason: str = Field(description="받아쓰기 평가를 위한 추론")
    score: int = Field(description="받아쓰기 점수. 0~10점")

In [33]:
parser = JsonOutputParser(pydantic_object=Evaluation)
format_instructions = parser.get_format_instructions()
format_instructions

'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"reason": {"title": "Reason", "description": "\\ubc1b\\uc544\\uc4f0\\uae30 \\ud3c9\\uac00\\ub97c \\uc704\\ud55c \\ucd94\\ub860", "type": "string"}, "score": {"title": "Score", "description": "\\ubc1b\\uc544\\uc4f0\\uae30 \\uc810\\uc218. 0~10\\uc810", "type": "integer"}}, "required": ["reason", "score"]}\n```'

In [34]:
from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate

human_prompt_template = HumanMessagePromptTemplate.from_template(
                            "영어 받아쓰기 시험이다. 사용자의 응답을 Reference와 비교하여 평가하라.\n사용자: {input}\Reference: {ref}\n{format_instructions}",
                            partial_variables={"format_instructions": format_instructions})

prompt = ChatPromptTemplate.from_messages(
    [
        human_prompt_template,
    ])

In [35]:
eval_chain = prompt | model | parser

In [36]:
eval_chain.invoke({"input": "I need to go grocery shopping after work",
                   "ref": "I need to go grocery shopping after"})

{'reason': "User's response is missing the word 'work' at the end of the sentence.",
 'score': 9}