# 의견 말하기 유형 질문 데이터 만들기

- 의견 말하기 유형 질문 생성 만들기
- 질문에 대한 오디오 만들기


## 질문 생성 Chain

In [1]:
import json
from typing import List

from tqdm.notebook import tqdm
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import JsonOutputParser, CommaSeparatedListOutputParser
from langchain.pydantic_v1 import BaseModel, Field
from langchain.schema import HumanMessage, AIMessage, StrOutputParser
import pandas as pd


For example, replace imports like: `from langchain.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. 	from pydantic.v1 import BaseModel

  exec(code_obj, self.user_global_ns, self.user_ns)


In [2]:
model = ChatOpenAI(model="gpt-3.5-turbo")

### 질문 주제 샘플링하기

In [3]:
csv_parser = CommaSeparatedListOutputParser()

In [4]:
csv_format_instruction = csv_parser.get_format_instructions()

In [5]:
subjet_prompt_template = PromptTemplate.from_template(template="영어 TOEIC 시험에 나올 법한 일상적인 주제를 단어 형식으로 만들어줘.\n{format_instruction}",
                                                      partial_variables={"format_instruction": csv_format_instruction})

In [6]:
subject_chain = subjet_prompt_template | model | csv_parser

In [7]:
subject_list = subject_chain.invoke({})
subject_list

['commuting',
 'technology',
 'leisure activities',
 'shopping',
 'education',
 'travel',
 'health and wellness',
 'social media',
 'work environment',
 'sports']

In [8]:
subject_list = subject_list[:4]
subject_list

['commuting', 'technology', 'leisure activities', 'shopping']

### 질문 만들기

In [10]:
#기존 프롬프트에서 요구사항 하나만 바꿔서 문제 유형 바꾸기
template = """\
- 영어 TOEIC Speaking 시험 중에 의견 말하기(express an opinion)에 나올 법한 {input} 주제 관련 질문 하나 만들어줘
- 상대방에게 그렇게 생각한 이유와 주장을 이야기하도록 요구해줘
- 한 문장만 만들어줘
- 여러 예시 만들지마
- 영어로
- 예시
Some people prefer to take a job that does not pay well but does provide a lot of time off from work. What is your opinion about taking a job with a low salary that has a lot of vacation time? Give reasons for your opinion.
"""

question_prompt_template = PromptTemplate.from_template(template=template)

In [11]:
question_chain = question_prompt_template | model | StrOutputParser()

In [12]:
question_list = []
for subject in tqdm(subject_list):
    question_list.append(question_chain.invoke({"input": subject}))

  0%|          | 0/4 [00:00<?, ?it/s]

In [13]:
question_list

['What do you think about commuting long distances to work every day? Give reasons for your opinion.',
 'What is your opinion on the impact of technology on interpersonal relationships? Give reasons for your opinion.',
 "What do you think about participating in outdoor activities like hiking and camping for leisure? Explain why you enjoy or don't enjoy these activities.",
 'What is your opinion on people who prefer online shopping over traditional shopping at brick-and-mortar stores? Share your reasons for your opinion.']

## 질문에 대한 오디오 파일 만들기

In [14]:
from openai import OpenAI

In [15]:
client = OpenAI()

In [16]:
def gen_speech_file(text, output_file_path):
    response = client.audio.speech.create(
        model="tts-1",
        voice="alloy", # alloy, echo, fable, onyx, nova, and shimmer
        input=text
    )
    response.stream_to_file(output_file_path)

In [17]:
!mkdir -p ./data/speaking__express_an_opinion

명령 구문이 올바르지 않습니다.


In [18]:
save_dir = "./data/speaking__express_an_opinion"

In [19]:
question_list

['What do you think about commuting long distances to work every day? Give reasons for your opinion.',
 'What is your opinion on the impact of technology on interpersonal relationships? Give reasons for your opinion.',
 "What do you think about participating in outdoor activities like hiking and camping for leisure? Explain why you enjoy or don't enjoy these activities.",
 'What is your opinion on people who prefer online shopping over traditional shopping at brick-and-mortar stores? Share your reasons for your opinion.']

In [20]:
record_list = []

for i, q in tqdm(enumerate(question_list), total=len(question_list)):
    output_file_path = f"{save_dir}/question_{i}.wav"
    gen_speech_file(q, output_file_path)

    record = {"question": q, "audio_file_path": output_file_path}
    record_list.append(record)

  0%|          | 0/4 [00:00<?, ?it/s]

  response.stream_to_file(output_file_path)


In [21]:
df = pd.DataFrame(record_list)
df

Unnamed: 0,question,audio_file_path
0,What do you think about commuting long distanc...,./data/speaking__express_an_opinion/question_0...
1,What is your opinion on the impact of technolo...,./data/speaking__express_an_opinion/question_1...
2,What do you think about participating in outdo...,./data/speaking__express_an_opinion/question_2...
3,What is your opinion on people who prefer onli...,./data/speaking__express_an_opinion/question_3...


In [22]:
df.to_csv(f"{save_dir}/question_and_audio.csv", index=False)

In [23]:
from IPython.display import Audio

In [24]:
Audio(f"{save_dir}/question_2.wav")

In [25]:
Audio(f"{save_dir}/question_0.wav")

In [26]:
subject_list

['commuting', 'technology', 'leisure activities', 'shopping']