# 목소리 2개짜리 음성 만들기

In [1]:
import pandas as pd
from pydub import AudioSegment, effects
from tqdm import tqdm
import random

In [2]:
def generate_samples(sample_size, data_size):
    sampled_pairs = set()
    
    while len(sampled_pairs) < sample_size:
        i = random.randint(0, data_size - 2)
        j = random.randint(i + 1, data_size-1)
        sampled_pairs.add((i, j))
    
    return list(sampled_pairs)

In [11]:
def generate(data_frame, sampled_pairs, stored_dir):
    ret = pd.DataFrame(columns=['id', 'path', 'label1', 'label2', 'len'])

    for i, j in tqdm(sampled_pairs):
        row_i = data_frame.iloc[i]
        row_j = data_frame.iloc[j]

        # 두 OGG 파일 불러오기
        audio1 = AudioSegment.from_file(row_i['path'], format="ogg")
        audio2 = AudioSegment.from_file(row_j['path'], format="ogg")
        audio1 = effects.normalize(audio1)  
        audio2 = effects.normalize(audio2) 

        # 두 음원 파일을 동시에 재생되도록 합치기 (믹싱)
        combined = audio1.overlay(audio2)
        cur_id = "_".join([row_i['id'], row_j['id']])
        path = f"./{stored_dir}/"+cur_id+".ogg"
        label1 = row_i['label1']
        label2 = row_j['label2']
        cur_len = max(row_i['len'], row_j['len'])
        
        # 하나의 행을 포함하는 데이터프레임 생성
        new_row = pd.DataFrame([{'id': cur_id, 'path': path, 'label1': label1, 'label2': label2,'len': cur_len}])

        # 빈 데이터프레임과 새 행을 포함하는 데이터프레임을 결합
        ret = pd.concat([ret, new_row], ignore_index=True)

        # 저장
        combined.export(path, format="ogg")
    
    return ret 

# toy_sample_two_voice

In [None]:
df = pd.read_csv("train_sample.csv")
df.shape

In [None]:
data_size = df.shape[0]
sample_size = 10
sampled_pairs = generate_samples(sample_size, data_size)
stored_dir = "toy_sample_two_voice"

In [None]:
mixed_df = generate(df, sampled_pairs, stored_dir)

In [None]:
mixed_df.head(3)

In [None]:
# 저장
# mixed_df.to_csv("train_sample_two_voice.csv", index=False)

# train

In [None]:
whole_df = pd.read_csv("train_final.csv")
df = whole_df[whole_df['path'].str.contains('train')]
df.shape

In [None]:
data_size = df.shape[0]
sample_size = 10000
sampled_pairs = generate_samples(sample_size, data_size)
stored_dir = "train_two_voice"

In [None]:
mixed_df = generate(df, sampled_pairs, stored_dir)

In [None]:
mixed_df.head(3)

In [None]:
# 저장
mixed_df.to_csv("train_two_voice.csv", index=False)

# validate

In [12]:
whole_df = pd.read_csv("train_final.csv")
df = whole_df[whole_df['path'].str.contains('validate')]
df.shape

(10256, 5)

In [13]:
data_size = df.shape[0]
sample_size = 10000
sampled_pairs = generate_samples(sample_size, data_size)
stored_dir = "validate_two_voice"

In [14]:
mixed_df = generate(df, sampled_pairs, stored_dir)

100%|██████████| 10000/10000 [55:28<00:00,  3.00it/s] 


In [15]:
mixed_df.head(3)

Unnamed: 0,id,path,label1,label2,len
0,ILTREVGW_VOIEUVXP,./validate_two_voice/ILTREVGW_VOIEUVXP.ogg,real,real,3686.0
1,KDUKURKF_ELXMJWNP,./validate_two_voice/KDUKURKF_ELXMJWNP.ogg,real,real,2908.0
2,FETAYQNT_ZMGYAKOJ,./validate_two_voice/FETAYQNT_ZMGYAKOJ.ogg,real,fake,2372.0


In [16]:
# 저장
mixed_df.to_csv("validate_two_voice.csv", index=False)