In [None]:
!unzip -q /content/A_class.zip -d /content/
!unzip -q /content/B_class.zip -d /content/
!unzip -q /content/C_class.zip -d /content/
!unzip -q /content/augmented_audio.zip -d /content/

In [2]:
import pandas as pd
import numpy as np

def make_score_table(file_path):
  df_full = pd.read_excel(file_path)
  df_reordered = df_full.set_index('Unnamed: 0').reset_index()
  score_table = df_reordered.replace({'〇': float(1), 'O': float(1), '✖': float(0), 'X': float(0), '×': float(0)})
  score_table.rename(columns={'Unnamed: 0': 'Text'}, inplace=True)
  return score_table


In [3]:
A_score_table = make_score_table(file_path='/content/語音辨識判定.xlsx')
B_score_table = make_score_table(file_path='/content/判定のコピー.xlsx')
C_score_table = make_score_table(file_path='/content/語音辨識判定(SHIH).xlsx')

In [4]:
df = pd.DataFrame(columns=['audio_path', 'text', 'score'])

rows = []

for class_name, score_table in zip(['A', 'B', 'C'], [A_score_table, B_score_table, C_score_table]):
  for person in range(10-2):
    for index in range(23):
      score = score_table.loc[index, f'音檔{person+1}']
      text = score_table.loc[index, 'Text']
      if pd.isna(score):
        continue
      audio_path = f'/content/{class_name}_class/{class_name}_class_audio_{person+1}/{text}.mp3'
      rows.append({'audio_path': audio_path, 'text': text, 'score': score})

df = pd.DataFrame(rows)

print(df)


                                     audio_path   text  score
0      /content/A_class/A_class_audio_1/わたし.mp3    わたし    1.0
1    /content/A_class/A_class_audio_1/わたしたち.mp3  わたしたち    1.0
2      /content/A_class/A_class_audio_1/あなた.mp3    あなた    1.0
3     /content/A_class/A_class_audio_1/あのひと.mp3   あのひと    0.0
4     /content/A_class/A_class_audio_1/あのかた.mp3   あのかた    1.0
..                                          ...    ...    ...
536     /content/C_class/C_class_audio_8/だれ.mp3     だれ    1.0
537    /content/C_class/C_class_audio_8/どなた.mp3    どなた    1.0
538    /content/C_class/C_class_audio_8/～さい.mp3    ～さい    1.0
539   /content/C_class/C_class_audio_8/なんさい.mp3   なんさい    1.0
540   /content/C_class/C_class_audio_8/おいくつ.mp3   おいくつ    1.0

[541 rows x 3 columns]


In [5]:
import librosa
import soundfile as sf
import os
def convert_mp3_to_wav(mp3_file_path):
    """
    將 MP3 文件轉換為 WAV 格式。

    :param mp3_file_path: MP3 文件的路徑。
    :return: 轉換後的 WAV 文件路徑。
    """
    y, sr = librosa.load(mp3_file_path, sr=None)
    wav_file_path = os.path.splitext(mp3_file_path)[0] + '.wav'
    sf.write(wav_file_path, y, sr)
    return wav_file_path

def augment_audio(file_path, pitch_shift_steps, time_stretch_rate):
    """
    對給定的音頻文件進行小範圍的音高變化和時間伸縮。

    :param file_path: 音頻文件的路徑。
    :param pitch_shift_steps: 音高變化的半音步數，可以是正數或負數。
    :param time_stretch_rate: 時間伸縮的比率，大於1表示加速，小於1表示減速。
    """

    # 讀取音頻文件
    y, sr = librosa.load(file_path, sr=None)

    # 音高變化
    if pitch_shift_steps != 0:
        y = librosa.effects.pitch_shift(y, sr=sr, n_steps=pitch_shift_steps)

    # 時間伸縮
    if time_stretch_rate != 1:
        y = librosa.effects.time_stretch(y, rate=time_stretch_rate)

    # 保存增量後的音頻
    augmented_file_path = '/content/augmented/' + file_path.replace('/content/', '')
    sf.write(augmented_file_path, y, sr)
    print("增量後的音頻已保存為：" + augmented_file_path)




In [6]:
import random

def generate_random(attr):
  if attr == 'pitch':
    result = random.uniform(-1.3, 1.3)
    if (-1.05 <= result <= 1.05):
      return generate_random(attr='pitch')
    return result
  elif attr == 'time':
    result = random.uniform(0.7, 1.3)
    if (0.95 <= result <= 1.05):
      return generate_random(attr='time')
    return result

for index in range(len(df)):
  audio = df.iloc[index]
  if audio['score'] == 0:
    augment_audio(file_path=audio['audio_path'], pitch_shift_steps=generate_random(attr='pitch'), time_stretch_rate=generate_random(attr='time'))

增量後的音頻已保存為：/content/augmented/A_class/A_class_audio_1/あのひと.mp3
增量後的音頻已保存為：/content/augmented/A_class/A_class_audio_1/きょうし.mp3
增量後的音頻已保存為：/content/augmented/A_class/A_class_audio_1/かいしゃいん.mp3
增量後的音頻已保存為：/content/augmented/A_class/A_class_audio_1/エンジニア.mp3
增量後的音頻已保存為：/content/augmented/A_class/A_class_audio_2/あのかた.mp3
增量後的音頻已保存為：/content/augmented/A_class/A_class_audio_2/おいくつ.mp3
增量後的音頻已保存為：/content/augmented/A_class/A_class_audio_4/エンジニア.mp3
增量後的音頻已保存為：/content/augmented/A_class/A_class_audio_4/おいくつ.mp3
增量後的音頻已保存為：/content/augmented/A_class/A_class_audio_6/あのひと.mp3
增量後的音頻已保存為：/content/augmented/A_class/A_class_audio_7/せんせい.mp3
增量後的音頻已保存為：/content/augmented/A_class/A_class_audio_7/エンジニア.mp3
增量後的音頻已保存為：/content/augmented/A_class/A_class_audio_8/おいくつ.mp3
增量後的音頻已保存為：/content/augmented/B_class/B_class_audio_1/だいがく.mp3
增量後的音頻已保存為：/content/augmented/B_class/B_class_audio_2/わたし.mp3
增量後的音頻已保存為：/content/augmented/B_class/B_class_audio_2/わたしたち.mp3
增量後的音頻已保存為：/content/augmented/B_class/B_class_audi

In [7]:
!zip -r augmented_audio.zip /content/augmented


updating: content/augmented/ (stored 0%)
updating: content/augmented/B_class/ (stored 0%)
updating: content/augmented/B_class/B_class_audio_1/ (stored 0%)
updating: content/augmented/B_class/B_class_audio_1/だいがく.mp3 (deflated 14%)
updating: content/augmented/B_class/B_class_audio_4/ (stored 0%)
updating: content/augmented/B_class/B_class_audio_4/ぎんこういん.mp3 (deflated 14%)
updating: content/augmented/B_class/B_class_audio_4/エンジニア.mp3 (deflated 11%)
updating: content/augmented/B_class/B_class_audio_4/いしゃ.mp3 (deflated 19%)
updating: content/augmented/B_class/B_class_audio_4/だれ.mp3 (deflated 23%)
updating: content/augmented/B_class/B_class_audio_4/せんせい.mp3 (deflated 22%)
updating: content/augmented/B_class/B_class_audio_6/ (stored 0%)
updating: content/augmented/B_class/B_class_audio_6/みなさん.mp3 (deflated 16%)
updating: content/augmented/B_class/B_class_audio_6/あのひと.mp3 (deflated 9%)
updating: content/augmented/B_class/B_class_audio_6/がくせい.mp3 (deflated 9%)
updating: content/augmented/B_cla