# Sentence and AudioFile Check

In [3]:
import pandas as pd

Sentence_1000_sample = pd.read_excel('01_Sentence_AudioName_sample.xlsx') 
Sentence_1000_sample

Unnamed: 0,NewNO,Sentence,Audio_File,Sent_NO
0,1,Can we have three sodas?,0001_1_3.wav,14
1,2,I think so.,0002_1_2.wav,52


# Native Speaker Reference Speech vs. Student's Speech
- DTW based on shape (Z-Normalized)

In [22]:
import librosa
import numpy as np
import pandas as pd
from dtaidistance import dtw
from dtaidistance import dtw_visualisation as dtwvis
import matplotlib.pyplot as plt
import os
from scipy import stats
from dtaidistance import ed

In [8]:
def extract_f0(file_path):
    y, sr = librosa.load(file_path)
    f0, voiced_flag, voiced_probs = librosa.pyin(y, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C7'))
    # Clean NaN values
    f0 = f0[~np.isnan(f0)]
    return f0

In [27]:
from PIL import Image, ImageDraw, ImageFont
import pandas as pd
import os

font = ImageFont.load_default()

# Load the combined DataFrame
Sentence_1000 = pd.read_excel('01_Sentence_AudioName_sample.xlsx')  # Replace with your actual file path

# Directory paths
reference_dir = 'NS_178_split'  # Native Speaker Reference Audio
test_files_dir = 'Y_RaterA_1000_wav'  # Students' Speaking Audio 

# Initialize DataFrame to store results
results = []

# Process each row in the DataFrame
for index, row in Sentence_1000.iterrows():
    # Reference file
    reference_file_num = row['Sent_NO']
    reference_file_sent = row['Sentence']
    reference_file_path = os.path.join(reference_dir, f"{reference_file_num}.wav")

    # Test file
    test_file_name = row['Audio_File']
    test_file_path = os.path.join(test_files_dir, test_file_name)

    # Check if files exist
    if not os.path.exists(reference_file_path) or not os.path.exists(test_file_path):
        print(f"File not found: {reference_file_path} or {test_file_path}")
        continue

    # Load and extract F0 from the reference audio file
    f0_reference = extract_f0(reference_file_path)
    f0_reference_z = stats.zscore(f0_reference)
    f0_test = extract_f0(test_file_path)
    f0_test_z = stats.zscore(f0_test)
    distance = dtw.distance(f0_reference_z, f0_test_z)

    # Visualization
    path = dtw.warping_path(f0_reference_z, f0_test_z)
    base_name = os.path.splitext(os.path.basename(test_file_path))[0]
    visualization_dir = "warping_Y_RaterA_1000_script_NS_zNormal"
    if not os.path.exists(visualization_dir):
        os.makedirs(visualization_dir)
    visualization_file = os.path.join(visualization_dir, "NS_normal_warping_"+ base_name + "_" + reference_file_sent + ".png")
    dtwvis.plot_warping(f0_reference_z, f0_test_z, path, filename=visualization_file)


    # Add sentence text to the image
    sentence = row['Sentence']
    img = Image.open(visualization_file)
    draw = ImageDraw.Draw(img)
    # You may need to adjust the font size and path to a font file
    font = ImageFont.truetype("Times New Roman.ttf", 20)
    text_position = (150, img.height - 62)  # Adjust as needed
    draw.text(text_position, sentence, font=font, fill="black")
    img.save(visualization_file)

    # Store results
    results.append({'Sent_NO': reference_file_num, 'Test File': test_file_name, 'NS_DTW Distance': distance, 'Sentence': sentence})


# Create DataFrame and display results
df = pd.DataFrame(results)
print(df)

# Save DataFrame as Excel file
excel_file_path = 'DTW_NS_Results_1000_z_Normal.xlsx'
df.to_excel(excel_file_path, index=False)

print(f"Results saved to {excel_file_path}")

     Sent_NO                                        Test File   
0         14     0001_1_20220527_5596514_39689_03B_1_SS_3.wav  \
1         52     0002_1_20220523_5582218_39688_02B_1_SS_2.wav   
2          5     0003_1_20220516_5618660_39691_02B_1_SS_4.wav   
3         45     0004_1_20220529_5603120_39692_01B_1_SS_1.wav   
4         40  0005_1_20220519_5582175_39688_01B_1000_SS_3.wav   
..       ...                                              ...   
995       84     0996_1_20220502_5602052_39688_03B_1_SS_3.wav   
996      148     0997_1_20220526_5624152_39692_02A_1_SS_4.wav   
997      115     0998_1_20220514_5595805_39693_03B_1_SS_4.wav   
998       71     0999_1_20220509_5605010_39691_03A_1_SS_3.wav   
999       24     1000_1_20220508_5617383_39693_03A_1_SS_3.wav   

     NS_DTW Distance                     Sentence  
0           4.268282     Can we have three sodas?  
1           2.715634                  I think so.  
2           5.468992         Are you still tired?  
3          

In [None]:
# End