In [None]:
import pandas as pd
import os
from time import time
import json

from predict import Wav2Vec2Aligner

In [None]:
# Load model and resources
DATA_PATH = '/data'
RESULT_PATH = '/result'
MODEL_PATH = '/code/saved_models/'

SONGS_PATH = os.path.join(DATA_PATH, 'songs')
LYRICS_PATH = os.path.join(DATA_PATH, 'lyrics')
TIME_SUBMISSION_PATH = os.path.join(RESULT_PATH, 'time_submission.csv')
JUPYTER_SUBMISSION_PATH = os.path.join(RESULT_PATH, 'jupyter_submission.zip')

LYRICS_FILE_EXTENSION = 'json'

# Ensure result directory is created
if not os.path.exists(RESULT_PATH):
    os.makedirs(RESULT_PATH)

# Load models
model = Wav2Vec2Aligner(MODEL_PATH, cuda=False)

In [None]:
# Read test cases
test_cases = []
for song_file in os.listdir(SONGS_PATH):
    filename = song_file.split('.')[0]
    lyrics_file = os.path.join(LYRICS_PATH, f'{filename}.{LYRICS_FILE_EXTENSION}')
    
    with open(lyrics_file, 'r') as f:
        label = json.load(f)

        lyric = []
        num_words_per_sentence = []
        for sentence in label:
            num_words = 0
            for word in sentence['l']:
                lyric.append(word['d'])
                num_words += 1
            num_words_per_sentence.append(num_words)

        test_cases.append(
            {
                "sent": lyric,
                "num_words": num_words_per_sentence,
                "wav_path": os.path.join(SONGS_PATH, song_file),
                "out_path": os.path.join(RESULT_PATH, filename + '.json')
            }
        )

In [None]:
# Save prediction
prediction_times = []
num_test_cases = len(test_cases)
for i, item in enumerate(test_cases):
    t1 = time()
    model.align_single_sample(item)
    t2 = time()

    file_name = item['wav_path']
    prediction_time = int(t2 * 1000 - t1 * 1000)
    print(f'{i+1}/{num_test_cases} {file_name}: {prediction_time} ms')
    
    prediction_times.append((file_name, prediction_time))
    
# Save prediction time
time_submission_df = pd.DataFrame(data=prediction_times, columns=['fname', 'time'])
time_submission_df

In [None]:
# Write time
time_submission_df.to_csv(
    TIME_SUBMISSION_PATH,
    index=False, # Don't save index
)

# Package outputs
!cd $RESULT_PATH && mkdir jupyter_submission && mv *.json jupyter_submission && zip -r jupyter_submission.zip jupyter_submission && rm -fr jupyter_submission