In [None]:
import pandas as pd

def synchronize_audio_video(audio_df, video_df):
    # Round video timestamps for easier matching
    video_df['rounded_timestamp'] = video_df['timestamp'].round()

    # Prepare the audio dataframe by creating a key for easier merging
    audio_df['merge_key'] = audio_df['start_timestep'].astype(int)

    # Merge based on the rounded timestamp and the new audio merge key
    merged_df = pd.merge(video_df, audio_df, left_on='rounded_timestamp', right_on='merge_key', how='inner')

    # Drop unnecessary columns if needed
    merged_df.drop(['rounded_timestamp', 'merge_key'], axis=1, inplace=True)

    return merged_df

# Paths to your datasets
audio_train_path = '1sec/SEWA_features_wav2vec_1_seconds_train.csv'
audio_dev_path = '1sec/SEWA_features_wav2vec_1_seconds_dev.csv'
audio_test_path = '1sec/SEWA_features_wav2vec_1_seconds_test.csv'

video_train_path = 'SEWA_radiant_fog_160_train.csv'
video_dev_path = 'SEWA_radiant_fog_160_dev.csv'
video_test_path = 'SEWA_radiant_fog_160_test.csv'

# Load the datasets
audio_train = pd.read_csv(audio_train_path)
audio_dev = pd.read_csv(audio_dev_path)
audio_test = pd.read_csv(audio_test_path)

video_train = pd.read_csv(video_train_path)
video_dev = pd.read_csv(video_dev_path)
video_test = pd.read_csv(video_test_path)

# Synchronize and merge the datasets
merged_train = synchronize_audio_video(audio_train, video_train)
merged_dev = synchronize_audio_video(audio_dev, video_dev)
merged_test = synchronize_audio_video(audio_test, video_test)

# Save the synchronized and merged datasets
merged_train.to_csv('SynchronizedData-Fusion/merged_train.csv', index=False)
merged_dev.to_csv('SynchronizedData-Fusion/merged_dev.csv', index=False)
merged_test.to_csv('SynchronizedData-Fusion/merged_test.csv', index=False)

print("Synchronization and merging complete. Files saved.")
