In [66]:
# Importing the required libraries
import os
import json
import pandas as pd
import datetime
# Import the required classes from the mutagen library
from mutagen.wave import WAVE
from mutagen import id3

In [67]:
# Read the CSV file into a pandas dataframe
df = pd.read_csv('rain_files.csv')

# Display the first few rows of the dataframe
df.head()

Unnamed: 0,timestamp,filename,rain,rain_class,total_rain
0,2023-02-14 10:00:00,SMM00894_20230214_100000.wav,0,no rain,0.0
1,2023-02-14 10:05:00,SMM00894_20230214_100500.wav,0,no rain,0.0
2,2023-02-14 10:10:00,SMM00894_20230214_101011.wav,0,no rain,0.0
3,2023-02-14 10:15:00,SMM00894_20230214_101500.wav,0,no rain,0.0
4,2023-02-14 10:20:00,SMM00894_20230214_102000.wav,0,no rain,0.0


In [68]:
df.describe()

Unnamed: 0,rain,total_rain
count,48208.0,48208.0
mean,0.028336,0.027746
std,0.165931,0.300219
min,0.0,0.0
25%,0.0,0.0
50%,0.0,0.0
75%,0.0,0.0
max,1.0,16.8


In [69]:
# Folder containing the .wav files
folder = r'/home/maciel/sound_of_rainfall'
missing_files = []
for index, row in df.iterrows():
    # Build full path
    full_path = os.path.join(folder, row['filename'])
    # Check if the .wav file exists
    if not os.path.isfile(full_path):
        print(f"{row['filename']} does not exist in {folder}. Skipping ...\nRow is marked to be deleted.")
        missing_files.append(index)
        continue
    else:
        print(f"Processing {row['filename']}...")
    # Print the metadata
    f = WAVE(full_path)
    # Create an ID3 tag if it doesn't exist
    if not f.tags:
        f.add_tags()
        f.clear()
    else:
        f.tags = id3.ID3()
    
    # Add the ID3 tags if the do not exist   
    # Title: original filename
    f.tags.add(id3.TIT2(encoding=3, text=row['filename']))
    # Artist: location
    f.tags.add(id3.TPE1(encoding=3, text='Campus'))
    # Album: category ['no rain', 'light', moderate', 'heavy', 'violent']
    f.tags.add(id3.TALB(encoding=3, text=row['rain_class']))
    # Compositor: day or night
    dt = datetime.datetime.strptime(row['timestamp'], '%Y-%m-%d %H:%M:%S')
    f.tags.add(id3.TCOM(encoding=3, text='day' if 6 <= dt.hour < 18 else 'night'))
    # Genre: year
    f.tags.add(id3.TCON(encoding=3, text=str(dt.year)))
    # Comments: additional metadata
    m = json.dumps({'timestamp': row['timestamp'], 'rain': row['rain'], 'total_rain': row['total_rain']})
    f.tags.add(id3.COMM(encoding=3, text=m))

    # Save the metadata
    f.save()

# Drop the rows corresponding to the missing files
df.drop(missing_files, inplace=True)
df.reset_index(drop=True, inplace=True)

# Save the updated dataframe
df.to_csv('rain_files.csv', index=False)

SMM00894_20230214_100000.wav does not exist in /home/maciel/sound_of_rainfall. Skipping ...
Row is marked to be deleted.
Processing SMM00894_20230214_100500.wav...
Processing SMM00894_20230214_101011.wav...
Processing SMM00894_20230214_101500.wav...
Processing SMM00894_20230214_102000.wav...
Processing SMM00894_20230214_102500.wav...
Processing SMM00894_20230214_103000.wav...
Processing SMM00894_20230214_103500.wav...
Processing SMM00894_20230214_104000.wav...
Processing SMM00894_20230214_104500.wav...
Processing SMM00894_20230214_105000.wav...
Processing SMM00894_20230214_105500.wav...
Processing SMM00894_20230214_110000.wav...
Processing SMM00894_20230214_110500.wav...
Processing SMM00894_20230214_111000.wav...
Processing SMM00894_20230214_111500.wav...
Processing SMM00894_20230214_112000.wav...
Processing SMM00894_20230214_112500.wav...
Processing SMM00894_20230214_113000.wav...
Processing SMM00894_20230214_113500.wav...
Processing SMM00894_20230214_114000.wav...
Processing SMM00894