In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import mido
from mido import Message, MidiFile, MidiTrack
import subprocess
import numpy as np
from pychord import find_chords_from_notes

In [3]:
def clean_screen_data(file, new_file="None"):
    """
    Clean the given screentime data by dropping unnecessary columns, renaming columns, and converting timestamp to datetime format.
    
    Parameters:
    - file: csv file containing screentime data to be cleaned
    - new_file: csv file to which to save the cleaned data (optional)

    Returns:
    - The cleaned data as a dataframe
    """
    # Read in screen.csv
    df = pd.read_csv(file)

    # Drop the first device_id column
    df = df.drop(columns=['device_id'])

    # Rename the device_id.1 column to screen_status
    df = df.rename(columns={'device_id.1': 'screen_status'})

    # Convert timestamp to datetime format
    df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')

    # Drop the _id columns
    df = df.drop(columns=['_id'])
    
    # Save the cleaned data to a new file if requested
    if new_file != "None":
        df.to_csv(new_file, index=False)

    return df

In [4]:
# Call clean_screen_data and save the cleaned data as clean_df
clean_df = clean_screen_data('../data/screen.csv', 'clean_screen.csv')
clean_df

Unnamed: 0,timestamp,screen_status
0,2022-02-02 20:35:04.892999936,2
1,2022-02-02 20:36:37.041999872,3
2,2022-02-02 20:39:04.928999936,2
3,2022-02-02 20:42:14.048000000,3
4,2022-02-02 20:42:55.027000064,2
...,...,...
29603,2023-01-17 04:25:14.417999872,2
29604,2023-01-17 05:59:51.760000000,3
29605,2023-01-17 06:00:47.371000064,2
29606,2023-01-17 06:03:27.064999936,3


In [5]:
def process_screentime(df, new_file="None"):
    """
    Process the screentime data by calculating screentimes as times elapsed between an unlock event and the next lock.  
    
    Parameters:
    - df: dataframe containing cleaned screentime data to be processed
    - new_file: csv file to which to save the processed data (optional)

    Returns:
    - A new dataframe with a column for timestamps (of lock events) and elapsed screen times in minutes.
    """
    # Convert string timestamp to datetime
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    
    # Create dictionaries to map dates to screen times and last unlock times
    screen_times = []
    last_unlock_time = {}

    # Iterate through the rows of the dataframe, keeping track of the most recent unlock time for each day
    # For each lock event, calculate the time elapsed since the last unlock time and add it to the running screen time total for the current day
    for index, row in df.iterrows():
        timestamp = pd.to_datetime(row['timestamp'])
        date = timestamp.date()
        status = row['screen_status']
        
        if status == 3:  # Unlock event
            last_unlock_time[date] = timestamp
        elif status == 2 and date in last_unlock_time:  # Lock event
            unlock_time = last_unlock_time.pop(date)
            elapsed_time = (timestamp - unlock_time).seconds / 60
            screen_times.append({'Timestamp': timestamp, 'Screen Time (Mins)': elapsed_time})

    # Save results to a new dataframe
    screen_time_df = pd.DataFrame(screen_times)

    # Save the processed data to a new CSV file if requested
    if new_file != "None":
        screen_time_df.to_csv(new_file, index=False)
    
    return screen_time_df


In [6]:
processed_df = process_screentime(clean_df, 'processed_screen.csv')
processed_df

Unnamed: 0,Timestamp,Screen Time (Mins)
0,2022-02-02 20:39:04.928999936,2.450000
1,2022-02-02 20:42:55.027000064,0.666667
2,2022-02-02 20:48:00.084999936,1.083333
3,2022-02-02 20:51:32.307000064,0.966667
4,2022-02-02 21:00:30.535000064,5.816667
...,...,...
14710,2023-01-17 03:50:27.337999872,2.316667
14711,2023-01-17 03:53:52.377999872,1.800000
14712,2023-01-17 04:25:14.417999872,31.316667
14713,2023-01-17 06:00:47.371000064,0.916667


In [7]:
def intervalize_screentime(df, start_time, end_time, interval='day', new_file="None"):
    """
    Group the screentime data into the specified intervals (hours, days, weeks, or months) 
    and calculate screen time totals for each interval.
    
    Parameters:
    - df: processed dataframe containing timestamps and screen time data
    - start_time: start time for the interval (inclusive)
    - end_time: end time for the interval (non-inclusive)
    - interval: interval by which to group the data by (hour, day, week, month) (default: day)
    - new_file: csv file to which to save the intervalized data (optional)

    Returns:
    - A new dataframe with the total screen time for each interval
    """
    df = df.copy()
    df['Timestamp'] = pd.to_datetime(df['Timestamp'])
    
    # Filter out rows outside specified time range
    mask = (df['Timestamp'] >= start_time) & (df['Timestamp'] < end_time)
    df = df.loc[mask]
    
    # Create column to group rows into specified intervals (hours, days, weeks, or months)
    col_name = interval.capitalize()
    if (interval == 'hour'):
        # Add Hour column where each timestamp is rounded down to the nearest full hour
        df[col_name] = df['Timestamp'].dt.floor('H')
    elif (interval == 'day'):
        # Add Day column containing only the date portion of each timestamp
        df[col_name] = df['Timestamp'].dt.date
    elif (interval == 'week'):
        # Add Week column where each timestamp is converted to a weekly period 
        # and apply lambda to get start of that week
        df[col_name]  = df['Timestamp'].dt.to_period('W').apply(lambda r: r.start_time)
    elif (interval == 'month'):
        # Add Month column where each timestamp is converted to a monthly period
        # and apply lambda to get start of that month
        df[col_name]  = df['Timestamp'].dt.to_period('M').apply(lambda r: r.start_time)
        
    # Group dataframe by specified interval and sum screentime for each interval
    intervalized_df = df.groupby(col_name, as_index=False)['Screen Time (Mins)'].sum() 
    
    # Save the intervalized data to a new CSV file if requested
    if new_file != "None":
        intervalized_df.to_csv(new_file, index=False)
    
    # Return dataframe containing total screen time for each interval
    return intervalized_df

In [9]:
# Compute the total screen time for each day from February to December 2022
start_time = pd.to_datetime('2022-02-02 00:00:00')
end_time = pd.to_datetime('2022-12-02 00:00:00')
interval = 'day'
intervalized_df = intervalize_screentime(processed_df, start_time, end_time, interval, 'intervalized_screen.csv')
intervalized_df

Unnamed: 0,Day,Screen Time (Mins)
0,2022-02-02,22.266667
1,2022-02-03,41.850000
2,2022-02-04,207.216667
3,2022-02-05,53.616667
4,2022-02-06,136.966667
...,...,...
285,2022-11-27,349.000000
286,2022-11-28,221.383333
287,2022-11-29,66.966667
288,2022-11-30,131.416667


In [None]:
# Find the days with the min, max, and median (sorted) screentimes
min_day = intervalized_df.loc[intervalized_df['Screen Time (Mins)'].idxmin()]
max_day = intervalized_df.loc[intervalized_df['Screen Time (Mins)'].idxmax()]

# Sort the dataframe by screen time in descending order
sorted_df = intervalized_df.sort_values(by='Screen Time (Mins)', ascending=False)
median_day = sorted_df.iloc[len(sorted_df) // 2]

# Print the days with the min, max, and median screentimes
print(f"Day with min screen time: {min_day['Day']}, Screen Time: {min_day['Screen Time (Mins)']} mins")
print(f"Day with max screen time: {max_day['Day']}, Screen Time: {max_day['Screen Time (Mins)']} mins")
print(f"Day with median screen time: {median_day['Day']}, Screen Time: {median_day['Screen Time (Mins)']} mins")

Day with min screen time: 2022-08-23, Screen Time: 1.35 mins
Day with max screen time: 2022-05-25, Screen Time: 687.0833333333334 mins
Day with median screen time: 2022-06-26, Screen Time: 191.88333333333333 mins


In [None]:
# Sonification rules:
# - Each interval is represented by a simple chord (3 notes)
# - The chord is major if the screen time is below the target, minor if above
# - The longer the screen time, the lower the pitch
# - Each chord plays for the specified duration

In [None]:
# List of note names (C, C#, D, D#, ..., B)
NOTE_LIST = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']

# Convert MIDI note number to note name and octave, based on 60 = C4
def midi_to_note(midi_number):
    note_name = NOTE_LIST[midi_number % 12]  
    octave = (midi_number // 12) - 1   
    return note_name, octave

# Map screentime value to a MIDI pitch between 36 (C2) and 84 (C6),
# normalized to the range [0, max] where max is the maximum of the max recorded screentime and a benchmark value based on the interval
def map_screentime_to_pitch(screen_time, max_screentime, interval='day'):
    min_pitch, max_pitch = 36, 84
    
    # Set a benchmark value based on the interval
    if interval == 'day':
        benchmark = 300
    elif interval == 'week':
        benchmark = 2100
    elif interval == 'month':
        benchmark = 9000
    else:
        benchmark = 60
    return int(np.interp(screen_time, [0, max(max_screentime, benchmark)], [max_pitch, min_pitch])) 

def sonify_screentime(df, target_screentime, interval='day', note_duration=2, output_midi="screentime.mid", output_wav="screentime.wav", soundfont="Grand_Piano.sf2"):
    """
    Map screentime data to chords and generate a MIDI and WAV file from this data. 
    
    Parameters:
    - df: intervalized dataframe containing screentime totals per interval
    - target_screentime: threshold (in minutes) determining major/minor chords
    - interval: interval represented by screentime (default: 'day')
    - note_duration: duration of each note in seconds (default: 2)
    - output_midi: name of output MIDI file (default: "screentime.mid")
    - output_wav: name of output WAV file (default: "screentime.wav")
    - soundfont: path to soundfont file for FluidSynth (default: "Grand_Piano.sf2")

    Returns:
    - A new dataframe with added 'Chord' and 'Octave' columns showing the chords and octaves used for each datapoint
    """
       
    # Print each parameter
    print("Sonifying screentime...")
    print(f"Target Screentime: {target_screentime}")
    print(f"Interval: {interval}")
    print(f"Note Duration: {note_duration}")
    print(f"Output MIDI: {output_midi}")
    print(f"Output WAV: {output_wav}")
    print(f"Soundfont: {soundfont}")
    print()
    
    # Create a new MIDI file and track
    midi = MidiFile()
    track = MidiTrack()
    midi.tracks.append(track)

    # Set tempo as 60 BPM
    tempo = mido.bpm2tempo(60)
    track.append(mido.MetaMessage('set_tempo', tempo=tempo))
    
    # Add MIDI controls for sustain (64), reverb (91), and expression (11)
    track.append(Message('control_change', control=64, value=127, time=0))
    track.append(Message('control_change', control=91, value=127, time=0))
    track.append(Message('control_change', control=11, value=50, time=0))

    # Create lists to store chords and octaves
    chords = []
    chord_octaves = []

    # Get max screentime across the data
    max_screentime = df['Screen Time (Mins)'].max()
    
    # How much to overlap the chords-- to reduce choppiness
    overlap_ratio = 0.6
    
    # Velocity (volume) of notes
    velocity = 50

    # Set the note duration (in ticks) and space between chords
    midi.ticks_per_beat = 480
    full_duration_ticks = int(note_duration * midi.ticks_per_beat) 
    interval_between_chords = int(full_duration_ticks * (1 - overlap_ratio))  
    
    # Store scheduled note-off events and track the absolute tick position in the track
    scheduled_note_offs = [] 
    
    # Process all chords
    for i, (_, row) in enumerate(df.iterrows()):
        screentime = row['Screen Time (Mins)']
        
        # Determine base note
        base_note = map_screentime_to_pitch(screentime, max_screentime, interval=interval)

        # Select chord type: major if screentime < target, minor if screentime >= target
        if screentime < target_screentime:
            chord_notes = [base_note, base_note + 4, base_note + 7]
        else:
            chord_notes = [base_note, base_note + 3, base_note + 7]
        
        # Convert notes' MIDI numbers to note names and their octaves
        note_names = [] 
        octaves = []   
        for n in chord_notes:
            note_name, octave = midi_to_note(n)
            note_names.append(note_name)
            octaves.append(octave)
            
        chord_octave = octaves[0]  # Consider base note's octave as the chord octave

        # Get chord name using pychord and store it, along with the constituent notes and octave
        chord_matches = find_chords_from_notes(note_names)
        chord_name = chord_matches[0] if chord_matches else "Unknown"
        chords.append(f"{chord_name} ({', '.join(note_names)})")
        chord_octaves.append(chord_octave)
        
        # Calculate note-on time for this chord: first chord in track starts at 0, rest at intervals
        note_on_time = 0 if i == 0 else interval_between_chords 
        
        # Flush scheduled note-offs before adding new notes
        for j, (note, ticks_remaining) in enumerate(scheduled_note_offs):
            time = ticks_remaining if j == 0 else 0
            track.append(Message('note_off', note=note, velocity=0, time=time))
        scheduled_note_offs = []
            
        # Add each note to the chord-- first note starts at note-on time, others follow immediately
        for j, note in enumerate(chord_notes):
            time = note_on_time if i > 0 and j == 0 else 0
            track.append(Message('note_on', note=note, velocity=velocity, time=time))
            
        for note in chord_notes:
            scheduled_note_offs.append((note, full_duration_ticks)) # Schedule note-off time

        # Every 4 notes, rearticulate sustain pedal to prevent muddiness
        if i > 0 and i % 4 == 0:
            track.append(Message('control_change', control=64, value=0, time=0))
            track.append(Message('control_change', control=64, value=127, time=0))
    
    # Flush scheduled note-offs (for last chord) 
    for j, (note, ticks_remaining) in enumerate(scheduled_note_offs):
        time = ticks_remaining if j == 0 else 0
        track.append(Message('note_off', note=note, velocity=0, time=time))
        
    track.append(Message('control_change', control=64, value=0, time=20))
    
    # Dummy control change to prevent silence at end
    track.append(Message('note_on', note=1, velocity=1, time=20))
    track.append(Message('note_off', note=1, velocity=0, time=10))
    
    # Meta message for DAWs
    track.append(mido.MetaMessage('end_of_track', time=0))
    
    # Save MIDI file
    midi.save(output_midi)

    # Convert MIDI to WAV using FluidSynth with tuned reverb, chorus, and 44100 sample rate
    subprocess.run([
        "fluidsynth", "-ni", "-g", "1.2",
        "-R", "1", "-C", "3",
        soundfont, output_midi,
        "-F", output_wav, "-r", "44100"
    ])

    # Add 'Chord' and 'Octave' columns to the dataframe
    df['Chord'] = chords
    df['Octave'] = chord_octaves

    # Print message confirming completion and outputted files
    print(f"Generated {output_wav} from {output_midi}")
    
    # Return dataframe with added 'Chord' and 'Octave' columns
    return df


# Function to implement the entire pipeline
def sonify_screentime_data(file, start_time, end_time, target_screentime, interval, note_duration, output_midi="screentime.mid", output_wav="screentime.wav", soundfont="Grand_Piano.sf2"):
    """
    Clean, process, intervalize, and sonify screentime data.
    
    Parameters:
    - file: csv file containing screentime data
    - start_time: start time for the interval (inclusive)
    - end_time: end time for the interval (non-inclusive)
    - target_screentime: target screentime in minutes
    - interval: interval by which to group the data by ('hour', 'day', 'week', 'month')
    - note_duration: duration of each note 
    """
    # Clean the screentime data
    clean_df = clean_screen_data(file)

    # Process the screentime data
    processed_df = process_screentime(clean_df)

    # Intervalize the screentime data
    intervalized_df = intervalize_screentime(processed_df, start_time, end_time, interval)

    # Sonify the screentime data
    df = sonify_screentime(intervalized_df, target_screentime, interval, note_duration, output_midi, output_wav, soundfont)
    
    # Return final dataframe
    return df

In [None]:
# Sonify the screen time data for each day in February 2022 with 180 minutes as the target screentime and 120 BPM, 2 seconds per note
sonified_screen = sonify_screentime(intervalized_df, 180, 'day', 120, 2, soundfont="Grand_Piano.sf2")
sonified_screen.to_csv('sonified_screen.csv', index=False)
sonified_screen