In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import mido
from mido import Message, MidiFile, MidiTrack
import subprocess
import numpy as np
from pychord import find_chords_from_notes

In [2]:
def clean_screen_data(file, new_file="None"):
    """
    Clean the given screentime data by dropping unnecessary columns, renaming columns, and converting timestamp to datetime format.
    
    Parameters:
    - file: csv file containing screentime data to be cleaned
    - new_file: csv file to which to save the cleaned data (optional)

    Returns:
    - The cleaned data as a dataframe
    """
    # Read in screen.csv
    df = pd.read_csv(file)

    # Drop the first device_id column
    df = df.drop(columns=['device_id'])

    # Rename the device_id.1 column to screen_status
    df = df.rename(columns={'device_id.1': 'screen_status'})

    # Convert timestamp to datetime format
    df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')

    # Drop the _id columns
    df = df.drop(columns=['_id'])
    
    # Save the cleaned data to a new file if requested
    if new_file != "None":
        df.to_csv(new_file, index=False)

    return df

# Call clean_screen_data and save the cleaned data as clean_df
clean_df = clean_screen_data('../data/screen.csv', 'clean_screen.csv')
clean_df

Unnamed: 0,timestamp,screen_status
0,2022-02-02 20:35:04.892999936,2
1,2022-02-02 20:36:37.041999872,3
2,2022-02-02 20:39:04.928999936,2
3,2022-02-02 20:42:14.048000000,3
4,2022-02-02 20:42:55.027000064,2
...,...,...
29603,2023-01-17 04:25:14.417999872,2
29604,2023-01-17 05:59:51.760000000,3
29605,2023-01-17 06:00:47.371000064,2
29606,2023-01-17 06:03:27.064999936,3


In [4]:
def process_screentime(df, new_file="None"):
    """
    Process the screentime data by calculating screentimes as times elapsed between an unlock event and the next lock.  
    
    Parameters:
    - df: dataframe containing cleaned screentime data to be processed
    - new_file: csv file to which to save the processed data (optional)

    Returns:
    - A new dataframe with a column for timestamps (of lock events) and elapsed screen times in minutes.
    """
    # Convert string timestamp to datetime
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    
    # Create dictionaries to map dates to screen times and last unlock times
    screen_times = []
    last_unlock_time = {}

    # Iterate through the rows of the dataframe, keeping track of the most recent unlock time for each day
    # For each lock event, calculate the time elapsed since the last unlock time and add it to the running screen time total for the current day
    for index, row in df.iterrows():
        timestamp = pd.to_datetime(row['timestamp'])
        date = timestamp.date()
        status = row['screen_status']
        
        if status == 3:  # Unlock event
            last_unlock_time[date] = timestamp
        elif status == 2 and date in last_unlock_time:  # Lock event
            unlock_time = last_unlock_time.pop(date)
            elapsed_time = (timestamp - unlock_time).seconds / 60
            screen_times.append({'Timestamp': timestamp, 'Screen Time (Mins)': elapsed_time})

    # Save results to a new dataframe
    screen_time_df = pd.DataFrame(screen_times)

    # Save the processed data to a new CSV file if requested
    if new_file != "None":
        screen_time_df.to_csv(new_file, index=False)
    
    return screen_time_df
    
processed_df = process_screentime(clean_df, 'processed_screen.csv')
processed_df


Unnamed: 0,Timestamp,Screen Time (Mins)
0,2022-02-02 20:39:04.928999936,2.450000
1,2022-02-02 20:42:55.027000064,0.666667
2,2022-02-02 20:48:00.084999936,1.083333
3,2022-02-02 20:51:32.307000064,0.966667
4,2022-02-02 21:00:30.535000064,5.816667
...,...,...
14710,2023-01-17 03:50:27.337999872,2.316667
14711,2023-01-17 03:53:52.377999872,1.800000
14712,2023-01-17 04:25:14.417999872,31.316667
14713,2023-01-17 06:00:47.371000064,0.916667


In [5]:
def intervalize_screentime(df, start_time, end_time, interval='day', new_file="None"):
    """
    Group the screentime data into the specified intervals (hours, days, weeks, or months) 
    and calculate screen time totals for each interval.
    
    Parameters:
    - df: processed dataframe containing timestamps and screen time data
    - start_time: start time for the interval (inclusive)
    - end_time: end time for the interval (non-inclusive)
    - interval: interval by which to group the data by (hour, day, week, month) (default: day)
    - new_file: csv file to which to save the intervalized data (optional)

    Returns:
    - A new dataframe with the total screen time for each interval
    """
    df = df.copy()
    df['Timestamp'] = pd.to_datetime(df['Timestamp'])
    
    # Filter out rows outside specified time range
    mask = (df['Timestamp'] >= start_time) & (df['Timestamp'] < end_time)
    df = df.loc[mask]
    
    # Create column to group rows into specified intervals (hours, days, weeks, or months)
    col_name = interval.capitalize()
    if (interval == 'hour'):
        # Add Hour column where each timestamp is rounded down to the nearest full hour
        df[col_name] = df['Timestamp'].dt.floor('H')
    elif (interval == 'day'):
        # Add Day column containing only the date portion of each timestamp
        df[col_name] = df['Timestamp'].dt.date
    elif (interval == 'week'):
        # Add Week column where each timestamp is converted to a weekly period 
        # and apply lambda to get start of that week
        df[col_name]  = df['Timestamp'].dt.to_period('W').apply(lambda r: r.start_time)
    elif (interval == 'month'):
        # Add Month column where each timestamp is converted to a monthly period
        # and apply lambda to get start of that month
        df[col_name]  = df['Timestamp'].dt.to_period('M').apply(lambda r: r.start_time)
        
    # Group dataframe by specified interval and sum screentime for each interval
    intervalized_df = df.groupby(col_name, as_index=False)['Screen Time (Mins)'].sum() 
    
    # Save the intervalized data to a new CSV file if requested
    if new_file != "None":
        intervalized_df.to_csv(new_file, index=False)
    
    # Return dataframe containing total screen time for each interval
    return intervalized_df


# Compute the total screen time for each day in the month of February 2022
start_time = pd.to_datetime('2022-02-02 00:00:00')
end_time = pd.to_datetime('2022-03-02 00:00:00')
interval = 'day'
intervalized_df = intervalize_screentime(processed_df, start_time, end_time, interval, 'intervalized_screen.csv')
intervalized_df

Unnamed: 0,Day,Screen Time (Mins)
0,2022-02-02,22.266667
1,2022-02-03,41.85
2,2022-02-04,207.216667
3,2022-02-05,53.616667
4,2022-02-06,136.966667
5,2022-02-07,167.5
6,2022-02-08,186.316667
7,2022-02-09,79.0
8,2022-02-10,53.65
9,2022-02-11,74.3


In [None]:
# Sonification rules:
# - Each interval is represented by a simple chord (3 notes)
# - The chord is major if the screen time is below the target, minor if above
# - The longer the screen time, the lower the pitch
# - Each chord plays for the specified duration

In [11]:
# List of note names (C, C#, D, D#, ..., B)
NOTE_LIST = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']

# Convert MIDI note number to note name and octave, based on 60 = C4
def midi_to_note(midi_number):
    note_name = NOTE_LIST[midi_number % 12]  
    octave = (midi_number // 12) - 1   
    return note_name, octave

# Map screentime value to a MIDI pitch between 36 (C2) and 84 (C6),
# normalized to the range [0, max] where max is the maximum of the max recorded screentime and a benchmark value based on the interval
def map_screentime_to_pitch(screen_time, max_screentime, interval='day'):
    min_pitch, max_pitch = 36, 84
    
    # Set a benchmark value based on the interval
    if interval == 'day':
        benchmark = 300
    elif interval == 'week':
        benchmark = 2100
    elif interval == 'month':
        benchmark = 9000
    else:
        benchmark = 60
    return int(np.interp(screen_time, [0, max(max_screentime, benchmark)], [max_pitch, min_pitch])) 

def sonify_screentime(df, target_screentime, interval='day', bpm=120, note_duration=2, output_midi="screentime.mid", output_wav="screentime.wav", soundfont="Grand_Piano.sf2"):
    """
    Map screentime data to chords and generate a MIDI and WAV file from this data. 
    
    Parameters:
    - df: intervalized dataframe containing screentime totals per interval
    - target_screentime: threshold (in minutes) determining major/minor chords
    - interval: interval represented by screentime (default: 'day')
    - bpm: tempo for generated MIDI file (default: 120)
    - note_duration: duration of each note in seconds (default: 2)
    - output_midi: name of output MIDI file (default: "screentime.mid")
    - output_wav: name of output WAV file (default: "screentime.wav")
    - soundfont: path to soundfont file for FluidSynth (default: "Grand_Piano.sf2")

    Returns:
    - A new dataframe with added 'Chord' and 'Octave' columns showing the chords and octaves used for each datapoint
    """
    
    # Print each parameter
    print("Sonifying screentime...")
    print(f"Target Screentime: {target_screentime}")
    print(f"Interval: {interval}")
    print(f"BPM: {bpm}")
    print(f"Note Duration: {note_duration}")
    print(f"Output MIDI: {output_midi}")
    print(f"Output WAV: {output_wav}")
    print(f"Soundfont: {soundfont}")
    print()
    
    # Create a new MIDI file and track
    midi = MidiFile()
    track = MidiTrack()
    midi.tracks.append(track)

    # Set tempo
    tempo = mido.bpm2tempo(bpm)
    track.append(mido.MetaMessage('set_tempo', tempo=tempo))

    # Create lists to store chords and octaves
    chords = []
    chord_octaves = []

    # Get max screentime across the data
    max_screentime = df['Screen Time (Mins)'].max()
    
    for _, row in df.iterrows():
        screentime = row['Screen Time (Mins)']
        
        # Determine base note
        base_note = map_screentime_to_pitch(screentime, max_screentime, interval=interval)

        # Select chord type: major if screentime < target, minor if screentime >= target
        if screentime < target_screentime:
            chord_notes = [base_note, base_note + 4, base_note + 7]
        else:
            chord_notes = [base_note, base_note + 3, base_note + 7]
        
        # Convert notes' MIDI numbers to note names and their octaves
        note_names = [] 
        octaves = []   
        for n in chord_notes:
            note_name, octave = midi_to_note(n)
            note_names.append(note_name)
            octaves.append(octave)
            
        chord_octave = octaves[0]  # Consider base note's octave as the chord octave

        # Get chord name using pychord and store it, along with the constituent notes and octave
        chord_matches = find_chords_from_notes(note_names)
        chord_name = chord_matches[0] if chord_matches else "Unknown"
        chords.append(f"{chord_name} ({', '.join(note_names)})")
        chord_octaves.append(chord_octave)

        # Build the chord
        for note in chord_notes:
            track.append(Message('note_on', note=note, velocity=64, time=0))

        # Release chord after 2 seconds
        track.append(Message('note_off', note=chord_notes[0], velocity=64, time=mido.second2tick(note_duration, midi.ticks_per_beat, tempo)))
        track.append(Message('note_off', note=chord_notes[1], velocity=64, time=0))
        track.append(Message('note_off', note=chord_notes[2], velocity=64, time=0))

    # Save MIDI file
    midi.save(output_midi)

    # Convert MIDI to WAV using FluidSynth
    subprocess.run(["fluidsynth", "-ni", soundfont, output_midi, "-F", output_wav, "-r", "44100"])

    # Add 'Chord' and 'Octave' columns to the dataframe
    df['Chord'] = chords
    df['Octave'] = chord_octaves

    # Print message confirming completion and outputted files
    print(f"Generated {output_wav} from {output_midi}")
    
    # Return dataframe with added 'Chord' and 'Octave' columns
    return df  

# Sonify the screen time data for each day in February 2022 with 180 minutes as the target screentime and 120 BPM, 2 seconds per note
sonified_screen = sonify_screentime(intervalized_df, 180, 'day', 120, 2, soundfont="Grand_Piano.sf2")
sonified_screen.to_csv('sonified_screen.csv', index=False)
sonified_screen

Sonifying screentime...
Target Screentime: 180
Interval: day
BPM: 120
Note Duration: 2
Output MIDI: screentime.mid
Output WAV: screentime.wav
Soundfont: Grand_Piano.sf2





FluidSynth runtime version 2.4.4
Copyright (C) 2000-2025 Peter Hanappe and others.
Distributed under the LGPL license.
SoundFont(R) is a registered trademark of Creative Technology Ltd.

Rendering audio to file 'screentime.wav'..
Generated screentime.wav from screentime.mid


Unnamed: 0,Day,Screen Time (Mins),Chord,Octave
0,2022-02-02,22.266667,"G# (G#, C, D#)",5
1,2022-02-03,41.85,"F (F, A, C)",5
2,2022-02-04,207.216667,"Dm (D, F, A)",3
3,2022-02-05,53.616667,"D# (D#, G, A#)",5
4,2022-02-06,136.966667,"D (D, F#, A)",4
5,2022-02-07,167.5,"A (A, C#, E)",3
6,2022-02-08,186.316667,"F#m (F#, A, C#)",3
7,2022-02-09,79.0,"B (B, D#, F#)",4
8,2022-02-10,53.65,"D# (D#, G, A#)",5
9,2022-02-11,74.3,"C (C, E, G)",5
