In [None]:
import os
import sys
import librosa
import matplotlib.pyplot as plt
import librosa.display
import IPython.display as ipd
import numpy as np
import mysql.connector as mysql
import bokeh.plotting
import bokeh.models
import bokeh.io
import datetime
import pandas as pd
import math
import re

In [None]:
def getCombinedWavs(directory, use_directory_starttime=True):
    nr = 0
    sr = None
    y = None
    files = os.listdir(directory)
    files.sort()
    last_endtime = None
    total_gap = 0
    for filename in files:
        fullfilename = f'{directory}/{filename}'
        y_part, sr_part = librosa.load(fullfilename, sr=None)
        
        file_end = int(re.search("^\d*-(\d*)\.wav", filename).group(1))
        file_start = file_end - int(len(y_part) * 1000 / sr_part)
        if sr == None:
            sr = sr_part
            y = y_part
            end = int(re.search("^\d*-(\d*)\.wav", files[0]).group(1))
            if use_directory_starttime:
                unixtime_start = int(re.search(".*-(\d*)", directory).group(1))
            else:
                unixtime_start = file_start            
        else:
            if sr != sr_part:
                raise Exception("Sampling rate mismatch")
            y = np.concatenate([y, y_part])
        nr += 1
        if last_endtime == None:
            print (f'part {"%3d" % nr}, {"%7d" % len(y_part)} samples from unixtime {file_start} to {file_end}.')
        else:
            gap = file_start - last_endtime
            total_gap += gap
            print (f'part {"%3d" % nr}, {"%7d" % len(y_part)} samples from unixtime {file_start} to {file_end}. Gap: {gap}. Total gap: {total_gap}')
        last_endtime = file_end
        
    unixtime_end = int(unixtime_start + len(y) * 1000 / sr)
    print (f'total:    {"%7d" % len(y)} samples from unixtime {unixtime_start} to {unixtime_end}')
    print (f'Read {nr} parts from {directory}. {y.shape[0]} samples: {"%.1f" % (y.shape[0]/sr)} seconds at {sr} samples/sec, starting at unixtime {unixtime_start}')
    return y, sr, unixtime_start

def getData(directory, use_directory_starttime=True):
    wav, sr, audio_unixtime_ms = getCombinedWavs(directory, use_directory_starttime)
    audio_length_ms = int(len(wav) / sr * 1000)

    print (f'Recording starts at unix time {audio_unixtime_ms} and lasts until {audio_unixtime_ms + audio_length_ms}')

    try:
        conn = mysql.connect(
            user="root",
            password="1234",
            host="localhost",
            port=3306,
            database="imagedescription"

        )
    except mysql.Error as e:
        print(f"Error connecting to MariaDB Platform: {e}")
        sys.exit(1)

    # Get Cursor
    cur = conn.cursor()

    query = ("SELECT time, unix_time, type, data "
             "FROM logger_traces "
             "WHERE type IN ('keydown', 'keyup') "
             "AND user_id = %s "
             "AND unix_time BETWEEN %s AND %s")


    cur.execute(query, (8,
                        audio_unixtime_ms,
                        audio_unixtime_ms + audio_length_ms))
    events = [(time, unix_time, type, data) for (time, unix_time, type, data) in cur]
    up_events = [x for x in events if x[2]=='keyup']
    down_events = [x for x in events if x[2]=='keydown']
    cur.close()

    print(f'Read {len(up_events)} keyup events and {len(down_events)} keydown events.')

    return {
        'wav': wav,
        'sr': sr,
        'audio_unixtime_ms': audio_unixtime_ms,
        'up_events': up_events,
        'down_events': down_events
    }

def plotMatplotlib(data):
    wav=data['wav']
    sr=data['sr']
    audio_unixtime_ms=data['audio_unixtime_ms']
    up_events=data['up_events']
    down_events=data['down_events']
    
    plt.figure(figsize=(12, 4))
    plt.ylim((0,0.02))
    librosa.display.waveplot(wav, sr=sr, alpha=0.1)
    plt.scatter([(x[1]-audio_unixtime_ms)/1000 for x in down_events], [0 for _ in down_events])
    plt.scatter([(x[1]-audio_unixtime_ms)/1000 for x in up_events], [0.01 for _ in up_events])

def plotBokeh(data, adjust_ms=0):
    wav=data['wav']
    sr=data['sr']
    audio_unixtime_ms=data['audio_unixtime_ms']
    up_events=data['up_events']
    down_events=data['down_events']
    
    s_sound = pd.Series(data=wav, index=range(len(wav)))
    df_sound = pd.DataFrame(s_sound)
    df_sound.reset_index(inplace=True)
    df_sound.columns = ['Index', 'wav']
    df_sound['unixtime'] = df_sound['Index'].apply(lambda x: audio_unixtime_ms + x * 1000 / sr)
    df_sound['time'] = df_sound['Index'].apply(lambda x: datetime.datetime.utcfromtimestamp(int(x / 1000)))

    bokeh.io.output_notebook()
    p = bokeh.plotting.figure()
    datetimeTickFormatter = bokeh.models.DatetimeTickFormatter(
        microseconds = ['%d-%m    %H:%M:%S.%3N'],
        milliseconds = ['%d-%m    %H:%M:%S.%3N'],
        seconds = ['%d-%m    %H:%M:%S.%3N'],
        minsec = ['%d-%m    %H:%M:%S.%3N'],
        minutes = ['%d-%m    %H:%M:%S.%3N'],
        hourmin = ['%d-%m    %H:%M:%S.%3N'],
        hours = ['%d-%m    %H:%M:%S.%3N'],
        days = ['%d-%m    %H:%M:%S.%3N'],
        months = ['%d-%m    %H:%M:%S.%3N'],
        years = ['%d-%m    %H:%M:%S.%3N'])
    p.xaxis.formatter = datetimeTickFormatter
    p.xaxis.major_label_orientation = math.pi/2
    p.line(x='unixtime', y='wav', source=df_sound)
    
    def plotEvents(events, colour="black", y_offset=0):
        df = pd.DataFrame()
        df['x'] = [e[1]+adjust_ms for e  in events]
        df['y'] = [0.015]*len(events)
        df['text'] = [e[3][3:] if e[3].startswith('Key') else e[3] for e in events]
        source = bokeh.models.ColumnDataSource(df)

        p.scatter(x='x', y='y', source=source, size=10, color=colour, alpha=0.5)
        p.add_layout(bokeh.models.LabelSet(x='x', y='y', text='text', source=source, x_offset=5, y_offset=y_offset, render_mode='canvas', angle=math.pi/2, text_color=colour))
    
    plotEvents(down_events, "green", 20)
    plotEvents(up_events, "red", -120)
       
    bokeh.plotting.show(p)
    
def diffPeekAmplAndKeyDown(directory):
    data = getData(directory)
    down_event = data['down_events'][0][1]
    max_index = np.argmax(data['wav'])
    max_time = data['audio_unixtime_ms'] + int(max_index * 1000 / data['sr'])
    print (f'{directory} max ampl at {max_time}, keydown at {down_event}, difference {max_time-down_event}')

def getDataSubset(data, offset_ms, length_ms=2000):
    sr = data['sr']
    starttime = data['audio_unixtime_ms'] + offset_ms
    subset_wav = data['wav'][int(offset_ms*sr/1000):int((offset_ms+length_ms)*sr/1000)]
    subset_up_events = [e for e in data['up_events'] if starttime < e[1] and e[1] < (starttime+length_ms)]
    subset_down_events = [e for e in data['down_events'] if starttime < e[1] and e[1] < (starttime+length_ms)]
    return {
        'wav': subset_wav,
        'sr': sr,
        'audio_unixtime_ms': starttime,
        'up_events': subset_up_events,
        'down_events': subset_down_events
    }
    
def aap(x, adjust_ms=0):
    data=getData(f'../web/data/videos/{x}')
    plotBokeh(data, adjust_ms=adjust_ms)
    
    wav=data['wav']
    sr=data['sr']
    s=librosa.stft(y=wav, hop_length=int(sr/1000))
    data['wav'] = abs(s.sum(axis=0))
    data['sr'] = 1000
    plotBokeh(data)

In [None]:
topdir ='16k'
dirs = os.listdir(f'../web/data/videos/{topdir}')

for directory in dirs: #['niels-step_5-1616998952444']:
    diffPeekAmplAndKeyDown(f'../web/data/videos/{topdir}/{directory}')

In [None]:
# lorem Ipsum slow
data = getData('../web/data/videos/logitech/niels-lang-1617089612145')
plotBokeh(getDataSubset(data, 0, 10000), adjust_ms=50)

In [None]:
plotBokeh(getDataSubset(data, 310000, 10000), adjust_ms=50)

In [None]:
q="select * from logger_traces lt where type='recorder-mark' and id between 150929 and 151867"
