In [1]:
import glob
import pandas as pd
import matplotlib.pyplot as plt
import csv

In [2]:
def windows_activities(in_dir, times_file, savefile, window_length_seconds = 1, sampling_freq = 500, overlap = 0.75):
    '''
    Generates and saves a csv file with the initial and final times of all possible windows for all the activites
    Parameters:
        in_dir: folder where the csv files of the recordings are
        times_file: csv file with the initial and final time indices of the detected activities
        savefile: csv file where the initial and final time indices of the exteracted windows of the activites will be saved
        window_length_seconds: window length (in seconds)
        sampling_freq: sampling frequency (in Hz) of the recordings
        overlap: overlap of the windows
    '''
    
    df_times = pd.read_csv(times_file)
    files = glob.glob("{}*.csv".format(in_dir))
    
    window_length = int(sampling_freq * window_length_seconds)

    savelist = [['File', 'Start', 'End', 'Label']]
    for i, row in df_times.iterrows():
        if i%100 == 0:
            print('{}/{}'.format(i, len(df_times)))

        file = row['File']
        initial = row['Initial time index of activity']
        final = row['Final time index of activity']

        df_activity = pd.read_csv(file)
        label = df_activity['gen_label'].iloc[0]
        
        if final == -1:
            final = len(df_activity)

        start = initial
        
        # If window fits
        while start + window_length < final:
            savelist.append([file, start, start + window_length, label])
            start = int(start + (1 - overlap) * window_length)
            
        # Last window: from final time backwards
        if start + window_length >= final:
            savelist.append([file, final - window_length, final, label])

    with open(savefile, "w", newline="") as f:
        writer = csv.writer(f)
        writer.writerows(savelist)
            
    print('Initial and final times of windows for all activities saved in {}'.format(savefile))    

In [3]:
def windows_low(in_dir, times_low_activity, savefile, window_length_seconds = 1, sampling_freq = 500, overlap = 0.75):
    '''
    Generates and saves a csv file with the initial and final times of all possible windows for "low activity"
    Parameters:
        in_dir: folder where the csv files of the recordings are
        times_low_activity: csv file with the initial and final time indices of "low activities"
        savefile: csv file where the initial and final time indices of the exteracted windows of "low activites" will be saved
        window_length_seconds: window length (in seconds)
        sampling_freq: sampling frequency (in Hz) of the recordings
        overlap: overlap of the windows
    '''
    df_times = pd.read_csv(times_low_activity)
    files = glob.glob("{}*.csv".format(in_dir))
    
    window_length = int(sampling_freq * window_length_seconds)

    savelist = [['File', 'Start', 'End']]
    for i, row in df_times.iterrows():
        if i%100 == 0:
            print('{}/{}'.format(i, len(df_times)))

        file = row['File']
        initial = row['Initial time index of low']
        final = row['Final time index of low']

        if final == -1:
            df_low = pd.read_csv(file)
            final = len(df_low)

        # Activity is shorter than window_length
        if final - initial < window_length:
            continue
            
        start = initial
        
        # If window fits
        while start + window_length < final:
            savelist.append([file, start, start + window_length])
            start = int(start + (1 - overlap) * window_length)
            
        # Last window: from final time backwards
        if start + window_length >= final:
            savelist.append([file, final - window_length, final])

    with open(savefile, "w", newline="") as f:
        writer = csv.writer(f)
        writer.writerows(savelist)
            
    print('Initial and final times of windows for "low activities" saved in {}'.format(savefile))    
    

In [6]:
def windows_all(in_dir, times_activities, times_low_activity, savefile, window_length_seconds = 1, sampling_freq = 500, overlap = 0.75):
    '''
    Generates and saves a csv file with the initial and final times of all possible windows for all the activites including "low activity"
    Parameters:
        in_dir: folder where the csv files of the recordings are
        times_activities: csv file with the initial and final time indices of the detected activities
        times_low_activity: csv file with the initial and final time indices of "low activities"
        savefile: csv file where the initial and final time indices of the exteracted windows of the activites will be saved
        window_length_seconds: window length (in seconds)
        sampling_freq: sampling frequency (in Hz) of the recordings
        overlap: overlap of the windows
    '''
    
    ## Activities excluding "low activity"
    print('\nStarting window extraction for activities')
        
    df_times = pd.read_csv(times_file)
    files = glob.glob("{}*.csv".format(in_dir))
    
    window_length = int(sampling_freq * window_length_seconds)

    savelist = [['File', 'Start', 'End', 'Label']]
    for i, row in df_times.iterrows():
        if i%100 == 0:
            print('{}/{}'.format(i, len(df_times)))

        file = row['File']
        initial = row['Initial time index of activity']
        final = row['Final time index of activity']

        df_activity = pd.read_csv(file)
        label = df_activity['gen_label'].iloc[0]
        
        if final == -1:
            final = len(df_activity)

        start = initial
        
        # If window fits
        while start + window_length < final:
            savelist.append([file, start, start + window_length, label])
            start = int(start + (1 - overlap) * window_length)
            
        # Last window: from final time backwards
        if start + window_length >= final:
            savelist.append([file, final - window_length, final, label])
        
    
    ## "low activity"
    print('\nStarting window extraction for "low activity"')
    df_times = pd.read_csv(times_low_activity)
    files = glob.glob("{}*.csv".format(in_dir))

    label = -1
    
    for i, row in df_times.iterrows():
        if i%100 == 0:
            print('{}/{}'.format(i, len(df_times)))

        file = row['File']
        initial = row['Initial time index of low']
        final = row['Final time index of low']

        if final == -1:
            df_low = pd.read_csv(file)
            final = len(df_low)

        # Activity is shorter than window_length
        if final - initial < window_length:
            continue
            
        start = initial
        
        # If window fits
        while start + window_length < final:
            savelist.append([file, start, start + window_length, label])
            start = int(start + (1 - overlap) * window_length)
            
        # Last window: from final time backwards
        if start + window_length >= final:
            savelist.append([file, final - window_length, final, label])
       
    with open(savefile, "w", newline="") as f:
        writer = csv.writer(f)
        writer.writerows(savelist)
            
    print('Initial and final times of windows for activites including "low activity" saved in {}'.format(savefile))    

# -------------------------------------- End of functions -------------------------------------------

In [8]:
### Parameters ###

# Folder where the recordings are
in_dir = r'C:\Users\Rafael\OneDrive - Delft University of Technology\Data\Split/'

# csv where the recordings with extracted activities are
times_file = r'C:\Users\Rafael\OneDrive - Delft University of Technology\Activity detection\activity_times.csv'

# csv where the recordings with extracted low activities are
times_low_activity = r'C:\Users\Rafael\OneDrive - Delft University of Technology\Activity detection\low_activity_times.csv'

window_length_seconds = 1 # Length of window in seconds
sampling_freq = 500 # sampling frequency
overlap = 0.75 # overlap of windows in percentage

# csv where the extracted windows will be saved
savefile = r'C:\Users\Rafael\OneDrive - Delft University of Technology\Data\windows\times_windows_all_{}_{}.csv'.format(window_length_seconds, overlap)


Starting window extraction for activities
0/867
100/867
200/867
300/867
400/867
500/867
600/867
700/867
800/867

Starting window extraction for "other activity"
0/1470
100/1470
200/1470
300/1470
400/1470
500/1470
600/1470
700/1470
800/1470
900/1470
1000/1470
1100/1470
1200/1470
1300/1470
1400/1470
Initial and final times of windows for activites including "other activity" saved in C:\Users\Rafael\OneDrive - Delft University of Technology\Data\windows\times_windows_all_1_0.75.csv


In [None]:
## Main. Extract windows of activities and low activities
windows_all(in_dir = in_dir, times_activities=times_file, times_low_activity = times_low_activity, savefile = savefile,
              window_length_seconds = window_length_seconds, sampling_freq = sampling_freq, overlap = overlap)

In [None]:
## Only extract windows of high activities

# csv where the extracted windows will be saved
times_windows_activities = r'C:\Users\Rafael\OneDrive - Delft University of Technology\Data\windows\times_windows_activities_{}_{}.csv'.format(window_length_seconds, overlap)

windows_activities(in_dir = in_dir, times_file = times_file, savefile = times_windows_activities, 
                   window_length_seconds = window_length_seconds, sampling_freq = sampling_freq, overlap = overlap)

In [None]:
## Only extract windows of low activities

# csv where the extracted windows will be saved
times_windows_low = r'C:\Users\Rafael\OneDrive - Delft University of Technology\Data\windows\times_windows_low_{}_{}.csv'.format(window_length_seconds, overlap)

windows_low(in_dir = in_dir, times_low_activity = times_low_activity, savefile = times_windows_low,
              window_length_seconds = window_length_seconds, sampling_freq = sampling_freq, overlap = overlap)