In [2]:
# =====
# File parsers
# =====
import os
from pathlib import Path
import re
import pandas as pd

def get_pdr():
    """Gets the 'Parkinson's Disease Repository' parent folder"""
    
    # Get path
    path = Path(os.getcwd())
    
    cwd = ''
    
    # Get parent folder
    while ('Parkinson-Disease-Repository' in cwd) == False:
        path = path.parent
        
        # Get current directory
        cwd = str(path).split('\\')[-1]
        
    return str(path)
    
def get_all_files(parent):
    """Collects all files located within the folders in file_list
    Warning: Might not do well with folders containing links that may lead to an endless loop

    Parameters: 
    =====
    parent: str
        Parent directory for which to collect all files
        
    Returns
    =====
    all_files: ()[]
        List of files, exist as tuple of '(file name, absolute path)'
    """
    all_files = []
    
    for root, dirs, files in os.walk(parent):
        if files:
            for file in files:
                all_files.append((file, root + '\\' + file))
    
    return all_files

def get_csv_files(all_files):
    """Gets the list of csv files from all_files
    
    Parameters
    =====
    all_files: ()[]
        List of files existing as tuple as per get_all_files()
        
    Returns
    =====
    new_files: ()[]
        List of files existing as tuple as per get_all_files(), but CSV only
    """
    # Rather than getting all those with .csv, we eliminate those with the following
    ignore_list = [
        'ipynb',
        'png'
    ]
    ignore_regex = '|'.join(map(lambda x : '.' + x, ignore_list))
    ignore_regex = '(' + ignore_regex + ')' + '$'
    
    new_files = []
    
    for file, path in all_files:
        if not re.search(ignore_regex, file):
            new_files.append((file, path))

    return new_files

def extract_json_from_name(file):
    """Extracts a JSON from the given file name
    
    Paramters
    =====
    file: (), tuple
        As per get_all_files()
    
    Returns
    =====
    json: dictionary
        {
            name: str # the name of the file
        }
    """
    json = {}
    
    return json

def structure_csv_files(files, json = False):
    """
    Structures the CSV files into an easy-to-read dataframe format
    They can also be structured into a JSON format
    
    Parameters
    =====
    files: ()[]
        As per get_all_files() structure, list of files
    json: boolean
        Whether the information should be returned as JSON
    """
    df = pd.DataFrame(files, columns = ["name", "path"])
    
    
    # Get time from name
    df['time'] = df['name'].apply(lambda x : re.search('\d\d\d\d-\d\d-\d\dT[\d]+.\d\d\.\d\d\.\d\d\d( [ap]m)?', x).group())
#     # Fix 'PM' or 'AM' in time
#     # Delete all 'AM'
#     df['time'] = df['time'].apply(lambda x : re.sub('_am', '', x))
#     # Increment the time in 'PM'
#     df['time'] = df['time'].apply(lambda x : x)
# #     df['time'].apply(lambda x : print('am' in x))
    
    
    
    df['mishmash'] = df['name'].apply(lambda x : x.split('_'))
    df['test'] = df['mishmash'].apply(lambda x : x[-1])
    df = df.drop(columns = ['mishmash'])
    
    df['name'].apply(lambda x : print(x))
    
    
#     check_for = "time"
#     df['check'] = df[check_for].apply(lambda x : x is None)
#     print("Check for '" + check_for + "' == None is " + str(df['check'].sum() > 0).lower() + ".")
    
    return df

directory = get_pdr()
directory = get_all_files(directory + r'\Data (Archived)')
directory = get_csv_files(directory)
structure_csv_files(directory)

MetaWear_2019-10-23T12.31.18.992 pm_C5013CAC38C1_Accelerometer.csv
MetaWear_2019-10-23T12.31.18.992 pm_C5013CAC38C1_Gyroscope.csv
S3_2019-10-23T12.31.18.992 pm_D4B85CC9CCD2_Accelerometer.csv
S3_2019-10-23T12.31.18.992 pm_D4B85CC9CCD2_Gyroscope.csv
MetaWear_2019-10-21T2.50.03.179 pm_C5013CAC38C1_Accelerometer.csv
MetaWear_2019-10-21T2.50.03.179 pm_C5013CAC38C1_Gyroscope.csv
MetaWear_2019-10-21T3.02.35.260 pm_C5013CAC38C1_Accelerometer.csv
MetaWear_2019-10-21T3.02.35.260 pm_C5013CAC38C1_Gyroscope.csv
S4_2019-10-23T12.00.07.369 pm_EB54418CDA46_Accelerometer.csv
S4_2019-10-23T12.00.07.369 pm_EB54418CDA46_Gyroscope.csv
tap lvl1 try1_S4_2019-10-25T13.46.40.964_EB54418CDA46_Accelerometer_1.4.4-checkpoint.csv
tap lvl1 try1_S4_2019-10-25T13.46.40.964_EB54418CDA46_Gyroscope_1.4.4-checkpoint.csv
tap lvl1 try1_S4_2019-10-25T13.46.40.964_EB54418CDA46_Accelerometer_1.4.4.csv
tap lvl1 try1_S4_2019-10-25T13.46.40.964_EB54418CDA46_Gyroscope_1.4.4.csv
tap lvl1 try2_S4_2019-10-25T13.48.31.829_EB54418CDA4

Unnamed: 0,name,path,time,test
0,MetaWear_2019-10-23T12.31.18.992 pm_C5013CAC38...,C:\Users\liong\Jupyter Notebook\191020_Fourth ...,2019-10-23T12.31.18.992 pm,Accelerometer.csv
1,MetaWear_2019-10-23T12.31.18.992 pm_C5013CAC38...,C:\Users\liong\Jupyter Notebook\191020_Fourth ...,2019-10-23T12.31.18.992 pm,Gyroscope.csv
2,S3_2019-10-23T12.31.18.992 pm_D4B85CC9CCD2_Acc...,C:\Users\liong\Jupyter Notebook\191020_Fourth ...,2019-10-23T12.31.18.992 pm,Accelerometer.csv
3,S3_2019-10-23T12.31.18.992 pm_D4B85CC9CCD2_Gyr...,C:\Users\liong\Jupyter Notebook\191020_Fourth ...,2019-10-23T12.31.18.992 pm,Gyroscope.csv
4,MetaWear_2019-10-21T2.50.03.179 pm_C5013CAC38C...,C:\Users\liong\Jupyter Notebook\191020_Fourth ...,2019-10-21T2.50.03.179 pm,Accelerometer.csv
5,MetaWear_2019-10-21T2.50.03.179 pm_C5013CAC38C...,C:\Users\liong\Jupyter Notebook\191020_Fourth ...,2019-10-21T2.50.03.179 pm,Gyroscope.csv
6,MetaWear_2019-10-21T3.02.35.260 pm_C5013CAC38C...,C:\Users\liong\Jupyter Notebook\191020_Fourth ...,2019-10-21T3.02.35.260 pm,Accelerometer.csv
7,MetaWear_2019-10-21T3.02.35.260 pm_C5013CAC38C...,C:\Users\liong\Jupyter Notebook\191020_Fourth ...,2019-10-21T3.02.35.260 pm,Gyroscope.csv
8,S4_2019-10-23T12.00.07.369 pm_EB54418CDA46_Acc...,C:\Users\liong\Jupyter Notebook\191020_Fourth ...,2019-10-23T12.00.07.369 pm,Accelerometer.csv
9,S4_2019-10-23T12.00.07.369 pm_EB54418CDA46_Gyr...,C:\Users\liong\Jupyter Notebook\191020_Fourth ...,2019-10-23T12.00.07.369 pm,Gyroscope.csv


In [2]:
# =====
# File readers
# =====
import re
import pandas as pd

def is_file(path, extension):
    """Check that the file at path ends with extension.
    
    Parameters
    =====
    path: str
    extension: str
        Regex
    
    Return
    =====
    ?: boolean
    """
    if re.search(extension, path).group():
        return True
    else:
        return False
    
def read_csv(path):
    """Read csv from path
    
    Parameters 
    =====
    path: str
        The path to the desired file to be read
    """
    return pd.read_csv(path)

In [3]:
# =====
# Plotting functions
# =====
import matplotlib.pyplot as plt

axes = ['x', 'y', 'z'] 
"""Available axes"""

axes_map = {
    'x': 3,
    'y': 4,
    'z': 5
}
"""Mapping for the axis to the column index in the dataframe"""

axes_color = {
    'x': 'r',
    'y': 'g',
    'z': 'b'
}
"""Mapping for the axis colors"""

time_axis_label = "Time (s)"
accel_axis_label = "Acceleration (g)"
time_index = 2

def plot_in_3d(df):
    """Plots the summation of x-, y- and z-axes in 3D space.
    Currently being reworked.
    """
    return None

def plot_time(df, var = 'x', ttl = ''):
    """Plots the desired parameter against time. 
    
    Parameters
    =====
    df: dataframe
    var: str
        The variable to be plotted as taken from the dictionary/
        Defaults to 'x'
    ttl: str
        Title of the graph
        
    Returns
    =====
    fig: figure
    """
    # Check that var is 'x', 'y', or 'z'
    # Defaults to 'x' if not used
    if var not in axes:
        var = 'x'
        
    # Get values
    time = df.iloc[:, 2]
    data = df.iloc[:, axes_map[var]]
    
    # Plot
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.set(xlabel = time_axis_label, ylabel = var, title = ttl)
    ax.grid(b = True, which = 'both')
    ax.plot(time, data)
    
    return fig
    
def plot_time_3_axes(df, t = ''):
    """Plots all axes into a single plot in the time domain
    
    Parameters
    =====
    df: dataframe
    t: str
        Title
        
    Returns
    =====
    fig: figure
    """
    # Create figure
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.set(
        xlabel = time_axis_label, 
        ylabel = accel_axis_label,
        title = t
    )
    ax.grid(b = True, which = 'both')
    
    # Plot all 3 axes
    for idx, axis in enumerate(axes_map):
        ax.plot(
            df.iloc[:, time_index],
            df.iloc[:, axes_map[axis]],
            axes_color[axis],
            label = axis
        )
        
    # Legend
    ax.legend()
    
    return fig