# Initialise Notebook

In [4]:
from shutil import copyfile
import io, pytz, os, time, datetime
from os.path import dirname, join, abspath
from os import getcwd, pardir
import yaml
import pandas as pd
import numpy as np
import markdown
from dateutil import parser

from IPython.display import display, Markdown, FileLink, FileLinks, clear_output, HTML
from IPython.core.display import HTML
from IPython.display import display, clear_output
from plotly.widgets import GraphWidget
    
import ipywidgets as widgets
from ipywidgets import interact
import csv

## Get Root Directory
rootDirectory = abspath(abspath(join(getcwd(), pardir)))

yaml1 = {}

class test_object:
    
    def __init__(self, ID):
        self.ID = ID
        self.yaml = {}
        self.yaml['test'] = dict()
        self.yaml['test']['id'] = ID
        self.yaml['test']['devices'] = dict()
        self.yaml['test']['devices']['kits'] = dict()       
    
    def add_details(self, project = 'smartcitizen', commit = '', author = '', type_test = '', report = '', comment = ''):
        try:
            self.yaml['test']['project'] = project
            self.yaml['test']['commit'] = commit
            self.yaml['test']['author'] = author
            self.yaml['test']['type_test'] = type_test
            self.yaml['test']['report'] = report
            self.yaml['test']['comment'] = markdown.markdown(comment)
            print 'Add details OK'
        except:
            print 'Add device NOK'
            pass

    def add_device(self, device, device_type = 'KIT', sck_version = '2.0', pm_sensor = '', alphasense = {}, location = 'Europe/Madrid'):
        try:
            self.yaml['test']['devices']['kits'][device] = dict()
            self.yaml['test']['devices']['kits'][device]['type'] = device_type
            self.yaml['test']['devices']['kits'][device]['SCK'] = sck_version
            self.yaml['test']['devices']['kits'][device]['PM'] = pm_sensor
            self.yaml['test']['devices']['kits'][device]['location'] = location
            #### Alphasense
            if alphasense != {}:
                self.yaml['test']['devices']['kits'][device]['alphasense'] = alphasense
                
            print 'Add device OK'
        except:
            print 'Add device NOK'
            pass

    def device_files(self, device, fileNameRaw = '', fileNameInfo = '', frequency = '1Min', type_file = 'csv_new'):
        try:
            self.yaml['test']['devices']['kits'][device]['fileNameRaw'] = fileNameRaw
            self.yaml['test']['devices']['kits'][device]['fileNameInfo'] = fileNameInfo
            fileNameProc = (self.yaml['test']['id'] + '_' + self.yaml['test']['devices']['kits'][device]['type'] + '_' + str(device) + '.csv')
            self.yaml['test']['devices']['kits'][device]['fileNameProc'] = fileNameProc
            self.yaml['test']['devices']['kits'][device]['frequency'] = frequency
            self.yaml['test']['devices']['kits'][device]['type_file'] = type_file  
            print 'Add device files OK'
        
        except:
            print 'Add device files NOK'
            pass
    
    def add_reference(self, reference, fileNameRaw = '', index = {}, channels = {}, location = ''):
        if 'reference' not in self.yaml['test']['devices']:
            self.yaml['test']['devices']['reference'] = dict()
        
        self.yaml['test']['devices']['references'][reference] = dict()
        self.yaml['test']['devices']['references'][reference]['fileNameRaw'] = fileNameRaw
        self.yaml['test']['devices']['references'][reference]['index'] = index
        self.yaml['test']['devices']['references'][reference]['channels'] = channels
    
    def process_files(self, _rootDirectory, _newpath):
        
        def get_raw_files():
                list_raw_files = []
                
                if 'kits' in self.yaml['test']['devices']:
                    for kit in self.yaml['test']['devices']['kits']:
                        list_raw_files.append(self.yaml['test']['devices']['kits'][kit]['fileNameRaw'])
                        
                if 'references' in self.yaml['test']['devices']:
                    for reference in self.yaml['test']['devices']['references']:
                        list_raw_files.append(self.yaml['test']['devices']['references'][reference]['fileNameRaw'])
                        
                return list_raw_files    
        
        def copy_raw_files(_raw_src_path, _raw_dst_path, _list_raw_files):
            
                try: 

                    for item in _list_raw_files:
                        s = join(_raw_src_path, item)
                        d = join(_raw_dst_path, item)
                        copyfile(s, d)
                    
                    return True
                
                except:

                    return False
                
        def date_parser(s, a):
            return parser.parse(s).replace(microsecond=int(a[-3:])*1000)
    
        # Define Paths
        raw_src_path = join(_rootDirectory, 'data', 'RAW_DATA')
        raw_dst_path = join(_newpath, 'RAW_DATA')    
        
        # Create Paths
        if not os.path.exists(raw_dst_path):
            os.makedirs(raw_dst_path)
        
        list_raw_files = get_raw_files()
        # Copy raw files and process data
        if copy_raw_files(raw_src_path, raw_dst_path, list_raw_files):
            # Process references
            if 'references' in self.yaml['test']['devices']:
                for reference in self.yaml['test']['devices']['references']:
                    src_path = join(raw_src_path, self.yaml['test']['devices']['references'][reference]['fileNameRaw'])
                    dst_path = join(newpath, self.yaml['test']['id'] + '_' + str(reference) + '_REF.csv')
                    
                    # Time Name
                    timeName = self.yaml['test']['devices']['references'][reference]['index']['name']
                    
                    # Load Dataframe
                    df = pd.read_csv(src_path, verbose=False, skiprows=[1]).set_index(timeName)
                    df.index = pd.to_datetime(df.index)
                    df.sort_index(inplace=True)
                    df = df.groupby(pd.TimeGrouper(freq = self.yaml['test']['devices']['references'][reference]['index']['frequency'])).aggregate(np.mean)
                    
                    # Remove Duplicates and drop unnamed columns
                    df = df[~df.index.duplicated(keep='first')]
                    df.drop([i for i in df.columns if 'Unnamed' in i], axis=1, inplace=True)
                                
                    # Remove na
                    df = df.apply(pd.to_numeric,errors='coerce')            
                    df.fillna(0)
                    
                    # Export to csv in destination path
                    df.to_csv(dst_path, sep=",")
                    
            
            # Process kits
            if 'kits' in self.yaml['test']['devices']:
                for kit in self.yaml['test']['devices']['kits']:
                    print ('Processing device: {}'.format(kit))
                    src_path = join(raw_src_path, self.yaml['test']['devices']['kits'][kit]['fileNameRaw'])
                    dst_path = join(newpath, self.yaml['test']['id'] + '_' + self.yaml['test']['devices']['kits'][kit]['type'] + '_' + str(kit) + '.csv')
                    
                    # Read file csv
                    if self.yaml['test']['devices']['kits'][kit]['type_file'] == 'csv_new':
                        skiprows_pd = range(1, 4)
                        index_name = 'TIME'
                        df = pd.read_csv(src_path, verbose=False, skiprows=skiprows_pd, delimiter = ',', encoding = 'utf-8')

                    elif self.yaml['test']['devices']['kits'][kit]['type_file'] == 'csv_old':
                        index_name = 'Time'
                        df = pd.read_csv(src_path, verbose=False, encoding = 'utf-8')
                        
                    elif self.yaml['test']['devices']['kits'][kit]['type_file'] == 'csv_ms':
                        index_name = 'Time'
                        df = pd.read_csv(src_path, verbose=False, encoding = 'utf-8', parse_dates=[[0,1]], date_parser=date_parser)
                    
                    # Find name in case of extra weird characters
                    for column in df.columns:
                        if index_name in column: index_found = column
                            
                    df.set_index(index_found, inplace = True)
                    df.index = pd.to_datetime(df.index).tz_localize('UTC').tz_convert(self.yaml['test']['devices']['kits'][kit]['location'])
                    df.sort_index(inplace=True)
                            
                    # Remove Duplicates and drop unnamed columns
                    # df = df[~df.index.duplicated(keep='first')]
                    df.drop([i for i in df.columns if 'Unnamed' in i], axis=1, inplace=True)
                                
                    # Remove na
                    df = df.apply(pd.to_numeric, errors='coerce')            
                    df.fillna(0)
                        
                    df.to_csv(dst_path, sep=",")
                    
                    ## Import units and ids
                    dict_header = dict()
                    with open(src_path, 'rb') as csvfile:
                        readercsv = csv.reader(csvfile, delimiter = ',')
                        line = 0
                    
                        header = next(readercsv)[1:]
                        unit = next(readercsv)[1:]
                        ids = next(readercsv)[1:]
                    
                        for key in header:
                            dict_header[key] = dict()
                            dict_header[key]['unit'] = unit[header.index(key)]
                            dict_header[key]['id'] = ids[header.index(key)]
                        
                        self.yaml['test']['devices']['kits'][kit]['metadata'] = dict_header
                    
                    ## Load txt info
                    if self.yaml['test']['devices']['kits'][kit]['fileNameInfo'] != '':
                        src_path_info = join(raw_src_path, self.yaml['test']['devices']['kits'][kit]['fileNameInfo'])
                        dict_info = dict()
                        with open(src_path_info, 'rb') as infofile:
                            for line in infofile:
                                line = line.strip('\r\n')
                                splitter = line.find(':')
                                dict_info[line[:splitter]]= line[splitter+2:] # Accounting for the space
                           
                        self.yaml['test']['devices']['kits'][kit]['info'] = dict_info
                
            
            # Create yaml with test description
            with open(join(newpath, 'test_description.yaml'), 'w') as yaml_file:
                yaml.dump(self.yaml, yaml_file)
                
            print ('Test Creation Finished')
            

# Input Data

In [5]:
date = '2019-01-10'
who = 'EXT'
name = 'TBOO_CHECK'

comment = '''
**Comment:** 
CHECK TBoo Data since he cannot upload
'''

date = pd.to_datetime(date)

test_id = date.strftime('%Y-%m') + '_' + who + '_' + name

# Create test object
test = test_object(test_id)

# Add General test details
test.add_details(project = 'iscape', 
                 commit = 'various', 
                 author = 'Tboo', 
                 type_test = 'outdoor', 
                 report = '', 
                 comment = comment)

# Add Device (as many as needed)
test.add_device('SCS21001', 
                device_type = 'KIT', 
                sck_version = '2.0', 
                pm_sensor = 'none', 
                location = 'Europe/Madrid')

test.device_files('SCS21001', 
                  fileNameRaw = 'Log_Concat_SCS21001.csv', 
                  fileNameInfo = '', 
                  frequency = '',
                  type_file = 'csv_new')

# Add Device (as many as needed)
test.add_device('SCS21002', 
                device_type = 'KIT', 
                sck_version = '2.0', 
                pm_sensor = 'none', 
                location = 'Europe/Madrid')

test.device_files('SCS21002', 
                  fileNameRaw = 'Log_Concat_SCS21002.csv', 
                  fileNameInfo = '', 
                  frequency = '',
                  type_file = 'csv_new')


# Add References (as many as needed) if none, just comment it
# test.add_reference('ARPAE', 
#                   fileNameRaw = 'ARPAE.csv', 
#                   index = {'name' : 'Time',
#                            'format' : '%Y-%m-%dT%H%M%S',
#                            'frequency' : '1Min'}, 
#                   channels = {'pollutants' : ('CO', 'O3', 'NO2', 'NO', 'NOX'), 
#                               'units' : ('mg/m3', 'ug/m3', 'ug/m3', 'ug/m3', 'ug/m3'),
#                               'names' : ('CO mg/m3', 'O3 ug/m3', 'NO2 ug/m3', 'NO ug/m3', 'NOX ug/m3')
#                              })


# Create folder structure under data subdir
newpath = join(rootDirectory, 'data', date.strftime('%Y'), date.strftime('%m'), test_id)

if not os.path.exists(newpath):
    os.makedirs(newpath)

# Process the stuff
test.process_files(rootDirectory, newpath)

Add details OK
Add device OK
Add device files OK
Add device OK
Add device files OK
Processing device: SCS21002
Processing device: SCS21001
Test Creation Finished


## Update Test (TODO)

In [None]:
test_name = '2018-10_INT_TEST_TEMP_PM_CHARGE_SD'
test_year = test_name[0:4]
test_month = test_name[5:7]
newpath = join(rootDirectory, 'data', test_year, test_month, test_name)
print newpath

with open(join(newpath, 'test_description.yaml'), 'r') as yaml_file:
    yaml1 = yaml.load(yaml_file)
    
display(yaml1)