In [1]:
import os
import shutil
import posixpath
import pandas as pd

root_dir = 'C:/Users/Julia/Desktop/toughbook4_loggernet_2015_05_11/'

In [48]:
def get_attrs(header_file, attrs):
    '''Augment attributes by inspecting the header of the .dat file.'''
    # metadata needs to be CF-1.6 compliant
    
    # in the TOA5 datafile headers, some metadata are written in the 1st row
    df_meta = pd.read_csv(header_file, nrows=1, dtype='str')
    meta_list = list(df_meta.columns.values)

    attrs.update({'format': meta_list[0],
                  'logger': meta_list[1],
                  'program': meta_list[5].split(':')[1],
                  'datafile': meta_list[7]})
    source_info = (attrs['logger'], attrs['datafile'], attrs['program'],
                   meta_list[6])
    source = 'Flux tower sensor data %s_%s.dat, %s, %s' % source_info
    attrs.update({'source': source})

    # the local attributes are in the 2nd, 3rd, and 4th rows
    df_names = pd.read_csv(header_file, skiprows=[0], nrows=2, dtype='str')
    df_names = df_names.astype('str')
    df_names.index = ('units', 'comment')
    local_attrs = df_names.to_dict()

    return attrs, local_attrs

In [74]:
TOA5_files = []
unknown_files = []
for path, dirs, files in os.walk(root_dir):
    for f in files:
        attrs = {}
        input_file = posixpath.join(path, f)
        if os.stat(input_file).st_size >= 100e6:
            attrs, local_attrs = get_attrs(input_file, attrs)
            TOA5_files.append({'path': path,
                              'filename': f,
                              'local_attrs': local_attrs,
                              'attrs': attrs,
                              'size': os.stat(input_file).st_size/1e6
                             })
            continue
        try:
            df = pd.read_csv(root_dir+f, skiprows=[0, 2, 3], parse_dates=True,
                             index_col=0, low_memory=False)
            df = df.drop_duplicates()
            df.sort(axis=1, inplace=True)
            df.index.name = 'time'
            attrs, local_attrs = get_attrs(input_file, attrs)
            TOA5_files.append({'path': path,
                               'filename': f,
                               'df': df,
                               'local_attrs': local_attrs,
                               'attrs': attrs
                              })                      
        except:
            unknown_files.append({'path': path,
                                  'filename': f})

In [75]:
def regroup_files(info):
    for f in TOA5_files:
        for p in info['programs']:
            if p in f['attrs']['program']:
                proj_path = posixpath.join(f['path'], info['project'])
                if os.path.exists(proj_path) is False:
                    os.mkdir(proj_path)
                output_path = posixpath.join(proj_path, p)
                if os.path.exists(output_path) is False:
                    os.mkdir(output_path)
                try:
                    shutil.move(posixpath.join(f['path'],f['filename']), posixpath.join(output_path,f['filename']))
                except:
                    continue

In [76]:
tower = {'project': 'Tower', 'programs': ['MainTower', 'Manifold']}

In [77]:
klee = {'project': 'KLEE', 'programs': ['Rainfall', 'KLEEMet']}

In [79]:
regroup_files(tower)