## Fixing timezone and splitting files

In [1]:
import os
import datetime as dt
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import mpld3

%matplotlib inline
mpld3.enable_notebook()

In [2]:
#path = 'C:/Users/Julia/work/LoggerNet/'
path = '../../CloudStation/LoggerNet/'
datafiles=['CR1000_Table1.dat',
           'CR1000_Table2.dat',
           'CR5000_onemin.dat',
           'CR5000_flux.dat',
           'CL06_CR1000_IP_Table1.dat',
           'Wash_Strm_CR200_IP_Table1.dat',
           'Upper WS CR200_Table1.dat'
          ]

In [117]:
def read_campbellsci(path, datafile):
    filename = path + datafile
    df = pd.read_csv(filename, skiprows=[0,2,3], index_col=0, parse_dates=True,
                     na_values=["NAN", "INF", "-INF", 7999, -7999], low_memory=False)
    f = open(filename, 'r')
    lines = f.readlines()
    f.close()
    UnitsLine = lines[2].strip('\r\n')
    MethodLine = lines[3].strip('\r\n')
    
    units = UnitsLine.replace('"','').split(',')[1:]
    method = MethodLine.replace('"','').split(',')[1:]
    return df, units, method

In [121]:
df, units, method = read_campbellsci(path, 'CL06_CR1000_IP_Table1.dat')

In [122]:
df.columns

Index([u'RECORD', u'AirTC_Avg', u'RH', u'WS_ms_S_WVT', u'WindDir_D1_WVT',
       u'WindDir_SD1_WVT', u'Lvl_cm_Avg', u'Temp_C_Avg', u'CTDM_Avg',
       u'BattV_Min'],
      dtype='object')

In [123]:
df_all = df_all[df.columns]

In [124]:
methods.append('Min')

In [125]:
methods

['', 'Avg', 'Smp', 'WVc', 'WVc', 'WVc', 'Avg', 'Avg', 'Avg', 'Min']

In [120]:
df_all = None
for f in os.listdir(path):
    if 'CL06_CR1000_IP_Table1' in f and f.endswith('.dat'):
        df, units, methods = read_campbellsci(path, datafile=f)
        df_all = pd.concat((df_all, df))
df_all = df_all.sort_index()

In [126]:
#This is for all the Washington sites
new = df_all[:'2015-10-14 14:00'].index + pd.DateOffset(hours=4)
new = new.append(df_all['2015-10-14 15:00':].index)
df_all = df_all.set_index(new)
columns = pd.MultiIndex.from_tuples(zip(df_all.columns, units, methods))
df_all.set_axis(1, columns)
df_all.insert(0, 'TIMESTAMP', df_all.index)

In [132]:
df_all['2016'].to_csv('../../data/CL06_CR1000_IP_Table1_2016.dat', index=False)

In [135]:
os.system("cp -f ../../data/Wash* ../../web/washington/datafiles/")

0

In [67]:
# This is for Butler
new = df_all[:'2013-03-26 07:00'].index + pd.DateOffset(hours=5)
new = new.append(df_all['2013-03-26 08:00':'2014-01-28'].index + pd.DateOffset(hours=4))
new = new.append(df_all['2014-01-29':'2014-06-27 05:00'].index + pd.DateOffset(hours=5))
new = new.append(df_all['2014-06-27 06:00':'2014-12-01'].index + pd.DateOffset(hours=4))
new = new.append(df_all['2014-12-02':'2015-05-09 04:00'].index + pd.DateOffset(hours=5))
new = new.append(df_all['2015-05-09 05:00':'2015-10-14 14:00'].index + pd.DateOffset(hours=4))
new = new.append(df_all['2015-10-14 15:00':].index) 

In [68]:
df_all = df_all.set_index(new)

(24611, 24611)

In [183]:
columns = pd.MultiIndex.from_tuples(zip(df_all.columns, units, methods))
df_all.set_axis(1, columns)
df_all.insert(0, 'TIMESTAMP', df_all.index)

In [185]:
df_all['2015'].to_csv('../../data/CR5000_flux_2015.dat', index=False)

In [166]:
os.system("cp -f ../../data/CR1000_Table1* ../../web/butler/datafiles/")

0

In [134]:
df_2016 = pd.read_csv('../../data/'+'CR1000_Table1_2016.dat', index_col=0, parse_dates=True, 
                 low_memory=False, skiprows=[1,2,3])

In [211]:
def update_file(datafile, year):
	# add new values to this year's file
    df_year = pd.read_csv('../../data/'+'{d}_{year}.dat'.format(d=datafile, year=year), 
                          index_col=0, parse_dates=True, low_memory=False, skiprows=[1,2,3])
    df, units, method = read_campbellsci('../../CloudStation/LoggerNet/','{d}.dat'.format(d=datafile))

    if df.index[-1] == df_year.index[-1]:
        return
    df_all = pd.concat((df_year,df[df.index.difference(df_year.index)[0]:]))
    columns = pd.MultiIndex.from_tuples(zip(df_all.columns, units, methods))
    df_all.set_axis(1, columns)
    df_all.insert(0, 'TIMESTAMP', df_all.index)
    df_all.to_csv('../../data/{d}_{year}.dat'.format(d=datafile, year=year), index=False)

In [212]:
update_file('CR5000_flux', '2016')

In [191]:
datafile = 'CR1000_Table1'
year = '2016'

In [195]:
df_year = pd.read_csv('../../data/'+'{d}_{year}.dat'.format(d=datafile, year=year), 
                      index_col=0, parse_dates=True, low_memory=False, skiprows=[1,2,3])
df, units, method = read_campbellsci('../../CloudStation/LoggerNet/','{d}.dat'.format(d=datafile))

if df.index[-1] == df_year.index[-1]:
    print df.index[-1]
df_all = pd.concat((df_year,df[df.index.difference(df_year.index)[0]:]))
columns = pd.MultiIndex.from_tuples(zip(df_all.columns, units, methods))
df_all.set_axis(1, columns)
    df_all.insert(0, 'TIMESTAMP', df_all.index)
    df_all.to_csv(path+'{d}_{year}.dat'.format(d=datafile, year=year), index=False)

In [217]:
path

'../../CloudStation/LoggerNet/'

In [218]:
os.system("cp -f ../../data/{d}_{year}.dat ../../web/ecs/datafiles/".format(d='CR5000_flux', year='2016'))

0

In [None]:
df_all.index[-1]

In [219]:
def update_file(datafile, year):
	# add new values to this year's file
    df_year = pd.read_csv('../../data/'+'{d}_{year}.dat'.format(d=datafile, year=year), 
                          index_col=0, parse_dates=True, low_memory=False, skiprows=[1,2,3])
    df, units, method = read_campbellsci('../../CloudStation/LoggerNet/','{d}.dat'.format(d=datafile))

    if df.index[-1] == df_year.index[-1]:
        return
    df_all = pd.concat((df_year,df[df.index.difference(df_year.index)[0]:]))
    columns = pd.MultiIndex.from_tuples(zip(df_all.columns, units, methods))
    df_all.set_axis(1, columns)
    df_all.insert(0, 'TIMESTAMP', df_all.index)
    df_all.to_csv('../../data/{d}_{year}.dat'.format(d=datafile, year=year), index=False)

In [222]:
out_path = './output/ecs/'
path = '../../data/'
params = ['TCroof_Avg', 'RHCroof', 'DnTot_Avg', 'UpTot_Avg', 'Rain_mm_3_Tot', 'WindSpd_ms']
update_file(datafile='CR5000_flux', year='2016')
os.system("cp -f {p}{d}_{year}.dat ../../web/ecs/datafiles/".format(p=path, d='CR5000_flux', year='2016'))

0