In [2]:
import pytimber
ldb = pytimber.LoggingDB()

Specify the output file and the times you want to download. Timber and pyTimber conflict with regards to the the times, probably because of winter and summertime. If you want to have data stored in Timber from 00:00 to 01:00, you might to request either from 01:00 to 02:00 or even 02:00 to 03:00. We have to account for this shift later on.

In [118]:
filename = '../Data_Raw/Nov2018.csv'

In [119]:
import pandas as pd
from datetime import datetime
import pytz

t1 = '2018-11-01 00:00:00.000'
t2 = '2018-12-01 00:00:00.000'

t1 = pytz.utc.localize(datetime.strptime(t1, '%Y-%m-%d %H:%M:%S.%f')).astimezone(tz=None)
t2 = pytz.utc.localize(datetime.strptime(t2, '%Y-%m-%d %H:%M:%S.%f')).astimezone(tz=None)
replace_file = False
replace_column = True

In [120]:
# %load ../ionsrcopt/source_features.py
class SourceFeatures(object):
    TIMESTAMP = 'UTC_TIME'
    BIASDISCAQNV = 'IP.NSRCGEN:BIASDISCAQNV'
    GASAQN = 'IP.NSRCGEN:GASAQN'
    GASSASAQN = 'IP.NSRCGEN:GASSASAQN'
    SOLINJ_CURRENT = 'IP.SOLINJ.ACQUISITION:CURRENT'
    SOLCEN_CURRENT = 'IP.SOLCEN.ACQUISITION:CURRENT'
    SOLEXT_CURRENT = 'IP.SOLEXT.ACQUISITION:CURRENT'
    OVEN1AQNP = 'IP.NSRCGEN:OVEN1AQNP'
    OVEN2AQNP = 'IP.NSRCGEN:OVEN2AQNP'
    SOURCEHTAQNI = 'IP.NSRCGEN:SOURCEHTAQNI'
    SAIREM2_FORWARDPOWER = 'IP.SAIREM2:FORWARDPOWER'
    BCT05_CURRENT = 'ITL.BCT05:CURRENT'
    BCT25_CURRENT = 'ITF.BCT25:CURRENT'

Now select all parameters you are interested in.

In [121]:
time = SourceFeatures.TIMESTAMP
parameters_raw = [
#        SourceFeatures.BIASDISCAQNV, 
#        SourceFeatures.GASAQN, 
#        SourceFeatures.OVEN1AQNP,
#        SourceFeatures.SOLINJ_CURRENT,
#        SourceFeatures.SOLCEN_CURRENT,
#        SourceFeatures.SOLEXT_CURRENT,
#        SourceFeatures.SOURCEHTAQNI,
#        SourceFeatures.BCT25_CURRENT
]
parameters_scaled = {
        #SourceFeatures.SAIREM2_FORWARDPOWER : {'scale' : 'AVG', 'interval' : 'SECOND', 'size' : '10'}}
        SourceFeatures.BCT05_CURRENT : {'scale' : 'AVG', 'interval' : 'MINUTE', 'size' : '2'}
}

In [122]:
result = {}

if parameters_raw:
    result = ldb.get(parameters_raw, t1, t2, unixtime=True)

for k, v in parameters_scaled.items():
    data = ldb.getScaled(k, t1, t2, scaleAlgorithm=v['scale'], scaleInterval=v['interval'], scaleSize=v['size'], unixtime=True)
    result.update(data)



In [123]:
import pandas as pd
from os import path

def load_existing_data(filename):
    if not path.exists(filename):
        print("The file {} does not yet exist, we will create a new one".format(filename))
        return pd.DataFrame(columns=[time])
    
    print("Loading data from {}.".format(filename))
    if replace_column:
        print("We will replace columns that already exist")
    else:
        print("We will only append new columns")
        
    df = pd.read_csv(filename)
    return df

if replace_file:
    df = pd.DataFrame(columns=[time])
else:
    df = load_existing_data(filename)
    
df.set_index(time, inplace = True)
df.index = pd.to_datetime(df.index).tz_localize('UTC')

Loading data from ../Data_Raw/Nov2018.csv.
We will replace columns that already exist


In [124]:
def check_duplicate_times(time_series):
    x = time_series.duplicated()
    count = x[x].count()
    if count > 0:
        print("Time duplicates exist!")

for parameter, values in result.items():
    print("For column {} {} datapoints exist.".format(parameter, len(values[1])))
    
    if parameter in df.columns:
        print("Parameter {} is already in the data frame. There it has {} values. In the newly retrieved dataset it has {} values.".format(parameter, df[parameter].count(), len(values[1])))
        if not replace_column:
            print("Skipping.")
            continue
        else:
            print("Removing old column.")
            df = df.drop(parameter, axis=1)
            df = df.dropna(axis=0, how='all')
    
    df_column = pd.DataFrame(columns=[time, parameter])
    df_column[time] = pd.Series([datetime.fromtimestamp(timestamp, tz=pytz.utc) for timestamp in values[0]])
    check_duplicate_times(df_column[time])
    df_column[parameter] = values[1]
    
    df_column.set_index(time, inplace = True)
    df_column.dropna(inplace=True)
    #if len(df_column.index) > 0:
    #    df_column.index -= timeshift_to_timber

    df = df.join(df_column, how='outer')
    
df = df.reindex(sorted(df.columns), axis=1)
df.index = df.index.strftime('%Y-%m-%d %H:%M:%S.%f').str[:-3]
df.index.name = time

For column ITL.BCT05:CURRENT 21600 datapoints exist.


If the timeindex is duplicated, we will only keep the first occurence.

In [125]:
df = df[~df.index.duplicated(keep='first')].copy()
df

Unnamed: 0_level_0,IP.NSRCGEN:BIASDISCAQNV,IP.NSRCGEN:GASAQN,IP.NSRCGEN:OVEN1AQNP,IP.NSRCGEN:OVEN2AQNP,IP.NSRCGEN:SOURCEHTAQNI,IP.SAIREM2:FORWARDPOWER,IP.SOLCEN.ACQUISITION:CURRENT,IP.SOLEXT.ACQUISITION:CURRENT,IP.SOLINJ.ACQUISITION:CURRENT,ITF.BCT25:CURRENT,ITL.BCT05:CURRENT
UTC_TIME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2018-11-01 00:00:00.000,,,,,,,,,,,0.000180
2018-11-01 00:00:25.655,,10.0,,,,,,,,-0.0009,
2018-11-01 00:02:00.000,,,,,,,,,,,0.000220
2018-11-01 00:02:25.655,,,,0.003,,,,,,,
2018-11-01 00:03:02.855,-1.0,,,,,,,,,,
2018-11-01 00:03:11.255,,,,,,,,-0.42,,,
2018-11-01 00:03:31.655,,,,,-0.0254,,,,,,
2018-11-01 00:04:00.000,,,,,,,,,,,-0.000220
2018-11-01 00:04:28.055,,,0.002,,,,,,,,
2018-11-01 00:05:26.855,,,,,,,,,,-0.0006,


And save the output to the file.

In [126]:
df.to_csv(filename)
print("Saved result to {}".format(filename))

Saved result to ../Data_Raw/Nov2018.csv
