In [1]:
import pytimber
ldb = pytimber.LoggingDB()

In [None]:
# %load ../ionsrcopt/source_features.py
from enum import Enum

class SourceFeatures(Enum):
    Timestamp = 'TIMESTAMP'
    BIASDISCAQNV = 'IP.NSRCGEN:BIASDISCAQNV'
    GASAQN = 'IP.NSRCGEN:GASAQN'
    GASSASAQN = 'IP.NSRCGEN:GASSASAQN'
    SOLINJ_CURRENT = 'IP.SOLINJ.ACQUISITION:CURRENT'
    SOLCEN_CURRENT = 'IP.SOLCEN.ACQUISITION:CURRENT'
    SOLEXT_CURRENT = 'IP.SOLEXT.ACQUISITION:CURRENT'
    OVEN1AQNP = 'IP.NSRCGEN:OVEN1AQNP'
    OVEN2AQNP = 'IP.NSRCGEN:OVEN2AQNP'
    SOURCEHTAQNI = 'IP.NSRCGEN:SOURCEHTAQNI'
    SAIREM2_FORWARDPOWER = 'IP.SAIREM2:FORWARDPOWER'
    BCT25_CURRENT = 'ITF.BCT25:CURRENT'

Specify the output file and the times you want to download. Timber and pyTimber conflict with regards to the the times, probably because of winter and summertime. If you want to have data stored in Timber from 00:00 to 01:00, you might to request either from 01:00 to 02:00 or even 02:00 to 03:00. We have to account for this shift later on.

In [96]:
filename = '../Data_Raw/Jan2018.csv'

In [97]:
import pandas as pd
from datetime import datetime
import pytz

t1 = '2018-01-01 00:00:00.000'
t2 = '2018-02-01 00:00:00.000'

t1 = pytz.utc.localize(datetime.strptime(t1, '%Y-%m-%d %H:%M:%S.%f')).astimezone(tz=None)
t2 = pytz.utc.localize(datetime.strptime(t2, '%Y-%m-%d %H:%M:%S.%f')).astimezone(tz=None)
timeshift_to_timber = pd.Timedelta(hours=0)
replace_file = True
replace_column = True

Now select all parameters you are interested in.

In [98]:
time = 'Timestamp (UTC_TIME)'
parameters_raw = [
        SourceFeatures.BIASDISCAQNV, 
        SourceFeatures.GASAQN, 
        SourceFeatures.OVEN1AQNP,
        SourceFeatures.SOLINJ_CURRENT,
        SourceFeatures.SOLCEN_CURRENT,
        SourceFeatures.SOLEXT_CURRENT,
        SourceFeatures.SOURCEHTAQNI,
        SourceFeatures.BCT25_CURRENT]
parameters_scaled = {
        SourceFeatures.SAIREM2_FORWARDPOWER : {'scale' : 'AVG', 'interval' : 'SECOND', 'size' : '10'}}

In [99]:
result = ldb.get(parameters_raw, t1, t2, unixtime=True)

for k, v in parameters_scaled.items():
    data = ldb.getScaled(k, t1, t2, scaleAlgorithm=v['scale'], scaleInterval=v['interval'], scaleSize=v['size'], unixtime=True)
    result.update(data)



In [100]:
import pandas as pd
from os import path

def load_existing_data(filename):
    if not path.exists(filename):
        print("The file {} does not yet exist, we will create a new one".format(filename))
        return pd.DataFrame(columns=[time])
    
    print("Loading data from {}.".format(filename))
    if replace_column:
        print("We will replace columns that already exist")
    else:
        print("We will only append new columns")
        
    df = pd.read_csv(filename)
    return df

if replace_file:
    df = pd.DataFrame(columns=[time])
else:
    df = load_existing_data(filename)
    
df.set_index(time, inplace = True)
#df.index = pd.to_datetime(df.index)

In [101]:
def check_duplicate_times(time_series):
    x = time_series.duplicated()
    count = x[x].count()
    if count > 0:
        print("Time duplicates exist!")

for parameter, values in result.items():
    print("For column {} {} datapoints exist.".format(parameter, len(values[1])))
    
    if parameter in df.columns:
        print("Parameter {} is already in the data frame. There it has {} values. In the newly retrieved dataset it has {} values.".format(parameter, df[parameter].count(), len(values[1])))
        if not replace_column:
            print("Skipping.")
            continue
        else:
            print("Removing old column.")
            df = df.drop(parameter, axis=1)
            df = df.dropna(axis=0, how='all')
    
    df_column = pd.DataFrame(columns=[time, parameter])
    df_column[time] = pd.Series([datetime.fromtimestamp(timestamp, tz=pytz.utc) for timestamp in values[0]])
    check_duplicate_times(df_column[time])
    df_column[parameter] = values[1]
    
    df_column.set_index(time, inplace = True)
    df_column.dropna(inplace=True)
    #if len(df_column.index) > 0:
    #    df_column.index -= timeshift_to_timber

    df = df.join(df_column, how='outer')
    
df = df.reindex(sorted(df.columns), axis=1)
df.index = df.index.strftime('%Y-%m-%d %H:%M:%S.%f').str[:-3]
df.index.name = time

For column IP.NSRCGEN:BIASDISCAQNV 3870 datapoints exist.
For column IP.NSRCGEN:GASAQN 3919 datapoints exist.
For column IP.NSRCGEN:OVEN1AQNP 3927 datapoints exist.
For column IP.NSRCGEN:OVEN2AQNP 3927 datapoints exist.
For column IP.NSRCGEN:SOURCEHTAQNI 3878 datapoints exist.
For column IP.SOLCEN.ACQUISITION:CURRENT 1477 datapoints exist.
For column IP.SOLEXT.ACQUISITION:CURRENT 1477 datapoints exist.
For column IP.SOLINJ.ACQUISITION:CURRENT 1477 datapoints exist.
For column ITF.BCT25:CURRENT 273 datapoints exist.
For column IP.SAIREM2:FORWARDPOWER 267840 datapoints exist.


If the timeindex is duplicated, we will only keep the first occurence.

In [102]:
df = df[~df.index.duplicated(keep='first')].copy()
df

Unnamed: 0_level_0,IP.NSRCGEN:BIASDISCAQNV,IP.NSRCGEN:GASAQN,IP.NSRCGEN:OVEN1AQNP,IP.NSRCGEN:OVEN2AQNP,IP.NSRCGEN:SOURCEHTAQNI,IP.SAIREM2:FORWARDPOWER,IP.SOLCEN.ACQUISITION:CURRENT,IP.SOLEXT.ACQUISITION:CURRENT,IP.SOLINJ.ACQUISITION:CURRENT,ITF.BCT25:CURRENT
Timestamp (UTC_TIME),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2018-01-01 00:14:48.455,,,,,,,,-0.42,,
2018-01-01 00:14:59.255,,,,,,,0.18,,-0.29,
2018-01-01 00:45:02.855,,,,,,,,-0.42,,
2018-01-01 00:45:13.655,,,,,,,0.18,,-0.29,
2018-01-01 01:15:17.255,,,,,,,,-0.42,,
2018-01-01 01:15:28.055,,,,,,,0.18,,-0.29,
2018-01-01 01:45:31.655,,,,,,,,-0.42,,
2018-01-01 01:45:42.455,,,,,,,0.18,,-0.29,
2018-01-01 02:15:46.055,,,,,,,,-0.42,,
2018-01-01 02:15:56.855,,,,,,,0.18,,-0.29,


And save the output to the file.

In [103]:
df.to_csv(filename)
print("Saved result to {}".format(filename))

Saved result to ../Data_Raw/Jan2018.csv
