# Adjust Waimangu Inferno water level data for input to FITS, and ouput

**This is for data from the 'existing' logger**

* Read a block of data containing the start and end records for overflow periods
* Ensure that the first record has the time of the start of the data period being adjusted, and the last record the same for the end of the period, these will have to be added manually, as will not be created by the notebook that finds overflow intervals

In [None]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime as dt
%matplotlib inline

In [None]:
datafile = 'overflow_intervals.csv'
alldatafile = 'alldata.csv'

In [None]:
df = pd.read_csv(datafile, usecols= ['Datetime', 'ifwaterlevel'], parse_dates=True, index_col='Datetime')

In [None]:
df.head()

In [None]:
fig = plt.figure(figsize=(25, 10))

ax1 = fig.add_subplot(1, 1, 1)
ax1.plot(df['ifwaterlevel'], marker='o')
fig.savefig('overflow_variations.png')

**The correction values**

In [None]:
print (df.tail())
df.index.max()

In [None]:
#dataframe for corrections
corr = df.copy()
corr['ifwaterlevel'] *= -1 #this is the correction
corr.rename(columns={'ifwaterlevel' : 'correction'}, inplace = True)
corr.reset_index(inplace=True)

In [None]:
corr.head()

In [None]:
#read the 'raw' observation data, the data to be corrected so can get starttime of real data
# names = ['Datetime', 'iftemp', 'ifwaterlevel', 'outlettemp', 'outletflow', 'calibval', 'overflowraw']
# alldata = pd.read_csv(alldatafile, names=names, parse_dates=['Datetime'])

In [None]:
#get range of times for corrections
dt = pd.date_range(start=corr['Datetime'].min(), end=corr['Datetime'].max(), freq = '15T') #every 15 mins between first and last times
dt

In [None]:
dfdt = pd.DataFrame(dt, columns = ['Datetime'])
dfdt.head()

In [None]:
dfall = pd.merge(dfdt, corr, how='left', on=['Datetime'])
#linear interpolation between known correction values
dfcorrect = dfall.interpolate(method='linear') #values for all times in period to be corrected
dfcorrect.head()


In [None]:
fig = plt.figure(figsize=(25, 10))

ax1 = fig.add_subplot(1, 1, 1)
ax1.plot(dfcorrect['Datetime'], dfcorrect['correction'], marker='o')

In [None]:
#range times for all data, use every 15 mins
dtall = pd.date_range(start=df.index.min(), end=df.index.max(), freq = '15T')
dfdtall = pd.DataFrame(dtall, columns = ['Datetime'])

In [None]:
dfdtall.head()

In [None]:
#read the 'raw' observation data, the data to be corrected
names = ['dt', 'iftemp', 'ifwaterlevel', 'outlettemp', 'outletflow', 'calibval', 'overflowraw']
alldata = pd.read_csv(alldatafile, names=names, parse_dates={"Datetime" : ['dt']})

In [None]:
alldata.head()

In [None]:
#merge these
dfa = pd.merge(dfdtall, dfcorrect, how='left', on=['Datetime'])
dfz = pd.merge(dfa, alldata, how='left', on=['Datetime'])

In [None]:
dfz.tail()

In [None]:
#dfz is new dataframe containing all data
dfz['ifwaterlevel'] += dfz['correction'] #finally make the correction

In [None]:
dfz.tail()

In [None]:
#add zero error column
dfz['error'] = 0.0

#add datetime column is format I need
dfz['dt'] = dfz['Datetime'].dt.strftime('%Y-%m-%dT%H:%M:%SZ')

In [None]:
dfz.head()

In [None]:
fig = plt.figure(figsize=(25, 10))

ax1 = fig.add_subplot(1, 1, 1)
ax1.plot(dfz['Datetime'], dfz['ifwaterlevel'], marker='o')

In [None]:
dfz.ifwaterlevel[dfz.ifwaterlevel > 0].describe()

In [None]:
#nasty spike associated with smoving sensor in late-2017, remove by discarding all high +ve observations
# dfz = dfz[dfz.ifwaterlevel < 0.2]

In [None]:
fig = plt.figure(figsize=(25, 10))

ax1 = fig.add_subplot(1, 1, 1)
ax1.plot(dfz['Datetime'], dfz['ifwaterlevel'], marker='o')

In [None]:
#output file suitable for uploading to FITS
dfz.to_csv('OT001_z.csv', columns=['dt','ifwaterlevel', 'error'], index=False)

In [None]:
dfz.ifwaterlevel[dfz.ifwaterlevel > 0].describe()