# Visualize Inferno Crater data for pseudo-production monitoring purposes

This is used because the old, experimental data logger has been turned off, and the LDRCP pilot is being used instead. Data are stored in a development S3 bucket as AWS. Data retrieval from AWS requires MFA so full automation of this process is not possible.

_Application_

This will be used until the LDRCP is production-ised, and an end-user data access mechanism is available.

## NB - TEMPORARY

Spike test data are used for Inferno Crater overflow observations, as those sensors are not yet 'connected' to the LDRCP pilot logger. This means that we artificially focus on the period when we have those data.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import boto3
import os
import glob
import shutil
from datetime import datetime

In [None]:
def remove_duplines(tmpfile, completefile):
    lines_seen = [] # holds lines already seen
    outfile = open(completefile, 'w')
    for line in open(tmpfile, 'r'):
        if line not in lines_seen: # not a duplicate
            outfile.write(line)
            lines_seen.append(line)
    outfile.close()

## Get LDRCP pilot data from AWS

In [None]:
#authentication for S3
sts_client = boto3.client('sts')

mfa_TOTP = input("Enter the MFA code: ")

# Call the assume_role method of the STSConnection object and pass the role
# ARN and a role session name.
assumed_role_object=sts_client.assume_role(
    RoleArn="arn:aws:sts::615890063537:role/S3UserRole",
    RoleSessionName="DataLoggerRole",
    SerialNumber="arn:aws:iam::582058524534:mfa/sherburn",
    TokenCode=mfa_TOTP
)

# From the response that contains the assumed role, get the temporary 
# credentials that can be used to make subsequent API calls
credentials=assumed_role_object['Credentials']

s3=boto3.resource('s3',
    aws_access_key_id=credentials['AccessKeyId'],
    aws_secret_access_key=credentials['SecretAccessKey'],
    aws_session_token=credentials['SessionToken'],
)

In [None]:
#S3 bucket name
bucket = 'dev-data-logger-lake.geonet.org.nz'

#folder for downloaded daily CSV files
dltmp = '/home/sherburn/GeoNet/datalogger/inferno_spike/tmp'
#top folder to save final CSV files
dlsav = '/home/sherburn/GeoNet/datalogger/inferno_spike'

#temporary file, concatenated but with daily headers
tmpfile = os.path.join(dlsav, 'tmpfile.csv')

#logger to download data from
logger = 'infernocratertest'

In [None]:
#date range for data
date1 = '20190718'
date2 = '20190805'

In [None]:
#construct and format the range of dates
dr = pd.date_range(date1, date2, freq='D', )
dates = dr.map(lambda x: x.strftime('%Y/%m/%d'))

In [None]:
os.makedirs(dltmp, exist_ok=True) #make tmp directory for downloaded files
#loop for each date
for date in dates:
    date2 = (datetime.strptime(date, '%Y/%m/%d')).strftime('%Y%m%d')
    s3file = date+'/'+'logger-'+logger+'_Table1'+'_'+date2+'.csv'
    #print (s3file)
    savefile = 'logger-'+logger+'_Table1'+'_'+date2+'.csv'
    try:
        s3.Bucket(bucket).download_file(s3file, os.path.join(dltmp, savefile))
    except:
        print ('fail to download '+s3file)
        pass

#concat all files for the logger
concatfile = tmpfile
files = glob.glob(os.path.join(dltmp, '*.csv'))
files.sort() #to get data in time order
with open(concatfile, 'w') as outfile:
    for file in files:
        with open(file, 'r') as readfile:
            shutil.copyfileobj(readfile, outfile)

shutil.rmtree(dltmp)#remove tmp directory for downloaded files

#remove unwanted header lines from temporary file
completefile = os.path.join(dlsav, logger, logger+'_Table1.csv')
remove_duplines(tmpfile, completefile)
#remove temporary file
os.remove(tmpfile)

### LDRCP pilot data into a dataframe

In [None]:
pilot = (pd.read_csv(completefile,
        usecols=['Time', 'Depth_USGS_OTT_meters', 'Temp_thermocouple1_degC'],
        parse_dates=True,
        index_col='Time'))
pilot.columns = ['crater_water_level', 'crater_water_temperature']
pilot.head()

In [None]:
#make datetime index timezone naive to match spike dataframe
idx = pilot.index.tz_localize(None)
pilot.set_index(idx, inplace=True)
pilot.head()

## Get spike data

In [None]:
spike = (pd.read_csv('CR6 4114 InfernoSpikeTest USB_Table1.dat',
        skiprows=[0,2,3],
        usecols=['TIMESTAMP', 'RadarLevel_Meters', 'Temp_thermocouple1_degC' ],
        parse_dates=True,
        index_col = 'TIMESTAMP',
        na_values='NAN'))
spike.columns = ['overflow_water_temperature', 'overflow_water_level']
spike.tail()

In [None]:
spike.head()

## Merge LDRCP pilot and spike dataframes

In [None]:
data = pilot.merge(spike, left_index=True, right_index=True)

In [None]:
data.tail()

In [None]:
#for this test dataset, trim to remove some rubbish at start
data = data.loc['2019-07-19 02:40:00':]

### Calculate outflow flowrate and do some other cleaning

- If water level in outflow channel is measured as negative, set overflow to zero
- Adjust water level in crater so that at overflow it is ~10 cm above. Measure in metres below overflow, which is a positive number

In [None]:
@np.vectorize
def flow(x):
    if (x<0):
        flow = 0
    else:
        flow = (1.056*x**1.538)*1000
    return flow

In [None]:
data['overflow_rate'] = flow(data['overflow_water_level'])

In [None]:
data['crater_level_reloverflow'] = -1 * (data['crater_water_level'] - 10.1)

### Visualize something

In [None]:
fig,(ax0,ax1,ax2,ax3) = plt.subplots(4, 1, figsize=(20,15))
plt.subplots_adjust(wspace=0.5, hspace=0.5)

data['crater_water_temperature'].plot(ax=ax0, title='Inferno Crater Observations', fontsize=12, label='crater water temperature')
ax0.title.set_size(20)
ax0.grid()
ax0.set_ylabel('Temperature (deg C)')
ax0.set_xlabel('')
ax0.set_ylim(bottom=40)
ax0.legend(loc='best')

data['crater_level_reloverflow'].plot(ax=ax1, fontsize=12, label='crater water depth\nbelow overflow')
ax1.title.set_size(20)
ax1.grid()
ax1.set_ylabel('Depth below overflow (m)')
ax1.set_xlabel('')
ax1.invert_yaxis()
ax1.legend(loc='best')

data['overflow_water_temperature'].plot(ax=ax2, label='overflow channel temperature', fontsize=12)
ax2.title.set_size(20)
ax2.grid()
ax2.set_ylabel('Temperature (deg C)')
ax2.legend(loc='best')

data['overflow_rate'].plot(ax=ax3, label='overflow channel\nflow rate', fontsize=12)
ax3.title.set_size(20)
ax3.grid()
ax3.set_ylabel('Flow (L/s)')
ax3.legend(loc='best')

#     fig.savefig(os.path.join(base, logger, logger+'_field_logger.png'), dpi=100, bbox_inches='tight')