In [1]:
# Imports
#---------
import sys
import pandas as pd
import numpy as np
import feather
import os
import gc
import datetime as dt
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from pandas.plotting import register_matplotlib_converters
import matplotlib.ticker as ticker
from matplotlib.dates import DateFormatter
import matplotlib as mpl
from datetime import timezone

register_matplotlib_converters()

# File locations
#----------------
the_gases = './oxaria/data/raw/1oxaria/json/gap_filling/'
pngs = './oxaria/data/pngs/gap_filling/'


In [2]:
# Load the df 
#-------------
oxaria1_status = pd.read_feather(the_gases+'oxaria1_status_gf.ftr').set_index(['tag','rec'])
oxaria1_status_lt = oxaria1_status.iloc[:,[12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,29]]
del oxaria1_status

In [3]:
# Checking out the data types 
#-----------------------------
print('\n'+'Loading Oxaria1 status...\n')

oxaria1_status_lt.info()



Loading Oxaria1 status...

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 4137083 entries, ('scs-bgx-536', Timestamp('2020-01-04 09:46:25+0000', tz='UTC')) to ('scs-bgx-543', Timestamp('2021-02-28 23:59:50+0000', tz='UTC'))
Data columns (total 17 columns):
 #   Column             Dtype  
---  ------             -----  
 0   val.up.load.av15   float32
 1   val.up.load.av1    float32
 2   val.up.load.av5    float32
 3   val.up.period      object 
 4   val.up.users       float32
 5   val.psu.prot-batt  float32
 6   val.psu.rst        float32
 7   val.psu.chg        float32
 8   val.psu.standby    object 
 9   val.psu.pwr-in     float32
 10  val.psu.host-3v3   float32
 11  val.psu.batt-flt   object 
 12  val.tz.name        object 
 13  val.tz.utc-offset  object 
 14  val.tmp.brd        float32
 15  val.psu.in         object 
 16  name               object 
dtypes: float32(10), object(7)
memory usage: 589.4+ MB


In [4]:
# Some cleaning incase have missed anything
#-------------------------------------------
oxaria1_status_lt = oxaria1_status_lt[~oxaria1_status_lt.index.duplicated(keep='last')]
oxaria1_status_lt = oxaria1_status_lt.reset_index()
oxaria1_status_lt = oxaria1_status_lt.dropna(axis=0,subset=['tag']).set_index(['tag','rec'])
oxaria1_status_lt.sort_index()
print('\n Cleaning duplicates & NAs in index (if any)...\n')


 Cleaning duplicates & NAs in index (if any)...



In [5]:
# Get a list of unique tags
#---------------------------
tags = oxaria1_status_lt.index.get_level_values(0).unique()
print('\n Unique tags loaded...\n \n', tags)


 Unique tags loaded...
 
 Index(['scs-bgx-536', 'scs-bgx-537', 'scs-bgx-538', 'scs-bgx-539',
       'scs-bgx-540', 'scs-bgx-541', 'scs-bgx-542', 'scs-bgx-543'],
      dtype='object', name='tag')


In [6]:
# Define the start dates for stable operation
# ---------------------------------------------
start_dates = ['2020-09-25T00:00:00','2020-08-01T00:00:00','2020-06-05T00:00:00','2020-01-25T00:00:00', \
               '2020-05-01T00:00:00','2020-03-05T00:00:00','2020-02-06T00:00:00','2020-12-07T00:00:00']
dates_list = [dt.datetime.fromisoformat(date).replace(tzinfo=timezone.utc) for date in start_dates]
dates_dict = dict(zip(tags,dates_list))
print('\n Applying variable start date filters of each sensor...\n \n'+str(dates_dict))


 Applying variable start date filters of each sensor...
 
{'scs-bgx-536': datetime.datetime(2020, 9, 25, 0, 0, tzinfo=datetime.timezone.utc), 'scs-bgx-537': datetime.datetime(2020, 8, 1, 0, 0, tzinfo=datetime.timezone.utc), 'scs-bgx-538': datetime.datetime(2020, 6, 5, 0, 0, tzinfo=datetime.timezone.utc), 'scs-bgx-539': datetime.datetime(2020, 1, 25, 0, 0, tzinfo=datetime.timezone.utc), 'scs-bgx-540': datetime.datetime(2020, 5, 1, 0, 0, tzinfo=datetime.timezone.utc), 'scs-bgx-541': datetime.datetime(2020, 3, 5, 0, 0, tzinfo=datetime.timezone.utc), 'scs-bgx-542': datetime.datetime(2020, 2, 6, 0, 0, tzinfo=datetime.timezone.utc), 'scs-bgx-543': datetime.datetime(2020, 12, 7, 0, 0, tzinfo=datetime.timezone.utc)}


In [7]:
# Select periods of stable operation from the df of all gases sensor data
# -------------------------------------------------------------------------
print('\n Applying start date filters...\n')

tmp = []

for k, v in dates_dict.items():
    df = oxaria1_status_lt.query('tag == @k & rec >= @v')
    tmp.append(df)
tmpdf = pd.concat(tmp)
tmpdf.info(null_counts=True)

# Save to feather
# -----------------
print('\n Writing to  '+the_gases+'oxaria1_status_stable_536_feb21/.ftr\n')
tmpdf.reset_index().to_feather(the_gases+'oxaria1_status_stable_536_2feb21/.ftr')
print('All done! \U0001F600')



 Applying start date filters...

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 3114028 entries, ('scs-bgx-536', Timestamp('2020-09-25 00:00:19+0000', tz='UTC')) to ('scs-bgx-543', Timestamp('2021-02-28 23:59:50+0000', tz='UTC'))
Data columns (total 17 columns):
 #   Column             Non-Null Count    Dtype  
---  ------             --------------    -----  
 0   val.up.load.av15   3114028 non-null  float32
 1   val.up.load.av1    3114028 non-null  float32
 2   val.up.load.av5    3114028 non-null  float32
 3   val.up.period      3114028 non-null  object 
 4   val.up.users       3114028 non-null  float32
 5   val.psu.prot-batt  3114027 non-null  float32
 6   val.psu.rst        3114027 non-null  float32
 7   val.psu.chg        3114027 non-null  float32
 8   val.psu.standby    3114027 non-null  object 
 9   val.psu.pwr-in     3114027 non-null  float32
 10  val.psu.host-3v3   3114027 non-null  float32
 11  val.psu.batt-flt   3114027 non-null  object 
 12  val.tz.name        3114028 n