In [14]:
# Imports
# ---------
import sys
import pandas as pd
import numpy as np
import feather
import os
import gc
import datetime as dt

folder0 = '/home/tonyb/Gdrive/MinicondaProjects/oxaria/data/raw/0oxaria/gap_filling/jun_to_sept_2021/'
folder1 = '/home/tonyb/Gdrive/MinicondaProjects/oxaria/data/raw/1oxaria/json/gap_filling/jun_to_sept_2021/'
folder2 = '/home/tonyb/Gdrive/MinicondaProjects/oxaria/data/raw/2oxaria/json/gap_filling/jun_to_sept_2021/'

# List of good Oxaria1 location names to zip with tags
# mus be in numerical order of tags
# ------------------------------------------------------
ox1_location_names = ['High St', 'South Parks Rd', 'St Ebbes', 'Jesus College',
                      'New Marston', 'The Plain', 'Worcester College', 'John Radcliffe']

# List of good Oxaria2 location names to zip with tags
# mus be in numerical order of tags
# ------------------------------------------------------
ox2_location_names = ['Windmill School1', 'Said Business School', 'County Hall',
                      'Divinity Road', 'Ahlul Bayt Mosque', 'Windmill School2', 'St Giles',
                      'Warneford Hospital', 'Spare', 'Speedwell St']

In [15]:
# Function to load stable Oxaria feathers & aggregate to 15-min minute
# ----------------------------------------------------------------------

def resample_15min(in_ftr, out_ftr):
    df = pd.read_feather(in_ftr).set_index(['tag', 'rec'])
    df = df.groupby([pd.Grouper(level='tag'),
                     pd.Grouper(level='rec',
                                freq='15min',
                                label='right',
                                convention='end',
                                origin='epoch'
                                )]).mean()
    df = df.astype({col: 'int32' for col in
                    df.select_dtypes('int64').columns})
    df = df.astype({col: np.float32 for col in
                    df.select_dtypes('float64').columns})
    df.reset_index(inplace=True)
    df['rec'] = df['rec'].dt.strftime('%Y-%m-%d %H:%M:%S.%f%z')
    df['rec'] = pd.to_datetime(df['rec'],
                               errors='coerce',
                               utc=True,
                               unit='ns',
                               origin='unix',
                               infer_datetime_format=True)
    df.to_feather(out_ftr)
    return df

def pollutant_transients(df):
    df_list = []
    for tag, dat in df.groupby('tag'):
        dat['hmd_s20'] = dat.loc[:,['val.sht.hmd']].diff(2).div(20)
        dat['tmp_s20'] = dat.loc[:,['val.sht.tmp']].diff(2).div(20)
        df1 = dat[['hmd_s20','tmp_s20']].resample('15T',closed='right',label='right',level=1).mean()
        df2 = dat[['hmd_s20','tmp_s20']].resample('15T',closed='right',label='right',level=1).max()
        df3 = dat[['hmd_s20','tmp_s20']].resample('15T',closed='right',label='right',level=1).min()
        df4 = pd.merge(left=df2,right=df3,left_index=True, right_index=True)
        df4['mag_hmd_s20'] = np.where(abs(df4['hmd_s20_x']) > abs(df4['hmd_s20_y']),df4['hmd_s20_x'],df4['hmd_s20_y'])
        df4['mag_tmp_s20'] = np.where(abs(df4['tmp_s20_x']) > abs(df4['tmp_s20_y']),df4['tmp_s20_x'],df4['tmp_s20_y'])
        df4 = pd.merge(left=df4,right=df1,left_index=True, right_index=True)
        df4 = df4.drop(columns=['hmd_s20_x','hmd_s20_y','tmp_s20_x','tmp_s20_y'])
        df4.rename(columns={'hmd_s20':'mean_hmd_s20', 'tmp_s20':'mean_tmp_s20'}, inplace=True)
        df4['tag'] = tag
        df4 = df4.reset_index().set_index(['tag','rec']).sort_index()
        df_list.append(df4)
    df5 = pd.concat(df_list)
    return df5

def climate_transients(df):
    df_list = []
    for tag, dat in df.groupby('tag'):
        dat['hmd_s20c'] = dat.loc[:,['val.hmd']].diff(2).div(20)
        dat['tmp_s20c'] = dat.loc[:,['val.tmp']].diff(2).div(20)
        df1 = dat[['hmd_s20c','tmp_s20c']].resample('15T',closed='right',label='right',level=1).mean()
        df2 = dat[['hmd_s20c','tmp_s20c']].resample('15T',closed='right',label='right',level=1).max()
        df3 = dat[['hmd_s20c','tmp_s20c']].resample('15T',closed='right',label='right',level=1).min()
        df3_1 = dat[['hmd_s20c']].resample('15T',closed='right',label='right',level=1).max()
        df3_1.rename(columns={'hmd_s20c':'max_hmd_s20c'}, inplace=True)
        df4 = pd.merge(left=df2,right=df3,left_index=True, right_index=True)
        df4['mag_hmd_s20c'] = np.where(abs(df4['hmd_s20c_x']) > abs(df4['hmd_s20c_y']),df4['hmd_s20c_x'],df4['hmd_s20c_y'])
        df4['mag_tmp_s20c'] = np.where(abs(df4['tmp_s20c_x']) > abs(df4['tmp_s20c_y']),df4['tmp_s20c_x'],df4['tmp_s20c_y'])
        df4 = pd.merge(left=df4,right=df1,left_index=True, right_index=True)
        df4 = df4.drop(columns=['hmd_s20c_x','hmd_s20c_y','tmp_s20c_x','tmp_s20c_y'])
        df4 = pd.merge(left=df4,right=df3_1,left_index=True, right_index=True)
        df4.rename(columns={'hmd_s20c':'mean_hmd_s20c', 'tmp_s20c':'mean_tmp_s20c'}, inplace=True)
        df4['tag'] = tag
        df4 = df4.reset_index().set_index(['tag','rec']).sort_index()
        df_list.append(df4)
    df5 = pd.concat(df_list)
    return df5


In [16]:
# Calc 15-min averages for space &n comparison with auto data - oxaria1
# -----------------------------------------------------------------------

#Apply resample function
oxaria1_climate_stable_15min = resample_15min(in_ftr=folder1+'oxaria1_climate_stable_oct21.ftr',
                                              out_ftr=folder1+'oxaria1_climate_stable_oct21_15min.ftr')

oxaria1_status_stable_15min = resample_15min(in_ftr=folder1+'oxaria1_status_stable_oct21.ftr',
                                             out_ftr=folder1+'oxaria1_status_stable_oct21_15min.ftr')

oxaria1_gases_stable_15min = resample_15min(in_ftr=folder1+'oxaria1_gases_stable_oct21.ftr',
                                            out_ftr=folder1+'oxaria1_gases_stable_oct21_15min.ftr')

oxaria1_pm_stable_15min = resample_15min(in_ftr=folder1+'oxaria1_pm_stable_oct21.ftr',
                                         out_ftr=folder1+'oxaria1_pm_stable_oct21_15min.ftr')

#Special function for transients
df = pd.read_feather(folder1+'oxaria1_climate_stable_oct21.ftr').set_index(['tag','rec'])
oxaria1_climate_stable_15min_transients = climate_transients(df=df)
df = pd.read_feather(folder1+'oxaria1_pm_stable_oct21.ftr').set_index(['tag','rec'])
oxaria1_pm_stable_15min_transients = pollutant_transients(df=df)
df = pd.read_feather(folder1+'oxaria1_gases_stable_oct21.ftr').set_index(['tag','rec'])
oxaria1_gases_stable_15min_transients = pollutant_transients(df=df)

oxaria1_climate_stable_15min.info()
oxaria1_climate_stable_15min_transients.info()
oxaria1_gases_stable_15min.info()
oxaria1_gases_stable_15min_transients.info()
oxaria1_pm_stable_15min.info()
oxaria1_pm_stable_15min_transients.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 79234 entries, 0 to 79233
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype              
---  ------   --------------  -----              
 0   tag      79234 non-null  object             
 1   rec      79234 non-null  datetime64[ns, UTC]
 2   val.hmd  79234 non-null  float32            
 3   val.tmp  79234 non-null  float32            
dtypes: datetime64[ns, UTC](1), float32(2), object(1)
memory usage: 1.8+ MB
<class 'pandas.core.frame.DataFrame'>
MultiIndex: 92121 entries, ('scs-bgx-536', Timestamp('2021-06-01 00:15:00+0000', tz='UTC')) to ('scs-bgx-543', Timestamp('2021-10-01 00:00:00+0000', tz='UTC'))
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   mag_hmd_s20c   79234 non-null  float32
 1   mag_tmp_s20c   79234 non-null  float32
 2   mean_hmd_s20c  79234 non-null  float32
 3   mean_tmp_s20c  79234 non-null  float32
 4   max_hmd_s20c   79

In [20]:
# Calc 15-min averages for space &n comparison with auto data - oxaria2
# -----------------------------------------------------------------------

# Apply resample function
oxaria2_climate_stable_15min = resample_15min(in_ftr=folder2+'oxaria2_climate_stable_oct21.ftr',
                                              out_ftr=folder2+'oxaria2_climate_stable_oct21_15min.ftr')

oxaria2_status_stable_15min = resample_15min(in_ftr=folder2+'oxaria2_status_stable_oct21.ftr',
                                              out_ftr=folder2+'oxaria2_status_stable_oct21_15min.ftr')

oxaria2_gases_stable_15min = resample_15min(in_ftr=folder2+'oxaria2_gases_stable_oct21.ftr',
                                            out_ftr=folder2+'oxaria2_gases_stable_oct21_15min.ftr')

oxaria2_pm_stable_15min = resample_15min(in_ftr=folder2+'oxaria2_pm_stable_oct21.ftr',
                                          out_ftr=folder2+'oxaria2_pm_stable_oct21_15min.ftr')

# Special function for transients
df = pd.read_feather(folder2+'oxaria2_climate_stable_oct21.ftr').set_index(['tag','rec'])
oxaria2_climate_stable_15min_transients = climate_transients(df=df)
df = pd.read_feather(folder2+'oxaria2_pm_stable_oct21.ftr').set_index(['tag','rec'])
oxaria2_pm_stable_15min_transients = pollutant_transients(df=df)
df = pd.read_feather(folder2+'oxaria2_gases_stable_oct21.ftr').set_index(['tag','rec'])
oxaria2_gases_stable_15min_transients = pollutant_transients(df=df)

oxaria2_climate_stable_15min.info()
oxaria2_climate_stable_15min_transients.info()
oxaria2_gases_stable_15min.info()
oxaria2_gases_stable_15min_transients.info()
oxaria2_pm_stable_15min.info()
oxaria2_pm_stable_15min_transients.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 107068 entries, 0 to 107067
Data columns (total 4 columns):
 #   Column   Non-Null Count   Dtype              
---  ------   --------------   -----              
 0   tag      107068 non-null  object             
 1   rec      107068 non-null  datetime64[ns, UTC]
 2   val.hmd  107068 non-null  float32            
 3   val.tmp  107068 non-null  float32            
dtypes: datetime64[ns, UTC](1), float32(2), object(1)
memory usage: 2.5+ MB
<class 'pandas.core.frame.DataFrame'>
MultiIndex: 115160 entries, ('scs-bgx-550', Timestamp('2021-06-21 10:15:00+0000', tz='UTC')) to ('scs-bgx-559', Timestamp('2021-10-01 00:00:00+0000', tz='UTC'))
Data columns (total 5 columns):
 #   Column         Non-Null Count   Dtype  
---  ------         --------------   -----  
 0   mag_hmd_s20c   107066 non-null  float32
 1   mag_tmp_s20c   107066 non-null  float32
 2   mean_hmd_s20c  107066 non-null  float32
 3   mean_tmp_s20c  107066 non-null  float32
 4   ma

In [22]:
# Calc 15-min averages for space &n comparison with auto data - oxaria2
# -----------------------------------------------------------------------

# Apply resample function
oxaria2_climate_stable_15min = resample_15min(in_ftr=folder2+'oxaria2_climate_stable_oct21.ftr',
                                              out_ftr=folder2+'oxaria2_climate_stable_oct21_15min.ftr')

oxaria2_status_stable_15min = resample_15min(in_ftr=folder2+'oxaria2_status_stable_oct21.ftr',
                                             out_ftr=folder2+'oxaria2_status_stable_oct21_15min.ftr')

oxaria2_gases_stable_15min = resample_15min(in_ftr=folder2+'oxaria2_gases_stable_oct21.ftr',
                                            out_ftr=folder2+'xxxoxaria2_gases_stable_oct21_15min.ftr')

oxaria2_pm_stable_15min = resample_15min(in_ftr=folder2+'oxaria2_pm_stable_oct21.ftr',
                                         out_ftr=folder2+'oxaria2_pm_stable_oct21_15min.ftr')

# Special function for transients
df = pd.read_feather(folder2+'oxaria2_climate_stable_oct21.ftr').set_index(['tag','rec'])
oxaria2_climate_stable_15min_transients = climate_transients(df=df)
df = pd.read_feather(folder2+'oxaria2_pm_stable_oct21.ftr').set_index(['tag','rec'])
oxaria2_pm_stable_15min_transients = pollutant_transients(df=df)
df = pd.read_feather(folder2+'oxaria2_gases_stable_oct21.ftr').set_index(['tag','rec'])
oxaria2_gases_stable_15min_transients = pollutant_transients(df=df)

oxaria2_climate_stable_15min.info()
oxaria2_climate_stable_15min_transients.info()
oxaria2_gases_stable_15min.info()
oxaria2_gases_stable_15min_transients.info()
oxaria2_pm_stable_15min.info()
oxaria2_pm_stable_15min_transients.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 107068 entries, 0 to 107067
Data columns (total 4 columns):
 #   Column   Non-Null Count   Dtype              
---  ------   --------------   -----              
 0   tag      107068 non-null  object             
 1   rec      107068 non-null  datetime64[ns, UTC]
 2   val.hmd  107068 non-null  float32            
 3   val.tmp  107068 non-null  float32            
dtypes: datetime64[ns, UTC](1), float32(2), object(1)
memory usage: 2.5+ MB
<class 'pandas.core.frame.DataFrame'>
MultiIndex: 115160 entries, ('scs-bgx-550', Timestamp('2021-06-21 10:15:00+0000', tz='UTC')) to ('scs-bgx-559', Timestamp('2021-10-01 00:00:00+0000', tz='UTC'))
Data columns (total 5 columns):
 #   Column         Non-Null Count   Dtype  
---  ------         --------------   -----  
 0   mag_hmd_s20c   107066 non-null  float32
 1   mag_tmp_s20c   107066 non-null  float32
 2   mean_hmd_s20c  107066 non-null  float32
 3   mean_tmp_s20c  107066 non-null  float32
 4   ma

In [23]:
# Add names back on to the dfs, they get lost by resample
# ---------------------------------------------------------

# functions for oxaria1
def add_names_1oxaria(df):
    ox1_location_names = ['High St', 'South Parks Rd', 'St Ebbes', 'Jesus College',
                          'Marsten', 'The Plain', 'Worcester College', 'John Radcliffe']
    devices = df.reset_index()['tag'].unique()
    device_names = dict(zip(devices, ox1_location_names))
    df_device_names = pd.DataFrame.from_dict(device_names,
                                             orient='index',
                                             columns=['name'
                                                      ])
    df_device_names.index.name = 'tag'
    dfout = df.reset_index().merge(df_device_names,
                     how='left',
                     left_on='tag',
                     right_on='tag').set_index(['tag','rec'])
    try:
        dfout.drop(columns=['index'], inplace=True)
    except:
        pass
    return dfout

# functions for oxaria2


def add_names_2oxaria(df):
    ox2_location_names = ['Windmill School1', 'Said Business School', 'County Hall', 'Divinity Road',
                          'Jahlul Bayt Mosque', 'Windmill School2', 'St Giles', 'Warneford Hospital',
                          'Spare', 'Speedwell St']
    devices = df.reset_index()['tag'].unique()
    device_names = dict(zip(devices, ox2_location_names))
    df_device_names = pd.DataFrame.from_dict(device_names,
                                             orient='index',
                                             columns=['name'
                                                      ])
    df_device_names.index.name = 'tag'
    dfout = df.reset_index().merge(df_device_names,
                     how='left',
                     left_on='tag',
                     right_on='tag').set_index(['tag','rec'])
    try:
        dfout.drop(columns=['index'], inplace=True)
    except:
        pass
    return dfout


# Applying the functions to the dfs
# -----------------------------------
# non-transients oxaria1
oxaria1_gases_stable_15min = add_names_1oxaria(oxaria1_gases_stable_15min)
oxaria1_pm_stable_15min = add_names_1oxaria(oxaria1_pm_stable_15min)
oxaria1_climate_stable_15min = add_names_1oxaria(oxaria1_climate_stable_15min)
# transients oxaria1
oxaria1_gases_stable_15min_transients = add_names_1oxaria(
    oxaria1_gases_stable_15min_transients)
oxaria1_pm_stable_15min_transients = add_names_1oxaria(
    oxaria1_pm_stable_15min_transients)
oxaria1_climate_stable_15min_transients = add_names_1oxaria(
    oxaria1_climate_stable_15min_transients)
# status
oxaria1_status_stable_15min = add_names_1oxaria(oxaria1_status_stable_15min)

# non-transients oxaria2
oxaria2_gases_stable_15min = add_names_2oxaria(oxaria2_gases_stable_15min)
oxaria2_pm_stable_15min = add_names_2oxaria(oxaria2_pm_stable_15min)
oxaria2_climate_stable_15min = add_names_2oxaria(oxaria2_climate_stable_15min)
# transients oxaria2
oxaria2_gases_stable_15min_transients = add_names_2oxaria(
    oxaria2_gases_stable_15min_transients)
oxaria2_pm_stable_15min_transients = add_names_2oxaria(
    oxaria2_pm_stable_15min_transients)
oxaria2_climate_stable_15min_transients = add_names_2oxaria(
    oxaria2_climate_stable_15min_transients)
# status
oxaria2_status_stable_15min = add_names_2oxaria(oxaria2_status_stable_15min)

# Some house keeping, don't know why, an earlier step
# oxaria2_climate_stable_15min.drop(columns=['index'],inplace=True)

In [24]:
oxaria1_climate_stable_15min_transients.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 92121 entries, ('scs-bgx-536', Timestamp('2021-06-01 00:15:00+0000', tz='UTC')) to ('scs-bgx-543', Timestamp('2021-10-01 00:00:00+0000', tz='UTC'))
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   mag_hmd_s20c   79234 non-null  float32
 1   mag_tmp_s20c   79234 non-null  float32
 2   mean_hmd_s20c  79234 non-null  float32
 3   mean_tmp_s20c  79234 non-null  float32
 4   max_hmd_s20c   79234 non-null  float32
 5   name           92121 non-null  object 
dtypes: float32(5), object(1)
memory usage: 3.1+ MB


In [25]:
oxaria_gases_stbl15_transients.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 207285 entries, ('scs-bgx-536', Timestamp('2021-06-01 00:15:00+0000', tz='UTC')) to ('scs-bgx-559', Timestamp('2021-10-01 00:00:00+0000', tz='UTC'))
Data columns (total 4 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   mag_hmd_s20   185926 non-null  float32
 1   mag_tmp_s20   185926 non-null  float32
 2   mean_hmd_s20  185926 non-null  float32
 3   mean_tmp_s20  185926 non-null  float32
dtypes: float32(4)
memory usage: 4.1+ MB


In [27]:
# Combine oxaria 1 & 2 gases + some checks to show all is the same
#------------------------------------------------------------------
oxaria_gases_stbl15 = pd.concat([oxaria1_gases_stable_15min,
                                 oxaria2_gases_stable_15min])#.set_index(['tag','rec'])

oxaria_gases_stbl15_transients = pd.concat([oxaria1_gases_stable_15min_transients,
                                            oxaria2_gases_stable_15min_transients])#.set_index(['tag','rec'])

oxaria_gases_stbl15_t = oxaria_gases_stbl15.drop('name', axis=1).merge(
    oxaria_gases_stbl15_transients, left_index=True, right_index=True, how='inner')

oxaria_gases_stbl15 = oxaria_gases_stbl15_t[~oxaria_gases_stbl15_t.index.duplicated(
    keep='last')].sort_index()

oxaria_gases_stbl15.info()
oxaria_gases_stbl15_t.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 187111 entries, ('scs-bgx-536', Timestamp('2021-06-01 00:15:00+0000', tz='UTC')) to ('scs-bgx-559', Timestamp('2021-10-01 00:00:00+0000', tz='UTC'))
Data columns (total 13 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   val.no2.wev       187111 non-null  float32
 1   val.no2.cnc       185927 non-null  float32
 2   val.no2.aev       187111 non-null  float32
 3   val.no2.wec       185927 non-null  float32
 4   val.sht.hmd       185927 non-null  float32
 5   val.sht.tmp       185927 non-null  float32
 6   exg.vb20.no2.cnc  187111 non-null  float32
 7   val.no2.cnc_1     182598 non-null  float32
 8   mag_hmd_s20       185926 non-null  float32
 9   mag_tmp_s20       185926 non-null  float32
 10  mean_hmd_s20      185926 non-null  float32
 11  mean_tmp_s20      185926 non-null  float32
 12  name              187111 non-null  object 
dtypes: float32(12), object(1)
memory usage: 10.

In [28]:
# Combine oxaria 1 & 2 pm + some checks to show all is the same
#------------------------------------------------------------------
oxaria_pm_stbl15 = pd.concat([oxaria1_pm_stable_15min,
                                 oxaria2_pm_stable_15min])

oxaria_pm_stbl15_transients = pd.concat([oxaria1_pm_stable_15min_transients,
                                            oxaria2_pm_stable_15min_transients])

oxaria_pm_stbl15_t = oxaria_pm_stbl15.drop('name', axis=1).merge(
    oxaria_pm_stbl15_transients, left_index=True, right_index=True, how='inner')

oxaria_pm_stbl15 = oxaria_pm_stbl15_t[~oxaria_pm_stbl15_t.index.duplicated(
    keep='last')].sort_index()

oxaria_pm_stbl15.info()
oxaria_pm_stbl15_t.info()


<class 'pandas.core.frame.DataFrame'>
MultiIndex: 177577 entries, ('scs-bgx-536', Timestamp('2021-06-01 00:15:00+0000', tz='UTC')) to ('scs-bgx-559', Timestamp('2021-10-01 00:00:00+0000', tz='UTC'))
Data columns (total 22 columns):
 #   Column          Non-Null Count   Dtype  
---  ------          --------------   -----  
 0   val.mtf1        177577 non-null  float32
 1   val.pm1         177577 non-null  float32
 2   val.mtf5        177577 non-null  float32
 3   val.pm2p5       177577 non-null  float32
 4   val.mtf3        177577 non-null  float32
 5   val.pm10        177577 non-null  float32
 6   val.mtf7        177577 non-null  float32
 7   val.per         177577 non-null  float32
 8   val.sfr         177577 non-null  float32
 9   val.sht.hmd     177577 non-null  float32
 10  val.sht.tmp     177577 non-null  float32
 11  exg.rn20.pm10   177577 non-null  float32
 12  exg.rn20.pm1    177577 non-null  float32
 13  exg.rn20.pm2p5  177577 non-null  float32
 14  val.pm10_1      124105 non-

In [29]:
# Combine oxaria 1 & 2 climate + some checks to show all is the same
#--------------------------------------------------------------------
oxaria_climate_stbl15 = pd.concat([oxaria1_climate_stable_15min,
                                 oxaria2_climate_stable_15min])

oxaria_climate_stbl15_transients = pd.concat([oxaria1_climate_stable_15min_transients,
                                            oxaria2_climate_stable_15min_transients])

oxaria_climate_stbl15_t = oxaria_climate_stbl15.drop('name', axis=1).merge(
    oxaria_climate_stbl15_transients, left_index=True, right_index=True, how='inner')

oxaria_climate_stbl15 = oxaria_climate_stbl15_t[~oxaria_climate_stbl15_t.index.duplicated(
    keep='last')].sort_index()

oxaria_climate_stbl15.info()
oxaria_climate_stbl15_t.info()


<class 'pandas.core.frame.DataFrame'>
MultiIndex: 186302 entries, ('scs-bgx-536', Timestamp('2021-06-01 00:15:00+0000', tz='UTC')) to ('scs-bgx-559', Timestamp('2021-10-01 00:00:00+0000', tz='UTC'))
Data columns (total 8 columns):
 #   Column         Non-Null Count   Dtype  
---  ------         --------------   -----  
 0   val.hmd        186302 non-null  float32
 1   val.tmp        186302 non-null  float32
 2   mag_hmd_s20c   186300 non-null  float32
 3   mag_tmp_s20c   186300 non-null  float32
 4   mean_hmd_s20c  186300 non-null  float32
 5   mean_tmp_s20c  186300 non-null  float32
 6   max_hmd_s20c   186300 non-null  float32
 7   name           186302 non-null  object 
dtypes: float32(7), object(1)
memory usage: 7.3+ MB
<class 'pandas.core.frame.DataFrame'>
MultiIndex: 186302 entries, ('scs-bgx-536', Timestamp('2021-06-01 00:15:00+0000', tz='UTC')) to ('scs-bgx-559', Timestamp('2021-10-01 00:00:00+0000', tz='UTC'))
Data columns (total 8 columns):
 #   Column         Non-Null Count  

In [30]:
# Combine oxaria 1 & 2 status + some checks to show all is the same
#--------------------------------------------------------------------
oxaria_status_stbl15 = pd.concat([oxaria1_status_stable_15min,
                                 oxaria2_status_stable_15min])
oxaria_status_stbl15.info()


<class 'pandas.core.frame.DataFrame'>
MultiIndex: 188108 entries, ('scs-bgx-536', Timestamp('2021-06-01 00:15:00+0000', tz='UTC')) to ('scs-bgx-559', Timestamp('2021-10-01 00:00:00+0000', tz='UTC'))
Data columns (total 15 columns):
 #   Column             Non-Null Count   Dtype  
---  ------             --------------   -----  
 0   val.gps.pos:0      185099 non-null  float32
 1   val.gps.pos:1      185099 non-null  float32
 2   val.gps.elv        185099 non-null  float32
 3   val.gps.qual       183573 non-null  float32
 4   val.up.load.av15   188108 non-null  float32
 5   val.up.load.av1    188108 non-null  float32
 6   val.up.load.av5    188108 non-null  float32
 7   val.up.users       188108 non-null  float32
 8   val.psu.prot-batt  179201 non-null  float32
 9   val.psu.standby    80969 non-null   float32
 10  val.psu.in         80969 non-null   float32
 11  val.psu.pwr-in     179201 non-null  float32
 12  val.psu.host-3v3   179201 non-null  float32
 13  val.psu.batt-flt   80969 non

In [31]:
# Write some ftr files
#----------------------
oxaria_gases_stbl15.reset_index().to_feather(folder0 + 'oxaria_gases_stable15_oct21_transients.ftr')
oxaria_pm_stbl15.reset_index().to_feather(folder0 + 'oxaria_pm_stable15_oct21_transients.ftr')
oxaria_climate_stbl15.reset_index().to_feather(folder0 + 'oxaria_climate_stable15_oct21_transients.ftr')
oxaria_status_stbl15.reset_index().to_feather(folder0 + 'oxaria_status_stable15_oct21_transients.ftr')

oxaria_gases_stbl15.info()
oxaria_pm_stbl15.info()
oxaria_climate_stbl15.info()
oxaria_status_stbl15.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 187111 entries, ('scs-bgx-536', Timestamp('2021-06-01 00:15:00+0000', tz='UTC')) to ('scs-bgx-559', Timestamp('2021-10-01 00:00:00+0000', tz='UTC'))
Data columns (total 13 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   val.no2.wev       187111 non-null  float32
 1   val.no2.cnc       185927 non-null  float32
 2   val.no2.aev       187111 non-null  float32
 3   val.no2.wec       185927 non-null  float32
 4   val.sht.hmd       185927 non-null  float32
 5   val.sht.tmp       185927 non-null  float32
 6   exg.vb20.no2.cnc  187111 non-null  float32
 7   val.no2.cnc_1     182598 non-null  float32
 8   mag_hmd_s20       185926 non-null  float32
 9   mag_tmp_s20       185926 non-null  float32
 10  mean_hmd_s20      185926 non-null  float32
 11  mean_tmp_s20      185926 non-null  float32
 12  name              187111 non-null  object 
dtypes: float32(12), object(1)
memory usage: 10.