# Experiment with MDOAS data uncertainty and what we mean by reliable

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

%matplotlib inline

In [None]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [None]:
start = '20150101'
end = '20151231'

In [None]:
names = ['dt', 'obs', 'err']
urlah1= 'https://fits.geonet.org.nz/observation?typeID=SO2-flux-a&methodID=mdoas-ah&siteID=WI301'
urlah2= 'https://fits.geonet.org.nz/observation?typeID=SO2-flux-a&methodID=mdoas-ah&siteID=WI302'
urlch1= 'https://fits.geonet.org.nz/observation?typeID=SO2-flux-a&methodID=mdoas-ch&siteID=WI301'
urlch2= 'https://fits.geonet.org.nz/observation?typeID=SO2-flux-a&methodID=mdoas-ch&siteID=WI302'

In [None]:
#site 301
dfah1 = pd.read_csv(urlah1, names=names, usecols=[0,1], skiprows=1, parse_dates={"Datetime" : ['dt']}, index_col='Datetime')
dfch1 = pd.read_csv(urlch1, names=names, usecols=[0,1], skiprows=1, parse_dates={"Datetime" : ['dt']}, index_col='Datetime')
dfah1.rename(columns={"obs":"ah"}, inplace=True)
dfch1.rename(columns={"obs":"ch"}, inplace=True)
#join so that ah and ch obs at same time on same row
df1 = dfah1.join(dfch1, how='outer')

#site 302
dfah2 = pd.read_csv(urlah2, names=names, usecols=[0,1], skiprows=1, parse_dates={"Datetime" : ['dt']}, index_col='Datetime')
dfch2 = pd.read_csv(urlch2, names=names, usecols=[0,1], skiprows=1, parse_dates={"Datetime" : ['dt']}, index_col='Datetime')
dfah2.rename(columns={"obs": "ah"}, inplace=True)
dfch2.rename(columns={"obs": "ch"}, inplace=True)
df2 = dfah2.join(dfch2, how='outer')

In [None]:
df1 = df1[(df1.index>= start)&(df1.index<=end)]
df2 = df2[(df2.index>= start)&(df2.index<=end)]

## All data

In [None]:
pl1 = df1['ah'].plot(figsize=(10,3), marker='o', markersize=2, color='blue', linestyle='None', ylim=[0,50])
df1['ch'].plot(marker='o', markersize=2, color='red', linestyle='None', ax=pl1)
pl1.set_ylabel('Flux (kg/s)')
pl1.set_title('WI301, North East Point')
pl1.legend(loc='best')

In [None]:
pl2 = df2['ah'].plot(figsize=(10,3), marker='o', markersize=2, color='blue', linestyle='None', ylim=[0,50])
df2['ch'].plot(marker='o', markersize=2, color='red', linestyle='None', ax=pl2)
pl2.set_ylabel('Flux (kg/s)')
pl2.set_title('WI302, South Rim')
pl2.legend(loc='best')

## For a day with much data

Can check number of observations with date by "df1.groupby(pd.Grouper(freq='d')).count()". A good date is 2015-12-05

In [None]:
selects = '20151205'
selecte = '20151206'

select1 = df1[(df1.index>= selects)&(df1.index<selecte)]
select2 = df2[(df2.index>= selects)&(df2.index<selecte)]

In [None]:
select1.columns = ['neah', 'nech']
select2.columns = ['srah', 'srch']

In [None]:
selall= select1.join(select2, how='outer')

In [None]:
selall.describe()

In [None]:
bp = selall.boxplot(whis=[5,95], showmeans=True)
bp.set_ylabel('Flux (kg/s)')
bp.set_title('2015-12-05(5-95 whiskers)')

In [None]:
fig, ax = plt.subplots()
sns.violinplot(data=selall, ax=ax, cut=0, palette='muted')
ax.set_title('2010-12-05')
ax.set_xlabel('observations')
ax.set_ylabel('flux (kg/s)')

In [None]:
fig, ax = plt.subplots()
sns.stripplot(data=selall, ax=ax, palette='muted')
ax.set_title('2010-12-05')
ax.set_xlabel('observations')
ax.set_ylabel('flux (kg/s)')

In [None]:
sns.catplot(data=selall, kind='bar')

In [None]:
fmri = sns.load_dataset("fmri")
sns.relplot(x="timepoint", y="signal", kind="line", data=fmri);

In [None]:
#site 301
dfah1 = pd.read_csv(urlah1, names=names, usecols=[0,1], skiprows=1, parse_dates=['dt'])
dfch1 = pd.read_csv(urlch1, names=names, usecols=[0,1], skiprows=1, parse_dates=['dt'])
dfah1.rename(columns={"dt":"dtah","obs":"ah"}, inplace=True)
dfch1.rename(columns={"dt":"dtch","obs":"ch"}, inplace=True)

dfah1 = dfah1[(dfah1['dtah']>= start)&(dfah1['dtah']<=end)]
dfch1 = dfch1[(dfch1['dtch']>= start)&(dfch1['dtch']<=end)]

#join so that ah and ch obs at same time on same row
df1 = dfah1.join(dfch1, how='outer')

In [None]:
df1.head()

In [None]:
df1.describe()

In [None]:
import datetime as dt
df1['dayah']= df1['dtah'].dt.date
df1['daych']= df1['dtch'].dt.date

In [None]:
alldays = pd.date_range(start='20150101', end='20151231')
days = pd.DataFrame(alldays)
days.columns = ['dayah']
days['daych'] = days['dayah']

In [None]:
days.head()

In [None]:
days['ah'] = np.nan
days['ch'] = np.nan

In [None]:
days.head()

In [None]:
df1.tail()

In [None]:
df1.join

In [None]:
sns.relplot(x='dayah', y='ah', kind='line', data=df1, height=5, aspect=3)

In [None]:
sns.relplot(x='daych', y='ch', kind='line', data=df1, height=5, aspect=3)