In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 1

In [2]:
import numpy as np
import pandas as pd
from astropy.time import Time
from datetime import timezone

import bokeh
from bokeh.io import push_notebook, show, output_notebook
from bokeh.plotting import figure

import matplotlib as mpl
import matplotlib.pyplot as plt

import statsmodels.api as sm

import tarfile
from pathlib import Path
import hashlib
import sqlite3

output_notebook()

In [3]:
clouds = pd.read_hdf('cloud_quarters.h5', 'clouds').rename(columns={'start_date': 'date', 'eighths': 'clouds'}).reset_index().set_index(['date', 'quarter'])
clouds.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,year,month,sday,eday,clouds,source,start,end,night_duration,start_mjd,end_mjd,center_mjd
date,quarter,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1975-01-01,1,1975,1,1,2,0,ctio,1975-01-02 01:00:24.798846240+00:00,1975-01-02 02:53:32.841448192+00:00,0.314261,42414.041954,42414.120519,42414.081236
1975-01-01,2,1975,1,1,2,0,ctio,1975-01-02 02:53:32.841448192+00:00,1975-01-02 04:46:40.884050144+00:00,0.314261,42414.120519,42414.199084,42414.159802
1975-01-01,3,1975,1,1,2,0,ctio,1975-01-02 04:46:40.884050144+00:00,1975-01-02 06:39:48.926692320+00:00,0.314261,42414.199084,42414.27765,42414.238367
1975-01-01,4,1975,1,1,2,0,ctio,1975-01-02 06:39:48.926692320+00:00,1975-01-02 08:32:56.969294240+00:00,0.314261,42414.27765,42414.356215,42414.316932
1975-01-02,1,1975,1,2,3,0,ctio,1975-01-03 01:00:32.232803104+00:00,1975-01-03 02:53:50.694660544+00:00,0.314744,42415.04204,42415.120726,42415.081383


In [4]:
clouds['consecutive_block'] = clouds['clouds'].diff().ne(0).cumsum()
cloud_blocks = clouds.reset_index().groupby('consecutive_block').agg({'year': 'min', 'date': 'min', 'clouds': 'min', 'quarter': 'count'})

In [5]:
def to_url(block):
    iso_time = cloud_blocks.loc[block, 'date']
    t = Time(iso_time)
    year = t.datetime.year
    month = t.datetime.month
    day = t.datetime.day
    url = f'http://www.ctio.noirlab.edu/noao/night-report-sky-conditions?field_nr_date_value%5Bvalue%5D%5Bdate%5D={month:02d}%2F{day:02d}%2F{year}'
    return url

In [6]:
# Table 1 of FERMILAB-FN-1002-AE-CD
frac_clear_mapping = {
    -1: 1.0,
    0: 1.0,
    1: 0.77,
    2: 0.75,
    3: 0.69,
    4: 0.67,
    5: 0.36,
    6: 0.25,
    7: 0.0,
    8: 0.0,
    9: np.nan,
}

# Table 2 of FERMILAB-FN-1002-AE-CD, averaging 3 and 4
frac_clear_mapping = {
    -1: 1.0,
    0: 1.0,
    1: 0.68,
    2: 0.66,
    3: 0.57,
    4: 0.57,
    5: 0.29,
    6: 0.20,
    7: 0.0,
    8: 0.0,
    9: np.nan,
}

# Do what opsim does
cloud_limit = 0.3
frac_clear_mapping = {-1: 1.0, 9: np.nan}
for eighths in np.arange(9):
    if (eighths/8.0) < cloud_limit:
        frac_clear_mapping[eighths] = 1.0
    else:
        frac_clear_mapping[eighths] = 0.0

clouds["frac_clear"] = clouds.clouds.map(frac_clear_mapping)
display(frac_clear_mapping)
clouds

{-1: 1.0,
 9: nan,
 0: 1.0,
 1: 1.0,
 2: 1.0,
 3: 0.0,
 4: 0.0,
 5: 0.0,
 6: 0.0,
 7: 0.0,
 8: 0.0}

Unnamed: 0_level_0,Unnamed: 1_level_0,year,month,sday,eday,clouds,source,start,end,night_duration,start_mjd,end_mjd,center_mjd,consecutive_block,frac_clear
date,quarter,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1975-01-01,1,1975,1,1,2,0,ctio,1975-01-02 01:00:24.798846240+00:00,1975-01-02 02:53:32.841448192+00:00,0.314261,42414.041954,42414.120519,42414.081236,1,1.0
1975-01-01,2,1975,1,1,2,0,ctio,1975-01-02 02:53:32.841448192+00:00,1975-01-02 04:46:40.884050144+00:00,0.314261,42414.120519,42414.199084,42414.159802,1,1.0
1975-01-01,3,1975,1,1,2,0,ctio,1975-01-02 04:46:40.884050144+00:00,1975-01-02 06:39:48.926692320+00:00,0.314261,42414.199084,42414.277650,42414.238367,1,1.0
1975-01-01,4,1975,1,1,2,0,ctio,1975-01-02 06:39:48.926692320+00:00,1975-01-02 08:32:56.969294240+00:00,0.314261,42414.277650,42414.356215,42414.316932,1,1.0
1975-01-02,1,1975,1,2,3,0,ctio,1975-01-03 01:00:32.232803104+00:00,1975-01-03 02:53:50.694660544+00:00,0.314744,42415.042040,42415.120726,42415.081383,1,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-09-03,4,2022,9,3,4,0,blanco,2022-09-04 07:17:47.810972160+00:00,2022-09-04 09:53:40.548976896+00:00,0.432997,59826.304026,59826.412275,59826.358150,21832,1.0
2022-09-04,1,2022,9,4,5,0,blanco,2022-09-04 23:30:40.158288128+00:00,2022-09-05 02:06:07.604255744+00:00,0.431826,59826.979631,59827.087588,59827.033610,21832,1.0
2022-09-04,2,2022,9,4,5,0,blanco,2022-09-05 02:06:07.604255744+00:00,2022-09-05 04:41:35.050263296+00:00,0.431826,59827.087588,59827.195545,59827.141566,21832,1.0
2022-09-04,3,2022,9,4,5,0,blanco,2022-09-05 04:41:35.050263296+00:00,2022-09-05 07:17:02.496230912+00:00,0.431826,59827.195545,59827.303501,59827.249523,21832,1.0


In [7]:
days_covered = (clouds.end_mjd.max()-clouds.end_mjd.min())
years_covered = days_covered/365.24
years_covered

47.67629760887712

In [8]:
clouds['lowess_clear'] = sm.nonparametric.lowess(clouds.reset_index()['date'].values, clouds.frac_clear.values, frac=1/(12*years_covered), return_sorted=False)

  res, _ = _lowess(y, x, x, np.ones_like(x),


In [9]:
clear_quarters = (
    clouds.reset_index()
    .assign(missing = clouds.reset_index().clouds.isna())
    .groupby(["year", "month"])
    .agg({"frac_clear": "mean", "missing": "sum", "date": "min"})
    .reset_index()
    .rename(columns={"frac_clear": "clear_quarters"})
)

month_median_quarters = (
    clear_quarters.groupby("month")["clear_quarters"].median()
)
month_mean_quarters = (
    clear_quarters.groupby("month")["clear_quarters"].mean()
)
clear_quarters.set_index("month", inplace=True)
clear_quarters["month_median"] = month_median_quarters
clear_quarters["month_mean"] = month_mean_quarters
clear_quarters.reset_index()

Unnamed: 0,month,year,clear_quarters,missing,date,month_median,month_mean
0,1,1975,0.758065,0,1975-01-01,0.923387,0.901271
1,2,1975,0.937500,0,1975-02-01,0.946429,0.920733
2,3,1975,0.887097,0,1975-03-01,0.903226,0.875336
3,4,1975,0.816667,0,1975-04-01,0.775000,0.756250
4,5,1975,0.588710,0,1975-05-01,0.584677,0.577957
...,...,...,...,...,...,...,...
568,5,2022,0.661290,0,2022-05-01,0.584677,0.577957
569,6,2022,0.475000,0,2022-06-01,0.550000,0.570486
570,7,2022,0.677419,0,2022-07-01,0.568548,0.579805
571,8,2022,0.782258,0,2022-08-01,0.653226,0.640457


In [10]:
fig = bokeh.plotting.figure(title="Fraction clear", width=1200, height=500)
fig.xaxis.axis_label = "Date"
fig.yaxis.axis_label = "Fraction clear"

fig.step(
    "date",
    "clear_quarters",
    mode="after",
    color="blue",
    source=clear_quarters,
)

fig.step(
    "date",
    "month_median",
    mode="after",
    color="orange",
    source=clear_quarters,
)

fig.step(
    "date",
    "month_mean",
    mode="after",
    color="red",
    source=clear_quarters,
)

fig.xaxis.formatter = bokeh.models.DatetimeTickFormatter(years=["%Y-%m-%d"])
fig.xaxis.ticker = (
    pd.date_range("1975-01-01", periods=50, freq="YS").astype(int) * 10**-6
)
fig.xaxis.major_label_orientation = "vertical"

bokeh.io.show(fig)

In [11]:
len(clear_quarters)

573

In [12]:
smooth_months = 4
clear_quarters['lowess4_clear'] = sm.nonparametric.lowess(clear_quarters.clear_quarters.values, clear_quarters.reset_index()['date'].values, frac=smooth_months/len(clear_quarters), return_sorted=False)
clear_quarters['lowess4_month_median'] = sm.nonparametric.lowess(clear_quarters.month_median.values, clear_quarters.reset_index()['date'].values, frac=smooth_months/len(clear_quarters), return_sorted=False)
clear_quarters['lowess4_month_mean'] = sm.nonparametric.lowess(clear_quarters.month_mean.values, clear_quarters.reset_index()['date'].values, frac=smooth_months/len(clear_quarters), return_sorted=False)

In [13]:
fig = bokeh.plotting.figure(title="Fraction clear", width=1200, height=500)
fig.xaxis.axis_label = "Date"
fig.yaxis.axis_label = "Fraction clear"

fig.line(
    "date",
    "lowess4_clear",
    color="blue",
    source=clear_quarters,
)

fig.line(
    "date",
    "lowess4_month_median",
    color="orange",
    source=clear_quarters,
)

fig.line(
    "date",
    "lowess4_month_mean",
    color="red",
    source=clear_quarters,
)

fig.xaxis.formatter = bokeh.models.DatetimeTickFormatter(years=["%Y-%m-%d"])
fig.xaxis.ticker = (
    pd.date_range("1975-01-01", periods=50, freq="YS").astype(int) * 10**-6
)
fig.xaxis.major_label_orientation = "vertical"

bokeh.io.show(fig)

In [14]:
clear_quarters['clear_diff'] = clear_quarters['clear_quarters'] - clear_quarters['month_mean']
clear_quarters['lowess6_clear_diff'] = sm.nonparametric.lowess(clear_quarters.clear_diff.values, clear_quarters.reset_index()['date'].values, frac=6/len(clear_quarters), return_sorted=False)

In [15]:
fig = bokeh.plotting.figure(title="Difference in fraction clear", width=1200, height=500)
fig.xaxis.axis_label = "Date"
fig.yaxis.axis_label = "Difference in fraction clear"

fig.line(
    "date",
    "lowess6_clear_diff",
    color="blue",
    source=clear_quarters,
)

fig.xaxis.formatter = bokeh.models.DatetimeTickFormatter(years=["%Y-%m-%d"])
fig.xaxis.ticker = (
    pd.date_range("1975-01-01", periods=50, freq="YS").astype(int) * 10**-6
)
fig.xaxis.major_label_orientation = "vertical"

bokeh.io.show(fig)

In [16]:
clouds.source.unique()

array(['ctio', 'stochastic_matrix', 'blanco', 'satellite'], dtype=object)

In [17]:
clouds['nsim'] = 0
clouds.loc[clouds.source == 'stochastic_matrix', 'nsim'] = 1
clouds.groupby('year')['nsim'].sum()

year
1975     13
1976      2
1977      7
1978      4
1979      4
1980      4
1981     16
1982     16
1983     12
1984     28
1985     12
1986     20
1987      4
1988      8
1989      4
1990    240
1991     12
1992    124
1993    128
1994      9
1995     25
1996    132
1997      8
1998      4
1999     12
2000      4
2001    128
2002     12
2003      4
2004     57
2005     92
2006      8
2007     12
2008      4
2009      8
2010     16
2011      4
2012     52
2013     40
2014    197
2015    149
2016    133
2017    164
2018    216
2019     60
2020    828
2021      8
2022     45
Name: nsim, dtype: int64

In [18]:
clouds.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,year,month,sday,eday,clouds,source,start,end,night_duration,start_mjd,end_mjd,center_mjd,consecutive_block,frac_clear,lowess_clear,nsim
date,quarter,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
1975-01-01,1,1975,1,1,2,0,ctio,1975-01-02 01:00:24.798846240+00:00,1975-01-02 02:53:32.841448192+00:00,0.314261,42414.041954,42414.120519,42414.081236,1,1.0,1.211501e+18,0
1975-01-01,2,1975,1,1,2,0,ctio,1975-01-02 02:53:32.841448192+00:00,1975-01-02 04:46:40.884050144+00:00,0.314261,42414.120519,42414.199084,42414.159802,1,1.0,1.211501e+18,0
1975-01-01,3,1975,1,1,2,0,ctio,1975-01-02 04:46:40.884050144+00:00,1975-01-02 06:39:48.926692320+00:00,0.314261,42414.199084,42414.27765,42414.238367,1,1.0,1.211501e+18,0
1975-01-01,4,1975,1,1,2,0,ctio,1975-01-02 06:39:48.926692320+00:00,1975-01-02 08:32:56.969294240+00:00,0.314261,42414.27765,42414.356215,42414.316932,1,1.0,1.211501e+18,0
1975-01-02,1,1975,1,2,3,0,ctio,1975-01-03 01:00:32.232803104+00:00,1975-01-03 02:53:50.694660544+00:00,0.314744,42415.04204,42415.120726,42415.081383,1,1.0,1.211501e+18,0


# Compare with old rubin_sim cloud database

Extract the old clouds data from its tar file.

In [19]:
OLD_DATA_PATH = Path('old_data')
SITE_MODELS_TARFILE = '/sdf/group/rubin/web_data/sim-data/rubin_sim_data/site_models_may_2021.tgz'


if not OLD_DATA_PATH.exists():
    OLD_DATA_PATH.mkdir()
    
old_clouds_file = OLD_DATA_PATH.joinpath('site_models', 'cloud.db')
if not old_clouds_file.exists():
    tf = tarfile.open(SITE_MODELS_TARFILE, mode='r')
    tf.extract('site_models/cloud.db', path=OLD_DATA_PATH)
    tf.close()
    
assert hashlib.md5(open(old_clouds_file,'rb').read()).hexdigest() == '821853c746134b43f8d6f8e19db6f759'

In [20]:
!find old_data

old_data
old_data/site_models
old_data/site_models/cloud.db


In [21]:
with sqlite3.connect(old_clouds_file) as connection:
    old_clouds = pd.read_sql_query('SELECT cloudId, c_date, CAST(ROUND(cloud*8) AS INT) AS cloud FROM cloud', connection, index_col='cloudId')
    old_clouds['old_cloud_date'] = pd.Timestamp('1975-01-01T00:00:00Z') + pd.to_timedelta(old_clouds.c_date, unit='s')
old_clouds

Unnamed: 0_level_0,c_date,cloud,old_cloud_date
cloudId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0,0,1975-01-01 00:00:00+00:00
1,9971,0,1975-01-01 02:46:11+00:00
2,14759,0,1975-01-01 04:05:59+00:00
3,19547,0,1975-01-01 05:25:47+00:00
4,24335,0,1975-01-01 06:45:35+00:00
...,...,...,...
29196,630571434,0,1994-12-25 06:43:54+00:00
29197,630643555,0,1994-12-26 02:45:55+00:00
29198,630648330,0,1994-12-26 04:05:30+00:00
29199,630653104,0,1994-12-26 05:25:04+00:00


Match clouds from old database to quarters.

In [22]:
matching_dates = (old_clouds.old_cloud_date.values[:, None] > clouds.start.values) & (old_clouds.old_cloud_date.values[:, None] <= clouds.end.values)

old_cloud_i, cloud_i = np.where(matching_dates)

matched_clouds = clouds.iloc[cloud_i, :].copy()
matched_clouds['old_date'] = old_clouds.iloc[old_cloud_i, :]['old_cloud_date'].values
matched_clouds['old_clouds'] = old_clouds.iloc[old_cloud_i, :]['cloud'].values
matched_clouds['old_date'] = matched_clouds['old_date'].dt.tz_localize(timezone.utc)
clouds['old_date'] = matched_clouds['old_date']
clouds['old_clouds'] = matched_clouds['old_clouds'].astype(int)
del matching_dates
del matched_clouds
clouds["old_frac_clear"] = clouds.old_clouds.map(frac_clear_mapping)
clouds.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,year,month,sday,eday,clouds,source,start,end,night_duration,start_mjd,end_mjd,center_mjd,consecutive_block,frac_clear,lowess_clear,nsim,old_date,old_clouds,old_frac_clear
date,quarter,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
1975-01-01,1,1975,1,1,2,0,ctio,1975-01-02 01:00:24.798846240+00:00,1975-01-02 02:53:32.841448192+00:00,0.314261,42414.041954,42414.120519,42414.081236,1,1.0,1.211501e+18,0,1975-01-02 02:46:22+00:00,0.0,1.0
1975-01-01,2,1975,1,1,2,0,ctio,1975-01-02 02:53:32.841448192+00:00,1975-01-02 04:46:40.884050144+00:00,0.314261,42414.120519,42414.199084,42414.159802,1,1.0,1.211501e+18,0,1975-01-02 04:06:22+00:00,0.0,1.0
1975-01-01,3,1975,1,1,2,0,ctio,1975-01-02 04:46:40.884050144+00:00,1975-01-02 06:39:48.926692320+00:00,0.314261,42414.199084,42414.27765,42414.238367,1,1.0,1.211501e+18,0,1975-01-02 05:26:23+00:00,0.0,1.0
1975-01-01,4,1975,1,1,2,0,ctio,1975-01-02 06:39:48.926692320+00:00,1975-01-02 08:32:56.969294240+00:00,0.314261,42414.27765,42414.356215,42414.316932,1,1.0,1.211501e+18,0,1975-01-02 06:46:23+00:00,0.0,1.0
1975-01-02,1,1975,1,2,3,0,ctio,1975-01-03 01:00:32.232803104+00:00,1975-01-03 02:53:50.694660544+00:00,0.314744,42415.04204,42415.120726,42415.081383,1,1.0,1.211501e+18,0,1975-01-03 02:46:31+00:00,0.0,1.0


In [23]:
clouds.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,year,month,sday,eday,clouds,source,start,end,night_duration,start_mjd,end_mjd,center_mjd,consecutive_block,frac_clear,lowess_clear,nsim,old_date,old_clouds,old_frac_clear
date,quarter,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2022-09-03,4,2022,9,3,4,0,blanco,2022-09-04 07:17:47.810972160+00:00,2022-09-04 09:53:40.548976896+00:00,0.432997,59826.304026,59826.412275,59826.35815,21832,1.0,1.211501e+18,0,NaT,,
2022-09-04,1,2022,9,4,5,0,blanco,2022-09-04 23:30:40.158288128+00:00,2022-09-05 02:06:07.604255744+00:00,0.431826,59826.979631,59827.087588,59827.03361,21832,1.0,1.211501e+18,0,NaT,,
2022-09-04,2,2022,9,4,5,0,blanco,2022-09-05 02:06:07.604255744+00:00,2022-09-05 04:41:35.050263296+00:00,0.431826,59827.087588,59827.195545,59827.141566,21832,1.0,1.211501e+18,0,NaT,,
2022-09-04,3,2022,9,4,5,0,blanco,2022-09-05 04:41:35.050263296+00:00,2022-09-05 07:17:02.496230912+00:00,0.431826,59827.195545,59827.303501,59827.249523,21832,1.0,1.211501e+18,0,NaT,,
2022-09-04,4,2022,9,4,5,0,blanco,2022-09-05 07:17:02.496230912+00:00,2022-09-05 09:52:29.942198272+00:00,0.431826,59827.303501,59827.411458,59827.357479,21832,1.0,1.211501e+18,0,NaT,,


In [24]:
old_clear_quarters = (
    clouds.reset_index()
    .assign(old_missing = clouds.reset_index().old_clouds.isna())
    .groupby(["year", "month"])
    .agg({"old_frac_clear": "mean", "old_missing": "sum", "old_date": "min"})
    .reset_index()
    .rename(columns={"old_frac_clear": "old_clear_quarters"})
)

old_month_median_quarters = (
    old_clear_quarters.groupby("month")["old_clear_quarters"].median()
)
old_month_mean_quarters = (
    old_clear_quarters.groupby("month")["old_clear_quarters"].mean()
)
clear_quarters["old_month_median"] = old_month_median_quarters
clear_quarters["old_month_mean"] = old_month_mean_quarters
clear_quarters = clear_quarters.reset_index().set_index(['month', 'year'])
clear_quarters["old_clear_quarters"] = old_clear_quarters.reset_index().set_index(['month', 'year'])["old_clear_quarters"]
clear_quarters = clear_quarters.reset_index().set_index("month")
clear_quarters

Unnamed: 0_level_0,year,clear_quarters,missing,date,month_median,month_mean,lowess4_clear,lowess4_month_median,lowess4_month_mean,clear_diff,lowess6_clear_diff,old_month_median,old_month_mean,old_clear_quarters
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,1975,0.758065,0,1975-01-01,0.923387,0.901271,0.781017,0.930560,0.907616,-0.143207,-0.102026,0.955645,0.933065,0.959677
2,1975,0.937500,0,1975-02-01,0.946429,0.920733,0.868528,0.927055,0.901072,0.016767,-0.042674,0.924107,0.895074,1.000000
3,1975,0.887097,0,1975-03-01,0.903226,0.875336,0.883351,0.881143,0.856162,0.011761,0.007955,0.862903,0.832258,0.943548
4,1975,0.816667,0,1975-04-01,0.775000,0.756250,0.791961,0.758678,0.756250,0.060417,0.039968,0.758333,0.736250,0.775000
5,1975,0.588710,0,1975-05-01,0.584677,0.577957,0.778094,0.639941,0.664891,0.010753,0.071217,0.528226,0.521774,0.709677
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5,2022,0.661290,0,2022-05-01,0.584677,0.577957,0.680082,0.639941,0.664891,0.083333,0.096696,0.528226,0.521774,
6,2022,0.475000,0,2022-06-01,0.550000,0.570486,0.628350,0.563127,0.570486,-0.095486,0.092381,0.454167,0.479167,
7,2022,0.677419,0,2022-07-01,0.568548,0.579805,0.656253,0.587051,0.594755,0.097614,0.109758,0.584677,0.591129,
8,2022,0.782258,0,2022-08-01,0.653226,0.640457,0.814670,0.632694,0.631657,0.141801,0.194038,0.604839,0.611694,


In [25]:
smooth_months = 4
clear_quarters['old_lowess4_clear'] = sm.nonparametric.lowess(clear_quarters.old_clear_quarters.values, clear_quarters.reset_index()['date'].values, frac=smooth_months/len(clear_quarters.query('old_clear_quarters.notna()')), return_sorted=False)
clear_quarters['old_lowess4_month_median'] = sm.nonparametric.lowess(clear_quarters.old_month_median.values, clear_quarters.reset_index()['date'].values, frac=smooth_months/len(clear_quarters), return_sorted=False)
clear_quarters['old_lowess4_month_mean'] = sm.nonparametric.lowess(clear_quarters.old_month_mean.values, clear_quarters.reset_index()['date'].values, frac=smooth_months/len(clear_quarters), return_sorted=False)

In [26]:
clear_quarters

Unnamed: 0_level_0,year,clear_quarters,missing,date,month_median,month_mean,lowess4_clear,lowess4_month_median,lowess4_month_mean,clear_diff,lowess6_clear_diff,old_month_median,old_month_mean,old_clear_quarters,old_lowess4_clear,old_lowess4_month_median,old_lowess4_month_mean
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
1,1975,0.758065,0,1975-01-01,0.923387,0.901271,0.781017,0.930560,0.907616,-0.143207,-0.102026,0.955645,0.933065,0.959677,0.970300,0.959429,0.936419
2,1975,0.937500,0,1975-02-01,0.946429,0.920733,0.868528,0.927055,0.901072,0.016767,-0.042674,0.924107,0.895074,1.000000,0.971877,0.914205,0.886481
3,1975,0.887097,0,1975-03-01,0.903226,0.875336,0.883351,0.881143,0.856162,0.011761,0.007955,0.862903,0.832258,0.943548,0.914716,0.854182,0.825343
4,1975,0.816667,0,1975-04-01,0.775000,0.756250,0.791961,0.758678,0.756250,0.060417,0.039968,0.758333,0.736250,0.775000,0.803144,0.717350,0.703738
5,1975,0.588710,0,1975-05-01,0.584677,0.577957,0.778094,0.639941,0.664891,0.010753,0.071217,0.528226,0.521774,0.709677,0.709677,0.528226,0.578259
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5,2022,0.661290,0,2022-05-01,0.584677,0.577957,0.680082,0.639941,0.664891,0.083333,0.096696,0.528226,0.521774,,,0.528226,0.578259
6,2022,0.475000,0,2022-06-01,0.550000,0.570486,0.628350,0.563127,0.570486,-0.095486,0.092381,0.454167,0.479167,,,0.556914,0.523576
7,2022,0.677419,0,2022-07-01,0.568548,0.579805,0.656253,0.587051,0.594755,0.097614,0.109758,0.584677,0.591129,,,0.584677,0.565335
8,2022,0.782258,0,2022-08-01,0.653226,0.640457,0.814670,0.632694,0.631657,0.141801,0.194038,0.604839,0.611694,,,0.592054,0.603181


In [27]:
fig = bokeh.plotting.figure(title="Fraction clear", width=1200, height=500)
fig.xaxis.axis_label = "Date"
fig.yaxis.axis_label = "Fraction clear"

fig.line(
    "date",
    "lowess4_clear",
    color="blue",
    source=clear_quarters,
)

fig.line(
    "date",
    "old_lowess4_clear",
    color="green",
    source=clear_quarters,
)

fig.line(
    "date",
    "old_lowess4_month_mean",
    color="red",
    source=clear_quarters,
)

fig.xaxis.formatter = bokeh.models.DatetimeTickFormatter(years=["%Y-%m-%d"])
fig.xaxis.ticker = (
    pd.date_range("1975-01-01", periods=50, freq="YS").astype(int) * 10**-6
)
fig.xaxis.major_label_orientation = "vertical"

bokeh.io.show(fig)

In [28]:
clear_quarters['lowess4_diff'] = clear_quarters['lowess4_clear'] - clear_quarters['lowess4_month_mean']
clear_quarters['lowess4_old_diff'] = clear_quarters['old_lowess4_clear'] - clear_quarters['lowess4_month_mean']

In [29]:
fig = bokeh.plotting.figure(title="Fraction clear", width=1200, height=500)
fig.xaxis.axis_label = "Date"
fig.yaxis.axis_label = "Fraction clear"

fig.line(
    "date",
    "lowess4_diff",
    color="blue",
    source=clear_quarters,
)

fig.line(
    "date",
    "lowess4_old_diff",
    color="red",
    source=clear_quarters,
)


fig.xaxis.formatter = bokeh.models.DatetimeTickFormatter(years=["%Y-%m-%d"])
fig.xaxis.ticker = (
    pd.date_range("1975-01-01", periods=50, freq="YS").astype(int) * 10**-6
)
fig.xaxis.major_label_orientation = "vertical"

bokeh.io.show(fig)

In [30]:
smooth_months = 12
clear_quarters['lowess12_clear'] = sm.nonparametric.lowess(clear_quarters.clear_quarters.values, clear_quarters.reset_index()['date'].values, frac=smooth_months/len(clear_quarters), return_sorted=False)
clear_quarters['lowess12_month_mean'] = sm.nonparametric.lowess(clear_quarters.month_mean.values, clear_quarters.reset_index()['date'].values, frac=smooth_months/len(clear_quarters), return_sorted=False)
clear_quarters['old_lowess12_clear'] = sm.nonparametric.lowess(clear_quarters.old_clear_quarters.values, clear_quarters.reset_index()['date'].values, frac=smooth_months/len(clear_quarters.query('old_clear_quarters.notna()')), return_sorted=False)
clear_quarters['old_lowess12_month_median'] = sm.nonparametric.lowess(clear_quarters.old_month_median.values, clear_quarters.reset_index()['date'].values, frac=smooth_months/len(clear_quarters), return_sorted=False)
clear_quarters['old_lowess12_month_mean'] = sm.nonparametric.lowess(clear_quarters.old_month_mean.values, clear_quarters.reset_index()['date'].values, frac=smooth_months/len(clear_quarters), return_sorted=False)

In [31]:
fig = bokeh.plotting.figure(title="Fraction clear", width=1200, height=500)
fig.xaxis.axis_label = "Date"
fig.yaxis.axis_label = "Fraction clear"

fig.line(
    "date",
    "lowess12_clear",
    color="blue",
    source=clear_quarters,
)

fig.line(
    "date",
    "old_lowess12_clear",
    color="green",
    source=clear_quarters,
)

fig.line(
    "date",
    "old_lowess12_month_mean",
    color="red",
    source=clear_quarters,
)

fig.line(
    "date",
    "lowess12_month_mean",
    color="orange",
    source=clear_quarters,
)

fig.xaxis.formatter = bokeh.models.DatetimeTickFormatter(years=["%Y-%m-%d"])
fig.xaxis.ticker = (
    pd.date_range("1975-01-01", periods=50, freq="YS").astype(int) * 10**-6
)
fig.xaxis.major_label_orientation = "vertical"

bokeh.io.show(fig)

In [32]:
clear_quarters['lowess12_diff'] = clear_quarters['lowess12_clear'] - clear_quarters['lowess12_month_mean']
clear_quarters['lowess12_old_diff'] = clear_quarters['old_lowess12_clear'] - clear_quarters['lowess12_month_mean']

In [33]:
fig = bokeh.plotting.figure(title="Fraction clear", width=1200, height=500)
fig.xaxis.axis_label = "Date"
fig.yaxis.axis_label = "Fraction clear"

fig.line(
    "date",
    "lowess12_diff",
    color="blue",
    source=clear_quarters,
)

fig.line(
    "date",
    "lowess12_old_diff",
    color="red",
    source=clear_quarters,
)


fig.xaxis.formatter = bokeh.models.DatetimeTickFormatter(years=["%Y-%m-%d"])
fig.xaxis.ticker = (
    pd.date_range("1975-01-01", periods=50, freq="YS").astype(int) * 10**-6
)
fig.xaxis.major_label_orientation = "vertical"

bokeh.io.show(fig)