In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 1

In [2]:
import numpy as np
import pandas as pd
from astropy.time import Time

import bokeh
from bokeh.io import push_notebook, show, output_notebook
from bokeh.plotting import figure

import matplotlib as mpl
import matplotlib.pyplot as plt

import statsmodels.api as sm

output_notebook()

In [3]:
clouds = pd.read_hdf('cloud_quarters.h5', 'clouds').rename(columns={'start_date': 'date', 'eighths': 'clouds'}).reset_index().set_index(['date', 'quarter'])
clouds.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,year,month,sday,eday,clouds,source,start,end,night_duration,start_mjd,end_mjd,center_mjd
date,quarter,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1975-01-01,1,1975,1,1,2,0,ctio,1975-01-02 01:00:24.798846240+00:00,1975-01-02 02:53:32.841448192+00:00,0.314261,42414.041954,42414.120519,42414.081236
1975-01-01,2,1975,1,1,2,0,ctio,1975-01-02 02:53:32.841448192+00:00,1975-01-02 04:46:40.884050144+00:00,0.314261,42414.120519,42414.199084,42414.159802
1975-01-01,3,1975,1,1,2,0,ctio,1975-01-02 04:46:40.884050144+00:00,1975-01-02 06:39:48.926692320+00:00,0.314261,42414.199084,42414.27765,42414.238367
1975-01-01,4,1975,1,1,2,0,ctio,1975-01-02 06:39:48.926692320+00:00,1975-01-02 08:32:56.969294240+00:00,0.314261,42414.27765,42414.356215,42414.316932
1975-01-02,1,1975,1,2,3,0,ctio,1975-01-03 01:00:32.232803104+00:00,1975-01-03 02:53:50.694660544+00:00,0.314744,42415.04204,42415.120726,42415.081383


In [4]:
clouds['consecutive_block'] = clouds['clouds'].diff().ne(0).cumsum()
cloud_blocks = clouds.reset_index().groupby('consecutive_block').agg({'year': 'min', 'date': 'min', 'clouds': 'min', 'quarter': 'count'})

In [5]:
def to_url(block):
    iso_time = cloud_blocks.loc[block, 'date']
    t = Time(iso_time)
    year = t.datetime.year
    month = t.datetime.month
    day = t.datetime.day
    url = f'http://www.ctio.noirlab.edu/noao/night-report-sky-conditions?field_nr_date_value%5Bvalue%5D%5Bdate%5D={month:02d}%2F{day:02d}%2F{year}'
    return url

In [6]:
# Table 1 of FERMILAB-FN-1002-AE-CD
frac_clear_mapping = {
    -1: 1.0,
    0: 1.0,
    1: 0.77,
    2: 0.75,
    3: 0.69,
    4: 0.67,
    5: 0.36,
    6: 0.25,
    7: 0.0,
    8: 0.0,
    9: np.nan,
}

# Table 2 of FERMILAB-FN-1002-AE-CD, averaging 3 and 4
frac_clear_mapping = {
    -1: 1.0,
    0: 1.0,
    1: 0.68,
    2: 0.66,
    3: 0.57,
    4: 0.57,
    5: 0.29,
    6: 0.20,
    7: 0.0,
    8: 0.0,
    9: np.nan,
}

clouds["frac_clear"] = clouds.clouds.map(frac_clear_mapping)
clouds

Unnamed: 0_level_0,Unnamed: 1_level_0,year,month,sday,eday,clouds,source,start,end,night_duration,start_mjd,end_mjd,center_mjd,consecutive_block,frac_clear
date,quarter,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1975-01-01,1,1975,1,1,2,0,ctio,1975-01-02 01:00:24.798846240+00:00,1975-01-02 02:53:32.841448192+00:00,0.314261,42414.041954,42414.120519,42414.081236,1,1.0
1975-01-01,2,1975,1,1,2,0,ctio,1975-01-02 02:53:32.841448192+00:00,1975-01-02 04:46:40.884050144+00:00,0.314261,42414.120519,42414.199084,42414.159802,1,1.0
1975-01-01,3,1975,1,1,2,0,ctio,1975-01-02 04:46:40.884050144+00:00,1975-01-02 06:39:48.926692320+00:00,0.314261,42414.199084,42414.277650,42414.238367,1,1.0
1975-01-01,4,1975,1,1,2,0,ctio,1975-01-02 06:39:48.926692320+00:00,1975-01-02 08:32:56.969294240+00:00,0.314261,42414.277650,42414.356215,42414.316932,1,1.0
1975-01-02,1,1975,1,2,3,0,ctio,1975-01-03 01:00:32.232803104+00:00,1975-01-03 02:53:50.694660544+00:00,0.314744,42415.042040,42415.120726,42415.081383,1,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-09-03,4,2022,9,3,4,0,blanco,2022-09-04 07:17:47.810972160+00:00,2022-09-04 09:53:40.548976896+00:00,0.432997,59826.304026,59826.412275,59826.358150,21832,1.0
2022-09-04,1,2022,9,4,5,0,blanco,2022-09-04 23:30:40.158288128+00:00,2022-09-05 02:06:07.604255744+00:00,0.431826,59826.979631,59827.087588,59827.033610,21832,1.0
2022-09-04,2,2022,9,4,5,0,blanco,2022-09-05 02:06:07.604255744+00:00,2022-09-05 04:41:35.050263296+00:00,0.431826,59827.087588,59827.195545,59827.141566,21832,1.0
2022-09-04,3,2022,9,4,5,0,blanco,2022-09-05 04:41:35.050263296+00:00,2022-09-05 07:17:02.496230912+00:00,0.431826,59827.195545,59827.303501,59827.249523,21832,1.0


In [7]:
days_covered = (clouds.end_mjd.max()-clouds.end_mjd.min())
years_covered = days_covered/365.24
years_covered

47.67629760887712

In [8]:
clouds['lowess_clear'] = sm.nonparametric.lowess(clouds.reset_index()['date'].values, clouds.frac_clear.values, frac=1/(12*years_covered), return_sorted=False)

  res, _ = _lowess(y, x, x, np.ones_like(x),


In [9]:
clear_quarters = (
    clouds.reset_index()
    .assign(missing = clouds.reset_index().clouds.isna())
    .groupby(["year", "month"])
    .agg({"frac_clear": "mean", "missing": "sum", "date": "min"})
    .reset_index()
    .rename(columns={"frac_clear": "clear_quarters"})
)

month_median_quarters = (
    clear_quarters.groupby("month")["clear_quarters"].median()
)
month_mean_quarters = (
    clear_quarters.groupby("month")["clear_quarters"].mean()
)
clear_quarters.set_index("month", inplace=True)
clear_quarters["month_median"] = month_median_quarters
clear_quarters["month_mean"] = month_mean_quarters
clear_quarters.reset_index()

Unnamed: 0,month,year,clear_quarters,missing,date,month_median,month_mean
0,1,1975,0.809435,0,1975-01-01,0.917460,0.901059
1,2,1975,0.949018,0,1975-02-01,0.925276,0.913451
2,3,1975,0.912419,0,1975-03-01,0.901331,0.876358
3,4,1975,0.802417,0,1975-04-01,0.784750,0.759467
4,5,1975,0.610645,0,1975-05-01,0.607823,0.598224
...,...,...,...,...,...,...,...
568,5,2022,0.643548,0,2022-05-01,0.607823,0.598224
569,6,2022,0.522833,0,2022-06-01,0.584542,0.599323
570,7,2022,0.653145,0,2022-07-01,0.619153,0.607379
571,8,2022,0.801210,0,2022-08-01,0.674718,0.655138


In [10]:
fig = bokeh.plotting.figure(title="Fraction clear", width=1200, height=500)
fig.xaxis.axis_label = "Date"
fig.yaxis.axis_label = "Fraction clear"

fig.step(
    "date",
    "clear_quarters",
    mode="after",
    color="blue",
    source=clear_quarters,
)

fig.step(
    "date",
    "month_median",
    mode="after",
    color="orange",
    source=clear_quarters,
)

fig.step(
    "date",
    "month_mean",
    mode="after",
    color="red",
    source=clear_quarters,
)

fig.xaxis.formatter = bokeh.models.DatetimeTickFormatter(years=["%Y-%m-%d"])
fig.xaxis.ticker = (
    pd.date_range("1975-01-01", periods=50, freq="YS").astype(int) * 10**-6
)
fig.xaxis.major_label_orientation = "vertical"

bokeh.io.show(fig)

In [11]:
len(clear_quarters)

573

In [12]:
smooth_months = 4
clear_quarters['lowess4_clear'] = sm.nonparametric.lowess(clear_quarters.clear_quarters.values, clear_quarters.reset_index()['date'].values, frac=smooth_months/len(clear_quarters), return_sorted=False)
clear_quarters['lowess4_month_median'] = sm.nonparametric.lowess(clear_quarters.month_median.values, clear_quarters.reset_index()['date'].values, frac=smooth_months/len(clear_quarters), return_sorted=False)
clear_quarters['lowess4_month_mean'] = sm.nonparametric.lowess(clear_quarters.month_mean.values, clear_quarters.reset_index()['date'].values, frac=smooth_months/len(clear_quarters), return_sorted=False)

In [13]:
fig = bokeh.plotting.figure(title="Fraction clear", width=1200, height=500)
fig.xaxis.axis_label = "Date"
fig.yaxis.axis_label = "Fraction clear"

fig.line(
    "date",
    "lowess4_clear",
    color="blue",
    source=clear_quarters,
)

fig.line(
    "date",
    "lowess4_month_median",
    color="orange",
    source=clear_quarters,
)

fig.line(
    "date",
    "lowess4_month_mean",
    color="red",
    source=clear_quarters,
)

fig.xaxis.formatter = bokeh.models.DatetimeTickFormatter(years=["%Y-%m-%d"])
fig.xaxis.ticker = (
    pd.date_range("1975-01-01", periods=50, freq="YS").astype(int) * 10**-6
)
fig.xaxis.major_label_orientation = "vertical"

bokeh.io.show(fig)

In [14]:
clear_quarters['clear_diff'] = clear_quarters['clear_quarters'] - clear_quarters['month_mean']
clear_quarters['lowess6_clear_diff'] = sm.nonparametric.lowess(clear_quarters.clear_diff.values, clear_quarters.reset_index()['date'].values, frac=6/len(clear_quarters), return_sorted=False)

In [15]:
fig = bokeh.plotting.figure(title="Difference in fraction clear", width=1200, height=500)
fig.xaxis.axis_label = "Date"
fig.yaxis.axis_label = "Difference in fraction clear"

fig.line(
    "date",
    "lowess6_clear_diff",
    color="blue",
    source=clear_quarters,
)

fig.xaxis.formatter = bokeh.models.DatetimeTickFormatter(years=["%Y-%m-%d"])
fig.xaxis.ticker = (
    pd.date_range("1975-01-01", periods=50, freq="YS").astype(int) * 10**-6
)
fig.xaxis.major_label_orientation = "vertical"

bokeh.io.show(fig)