# Combine clouds

This notebook combines the cloud estimates from CTIO night reports into a file.

## Prepare the notebook

In [1]:
from glob import glob
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt

## Read and merge the files

In [2]:
clouds_wide_dfs = []
for fname in glob('clouds_????-??-??_to_????-??-??.txt'):
    clouds_wide = pd.read_csv(fname, sep="\t")

    clouds_wide["date"] = pd.to_datetime(
        clouds_wide.rename(columns={"year": "Year", "month": "Month", "sday": "Day"})[
            ["Year", "Month", "Day"]
        ]
    )
    clouds_wide_dfs.append(clouds_wide)
    
clouds_wide = pd.concat(clouds_wide_dfs).sort_values('date').drop_duplicates(subset=['date']).set_index("date")
clouds_wide

Unnamed: 0_level_0,sday,eday,month,year,q1,q2,q3,q4
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1975-01-01,1,2,1,1975,0,0,0,0
1975-01-02,2,3,1,1975,0,2,3,4
1975-01-03,3,4,1,1975,0,3,4,0
1975-01-04,4,5,1,1975,0,2,3,4
1975-01-05,5,6,1,1975,3,2,1,1
...,...,...,...,...,...,...,...,...
2022-08-31,31,1,8,2022,0,0,0,0
2022-09-01,1,2,9,2022,0,0,0,0
2022-09-02,2,3,9,2022,0,0,0,0
2022-09-03,3,4,9,2022,0,0,0,0


In [3]:
clouds_wide.to_csv('clouds_ctio_blanco.txt', sep='\t', index=False)

## Convert to "long" format

In [4]:
clouds = (
    pd.wide_to_long(
        clouds_wide.reset_index(),
        stubnames=["q"],
        i=["date", "sday", "eday", "month", "year"],
        j="quarter",
    )
    .rename(columns={"q": "clouds"})
    .reset_index()
    .set_index(["date", "quarter"], drop="date")
)
clouds

Unnamed: 0_level_0,Unnamed: 1_level_0,sday,eday,month,year,clouds
date,quarter,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1975-01-01,1,1,2,1,1975,0
1975-01-01,2,1,2,1,1975,0
1975-01-01,3,1,2,1,1975,0
1975-01-01,4,1,2,1,1975,0
1975-01-02,1,2,3,1,1975,0
...,...,...,...,...,...,...
2022-09-03,4,3,4,9,2022,0
2022-09-04,1,4,5,9,2022,0
2022-09-04,2,4,5,9,2022,0
2022-09-04,3,4,5,9,2022,0


Replace "-1" values with 0:

In [5]:
clouds.loc[clouds.clouds==-1, 'clouds'] = 0

Replace "9" values where known by other means:

In [6]:
clouds.query("('2016-02-01' > date > '2015-08-01') and (clouds==9)")

Unnamed: 0_level_0,Unnamed: 1_level_0,sday,eday,month,year,clouds
date,quarter,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-08-03,1,3,4,8,2015,9
2015-08-03,2,3,4,8,2015,9
2015-08-03,3,3,4,8,2015,9
2015-08-03,4,3,4,8,2015,9
2015-08-08,1,8,9,8,2015,9
2015-08-08,2,8,9,8,2015,9
2015-08-08,3,8,9,8,2015,9
2015-08-08,4,8,9,8,2015,9
2015-08-10,1,10,11,8,2015,9
2015-08-10,2,10,11,8,2015,9


In [7]:
clouds.loc[pd.IndexSlice['2015-08-03', 1:5], 'clouds']

date        quarter
2015-08-03  1          9
            2          9
            3          9
            4          9
Name: clouds, dtype: int64

### Track cloud value source

In [8]:
clouds.loc[pd.to_datetime('2014-03-30') > clouds.index.get_level_values('date'), 'source'] = 'ctio'
clouds.loc[pd.to_datetime('2014-03-30') <= clouds.index.get_level_values('date'), 'source'] = 'blanco'
clouds

Unnamed: 0_level_0,Unnamed: 1_level_0,sday,eday,month,year,clouds,source
date,quarter,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1975-01-01,1,1,2,1,1975,0,ctio
1975-01-01,2,1,2,1,1975,0,ctio
1975-01-01,3,1,2,1,1975,0,ctio
1975-01-01,4,1,2,1,1975,0,ctio
1975-01-02,1,2,3,1,1975,0,ctio
...,...,...,...,...,...,...,...
2022-09-03,4,3,4,9,2022,0,blanco
2022-09-04,1,4,5,9,2022,0,blanco
2022-09-04,2,4,5,9,2022,0,blanco
2022-09-04,3,4,5,9,2022,0,blanco


In [9]:
clouds.reset_index(inplace=True)
clouds.to_hdf('clouds_ctio_blanco.h5', 'clouds')