This file generates and displays PCA plots for various data subsets.

In [None]:
# parameters
n_processes = 6
sscodes = 'data/formatted_sscodes.pkl'

In [None]:
from datetime import datetime, timedelta
from zoneinfo import ZoneInfo
from IPython.display import display
import clustering
from tools.io import unpickle_data
from tools.pandas_mask import build_mask
%matplotlib inline

In [None]:
sscodes = unpickle_data(sscodes, False)
masks = [('sunset',
    {'&': {
        'sunset_rule': [("hours_from_closest_sunset", '[)', timedelta(hours=-1), timedelta(hours=1))],
    }
    }),
    ('sunset_date',
        {
            '&': {
            'sunset_rule': [("hours_from_closest_sunset", '[)', timedelta(hours=-1), timedelta(hours=1))],
            '&': [('datetime', '>=', datetime(2023, 4, 1, tzinfo=ZoneInfo("Australia/Brisbane"))),
                    ('datetime', '<=', datetime(2023, 4, 3, tzinfo=ZoneInfo("Australia/Brisbane")))]
            }
        })
]
output_figs = {}
for mask_name, mask in masks:
    new_data = {}
    for name, data in sscodes.items():
        if mask:
            built_mask = build_mask(data, mask)
            new = data[built_mask.mask].copy()
        else:
            new = data

        new_data[f"{mask_name}_{name}"] = new.reset_index()

    clustering.convert_sountrap_strings_to_int(new_data)
    pca_clusters = clustering.clustering(new_data, n_processes)
    output_figs[mask_name] = pca_clusters

In [None]:
for mask in output_figs:
    for band, fig in output_figs[mask]:
        print(f"PCA plot for {band} band with {mask} mask")
        display(fig)