In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
import os

In [None]:
import sys
sys.path.append('..')

In [None]:
import mysecrets
import config
import fetch_from_cluster
import catalogmanager
import sqlite_db_utils

In [None]:
s2_satellite_catalog_gdf = \
fetch_from_cluster.load_satellite_sentinel2_catalog(
    sshcreds = mysecrets.SSH_UMD,
    satellite_folderpath = config.FOLDERPATH_SATELLITE,
    overwrite = True,
)

In [None]:
s2_satellite_catalog_gdf.shape

In [None]:
s2_satellite_catalog_gdf.columns

In [None]:
s2_satellite_catalog_gdf['last_update']

In [None]:
sqlite_db_utils.ts_to_str(s2_satellite_catalog_gdf['last_update'][0])

In [None]:
scale = 5
aspect_ratio = 2
fig, ax = plt.subplots(figsize=(scale*aspect_ratio, scale))

g = sns.histplot(
    data = s2_satellite_catalog_gdf[
        s2_satellite_catalog_gdf['last_update'] >= catalogmanager.dt2ts(dt=datetime.datetime(2025, 2, 1))
    ],
    x = 'last_update',
    bins = 100,
)

### geojson to db

In [None]:
catalog_filepath = fetch_from_cluster.remotepath_to_localpath(
    remotepath = fetch_from_cluster.FILEPATH_SATELLITE_SENTINEL2_CATALOG,
    remote_root_path = fetch_from_cluster.FOLDERPATH_SATELLITE,
    local_root_path = config.FOLDERPATH_SATELLITE,
)
catalog_filepath

In [None]:
db_path = catalog_filepath.replace('.geojson', '.db')
db_path

In [None]:
col_type_dict = {
    'id': 'TEXT UNIQUE',
    'satellite': 'TEXT',
    'timestamp': 'TEXT',
    's3url': 'TEXT',
    'local_folderpath': 'TEXT',
    'files': 'TEXT',
    'last_update': 'TEXT',
    'cloud_cover': 'REAL',
    'geometry': 'TEXT',
}

s2l2a_table = 'sentinel-2-l2a'
s2l1c_table = 'sentinel-2-l1c'

In [None]:
if os.path.exists(db_path):
    os.remove(db_path)

sqlite_db_utils.create_db(
    database = db_path,
    db_structure = {
        s2l2a_table : col_type_dict,
        s2l1c_table : col_type_dict, 
    },
    id_col = 'id',
)

In [None]:
for table in [s2l2a_table, s2l1c_table]:
    _rows = [
        sqlite_db_utils.gpd_to_sql_row(row_dict=row)
        for index, row in s2_satellite_catalog_gdf.iterrows()
        if row['satellite'] == table
    ]

    sqlite_db_utils.insert_rows_to_db(
        database = db_path,
        table = table,
        data_dicts = _rows,
    )

    del _rows

In [None]:
s2l2a_df = sqlite_db_utils.fetch_rows_from_db(
    database = db_path,
    table = s2l2a_table,
)
s2l2a_df

In [None]:
import numpy as np

In [None]:
files = []
for _files in s2l2a_df['files']:
    files += _files.split(',')

dict(zip(*np.unique(files, return_counts=True)))

In [None]:
s2l2a_df[s2l2a_df['files'].apply(lambda x: 'B01.jp2' not in x)]

In [None]:
sqlite_db_utils.fetch_rows_from_db(
    database = db_path,
    table = s2l1c_table,
)

In [None]:
db_gdf = sqlite_db_utils.fetch_rows_from_db(
    database = db_path,
    table = s2l1c_table,
    # columns = ['id', 'geometry']
)

In [None]:
db_gdf['satellite'].value_counts()

In [None]:
selected_id = s2_satellite_catalog_gdf['id'][0]
selected_id

In [None]:
selected_id = s2_satellite_catalog_gdf['id'][0]
selected_id

In [None]:
satellite_filepaths = fetch_from_cluster.download_sentinel2_satellite(
    id = selected_id,
    sshcreds = mysecrets.SSH_UMD,
    satellite_folderpath = config.FOLDERPATH_SATELLITE,
)

In [None]:
satellite_filepaths