In [None]:
## Set notebook to auto reload updated modules
%load_ext autoreload
%autoreload 2

In [None]:
from shared.config import SHARED_SETTINGS
from shared.db import get_db_uri, get_engine, get_session_pool

In [None]:
from shared.domain.weatherapi.weather.current import (CurrentWeatherIn, CurrentWeatherOut, CurrentWeatherRepository, CurrentWeatherModel, CurrentWeatherJSONIn, CurrentWeatherJSONOut, CurrentWeatherJSONModel, CurrentWeatherJSONRepository)

from shared.domain.weatherapi.location import (LocationIn, LocationJSONModel, LocationModel, LocationOut, LocationRepository)

from shared.domain.weatherapi.weather.forecast import ForecastJSONIn, ForecastJSONModel, ForecastJSONOut, ForecastJSONRepository

In [None]:
from config import SETTINGS, DB_SETTINGS

In [None]:
import pandas as pd

In [None]:
display(DB_SETTINGS)

In [None]:
db_url = get_db_uri(
    drivername=DB_SETTINGS.get("DB_DRIVERNAME"),
    username=DB_SETTINGS.get("DB_USERNAME"),
    password=DB_SETTINGS.get("DB_PASSWORD"),
    host=DB_SETTINGS.get("DB_HOST"),
    port=DB_SETTINGS.get("DB_PORT"),
    database=DB_SETTINGS.get("DB_DATABASE")
)

In [None]:
db_engine = get_engine(url=db_url, echo=DB_SETTINGS.get("db_echo", False))

In [None]:
SessionLocal = get_session_pool(engine=db_engine)

---

In [None]:
with SessionLocal() as session:
    current_weather_json_repo = CurrentWeatherJSONRepository(session)
    
    all_current_weather_json_db_entities: list[CurrentWeatherJSONModel] = current_weather_json_repo.list()

In [None]:
display(type(all_current_weather_json_db_entities))
display(type(all_current_weather_json_db_entities[0]))

In [None]:
display(all_current_weather_json_db_entities[0].__dict__)

In [None]:
all_current_weather_dicts = [
    {"id": m.id, "current_weather_json": m.current_weather_json, "created_at": m.created_at} for m in all_current_weather_json_db_entities
]

display(len(all_current_weather_dicts))


In [None]:
df = pd.json_normalize(data=all_current_weather_dicts)

df = df.sort_values('created_at')

# Ensure 'created_at' is datetime type
df['created_at'] = pd.to_datetime(df['created_at'])

display(df.shape[0])
display(df.head(10))

In [None]:
_delete_df = df.copy()

In [None]:
_delete_df['time_diff'] = _delete_df['created_at'].diff()

In [None]:
created_at_times = _delete_df['created_at'].tolist()
display(created_at_times)

In [None]:
## Mark for deletion if created within timedelta of previous row
delete_mask = _delete_df['time_diff'] <= pd.Timedelta(seconds=30)
records_to_delete = _delete_df[delete_mask]

records_to_delete = records_to_delete.drop(columns=['time_diff'])

In [None]:
display(records_to_delete.shape[0])
display(records_to_delete.head(10))

In [None]:
delete_ids = records_to_delete['id'].tolist()
display(len(delete_ids))

if len(delete_ids) >10:
    display(delete_ids[:10])
else:
    display(delete_ids)

In [None]:
with SessionLocal() as session:
    repo = CurrentWeatherJSONRepository(session)
    
    errors = []
    deleted_models = []
    
    for _id in delete_ids:
        _model = repo.get(_id)
        
        if not _model:
            display(f"Could not find database entity with ID: {_id}")
            continue
        
        try:
            repo.delete(_model)
            deleted_models.append(_model)
        except Exception as exc:
            display(f"Error deleting database entity with ID: {_model.id} : ({type(exc)}) {exc}")
            errors.append(_model)
        
display(f"Deleted: {len(deleted_models)}, errors: {len(errors)}")

In [None]:
if len(errors) > 0:
    for _e in errors:
        display(f"Errored on model:\n{_e.__dict__}")

In [None]:
deleted_models


---

In [None]:
with SessionLocal() as session:
    current_weather_json_repo = CurrentWeatherJSONRepository(session)
    
    all_current_weather_json_db_entities: list[CurrentWeatherJSONModel] = current_weather_json_repo.list()

In [None]:
display(all_current_weather_json_db_entities[0].__dict__)

In [None]:
all_current_weather_dicts = [
    {"id": m.id, "current_weather_json": m.current_weather_json, "created_at": m.created_at} for m in all_current_weather_json_db_entities
]

display(len(all_current_weather_dicts))

In [None]:
df = pd.json_normalize(data=all_current_weather_dicts)

df = df.sort_values('created_at')

# Ensure 'created_at' is datetime type
df['created_at'] = pd.to_datetime(df['created_at'])

display(df.shape[0])
display(df.head(10))