In [1]:
## Set notebook to auto reload updated modules
%load_ext autoreload
%autoreload 2

In [2]:
from shared.config import SHARED_SETTINGS
from shared.db import get_db_uri, get_engine, get_session_pool

In [3]:
from shared.domain.weatherapi.weather.current import (CurrentWeatherIn, CurrentWeatherOut, CurrentWeatherRepository, CurrentWeatherModel, CurrentWeatherJSONIn, CurrentWeatherJSONOut, CurrentWeatherJSONModel, CurrentWeatherJSONRepository)

from shared.domain.weatherapi.location import (LocationIn, LocationJSONModel, LocationModel, LocationOut, LocationRepository)

from shared.domain.weatherapi.weather.forecast import ForecastJSONIn, ForecastJSONModel, ForecastJSONOut, ForecastJSONRepository

In [4]:
from config import SETTINGS, DB_SETTINGS

In [5]:
import pandas as pd
import duckdb

In [6]:
display(DB_SETTINGS)

<Box: {'db_password': '', 'db_type': 'sqlite', 'db_drivername': 'sqlite+pysqlite', 'db_username': '', 'db_host': '', 'db_port': '', 'db_database': '/home/jack/git/Python/Projects/theweather/collectors/weatherapi-collector/.db/weatherapi-collector.db', 'db_echo': False}>

In [7]:
db_url = get_db_uri(
    drivername=DB_SETTINGS.get("DB_DRIVERNAME"),
    username=DB_SETTINGS.get("DB_USERNAME"),
    password=DB_SETTINGS.get("DB_PASSWORD"),
    host=DB_SETTINGS.get("DB_HOST"),
    port=DB_SETTINGS.get("DB_PORT"),
    database=DB_SETTINGS.get("DB_DATABASE")
)

In [8]:
db_engine = get_engine(url=db_url, echo=DB_SETTINGS.get("db_echo", False))

In [9]:
SessionLocal = get_session_pool(engine=db_engine)

---

In [10]:
weatherapi_current_json_df = pd.read_sql_table("current_weather_response", con=db_engine)
forecast_response_df = pd.read_sql_table("forecast_response", con=db_engine)
location_response_df = pd.read_sql_table("location_response", con=db_engine)

In [11]:
duck = duckdb.connect(database=":memory:")

In [12]:
duck.register("weatherapi_current_json", weatherapi_current_json_df)
duck.register("forecast_response", forecast_response_df)
# duck.register("location_response", location_response_df)

<_duckdb.DuckDBPyConnection at 0x7f61307accb0>

In [13]:
tables_res = duck.execute(f"""
SELECT table_name 
FROM information_schema.tables 
WHERE table_schema = 'main'
""")
display(tables_res.fetch_df())

Unnamed: 0,table_name
0,forecast_response
1,weatherapi_current_json


---

In [14]:
current_res = duck.execute(f"""
SELECT *
FROM weatherapi_current_json                       
""")
display(current_res.fetch_df())

Unnamed: 0,id,created_at,current_weather_json,retain
0,1,2025-10-06 04:45:00.492640,"{'location': {'name': 'Cleveland', 'region': '...",True
1,2,2025-10-06 05:01:30.139975,"{'location': {'name': 'Cleveland', 'region': '...",True
2,3,2025-10-06 05:15:00.524541,"{'location': {'name': 'Cleveland', 'region': '...",True
3,4,2025-10-06 05:30:00.385128,"{'location': {'name': 'Cleveland', 'region': '...",True
4,5,2025-10-06 05:45:00.646472,"{'location': {'name': 'Cleveland', 'region': '...",True
...,...,...,...,...
464,465,2025-10-11 01:15:00.440463,"{'location': {'name': 'Cleveland', 'region': '...",True
465,466,2025-10-11 01:30:00.494683,"{'location': {'name': 'Cleveland', 'region': '...",True
466,467,2025-10-11 01:45:00.759453,"{'location': {'name': 'Cleveland', 'region': '...",True
467,468,2025-10-11 02:00:05.621156,"{'location': {'name': 'Cleveland', 'region': '...",True


In [15]:
retain_current_res = duck.execute(f"""
SELECT *
FROM weatherapi_current_json
WHERE retain == True                           
""")

retain_current_df = retain_current_res.fetch_df()
display(retain_current_df.shape[0])
display(retain_current_df)

469

Unnamed: 0,id,created_at,current_weather_json,retain
0,1,2025-10-06 04:45:00.492640,"{'location': {'name': 'Cleveland', 'region': '...",True
1,2,2025-10-06 05:01:30.139975,"{'location': {'name': 'Cleveland', 'region': '...",True
2,3,2025-10-06 05:15:00.524541,"{'location': {'name': 'Cleveland', 'region': '...",True
3,4,2025-10-06 05:30:00.385128,"{'location': {'name': 'Cleveland', 'region': '...",True
4,5,2025-10-06 05:45:00.646472,"{'location': {'name': 'Cleveland', 'region': '...",True
...,...,...,...,...
464,465,2025-10-11 01:15:00.440463,"{'location': {'name': 'Cleveland', 'region': '...",True
465,466,2025-10-11 01:30:00.494683,"{'location': {'name': 'Cleveland', 'region': '...",True
466,467,2025-10-11 01:45:00.759453,"{'location': {'name': 'Cleveland', 'region': '...",True
467,468,2025-10-11 02:00:05.621156,"{'location': {'name': 'Cleveland', 'region': '...",True


---

In [16]:
forecast_res = duck.execute(f"""
SELECT *
FROM forecast_response                      
""")

forecast_df = forecast_res.fetch_df()
display(forecast_df.shape[0])
display(forecast_df)

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

1592

Unnamed: 0,id,created_at,forecast_json,retain
0,1,2025-09-23 05:45:01.095975,"{'location': {'name': 'Cleveland', 'region': '...",False
1,2,2025-09-23 06:00:00.735109,"{'location': {'name': 'Cleveland', 'region': '...",False
2,3,2025-09-23 06:15:01.399307,"{'location': {'name': 'Cleveland', 'region': '...",False
3,4,2025-09-23 06:30:01.090009,"{'location': {'name': 'Cleveland', 'region': '...",False
4,5,2025-09-23 06:45:01.825144,"{'location': {'name': 'Cleveland', 'region': '...",False
...,...,...,...,...
1587,1588,2025-10-11 01:15:00.285681,"{'location': {'name': 'Cleveland', 'region': '...",False
1588,1589,2025-10-11 01:30:00.479879,"{'location': {'name': 'Cleveland', 'region': '...",False
1589,1590,2025-10-11 01:45:00.351557,"{'location': {'name': 'Cleveland', 'region': '...",False
1590,1591,2025-10-11 02:00:05.622377,"{'location': {'name': 'Cleveland', 'region': '...",True


In [17]:
retain_forecast_true_res = duck.execute(f"""
SELECT *
FROM weatherapi_current_json
WHERE retain == True                           
""")

retain_forecast_df = retain_forecast_true_res.fetch_df()
display(retain_forecast_df.shape[0])
display(retain_forecast_df)

469

Unnamed: 0,id,created_at,current_weather_json,retain
0,1,2025-10-06 04:45:00.492640,"{'location': {'name': 'Cleveland', 'region': '...",True
1,2,2025-10-06 05:01:30.139975,"{'location': {'name': 'Cleveland', 'region': '...",True
2,3,2025-10-06 05:15:00.524541,"{'location': {'name': 'Cleveland', 'region': '...",True
3,4,2025-10-06 05:30:00.385128,"{'location': {'name': 'Cleveland', 'region': '...",True
4,5,2025-10-06 05:45:00.646472,"{'location': {'name': 'Cleveland', 'region': '...",True
...,...,...,...,...
464,465,2025-10-11 01:15:00.440463,"{'location': {'name': 'Cleveland', 'region': '...",True
465,466,2025-10-11 01:30:00.494683,"{'location': {'name': 'Cleveland', 'region': '...",True
466,467,2025-10-11 01:45:00.759453,"{'location': {'name': 'Cleveland', 'region': '...",True
467,468,2025-10-11 02:00:05.621156,"{'location': {'name': 'Cleveland', 'region': '...",True
