### Installing packages

In [1]:
# we use natsort package to sort those missing leading zero files 
!pip install natsort



### Defining ANSI codes for colored text prints 

In [2]:
# ANSI escape codes, to make log prints nicer
RED = "\033[31m"
GREEN = "\033[32m"
YELLOW = "\033[33m"
BLUE = "\033[34m"
BOLD = "\033[1m"
ITALIC = "\x1B[3m"
UNDERLINED = "\033[4m"
RESET = "\033[0m"
WHITE_BG    = "\x1b[47m\033[30m" # adding \033[30m makes text black
GREEN_BG    = "\x1b[102m\033[30m" # adding \033[30m makes text black

# https://jakob-bagterp.github.io/colorist-for-python/ansi-escape-codes/standard-16-colors/#bright-colors_2

### Imports

In [3]:
import pandas as pd
import os
from natsort import os_sorted
from datetime import datetime, timedelta

import time
import json
import requests
import zipfile
import warnings 
import urllib3

from dotenv import dotenv_values
from sqlalchemy import create_engine, types, text
from sqlalchemy.dialects.postgresql import JSON as postgres_json

# we'll suppress the "missing SSL certificate" warnings while downloading files
warnings.simplefilter("ignore", urllib3.exceptions.InsecureRequestWarning) 

In [26]:
from dotenv import dotenv_values

config = dotenv_values()

# define variables for the login
pg_user = config['POSTGRES_USER']  # align the key label with your .env file !
pg_host = config['POSTGRES_HOST']
pg_port = config['POSTGRES_PORT']
pg_db = config['POSTGRES_DB']
pg_schema = config['POSTGRES_SCHEMA']
pg_pass = config['POSTGRES_PASS']

# next steps daily wheather

In [44]:
airport_staids = {'TPA': 72211,'TLH': 72214,'ATL': 72219,'CLT': 72314}
airport_staids

{'TPA': 72211, 'TLH': 72214, 'ATL': 72219, 'CLT': 72314}

In [46]:
config = dotenv_values()

api_key = config['x-rapidapi-key'] # align the key label with your .env file

In [52]:
period_start = "2024-08-01"
period_end = "2024-10-31"

In [53]:
weather_dict = {'extracted_at':[], 
                'airport_code':[], 
                'station_id':[], 
                'extracted_data':[]
               }

# API CALL daily (station) - for the syntax: see the rapidapi interface

url = "https://meteostat.p.rapidapi.com/stations/daily"

headers = {
        "X-RapidAPI-Key": api_key,
        "X-RapidAPI-Host": "meteostat.p.rapidapi.com"
}

# for-loop for the querystrings
for airport in airport_staids:
   
    querystring = {
        "station":airport_staids[airport]
        ,"start":period_start
        ,"end":period_end
        ,"model":"true"
    }
    
    # making one call with the current querystring
    response = requests.get(url, headers=headers, params=querystring)
                
    # appending data to the dictionary:
    weather_dict['extracted_at'].append(datetime.now())                # timestamp, 
    weather_dict['airport_code'].append(airport)                       # airport code    
    weather_dict['station_id'].append(airport_staids[airport])         # weater Station ID
    weather_dict['extracted_data'].append(json.loads(response.text))   # JSON string

In [54]:
weather_daily_df = pd.DataFrame(weather_dict)
weather_daily_df

Unnamed: 0,extracted_at,airport_code,station_id,extracted_data
0,2026-02-27 15:36:31.135614,TPA,72211,"{'meta': {'generated': '2026-02-27 14:36:28'},..."
1,2026-02-27 15:36:31.605644,TLH,72214,"{'meta': {'generated': '2026-02-27 14:36:29'},..."
2,2026-02-27 15:36:32.109429,ATL,72219,"{'meta': {'generated': '2026-02-27 14:36:29'},..."
3,2026-02-27 15:36:32.661835,CLT,72314,"{'meta': {'generated': '2026-02-27 14:36:30'},..."


In [59]:
url = f'postgresql://{pg_user}:{pg_pass}@{pg_host}:{pg_port}/{pg_db}'
url

'postgresql://alinakhomich:HmHCUg2AIEiIgiIU@data-analytics-course-2.c8g8r1deus2v.eu-central-1.rds.amazonaws.com:5432/nf_da_onl_en_081225'

In [64]:
engine = create_engine(
    url,
    connect_args={"options": "-csearch_path=below_zero"},
    echo=True
)

In [65]:
dtype_dict = {
    'extracted_at':types.DateTime,
    'airport_code': types.String,
    'station_id': types.Integer,
    'extracted_data':postgres_json
             }

In [67]:
# writing dataframe to DB
weather_daily_df.to_sql(name = 'weather_daily_raw', 
                       con = engine, 
                       schema = "below_zero", # pandas is allowing to specify, in which schema the table shall be created
                       if_exists='replace', 
                       dtype=dtype_dict,
                       index=False
                      )

2026-02-27 15:41:03,580 INFO sqlalchemy.engine.Engine select pg_catalog.version()
2026-02-27 15:41:03,581 INFO sqlalchemy.engine.Engine [raw sql] {}
2026-02-27 15:41:03,645 INFO sqlalchemy.engine.Engine select current_schema()
2026-02-27 15:41:03,645 INFO sqlalchemy.engine.Engine [raw sql] {}
2026-02-27 15:41:03,706 INFO sqlalchemy.engine.Engine show standard_conforming_strings
2026-02-27 15:41:03,706 INFO sqlalchemy.engine.Engine [raw sql] {}
2026-02-27 15:41:03,759 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2026-02-27 15:41:03,765 INFO sqlalchemy.engine.Engine SELECT pg_catalog.pg_class.relname 
FROM pg_catalog.pg_class JOIN pg_catalog.pg_namespace ON pg_catalog.pg_namespace.oid = pg_catalog.pg_class.relnamespace 
WHERE pg_catalog.pg_class.relname = %(table_name)s AND pg_catalog.pg_class.relkind = ANY (ARRAY[%(param_1)s, %(param_2)s, %(param_3)s, %(param_4)s, %(param_5)s]) AND pg_catalog.pg_namespace.nspname = %(nspname_1)s
2026-02-27 15:41:03,766 INFO sqlalchemy.engine.Engine [g

4