In [2]:
import dlt
from dlt.sources.helpers.rest_client import RESTClient
from dlt.sources.helpers.rest_client.paginators import PageNumberPaginator

In [None]:
# getting API and DB credentials - Alternative 1: dotenv_values(). The API key is stored in .env file.

from dotenv import dotenv_values
config = dotenv_values()

api_key = config['X-RapidAPI-Key']

The postgres related information is stored in .dlt/secrets.toml

```
[destination.postgres.credentials]

database = "..." # five database name
username = " " # give user name
password = " " # replace with your password
host = "..." # replace with your host connection
port = 5432 # normally for postgres it is 5432
connect_timeout = 15
```

In [None]:
# Define the API resource for weather data
@dlt.resource(name="weather")   # The name of the resource (used as the table name)
def meteostat():
    client = RESTClient(  # Initialize REST client without params
        base_url="https://meteostat.p.rapidapi.com",
        headers={
            "X-RapidAPI-Key": api_key,
            "X-RapidAPI-Host": "meteostat.p.rapidapi.com"
        }
    )

    # Define API parameters
    params = {
        "station": 74486,
        "start": "2024-01-01",
        "end": "2024-03-31",
        "model": "true"
    }

    # Iterate over paginated responses
    for page in client.paginate("/stations/daily", params=params):  # Pass end point and params here
        yield page  # Yield data to manage memory

# Define new dlt pipeline
pipeline = dlt.pipeline(destination="postgres")

# Run the pipeline with the new resource
load_info = pipeline.run(meteostat, write_disposition="replace")
print(load_info)

# Incremental

In [3]:
# Define the API resource for weather data
@dlt.resource(name="weather")   # The name of the resource (used as the table name)
def meteostat():
    cursor_date=dlt.sources.incremental(
        "date",   # <--- field to track, our timestamp
        initial_value="2024-01-01 01:00:00.000 +0100",   # <--- start date June 15, 2009
        )
    client = RESTClient(  # Initialize REST client without params
        base_url="https://meteostat.p.rapidapi.com",
        headers={
            "X-RapidAPI-Key": api_key,
            "X-RapidAPI-Host": "meteostat.p.rapidapi.com"
        }
    )

    # Define API parameters
    params = {
        "station": 74486,
        "start": "2024-01-01",
        "end": "2024-05-15",
        "model": "true"
    }

    # Iterate over paginated responses
    for page in client.paginate("/stations/daily", params=params):  # Pass params here
        yield page  # Yield data to manage memory

# Define new dlt pipeline
pipeline = dlt.pipeline(pipeline_name="meteostat_incremental", destination="postgres", dataset_name="meteostat_data")

# Run the pipeline with the new resource
load_info = pipeline.run(meteostat, write_disposition="replace")
print(load_info)



Pipeline meteostat_incremental load step completed in 1.31 seconds
1 load package(s) were loaded to destination postgres and into dataset meteostat_data
The postgres destination used postgresql://jugnuarora:***@data-analytics-course-2.c8g8r1deus2v.eu-central-1.rds.amazonaws.com:5432/hh_analytics_24_4 location to store data
Load package 1739477465.225763 is LOADED and contains no failed jobs
