## 1 - API / Data-Collection

In [1]:
import requests
import pandas as pd
import uuid
from datetime import datetime

class ThurgauAPIClient:
    """Client to fetch and clean Open Government Data from Thurgau."""

    def __init__(self, base_url: str = "https://data.tg.ch/api/records/1.0/search/"):
        self.base_url = base_url

    def fetch_data(self, dataset: str, max_records: int = 1000) -> pd.DataFrame:
        """Fetches data from the Thurgau - Open Government Data API.

        Args:
            dataset (str): Unique dataset ID from the webpage's API description.
            max_records (int, optional): Max rows to fetch. Defaults to 1000.

        Returns:
            pd.DataFrame: Table with requested data.
        """
        
        params = {"dataset": dataset, "rows": max_records}
        response = requests.get(self.base_url, params=params)
        response.raise_for_status()
        data = response.json()
        records = [r["fields"] for r in data.get("records", [])]
        df = pd.DataFrame(records)
        print(f"Es wurden {len(df)} Datensätze vom Dataset '{dataset}' geladen.")
        return df

class DataCleaner:
    """Utility class for cleaning and enriching datasets with additional columns."""

    @staticmethod
    def add_uuid(df: pd.DataFrame, uuid_column: str = "uuid") -> pd.DataFrame:
        """Add a unique UUID to each row."""
        df[uuid_column] = [str(uuid.uuid4()) for _ in range(len(df))]
        return df

    @staticmethod
    def add_timestamp(df: pd.DataFrame, ts_column: str = "loaded_at") -> pd.DataFrame:
        """Add a timestamp to each row."""
        df[ts_column] = datetime.now().strftime("%Y-%m-%d")
        return df


# Execution
client = ThurgauAPIClient()
df_c02 = client.fetch_data(dataset="div-energie-8")

cleaner = DataCleaner()
df_c02 = cleaner.add_uuid(df_c02)
df_c02 = cleaner.add_timestamp(df_c02)

df_c02


Es wurden 720 Datensätze vom Dataset 'div-energie-8' geladen.


Unnamed: 0,bfs_nr_gemeinde,erdoelbrennstoffe,gemeinde_name,jahr,energiebezugsflaeche,total,einwohner,erdgas,andere,uuid,loaded_at
0,4881,1686.933,Amlikon-Bissegg,2015,113791,1686.933,1320,,,1b8f0517-54c0-4ad4-8552-91b9cbfa5cb3,2025-07-29
1,4921,2217.241,Bussnang,2015,187360,3151.487,2262,932.375,1.871,b68324ae-119f-4cd4-9de2-337b26f3b487,2025-07-29
2,4751,2836.259,Rickenbach (TG),2015,227233,4585.831,2766,1745.491,4.081,e7dadbc2-470f-41e9-8023-9c5c0152e32e,2025-07-29
3,4756,867.692,Schönholzerswilen,2015,72638,867.692,804,,,e9f42b80-ff02-4f2c-9cde-7df168ba37f3,2025-07-29
4,4696,3593.640,Tägerwilen,2015,348780,6037.877,4377,2444.237,,b83d48f7-9702-4786-bd28-da39b08aed68,2025-07-29
...,...,...,...,...,...,...,...,...,...,...,...
715,4806,1092.829,Eschenz,2023,168717,2234.899,1893,1142.070,,c47a9776-f015-42c6-8a76-856caf06651d,2025-07-29
716,4724,2305.944,Eschlikon,2023,430312,5319.303,4864,3013.359,,b03d2841-d527-4f93-90e5-b92ebabfc63e,2025-07-29
717,4741,1227.117,Lommis,2023,103746,1227.117,1267,,,a424545b-2396-4905-a027-0e821eee5210,2025-07-29
718,4441,1192.134,Salmsach,2023,116236,1862.043,1578,669.910,,03be1b38-c87c-4140-8078-09d07f8502cc,2025-07-29


In [2]:
# Export locally
df_c02.to_csv("20250718_export.csv", sep=";", index=None)

## 2 - Analysis

In [None]:
import pandas as pd

class AnalyseC02Data:
    def __init__(self, df: pd.DataFrame):
        self.df = df.copy()
        self.tables = {}  # Speicherung der dfs

        # Data Cleaning
        self.df['andere'] = self.df['andere'].fillna(0)

        # Analysen
        self._prepare_kpis_pro_jahr()
        self._prepare_energiemix()
        self._prepare_gemeindeentwicklung()
        self._prepare_gemeindedetails()

    def _prepare_kpis_pro_jahr(self):
        df = self.df.groupby('jahr').agg({
            'total': 'sum',
            'einwohner': 'sum'
        }).reset_index()

        df['energy_per_inhabitant'] = df['total'] / df['einwohner']
        df['growth_total_energy_pct'] = df['total'].pct_change() * 100
        df['growth_energy_per_inhabitant_pct'] = df['energy_per_inhabitant'].pct_change() * 100

        df = df.round(2)

        self.tables['kpi_pro_jahr'] = df

    def _prepare_energiemix(self):
        df = self.df.groupby('jahr').agg({
            'erdoelbrennstoffe': 'sum',
            'erdgas': 'sum',
            'andere': 'sum'
        }).reset_index()

        self.tables['energiemix_pro_jahr'] = df

    def _prepare_gemeindeentwicklung(self):
        df = self.df.copy()
        df['energy_per_inhabitant'] = df['total'] / df['einwohner']

        df = df[['bfs_nr_gemeinde', 'gemeinde_name', 'jahr', 'total', 'energy_per_inhabitant']]

        self.tables['gemeindeentwicklung'] = df

    def _prepare_gemeindedetails(self):
        df = self.df.copy()
        df['energy_per_inhabitant'] = df['total'] / df['einwohner']

        df = df[[
            'bfs_nr_gemeinde', 'gemeinde_name', 'jahr',
            'total', 'erdoelbrennstoffe', 'erdgas', 'andere',
            'einwohner', 'energiebezugsflaeche', 'energy_per_inhabitant'
        ]]

        self.tables['gemeindedetails'] = df

    def get_table(self, name: str) -> pd.DataFrame:
        return self.tables.get(name)

    def get_all_tables(self) -> dict:
        return self.tables

## 3 - BigQuery Upload

In [18]:
from google.oauth2 import service_account
from pandas_gbq import to_gbq
import os

class BigQueryUploader:
    def __init__(self, project_id: str, credentials_path: str):
        self.project_id = project_id
        self.credentials_path = credentials_path
        self.credentials = service_account.Credentials.from_service_account_file(credentials_path)

    def upload_tables(self, tables: dict, if_exists: str = "replace"):
        """
        tables: dict im Format {
            "name1": {
                "dataframe": df1,
                "dataset": "mein_dataset",
                "table": "meine_tabelle"
            },
            ...
        }
        """
        for name, config in tables.items():
            df = config["dataframe"]
            dataset = config["dataset"]
            table = config["table"]
            full_table_name = f"{dataset}.{table}"

            print(f"⬆️ Lade {name} hoch nach: {full_table_name}...")

            try:
                to_gbq(
                    dataframe=df,
                    destination_table=full_table_name,
                    project_id=self.project_id,
                    credentials=self.credentials,
                    if_exists=if_exists
                )
                print(f"✅ {name} erfolgreich hochgeladen.\n")
            except Exception as e:
                print(f"❌ Fehler beim Hochladen von {name}: {e}\n")

In [None]:
# Analyse und Uploadinfos vorbereiten
analyse = AnalyseC02Data(df_c02)
upload_dict = {}
for name, df in analyse.get_all_tables().items():
    upload_dict[name] = {
        "dataframe": df,
        "dataset": "energie_daten",
        "table": name  # Tabelle im BQ trägt denselben Namen wie die Analyse
    }

#Upload
uploader = BigQueryUploader(
    project_id="c02-tg",
    credentials_path=os.path.join(os.pardir, "secrets", "bigquery-service-account-c02-tg.json")
)
uploader.upload_tables(upload_dict)

⬆️ Lade kpi_pro_jahr hoch nach: energie_daten.kpi_pro_jahr...
✅ kpi_pro_jahr erfolgreich hochgeladen.
⬆️ Lade energiemix_pro_jahr hoch nach: energie_daten.energiemix_pro_jahr...
✅ energiemix_pro_jahr erfolgreich hochgeladen.
⬆️ Lade gemeindeentwicklung hoch nach: energie_daten.gemeindeentwicklung...
✅ gemeindeentwicklung erfolgreich hochgeladen.
⬆️ Lade gemeindedetails hoch nach: energie_daten.gemeindedetails...
✅ gemeindedetails erfolgreich hochgeladen.
