### Question 2

In [1]:
!pip install requests requests-oauth2client polars




In [3]:
import json
import polars as pl
import requests
import tempfile

from pathlib import Path
from requests_oauth2client import OAuth2Client, OAuth2ClientCredentialsAuth
from typing import List

# ✅ Your credentials
CLIENT_ID = "2d2258e8-bc34-4119-8431-d71ca1ae5cf3"
CLIENT_SECRET = "H6n8Q~0ph-GF4ehM.uLgp6E~cemXSHP1UkfcAc1_"

class MyEmissionsData(requests.Session):
    _auth_url = "https://login.microsoftonline.com/a815c246-a01f-4d10-bc3e-eeb6a48ef48a/oauth2/v2.0/token"
    _senaps_url = "https://senaps.eratos.com/api/sensor/v2/observations"

    def __init__(self, client_id: str = CLIENT_ID, client_secret: str = CLIENT_SECRET):
        super().__init__()
        oauth2client = OAuth2Client(self._auth_url, (client_id, client_secret))
        self.auth = OAuth2ClientCredentialsAuth(oauth2client, scope=f"{client_id}/.default")
        self.headers = {
            "accept": "*/*",
            "content-type": "application/json",
        }

    def download_and_parse_data(self, *, write_path: Path, regions: List[str], start: str, end: str, limit: int = 99_999_999):
        if len(regions) == 0:
            raise ValueError("`regions` list cannot be empty")
        parser = self._parse_single_stream if len(regions) == 1 else self._parse_multiple_streams

        streamid = ",".join(
            f"csiro.energy.dch.agshop.regional_global_emissions.{region}" for region in regions
        )

        with tempfile.TemporaryDirectory() as tmpdir:
            fname = Path(tmpdir) / "response.json"
            with self.get(
                url=self._senaps_url,
                params=dict(streamid=streamid, start=start, end=end, limit=limit),
            ) as response:
                response.raise_for_status()
                with open(fname, "wb") as fp:
                    for chunk in response.iter_content(chunk_size=1024):
                        fp.write(chunk)

            write_path.parent.mkdir(parents=True, exist_ok=True)
            with open(fname, "r") as fp:
                data = json.load(fp)
                parser(data, write_path)

    @staticmethod
    def _parse_single_stream(data, write_path):
        col_name = data["_embedded"]["stream"]["_links"]["self"]["id"]
        (
            pl.LazyFrame([
                {"timestamp": elem["t"], col_name: elem["v"]["v"]}
                for elem in data["results"]
            ])
            .with_columns(
                pl.col("timestamp")
                .str.strptime(pl.Datetime, format="%Y-%m-%dT%H:%M:%S%.fZ", strict=True)
                .cast(pl.Datetime(time_unit="ms", time_zone="UTC"))
            )
            .sort("timestamp")
            .sink_parquet(write_path)
        )

    @staticmethod
    def _parse_multiple_streams(data, write_path):
        (
            pl.LazyFrame([
                {
                    "timestamp": key,
                    "struct": {k: v["v"] for k, v in val.items()}
                }
                for elem in data["results"]
                for key, val in elem.items()
            ])
            .unnest("struct")
            .with_columns(
                pl.col("timestamp")
                .str.strptime(pl.Datetime, format="%Y-%m-%dT%H:%M:%S%.fZ", strict=True)
                .cast(pl.Datetime(time_unit="ms", time_zone="UTC"))
            )
            .sort("timestamp")
            .sink_parquet(write_path)
        )


In [None]:
emissions = MyEmissionsData()

emissions.download_and_parse_data(
    regions=["nsw", "qld"],
    start="2024-01-01T00:00:00.000Z",
    end="2024-01-07T00:00:00.000Z",
    write_path=Path("./csiro_5min_emissions.parquet")
)


In [None]:
df = pl.read_parquet("csiro_5min_emissions.parquet")
df.write_csv("csiro_5min_emissions.csv")
df.head()


In [None]:
import polars as pl
import os

# Load the Parquet file
df = pl.read_parquet("csiro_5min_emissions.parquet")

# Use the same dataset_merge folder path
folder_path = os.path.expanduser("~/Desktop/dataset_merge")
output_path = os.path.join(folder_path, "csiro_5min_emissions.csv")

# Save the CSV file
df.write_csv(output_path)

print(f"✅ File saved to: {output_path}")


# Downloaded the DATASET for a single day of carbon emission

In [None]:
import json
import polars as pl
import requests
import tempfile

from pathlib import Path
from requests_oauth2client import OAuth2Client, OAuth2ClientCredentialsAuth
from typing import List

# ✅ Your credentials
CLIENT_ID = "2d2258e8-bc34-4119-8431-d71ca1ae5cf3"
CLIENT_SECRET = "H6n8Q~0ph-GF4ehM.uLgp6E~cemXSHP1UkfcAc1_"

class MyEmissionsData(requests.Session):
    _auth_url = "https://login.microsoftonline.com/a815c246-a01f-4d10-bc3e-eeb6a48ef48a/oauth2/v2.0/token"
    _senaps_url = "https://senaps.eratos.com/api/sensor/v2/observations"

    def __init__(self, client_id: str = CLIENT_ID, client_secret: str = CLIENT_SECRET):
        super().__init__()
        oauth2client = OAuth2Client(self._auth_url, (client_id, client_secret))
        self.auth = OAuth2ClientCredentialsAuth(oauth2client, scope=f"{client_id}/.default")
        self.headers = {
            "accept": "*/*",
            "content-type": "application/json",
        }

    def download_and_parse_data(self, *, write_path: Path, regions: List[str], start: str, end: str, limit: int = 99_999_999):
        if not regions:
            raise ValueError("`regions` list cannot be empty")
        parser = self._parse_single_stream if len(regions) == 1 else self._parse_multiple_streams

        streamid = ",".join(
            f"csiro.energy.dch.agshop.regional_global_emissions.{region}" for region in regions
        )

        with tempfile.TemporaryDirectory() as tmpdir:
            fname = Path(tmpdir) / "response.json"
            with self.get(
                url=self._senaps_url,
                params=dict(streamid=streamid, start=start, end=end, limit=limit),
            ) as response:
                response.raise_for_status()
                with open(fname, "wb") as fp:
                    for chunk in response.iter_content(chunk_size=1024):
                        fp.write(chunk)

            write_path.parent.mkdir(parents=True, exist_ok=True)
            with open(fname, "r") as fp:
                data = json.load(fp)
                parser(data, write_path)

    @staticmethod
    def _parse_single_stream(data, write_path):
        col_name = data["_embedded"]["stream"]["_links"]["self"]["id"]
        (
            pl.LazyFrame([
                {"timestamp": elem["t"], col_name: elem["v"]["v"]}
                for elem in data["results"]
            ])
            .with_columns(
                pl.col("timestamp")
                .str.strptime(pl.Datetime, format="%Y-%m-%dT%H:%M:%S%.fZ", strict=True)
                .cast(pl.Datetime(time_unit="ms", time_zone="UTC"))
            )
            .sort("timestamp")
            .sink_parquet(write_path)
        )

    @staticmethod
    def _parse_multiple_streams(data, write_path):
        (
            pl.LazyFrame([
                {
                    "timestamp": key,
                    "struct": {k: v["v"] for k, v in val.items()}
                }
                for elem in data["results"]
                for key, val in elem.items()
            ])
            .unnest("struct")
            .with_columns(
                pl.col("timestamp")
                .str.strptime(pl.Datetime, format="%Y-%m-%dT%H:%M:%S%.fZ", strict=True)
                .cast(pl.Datetime(time_unit="ms", time_zone="UTC"))
            )
            .sort("timestamp")
            .sink_parquet(write_path)
        )

# ✅ Run the extraction here
if __name__ == "__main__":
    e = MyEmissionsData()

    e.download_and_parse_data(
        regions=["nsw", "qld", "vic", "sa", "tas"],  # 👈 include all regions here
        start="2024-01-01T00:00:00.000Z",            # 👈 adjust time range as needed
        end="2024-01-02T00:00:00.000Z",
        write_path=Path.home() / "Desktop" / "csiro_5min_emissions_full.parquet"
    )

    print("✅ CSIRO data with all 5 regions downloaded and saved to Desktop.")


# Saved it into a CSV

In [None]:
import polars as pl
from pathlib import Path

# Load the .parquet file from Desktop
parquet_path = Path.home() / "Desktop" / "csiro_5min_emissions_full.parquet"
df = pl.read_parquet(parquet_path)

# Define the output path to your ~/Desktop/dataset_merge folder
output_path = Path.home() / "Desktop" / "dataset_merge" / "csiro_5min_emissions_full.csv"

# Save as CSV
df.write_csv(output_path)

print(f"✅ CSV file saved to: {output_path}")


# Download & Parse Data for the whole year 

In [2]:
import json
import polars as pl
import requests
import tempfile

from pathlib import Path
from requests_oauth2client import OAuth2Client, OAuth2ClientCredentialsAuth
from typing import List

# ✅ Your credentials
CLIENT_ID = "2d2258e8-bc34-4119-8431-d71ca1ae5cf3"
CLIENT_SECRET = "H6n8Q~0ph-GF4ehM.uLgp6E~cemXSHP1UkfcAc1_"

class MyEmissionsData(requests.Session):
    _auth_url = "https://login.microsoftonline.com/a815c246-a01f-4d10-bc3e-eeb6a48ef48a/oauth2/v2.0/token"
    _senaps_url = "https://senaps.eratos.com/api/sensor/v2/observations"

    def __init__(self, client_id: str = CLIENT_ID, client_secret: str = CLIENT_SECRET):
        super().__init__()
        oauth2client = OAuth2Client(self._auth_url, (client_id, client_secret))
        self.auth = OAuth2ClientCredentialsAuth(oauth2client, scope=f"{client_id}/.default")
        self.headers = {
            "accept": "*/*",
            "content-type": "application/json",
        }

    def download_and_parse_data(self, *, write_path: Path, regions: List[str], start: str, end: str, limit: int = 99_999_999):
        if not regions:
            raise ValueError("`regions` list cannot be empty")
        parser = self._parse_single_stream if len(regions) == 1 else self._parse_multiple_streams

        streamid = ",".join(
            f"csiro.energy.dch.agshop.regional_global_emissions.{region}" for region in regions
        )

        with tempfile.TemporaryDirectory() as tmpdir:
            fname = Path(tmpdir) / "response.json"
            with self.get(
                url=self._senaps_url,
                params=dict(streamid=streamid, start=start, end=end, limit=limit),
            ) as response:
                response.raise_for_status()
                with open(fname, "wb") as fp:
                    for chunk in response.iter_content(chunk_size=1024):
                        fp.write(chunk)

            write_path.parent.mkdir(parents=True, exist_ok=True)
            with open(fname, "r") as fp:
                data = json.load(fp)
                parser(data, write_path)

    @staticmethod
    def _parse_single_stream(data, write_path):
        col_name = data["_embedded"]["stream"]["_links"]["self"]["id"]
        (
            pl.LazyFrame([
                {"timestamp": elem["t"], col_name: elem["v"]["v"]}
                for elem in data["results"]
            ])
            .with_columns(
                pl.col("timestamp")
                .str.strptime(pl.Datetime, format="%Y-%m-%dT%H:%M:%S%.fZ", strict=True)
                .cast(pl.Datetime(time_unit="ms", time_zone="UTC"))
            )
            .sort("timestamp")
            .sink_parquet(write_path)
        )

    @staticmethod
    def _parse_multiple_streams(data, write_path):
        (
            pl.LazyFrame([
                {
                    "timestamp": key,
                    "struct": {k: v["v"] for k, v in val.items()}
                }
                for elem in data["results"]
                for key, val in elem.items()
            ])
            .unnest("struct")
            .with_columns(
                pl.col("timestamp")
                .str.strptime(pl.Datetime, format="%Y-%m-%dT%H:%M:%S%.fZ", strict=True)
                .cast(pl.Datetime(time_unit="ms", time_zone="UTC"))
            )
            .sort("timestamp")
            .sink_parquet(write_path)
        )

# ✅ Run the extraction here
if __name__ == "__main__":
    e = MyEmissionsData()

    e.download_and_parse_data(
        regions=["nsw", "qld", "vic", "sa", "tas"],  # 👈 All regions
        start="2024-01-01T00:00:00.000Z",            # 👈 Start of 2024
        end="2025-01-01T00:00:00.000Z",              # 👈 End of 2024
        write_path=Path.home() / "Desktop" / "csiro_5min_emissions_full.parquet"
    )

    print("✅ CSIRO full-year data downloaded and saved to Desktop.")


✅ CSIRO full-year data downloaded and saved to Desktop.


# Convert .parquet to .csv 

In [3]:
import polars as pl
from pathlib import Path

# Load the .parquet file from Desktop
parquet_path = Path.home() / "Desktop" / "csiro_5min_emissions_full.parquet"
df = pl.read_parquet(parquet_path)

# Define the output path to your ~/Desktop/dataset_merge folder
output_path = Path.home() / "Desktop" / "dataset_merge" / "csiro_5min_emissions_full.csv"

# Save as CSV
df.write_csv(output_path)

print(f"✅ CSV file saved to: {output_path}")


✅ CSV file saved to: /Users/nafis/Desktop/dataset_merge/csiro_5min_emissions_full.csv


## Downloading the DATA from 2019 to 2024 

In [2]:
import json
import polars as pl
import requests
import tempfile

from pathlib import Path
from requests_oauth2client import OAuth2Client, OAuth2ClientCredentialsAuth
from typing import List

# ✅ Your credentials
CLIENT_ID = "2d2258e8-bc34-4119-8431-d71ca1ae5cf3"
CLIENT_SECRET = "H6n8Q~0ph-GF4ehM.uLgp6E~cemXSHP1UkfcAc1_"

class MyEmissionsData(requests.Session):
    _auth_url = "https://login.microsoftonline.com/a815c246-a01f-4d10-bc3e-eeb6a48ef48a/oauth2/v2.0/token"
    _senaps_url = "https://senaps.eratos.com/api/sensor/v2/observations"

    def __init__(self, client_id: str = CLIENT_ID, client_secret: str = CLIENT_SECRET):
        super().__init__()
        oauth2client = OAuth2Client(self._auth_url, (client_id, client_secret))
        self.auth = OAuth2ClientCredentialsAuth(oauth2client, scope=f"{client_id}/.default")
        self.headers = {
            "accept": "*/*",
            "content-type": "application/json",
        }

    def download_and_parse_data(self, *, write_path: Path, regions: List[str], start: str, end: str, limit: int = 99_999_999):
        if not regions:
            raise ValueError("`regions` list cannot be empty")
        parser = self._parse_single_stream if len(regions) == 1 else self._parse_multiple_streams

        streamid = ",".join(
            f"csiro.energy.dch.agshop.regional_global_emissions.{region}" for region in regions
        )

        with tempfile.TemporaryDirectory() as tmpdir:
            fname = Path(tmpdir) / "response.json"
            with self.get(
                url=self._senaps_url,
                params=dict(streamid=streamid, start=start, end=end, limit=limit),
            ) as response:
                response.raise_for_status()
                with open(fname, "wb") as fp:
                    for chunk in response.iter_content(chunk_size=1024):
                        fp.write(chunk)

            write_path.parent.mkdir(parents=True, exist_ok=True)
            with open(fname, "r") as fp:
                data = json.load(fp)
                parser(data, write_path)

    @staticmethod
    def _parse_single_stream(data, write_path):
        col_name = data["_embedded"]["stream"]["_links"]["self"]["id"]
        (
            pl.LazyFrame([
                {"timestamp": elem["t"], col_name: elem["v"]["v"]}
                for elem in data["results"]
            ])
            .with_columns(
                pl.col("timestamp")
                .str.strptime(pl.Datetime, format="%Y-%m-%dT%H:%M:%S%.fZ", strict=True)
                .cast(pl.Datetime(time_unit="ms", time_zone="UTC"))
            )
            .sort("timestamp")
            .sink_parquet(write_path)
        )

    @staticmethod
    def _parse_multiple_streams(data, write_path):
        (
            pl.LazyFrame([
                {
                    "timestamp": key,
                    "struct": {k: v["v"] for k, v in val.items()}
                }
                for elem in data["results"]
                for key, val in elem.items()
            ])
            .unnest("struct")
            .with_columns(
                pl.col("timestamp")
                .str.strptime(pl.Datetime, format="%Y-%m-%dT%H:%M:%S%.fZ", strict=True)
                .cast(pl.Datetime(time_unit="ms", time_zone="UTC"))
            )
            .sort("timestamp")
            .sink_parquet(write_path)
        )

# ✅ Run the extraction here
if __name__ == "__main__":
    e = MyEmissionsData()
    e.download_and_parse_data(
        regions=["nsw", "qld", "vic", "sa", "tas"],                           # All NEM regions
        start="2019-01-01T00:00:00.000Z",                                    # Start of 5-year period
        end="2024-12-31T23:59:59.999Z",                                      # End of 2024
        write_path=Path.home() / "Desktop" / "csiro_5y_emissions.parquet"   # Output path
    )
    print("✅ 5-year CSIRO emissions data downloaded to Desktop.")


✅ 5-year CSIRO emissions data downloaded to Desktop.


## Converting to CSV 

In [4]:
import polars as pl
from pathlib import Path

# Load the .parquet file from Desktop
parquet_path = Path.home() / "Desktop" / "csiro_5y_emissions.parquet"
df = pl.read_parquet(parquet_path)

# Define output path inside Desktop/dataset_merge
output_path = Path.home() / "Desktop" / "dataset_merge" / "csiro_5y_emissions.csv"

# Save to CSV
df.write_csv(output_path)

print(f"✅ CSV file saved to: {output_path}")


✅ CSV file saved to: /Users/nafis/Desktop/dataset_merge/csiro_5y_emissions.csv


In [1]:
import json
import polars as pl
import requests
import tempfile

from pathlib import Path
from requests_oauth2client import OAuth2Client, OAuth2ClientCredentialsAuth
from typing import List

# ✅ Your new credentials
CLIENT_ID = "5259460c-3aa8-421a-afc2-eefca36ab37e"
CLIENT_SECRET = "~2s8Q~9b~pQ458c140PcH5mJFgKsspfW_6upCdAE"

class MyEmissionsData(requests.Session):
    _auth_url = "https://login.microsoftonline.com/a815c246-a01f-4d10-bc3e-eeb6a48ef48a/oauth2/v2.0/token"
    _senaps_url = "https://senaps.eratos.com/api/sensor/v2/observations"

    def __init__(self, client_id: str = CLIENT_ID, client_secret: str = CLIENT_SECRET):
        super().__init__()
        oauth2client = OAuth2Client(self._auth_url, (client_id, client_secret))
        self.auth = OAuth2ClientCredentialsAuth(oauth2client, scope=f"{client_id}/.default")
        self.headers = {
            "accept": "*/*",
            "content-type": "application/json",
        }

    def download_and_parse_data(self, *, write_path: Path, regions: List[str], start: str, end: str, limit: int = 99_999_999):
        if not regions:
            raise ValueError("`regions` list cannot be empty")
        parser = self._parse_single_stream if len(regions) == 1 else self._parse_multiple_streams

        streamid = ",".join(
            f"csiro.energy.dch.agshop.regional_global_emissions.{region}" for region in regions
        )

        with tempfile.TemporaryDirectory() as tmpdir:
            fname = Path(tmpdir) / "response.json"
            with self.get(
                url=self._senaps_url,
                params=dict(streamid=streamid, start=start, end=end, limit=limit),
            ) as response:
                response.raise_for_status()
                with open(fname, "wb") as fp:
                    for chunk in response.iter_content(chunk_size=1024):
                        fp.write(chunk)

            write_path.parent.mkdir(parents=True, exist_ok=True)
            with open(fname, "r") as fp:
                data = json.load(fp)
                parser(data, write_path)

    @staticmethod
    def _parse_single_stream(data, write_path):
        col_name = data["_embedded"]["stream"]["_links"]["self"]["id"]
        (
            pl.LazyFrame([
                {"timestamp": elem["t"], col_name: elem["v"]["v"]}
                for elem in data["results"]
            ])
            .with_columns(
                pl.col("timestamp")
                .str.strptime(pl.Datetime, format="%Y-%m-%dT%H:%M:%S%.fZ", strict=True)
                .cast(pl.Datetime(time_unit="ms", time_zone="UTC"))
            )
            .sort("timestamp")
            .sink_parquet(write_path)
        )

    @staticmethod
    def _parse_multiple_streams(data, write_path):
        (
            pl.LazyFrame([
                {
                    "timestamp": key,
                    "struct": {k: v["v"] for k, v in val.items()}
                }
                for elem in data["results"]
                for key, val in elem.items()
            ])
            .unnest("struct")
            .with_columns(
                pl.col("timestamp")
                .str.strptime(pl.Datetime, format="%Y-%m-%dT%H:%M:%S%.fZ", strict=True)
                .cast(pl.Datetime(time_unit="ms", time_zone="UTC"))
            )
            .sort("timestamp")
            .sink_parquet(write_path)
        )

# ✅ Download data from 2019 to April 2025
if __name__ == "__main__":
    e = MyEmissionsData()
    e.download_and_parse_data(
        regions=["nsw", "qld", "vic", "sa", "tas"],
        start="2019-01-01T00:00:00.000Z",
        end="2025-04-30T23:59:59.999Z",
        write_path=Path.home() / "Desktop" / "dataset_merge" / "Question 2" / "csiro_2019_to_2025.parquet"
    )
    print("✅ CSIRO data from 2019 to April 2025 downloaded successfully.")


✅ CSIRO data from 2019 to April 2025 downloaded successfully.


In [2]:
import polars as pl
from pathlib import Path

# Load the .parquet file
parquet_path = Path.home() / "Desktop" / "dataset_merge" / "Question 2" / "csiro_2019_to_2025.parquet"
df = pl.read_parquet(parquet_path)

# Define the output path for CSV
output_path = Path.home() / "Desktop" / "dataset_merge" / "Question 2" / "csiro_2019_to_2025.csv"

# Save as CSV
df.write_csv(output_path)

print(f"✅ CSV file saved to: {output_path}")


✅ CSV file saved to: /Users/nafis/Desktop/dataset_merge/Question 2/csiro_2019_to_2025.csv
