Skip to content
This repository was archived by the owner on Sep 11, 2023. It is now read-only.
24 changes: 24 additions & 0 deletions .github/workflows/linters.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
name: Lint Python

on: [push]

jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.9]

steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pydocstyle
- name: Docstyle linting
run: |
pydocstyle --convention=google --add-ignore=D200,D210,D212,D415 nowcasting_dataset
2 changes: 1 addition & 1 deletion .github/workflows/python-app.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# This workflow will install Python dependencies, run tests and lint with a single version of Python
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions

name: Python application
name: Python Tests

on: [push, pull_request]

Expand Down
1 change: 1 addition & 0 deletions nowcasting_dataset/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
""" init file """
from nowcasting_dataset.square import Square
1 change: 1 addition & 0 deletions nowcasting_dataset/cloud/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
""" Cloud functions """
49 changes: 32 additions & 17 deletions nowcasting_dataset/cloud/aws.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
""" AWS functions """
import logging
from pathlib import Path
import os
Expand All @@ -13,14 +14,17 @@ def aws_upload_and_delete_local_files(
aws_path: str, local_path: Path, bucket: str = "solar-pv-nowcasting-data"
):
"""
Upload and delete files

1. Upload the files in a local path, to a path in aws
2. Delete files in that local path
@param aws_path: the folder in the aws bucket that files will be saved too
@param local_path: the local path where fiels will be copied from
@param bucket: the aws bucket that files are saved too
@return:
"""

Args:
aws_path: the folder in the aws bucket that files will be saved too
local_path: the local path where fiels will be copied from
bucket: the aws bucket that files are saved too

"""
_LOG.info("Uploading to AWS!")

# create s3 resource
Expand Down Expand Up @@ -55,12 +59,14 @@ def aws_download_to_local(
):
"""
Download file from gcs
@param remote_filename: the gcs file name, should start with gs://
@param local_filename:
@param s3_resource: s3 resource, means a new one doesnt have to be made everytime.
@param bucket: The s3 bucket name, from which to load the file from.
"""

Args:
remote_filename: the gcs file name, should start with gs://
local_filename: the local file name
s3_resource: s3 resource, means a new one doesnt have to be made everytime.
bucket: The s3 bucket name, from which to load the file from.

"""
_LOG.debug(f"Downloading {remote_filename} from AWS to {local_filename}")

if s3_resource is None:
Expand All @@ -74,15 +80,19 @@ def aws_download_to_local(


def upload_one_file(
remote_filename: str, local_filename: str, bucket: str = "solar-pv-nowcasting-data",
remote_filename: str,
local_filename: str,
bucket: str = "solar-pv-nowcasting-data",
):
"""
Upload one file to s3
@param remote_filename: the aws key name
@param local_filename: the local file name
@param bucket: the s3 bucket
"""

Args:
remote_filename: the aws key name
local_filename: the local file name
bucket: the s3 bucket

"""
# create s3 resource
s3 = boto3.client("s3")

Expand All @@ -96,8 +106,13 @@ def get_all_filenames_in_path_aws(
) -> List[str]:
"""
Get all the files names from one folder in gcp
@param remote_path: the path that we should look in
@return: a list of strings, of files names

Args:
remote_path:the path that we should look in
bucket: the aws bucket

Returns: a list of strings, of files names

"""
# get client
s3 = boto3.client("s3")
Expand Down
7 changes: 4 additions & 3 deletions nowcasting_dataset/cloud/gcp.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
""" GCP general functions """
import logging
from pathlib import Path
from typing import List, Union
Expand Down Expand Up @@ -34,14 +35,14 @@ def gcp_upload_and_delete_local_files(dst_path: str, local_path: Union[str, Path
def gcp_download_to_local(
remote_filename: str, local_filename: str, gcs: gcsfs.GCSFileSystem = None
):
"""Download file from gcs.
"""
Download file from gcs.

Args:
remote_filename: the gcs file name, should start with gs://
local_filename:
local_filename: the local filename
gcs: gcsfs.GCSFileSystem connection, means a new one doesnt have to be made everytime.
"""

_LOG.debug(f"Downloading from GCP {remote_filename} to {local_filename}")

if gcs is None:
Expand Down
6 changes: 2 additions & 4 deletions nowcasting_dataset/cloud/local.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
""" Functions for local files """
import glob
import os
import shutil
Expand All @@ -10,10 +11,7 @@


def delete_all_files_and_folder_in_temp_path(path: str):
"""
Delete all the files and folders in a temporary path
"""

""" Delete all the files and folders in a temporary path """
_LOG.info(f"Deleting files and folder from {path} .")

for files in os.listdir(path):
Expand Down
14 changes: 8 additions & 6 deletions nowcasting_dataset/cloud/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
""" General utils functions """
import logging
from pathlib import Path
import gcsfs
Expand All @@ -14,7 +15,6 @@ def upload_and_delete_local_files(dst_path: str, local_path: Path, cloud: str =
"""
Upload and delete local files to either AWS or GCP
"""

assert cloud in ["gcp", "aws"]

if cloud == "gcp":
Expand All @@ -26,12 +26,14 @@ def upload_and_delete_local_files(dst_path: str, local_path: Path, cloud: str =
def gcp_to_aws(gcp_filename: str, gcs: gcsfs.GCSFileSystem, aws_filename: str, aws_bucket: str):
"""
Download a file from gcp and upload it to aws
@param gcp_filename: the gcp file name
@param gcs: the gcs file system (so it doesnt have to be made more than once)
@param aws_filename: the aws filename and path
@param aws_bucket: the asw bucket
"""

Args:
gcp_filename: the gcp file name
gcs: the gcs file system (so it doesnt have to be made more than once)
aws_filename: the aws filename and path
aws_bucket: the aws bucket

"""
# create temp file
with tempfile.NamedTemporaryFile() as fp:
local_filename = fp.name
Expand Down
1 change: 1 addition & 0 deletions nowcasting_dataset/config/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
""" Configuration of the dataset """
13 changes: 8 additions & 5 deletions nowcasting_dataset/config/load.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
""" Loading configuration functions """
import logging
import gcsfs
import os
Expand All @@ -14,11 +15,14 @@
def load_yaml_configuration(filename: Union[str, Pathy]) -> Configuration:
"""
Load a yaml file which has a configuration in it
filename: the file name that you want to load. Will load from local, AWS, or GCP
depending on the protocol suffix (e.g. 's3://bucket/config.yaml').
Returns: pydantic class
"""

Args:
filename: the file name that you want to load. Will load from local, AWS, or GCP
depending on the protocol suffix (e.g. 's3://bucket/config.yaml').

Returns:pydantic class

"""
# load the file to a dictionary
with fsspec.open(filename, mode="r") as stream:
configuration = yaml.safe_load(stream)
Expand All @@ -41,7 +45,6 @@ def load_configuration_from_gcs(

Returns: configuration class
"""

logger.info("Loading configuration from gcs")

bucket_and_dir = os.path.join(f"gs://{bucket}", gcp_dir)
Expand Down
29 changes: 23 additions & 6 deletions nowcasting_dataset/config/model.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
""" Configuration model for the dataset """
from pydantic import BaseModel, Field, validator

from pydantic import BaseModel, Field
Expand All @@ -12,6 +13,8 @@


class General(BaseModel):
""" General pydantic model """

name: str = Field("example", description="The name of this configuration file.")
description: str = Field(
"example configuration", description="Description of this confgiruation file"
Expand All @@ -27,6 +30,8 @@ class General(BaseModel):


class Git(BaseModel):
""" Git model """

hash: str = Field(..., description="The git hash has for when a dataset is created.")
message: str = Field(..., description="The git message has for when a dataset is created.")
committed_date: datetime = Field(
Expand All @@ -35,9 +40,13 @@ class Git(BaseModel):


class InputData(BaseModel):
# All paths must include the protocol prefix. For local files,
# it's sufficient to just start with a '/'. For aws, start with 's3://',
# for gcp start with 'gs://'.
"""
Input data model

All paths must include the protocol prefix. For local files,
it's sufficient to just start with a '/'. For aws, start with 's3://',
for gcp start with 'gs://'.
"""

solar_pv_data_filename: str = Field(
"gs://solar-pv-nowcasting-data/PV/PVOutput.org/UK_PV_timeseries_batch.nc",
Expand Down Expand Up @@ -66,6 +75,8 @@ class InputData(BaseModel):


class OutputData(BaseModel):
""" Output data model """

filepath: str = Field(
"gs://solar-pv-nowcasting-data/prepared_ML_training_data/v5/",
description=(
Expand All @@ -76,6 +87,8 @@ class OutputData(BaseModel):


class Process(BaseModel):
""" Pydantic model of how the data is processed """

seed: int = Field(1234, description="Random seed, so experiments can be repeatable")
batch_size: int = Field(32, description="the number of examples per batch")
upload_every_n_batches: int = Field(
Expand All @@ -100,34 +113,38 @@ class Process(BaseModel):

@property
def seq_len_30_minutes(self):
""" How many steps are there in 30 minute datasets """
return int((self.history_minutes + self.forecast_minutes) / 30 + 1)

@property
def seq_len_5_minutes(self):
""" How many steps are there in 5 minute datasets """
return int((self.history_minutes + self.forecast_minutes) / 5 + 1)

@validator("history_minutes")
def history_minutes_divide_by_30(cls, v):
""" Validate 'history_minutes' """
assert v % 30 == 0 # this means it also divides by 5
return v

@validator("forecast_minutes")
def forecast_minutes_divide_by_30(cls, v):
""" Validate 'forecast_minutes' """
assert v % 30 == 0 # this means it also divides by 5
return v


class Configuration(BaseModel):
""" Configuration model for the dataset """

general: General = General()
input_data: InputData = InputData()
output_data: OutputData = OutputData()
process: Process = Process()
git: Optional[Git] = None

def set_base_path(self, base_path: str):
"""Append base_path to all paths.

Mostly used for testing."""
"""Append base_path to all paths. Mostly used for testing."""
base_path = Pathy(base_path)
path_attrs = [
"solar_pv_data_filename",
Expand Down
2 changes: 1 addition & 1 deletion nowcasting_dataset/config/save.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
""" Save functions for the configuration model"""
import yaml
import logging
import fsspec
Expand All @@ -18,7 +19,6 @@ def save_yaml_configuration(

Will save to GCP, AWS, or local, depending on the protocol suffix of filepath.
"""

# make a dictionary from the configuration
d = configuration.dict()
if filename is None:
Expand Down
1 change: 1 addition & 0 deletions nowcasting_dataset/consts.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
""" Constants that can be imported when needed """
from typing import Union
import numpy as np
import xarray as xr
Expand Down
1 change: 1 addition & 0 deletions nowcasting_dataset/data_sources/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
""" Various DataSources """
from nowcasting_dataset.data_sources.data_source import DataSource
from nowcasting_dataset.data_sources.satellite_data_source import SatelliteDataSource
from nowcasting_dataset.data_sources.pv_data_source import PVDataSource
Expand Down
Loading