diff --git a/.github/workflows/linters.yaml b/.github/workflows/linters.yaml
new file mode 100644
index 00000000..8d8027a1
--- /dev/null
+++ b/.github/workflows/linters.yaml
@@ -0,0 +1,24 @@
+name: Lint Python
+
+on: [push]
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: [3.9]
+
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install pydocstyle
+      - name: Docstyle linting
+        run: |
+          pydocstyle --convention=google --add-ignore=D200,D210,D212,D415 nowcasting_dataset
diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
index cc8ccd13..60a4ed1c 100644
--- a/.github/workflows/python-app.yml
+++ b/.github/workflows/python-app.yml
@@ -1,7 +1,7 @@
 # This workflow will install Python dependencies, run tests and lint with a single version of Python
 # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 
-name: Python application
+name: Python Tests
 
 on: [push, pull_request]
 
diff --git a/nowcasting_dataset/__init__.py b/nowcasting_dataset/__init__.py
index 9172424d..01b773db 100644
--- a/nowcasting_dataset/__init__.py
+++ b/nowcasting_dataset/__init__.py
@@ -1 +1,2 @@
+""" init file """
 from nowcasting_dataset.square import Square
diff --git a/nowcasting_dataset/cloud/__init__.py b/nowcasting_dataset/cloud/__init__.py
index e69de29b..e09f83e3 100644
--- a/nowcasting_dataset/cloud/__init__.py
+++ b/nowcasting_dataset/cloud/__init__.py
@@ -0,0 +1 @@
+""" Cloud functions """
diff --git a/nowcasting_dataset/cloud/aws.py b/nowcasting_dataset/cloud/aws.py
index a062eb9f..128c92fa 100644
--- a/nowcasting_dataset/cloud/aws.py
+++ b/nowcasting_dataset/cloud/aws.py
@@ -1,3 +1,4 @@
+""" AWS functions """
 import logging
 from pathlib import Path
 import os
@@ -13,14 +14,17 @@ def aws_upload_and_delete_local_files(
     aws_path: str, local_path: Path, bucket: str = "solar-pv-nowcasting-data"
 ):
     """
+     Upload and delete files
+
     1. Upload the files in a local path, to a path in aws
     2. Delete files in that local path
-    @param aws_path: the folder in the aws bucket that files will be saved too
-    @param local_path: the local path where fiels will be copied from
-    @param bucket: the aws bucket that files are saved too
-    @return:
-    """
 
+    Args:
+        aws_path: the folder in the aws bucket that files will be saved too
+        local_path: the local path where fiels will be copied from
+        bucket: the aws bucket that files are saved too
+
+    """
     _LOG.info("Uploading to AWS!")
 
     # create s3 resource
@@ -55,12 +59,14 @@ def aws_download_to_local(
 ):
     """
     Download file from gcs
-    @param remote_filename: the gcs file name, should start with gs://
-    @param local_filename:
-    @param s3_resource: s3 resource, means a new one doesnt have to be made everytime.
-    @param bucket: The s3 bucket name, from which to load the file from.
-    """
 
+    Args:
+        remote_filename: the gcs file name, should start with gs://
+        local_filename: the local file name
+        s3_resource: s3 resource, means a new one doesnt have to be made everytime.
+        bucket: The s3 bucket name, from which to load the file from.
+
+    """
     _LOG.debug(f"Downloading {remote_filename} from AWS to {local_filename}")
 
     if s3_resource is None:
@@ -74,15 +80,19 @@ def aws_download_to_local(
 
 
 def upload_one_file(
-    remote_filename: str, local_filename: str, bucket: str = "solar-pv-nowcasting-data",
+    remote_filename: str,
+    local_filename: str,
+    bucket: str = "solar-pv-nowcasting-data",
 ):
     """
     Upload one file to s3
-    @param remote_filename: the aws key name
-    @param local_filename: the local file name
-    @param bucket: the s3 bucket
-    """
 
+    Args:
+        remote_filename: the aws key name
+        local_filename: the local file name
+        bucket: the s3 bucket
+
+    """
     # create s3 resource
     s3 = boto3.client("s3")
 
@@ -96,8 +106,13 @@ def get_all_filenames_in_path_aws(
 ) -> List[str]:
     """
     Get all the files names from one folder in gcp
-    @param remote_path: the path that we should look in
-    @return: a list of strings, of files names
+
+    Args:
+        remote_path:the path that we should look in
+        bucket: the aws bucket
+
+    Returns: a list of strings, of files names
+
     """
     # get client
     s3 = boto3.client("s3")
diff --git a/nowcasting_dataset/cloud/gcp.py b/nowcasting_dataset/cloud/gcp.py
index 70c6b600..6b7777b3 100644
--- a/nowcasting_dataset/cloud/gcp.py
+++ b/nowcasting_dataset/cloud/gcp.py
@@ -1,3 +1,4 @@
+""" GCP general functions """
 import logging
 from pathlib import Path
 from typing import List, Union
@@ -34,14 +35,14 @@ def gcp_upload_and_delete_local_files(dst_path: str, local_path: Union[str, Path
 def gcp_download_to_local(
     remote_filename: str, local_filename: str, gcs: gcsfs.GCSFileSystem = None
 ):
-    """Download file from gcs.
+    """
+    Download file from gcs.
 
     Args:
         remote_filename: the gcs file name, should start with gs://
-        local_filename:
+        local_filename: the local filename
         gcs: gcsfs.GCSFileSystem connection, means a new one doesnt have to be made everytime.
     """
-
     _LOG.debug(f"Downloading from GCP {remote_filename} to {local_filename}")
 
     if gcs is None:
diff --git a/nowcasting_dataset/cloud/local.py b/nowcasting_dataset/cloud/local.py
index 6c2f402e..c0dcfc0a 100644
--- a/nowcasting_dataset/cloud/local.py
+++ b/nowcasting_dataset/cloud/local.py
@@ -1,3 +1,4 @@
+""" Functions for local files """
 import glob
 import os
 import shutil
@@ -10,10 +11,7 @@
 
 
 def delete_all_files_and_folder_in_temp_path(path: str):
-    """
-    Delete all the files and folders in a temporary path
-    """
-
+    """ Delete all the files and folders in a temporary path """
     _LOG.info(f"Deleting files and folder from {path} .")
 
     for files in os.listdir(path):
diff --git a/nowcasting_dataset/cloud/utils.py b/nowcasting_dataset/cloud/utils.py
index 38b776c2..ebbe323e 100644
--- a/nowcasting_dataset/cloud/utils.py
+++ b/nowcasting_dataset/cloud/utils.py
@@ -1,3 +1,4 @@
+""" General utils functions """
 import logging
 from pathlib import Path
 import gcsfs
@@ -14,7 +15,6 @@ def upload_and_delete_local_files(dst_path: str, local_path: Path, cloud: str =
     """
     Upload and delete local files to either AWS or GCP
     """
-
     assert cloud in ["gcp", "aws"]
 
     if cloud == "gcp":
@@ -26,12 +26,14 @@ def upload_and_delete_local_files(dst_path: str, local_path: Path, cloud: str =
 def gcp_to_aws(gcp_filename: str, gcs: gcsfs.GCSFileSystem, aws_filename: str, aws_bucket: str):
     """
     Download a file from gcp and upload it to aws
-    @param gcp_filename: the gcp file name
-    @param gcs: the gcs file system (so it doesnt have to be made more than once)
-    @param aws_filename: the aws filename and path
-    @param aws_bucket: the asw bucket
-    """
 
+    Args:
+        gcp_filename: the gcp file name
+        gcs: the gcs file system (so it doesnt have to be made more than once)
+        aws_filename: the aws filename and path
+        aws_bucket: the aws bucket
+
+    """
     # create temp file
     with tempfile.NamedTemporaryFile() as fp:
         local_filename = fp.name
diff --git a/nowcasting_dataset/config/__init__.py b/nowcasting_dataset/config/__init__.py
index e69de29b..93de233c 100644
--- a/nowcasting_dataset/config/__init__.py
+++ b/nowcasting_dataset/config/__init__.py
@@ -0,0 +1 @@
+""" Configuration of the dataset """
diff --git a/nowcasting_dataset/config/load.py b/nowcasting_dataset/config/load.py
index 977c3462..1b84fdf5 100644
--- a/nowcasting_dataset/config/load.py
+++ b/nowcasting_dataset/config/load.py
@@ -1,3 +1,4 @@
+""" Loading configuration functions """
 import logging
 import gcsfs
 import os
@@ -14,11 +15,14 @@
 def load_yaml_configuration(filename: Union[str, Pathy]) -> Configuration:
     """
     Load a yaml file which has a configuration in it
-    filename: the file name that you want to load.  Will load from local, AWS, or GCP
-      depending on the protocol suffix (e.g. 's3://bucket/config.yaml').
-    Returns: pydantic class
-    """
 
+    Args:
+        filename: the file name that you want to load.  Will load from local, AWS, or GCP
+            depending on the protocol suffix (e.g. 's3://bucket/config.yaml').
+
+    Returns:pydantic class
+
+    """
     # load the file to a dictionary
     with fsspec.open(filename, mode="r") as stream:
         configuration = yaml.safe_load(stream)
@@ -41,7 +45,6 @@ def load_configuration_from_gcs(
 
     Returns: configuration class
     """
-
     logger.info("Loading configuration from gcs")
 
     bucket_and_dir = os.path.join(f"gs://{bucket}", gcp_dir)
diff --git a/nowcasting_dataset/config/model.py b/nowcasting_dataset/config/model.py
index 64ceee55..298c82aa 100644
--- a/nowcasting_dataset/config/model.py
+++ b/nowcasting_dataset/config/model.py
@@ -1,3 +1,4 @@
+""" Configuration model for the dataset """
 from pydantic import BaseModel, Field, validator
 
 from pydantic import BaseModel, Field
@@ -12,6 +13,8 @@
 
 
 class General(BaseModel):
+    """ General pydantic model """
+
     name: str = Field("example", description="The name of this configuration file.")
     description: str = Field(
         "example configuration", description="Description of this confgiruation file"
@@ -27,6 +30,8 @@ class General(BaseModel):
 
 
 class Git(BaseModel):
+    """ Git model """
+
     hash: str = Field(..., description="The git hash has for when a dataset is created.")
     message: str = Field(..., description="The git message has for when a dataset is created.")
     committed_date: datetime = Field(
@@ -35,9 +40,13 @@ class Git(BaseModel):
 
 
 class InputData(BaseModel):
-    # All paths must include the protocol prefix.  For local files,
-    # it's sufficient to just start with a '/'.  For aws, start with 's3://',
-    # for gcp start with 'gs://'.
+    """
+    Input data model
+
+    All paths must include the protocol prefix.  For local files,
+    it's sufficient to just start with a '/'.  For aws, start with 's3://',
+    for gcp start with 'gs://'.
+    """
 
     solar_pv_data_filename: str = Field(
         "gs://solar-pv-nowcasting-data/PV/PVOutput.org/UK_PV_timeseries_batch.nc",
@@ -66,6 +75,8 @@ class InputData(BaseModel):
 
 
 class OutputData(BaseModel):
+    """ Output data model """
+
     filepath: str = Field(
         "gs://solar-pv-nowcasting-data/prepared_ML_training_data/v5/",
         description=(
@@ -76,6 +87,8 @@ class OutputData(BaseModel):
 
 
 class Process(BaseModel):
+    """ Pydantic model of how the data is processed """
+
     seed: int = Field(1234, description="Random seed, so experiments can be repeatable")
     batch_size: int = Field(32, description="the number of examples per batch")
     upload_every_n_batches: int = Field(
@@ -100,24 +113,30 @@ class Process(BaseModel):
 
     @property
     def seq_len_30_minutes(self):
+        """ How many steps are there in 30 minute datasets """
         return int((self.history_minutes + self.forecast_minutes) / 30 + 1)
 
     @property
     def seq_len_5_minutes(self):
+        """ How many steps are there in 5 minute datasets """
         return int((self.history_minutes + self.forecast_minutes) / 5 + 1)
 
     @validator("history_minutes")
     def history_minutes_divide_by_30(cls, v):
+        """ Validate 'history_minutes' """
         assert v % 30 == 0  # this means it also divides by 5
         return v
 
     @validator("forecast_minutes")
     def forecast_minutes_divide_by_30(cls, v):
+        """ Validate 'forecast_minutes' """
         assert v % 30 == 0  # this means it also divides by 5
         return v
 
 
 class Configuration(BaseModel):
+    """ Configuration model for the dataset """
+
     general: General = General()
     input_data: InputData = InputData()
     output_data: OutputData = OutputData()
@@ -125,9 +144,7 @@ class Configuration(BaseModel):
     git: Optional[Git] = None
 
     def set_base_path(self, base_path: str):
-        """Append base_path to all paths.
-
-        Mostly used for testing."""
+        """Append base_path to all paths. Mostly used for testing."""
         base_path = Pathy(base_path)
         path_attrs = [
             "solar_pv_data_filename",
diff --git a/nowcasting_dataset/config/save.py b/nowcasting_dataset/config/save.py
index 9a8914d4..7eacf893 100644
--- a/nowcasting_dataset/config/save.py
+++ b/nowcasting_dataset/config/save.py
@@ -1,3 +1,4 @@
+""" Save functions for the configuration model"""
 import yaml
 import logging
 import fsspec
@@ -18,7 +19,6 @@ def save_yaml_configuration(
 
     Will save to GCP, AWS, or local, depending on the protocol suffix of filepath.
     """
-
     # make a dictionary from the configuration
     d = configuration.dict()
     if filename is None:
diff --git a/nowcasting_dataset/consts.py b/nowcasting_dataset/consts.py
index 0bdd295b..f1237569 100644
--- a/nowcasting_dataset/consts.py
+++ b/nowcasting_dataset/consts.py
@@ -1,3 +1,4 @@
+""" Constants that can be imported when needed """
 from typing import Union
 import numpy as np
 import xarray as xr
diff --git a/nowcasting_dataset/data_sources/__init__.py b/nowcasting_dataset/data_sources/__init__.py
index 3171f7a8..cd3bdf58 100644
--- a/nowcasting_dataset/data_sources/__init__.py
+++ b/nowcasting_dataset/data_sources/__init__.py
@@ -1,3 +1,4 @@
+""" Various DataSources """
 from nowcasting_dataset.data_sources.data_source import DataSource
 from nowcasting_dataset.data_sources.satellite_data_source import SatelliteDataSource
 from nowcasting_dataset.data_sources.pv_data_source import PVDataSource
diff --git a/nowcasting_dataset/data_sources/data_source.py b/nowcasting_dataset/data_sources/data_source.py
index df8f20d5..e887e527 100644
--- a/nowcasting_dataset/data_sources/data_source.py
+++ b/nowcasting_dataset/data_sources/data_source.py
@@ -1,3 +1,4 @@
+"""  General Data Source Class """
 from numbers import Number
 import pandas as pd
 import numpy as np
@@ -34,7 +35,7 @@ class DataSource:
     convert_to_numpy: bool
 
     def __post_init__(self):
-
+        """ Post Init """
         self.sample_period_minutes = self._get_sample_period_minutes()
 
         self.history_len = self.history_minutes // self.sample_period_minutes
@@ -69,7 +70,9 @@ def _get_end_dt(self, t0_dt: pd.Timestamp) -> pd.Timestamp:
     # ************* METHODS THAT CAN BE OVERRIDDEN ****************************
     def _get_sample_period_minutes(self):
         """
-        This is the default sample period in minutes. This functions may be overwritten if
+        This is the default sample period in minutes.
+
+        This functions may be overwritten if
         the sample period of the data source is not 5 minutes
         """
         logging.debug(
@@ -96,8 +99,16 @@ def get_batch(
         y_locations: Iterable[Number],
     ) -> List[Example]:
         """
-        Returns:
-            List of Examples with data converted to Numpy data structures.
+        Get Batch Data
+
+        Args:
+            t0_datetimes: list of timestamps for the datetime of the batches. The batch will also include data
+                for historic and future depending on 'history_minutes' and 'future_minutes'.
+            x_locations: x center batch locations
+            y_locations: y center batch locations
+
+        Returns: Batch data
+
         """
         examples = []
         zipped = zip(t0_datetimes, x_locations, y_locations)
@@ -148,14 +159,18 @@ def get_example(
 @dataclass
 class ImageDataSource(DataSource):
     """
+    Image Data source
+
     Args:
       image_size_pixels: Size of the width and height of the image crop
-        returned by get_sample(). """
+        returned by get_sample().
+    """
 
     image_size_pixels: InitVar[int]
     meters_per_pixel: InitVar[int]
 
     def __post_init__(self, image_size_pixels: int, meters_per_pixel: int):
+        """ Post Init """
         super().__post_init__()
         self._square = square.Square(
             size_pixels=image_size_pixels, meters_per_pixel=meters_per_pixel
@@ -165,6 +180,8 @@ def __post_init__(self, image_size_pixels: int, meters_per_pixel: int):
 @dataclass
 class ZarrDataSource(ImageDataSource):
     """
+    A General Zarr Data source
+
     Attributes:
       _data: xr.DataArray data, opened by open().
         x is left-to-right.
@@ -180,6 +197,7 @@ class ZarrDataSource(ImageDataSource):
     consolidated: bool = True
 
     def __post_init__(self, image_size_pixels: int, meters_per_pixel: int):
+        """ Post init """
         super().__post_init__(image_size_pixels, meters_per_pixel)
         self._data = None
         if self.n_timesteps_per_batch is None:
@@ -187,6 +205,7 @@ def __post_init__(self, image_size_pixels: int, meters_per_pixel: int):
 
     @property
     def data(self):
+        """ Data property """
         if self._data is None:
             raise RuntimeError("Please run `open()` before accessing data!")
         return self._data
@@ -194,6 +213,18 @@ def data(self):
     def get_example(
         self, t0_dt: pd.Timestamp, x_meters_center: Number, y_meters_center: Number
     ) -> Example:
+        """
+        Get Example data
+
+        Args:
+            t0_dt: list of timestamps for the datetime of the batches. The batch will also include data
+                for historic and future depending on 'history_minutes' and 'future_minutes'.
+            x_meters_center: x center batch locations
+            y_meters_center: y center batch locations
+
+        Returns: Example Data
+
+        """
         selected_data = self._get_time_slice(t0_dt)
         bounding_box = self._square.bounding_box_centered_on(
             x_meters_center=x_meters_center, y_meters_center=y_meters_center
@@ -225,8 +256,8 @@ def get_example(
         return self._put_data_into_example(selected_data)
 
     def geospatial_border(self) -> List[Tuple[Number, Number]]:
-        """Get 'corner' coordinates for a rectangle within the boundary of the
-        data.
+        """
+        Get 'corner' coordinates for a rectangle within the boundary of the data.
 
         Returns List of 2-tuples of the x and y coordinates of each corner,
         in OSGB projection.
@@ -247,10 +278,14 @@ def _post_process_example(
     # ****************** METHODS THAT MUST BE OVERRIDDEN **********************
     # (in addition to the DataSource methods that must be overridden)
     def open(self) -> None:
-        # We don't want to _open_data() in __init__.
-        # If we did that, then we couldn't copy ZarrDataSource
-        # instances into separate processes.  Instead,
-        # call open() _after_ creating separate processes.
+        """
+        Open the data
+
+        We don't want to _open_data() in __init__.
+        If we did that, then we couldn't copy ZarrDataSource
+        instances into separate processes.  Instead,
+        call open() _after_ creating separate processes.
+        """
         raise NotImplementedError()
 
     def _open_data(self) -> xr.DataArray:
diff --git a/nowcasting_dataset/data_sources/datetime_data_source.py b/nowcasting_dataset/data_sources/datetime_data_source.py
index 35a75bd2..4583864a 100644
--- a/nowcasting_dataset/data_sources/datetime_data_source.py
+++ b/nowcasting_dataset/data_sources/datetime_data_source.py
@@ -1,3 +1,4 @@
+""" Datetime DataSource - add hour and year features """
 from nowcasting_dataset.data_sources.data_source import DataSource
 from nowcasting_dataset.dataset.example import Example
 from nowcasting_dataset import time as nd_time
@@ -9,14 +10,26 @@
 
 @dataclass
 class DatetimeDataSource(DataSource):
-    """Add hour_of_day_{sin, cos} and day_of_year_{sin, cos} features."""
+    """ Add hour_of_day_{sin, cos} and day_of_year_{sin, cos} features. """
 
     def __post_init__(self):
+        """ Post init """
         super().__post_init__()
 
     def get_example(
         self, t0_dt: pd.Timestamp, x_meters_center: Number, y_meters_center: Number
     ) -> Example:
+        """
+        Get example data
+
+        Args:
+            t0_dt: list of timestamps
+            x_meters_center: x center of patches - not needed
+            y_meters_center: y center of patches - not needed
+
+        Returns: batch data of datetime features
+
+        """
         del x_meters_center, y_meters_center
         start_dt = self._get_start_dt(t0_dt)
         end_dt = self._get_end_dt(t0_dt)
@@ -26,7 +39,9 @@ def get_example(
     def get_locations_for_batch(
         self, t0_datetimes: pd.DatetimeIndex
     ) -> Tuple[List[Number], List[Number]]:
+        """ This method is not needed for DatetimeDataSource """
         raise NotImplementedError()
 
     def datetime_index(self) -> pd.DatetimeIndex:
+        """ This method is not needed for DatetimeDataSource """
         raise NotImplementedError()
diff --git a/nowcasting_dataset/data_sources/gsp/__init__.py b/nowcasting_dataset/data_sources/gsp/__init__.py
index e69de29b..2edd3635 100644
--- a/nowcasting_dataset/data_sources/gsp/__init__.py
+++ b/nowcasting_dataset/data_sources/gsp/__init__.py
@@ -0,0 +1 @@
+""" GSP data sources and functions """
diff --git a/nowcasting_dataset/data_sources/gsp/eso.py b/nowcasting_dataset/data_sources/gsp/eso.py
index 5cb6efdd..b5e6c11f 100644
--- a/nowcasting_dataset/data_sources/gsp/eso.py
+++ b/nowcasting_dataset/data_sources/gsp/eso.py
@@ -1,5 +1,6 @@
 """
 This file has a few functions that are used to get GSP (Grid Supply Point) information from National Grid ESO.
+
 ESO - Electricity System Operator. General information can be found here
 - https://data.nationalgrideso.com/system/gis-boundaries-for-gb-grid-supply-points
 
@@ -41,13 +42,13 @@
 def get_gsp_metadata_from_eso(calculate_centroid: bool = True) -> pd.DataFrame:
     """
     Get the metadata for the gsp, from ESO.
+
     Args:
         calculate_centroid: Load the shape file also, and calculate the Centroid
 
-    Returns:
+    Returns: Dataframe of ESO Metadata
 
     """
-
     logger.debug("Getting GSP shape file")
 
     # call ESO website. There is a possibility that this API will be replaced and its unclear if this original API will
@@ -83,6 +84,7 @@ def get_gsp_shape_from_eso(
 ) -> gpd.GeoDataFrame:
     """
     Get the the gsp shape file from ESO (or a local file)
+
     Args:
         join_duplicates: If True, any RegionIDs which have multiple entries, will be joined together to give one entry
         load_local_file: Load from a local file, not from ESO
@@ -90,7 +92,6 @@ def get_gsp_shape_from_eso(
 
     Returns: Geo Pandas dataframe of GSP shape data
     """
-
     logger.debug("Loading GSP shape file")
 
     local_file = f"{os.path.dirname(os.path.realpath(__file__))}/gsp_shape"
@@ -178,7 +179,6 @@ def get_list_of_gsp_ids(maximum_number_of_gsp: Optional[int] = None) -> List[int
     Returns:  list of gsp ids
 
     """
-
     # get a lit of gsp ids
     metadata = get_gsp_metadata_from_eso(calculate_centroid=False)
 
diff --git a/nowcasting_dataset/data_sources/gsp/gsp_data_source.py b/nowcasting_dataset/data_sources/gsp/gsp_data_source.py
index 10325943..c62a01ae 100644
--- a/nowcasting_dataset/data_sources/gsp/gsp_data_source.py
+++ b/nowcasting_dataset/data_sources/gsp/gsp_data_source.py
@@ -1,3 +1,7 @@
+""" GSP Data Source. GSP - Grid Supply Points
+
+Read more https://data.nationalgrideso.com/system/gis-boundaries-for-gb-grid-supply-points
+"""
 import logging
 
 import xarray as xr
@@ -72,7 +76,6 @@ def load(self):
         """
         Load the meta data and load the GSP power data
         """
-
         # load metadata
         self.metadata = get_gsp_metadata_from_eso()
 
@@ -108,11 +111,16 @@ def get_locations_for_batch(
     ) -> Tuple[List[Number], List[Number]]:
         """
         Get x and y locations for a batch. Assume that all data is available for all GSP.
+
         Random GSP are taken, and the locations of them are returned. This is useful as other datasources need to know
         which x,y locations to get
+
+        Args:
+            t0_datetimes: list of datetimes that the batches locations have data for
+
         Returns: list of x and y locations
-        """
 
+        """
         logger.debug("Getting locations for the batch")
 
         # Pick a random GSP for each t0_datetime, and then grab
@@ -230,6 +238,7 @@ def _get_central_gsp_id(
     ) -> int:
         """
         Get the GSP id of the central GSP from coordinates
+
         Args:
             x_meters_center: the location of the gsp (x)
             y_meters_center: the location of the gsp (y)
@@ -237,7 +246,6 @@ def _get_central_gsp_id(
 
         Returns: GSP id
         """
-
         logger.debug("Getting Central GSP")
 
         # If x_meters_center and y_meters_center have been chosen
@@ -279,6 +287,7 @@ def _get_gsp_ids_in_roi(
     ) -> pd.Int64Index:
         """
         Find the GSP IDs for all the GSP within the geospatial region of interest, defined by self.square.
+
         Args:
             x_meters_center: center of area of interest (x coords)
             y_meters_center: center of area of interest (y coords)
@@ -287,7 +296,6 @@ def _get_gsp_ids_in_roi(
         Returns: list of GSP ids that are in area of interest
 
         """
-
         logger.debug("Getting all gsp in ROI")
 
         # creating bounding box
@@ -311,13 +319,14 @@ def _get_gsp_ids_in_roi(
     def _get_time_slice(self, t0_dt: pd.Timestamp) -> [pd.DataFrame]:
         """
         Get time slice of GSP power data for give time.
+
         Note the time is extended backwards by history lenght and forward by prediction time
+
         Args:
             t0_dt: timestamp of interest
 
         Returns: pandas data frame of GSP power data
         """
-
         logger.debug(f"Getting power slice for {t0_dt}")
 
         # get start and end datetime, takening into account history and forecast length.
@@ -338,6 +347,7 @@ def _get_time_slice(self, t0_dt: pd.Timestamp) -> [pd.DataFrame]:
 def drop_gsp_by_threshold(gsp_power: pd.DataFrame, meta_data: pd.DataFrame, threshold_mw: int = 20):
     """
     Drop GSP where the max power is below a certain threshold
+
     Args:
         gsp_power: GSP power data
         meta_data: the GSP meta data
@@ -372,10 +382,9 @@ def load_solar_gsp_data(
         start_dt: the start datetime, which to trim the data to
         end_dt: the end datetime, which to trim the data to
 
-    Returns:dataframe of pv data
+    Returns: dataframe of pv data
 
     """
-
     logger.debug(f"Loading Solar GSP Data from GCS {filename} from {start_dt} to {end_dt}")
     # Open data - it may be quicker to open byte file first, but decided just to keep it like this at the moment
     gsp_power = xr.open_dataset(filename, engine="zarr")
diff --git a/nowcasting_dataset/data_sources/gsp/pvlive.py b/nowcasting_dataset/data_sources/gsp/pvlive.py
index 7648f0af..112d2f61 100644
--- a/nowcasting_dataset/data_sources/gsp/pvlive.py
+++ b/nowcasting_dataset/data_sources/gsp/pvlive.py
@@ -1,3 +1,4 @@
+""" Functions used to query the PVlive api """
 from datetime import datetime, timedelta
 import logging
 import pandas as pd
@@ -15,6 +16,7 @@ def load_pv_gsp_raw_data_from_pvlive(
 ) -> pd.DataFrame:
     """
     Load raw pv gsp data from pvlive. Note that each gsp is loaded separately. Also the data is loaded in 30 day chunks.
+
     Args:
         start: the start date for gsp data to load
         end: the end date for gsp data to load
@@ -23,7 +25,6 @@ def load_pv_gsp_raw_data_from_pvlive(
     Returns: Data frame of time series of gsp data. Shows PV data for each GSP from {start} to {end}
 
     """
-
     # get a lit of gsp ids
     gsp_ids = get_list_of_gsp_ids(maximum_number_of_gsp=number_of_gsp)
 
diff --git a/nowcasting_dataset/data_sources/nwp_data_source.py b/nowcasting_dataset/data_sources/nwp_data_source.py
index ca24782d..b2ff3eea 100644
--- a/nowcasting_dataset/data_sources/nwp_data_source.py
+++ b/nowcasting_dataset/data_sources/nwp_data_source.py
@@ -1,3 +1,4 @@
+""" NWP Data Source """
 from nowcasting_dataset.data_sources.data_source import ZarrDataSource
 from nowcasting_dataset.dataset.example import Example, to_numpy
 from nowcasting_dataset import utils
@@ -57,6 +58,8 @@
 @dataclass
 class NWPDataSource(ZarrDataSource):
     """
+    NWP Data Source (Numerical Weather Predictions)
+
     Args (for init):
       filename: The base path in which we find '2018_1-6', etc.
 
@@ -86,6 +89,14 @@ class NWPDataSource(ZarrDataSource):
     meters_per_pixel: InitVar[int] = 2_000
 
     def __post_init__(self, image_size_pixels: int, meters_per_pixel: int):
+        """
+        Post init
+
+        Args:
+            image_size_pixels: number of pixels in image
+            meters_per_pixel: how many meteres for each pixel
+
+        """
         super().__post_init__(image_size_pixels, meters_per_pixel)
         n_channels = len(self.channels)
         self._shape_of_example = (
@@ -96,10 +107,14 @@ def __post_init__(self, image_size_pixels: int, meters_per_pixel: int):
         )
 
     def open(self) -> None:
-        # We don't want to open_sat_data in __init__.
-        # If we did that, then we couldn't copy NWPDataSource
-        # instances into separate processes.  Instead,
-        # call open() _after_ creating separate processes.
+        """
+        Open NWP data
+
+        We don't want to open_sat_data in __init__.
+        If we did that, then we couldn't copy NWPDataSource
+        instances into separate processes.  Instead,
+        call open() _after_ creating separate processes.
+        """
         data = self._open_data()
         self._data = data["UKV"].sel(variable=list(self.channels))
 
@@ -109,7 +124,17 @@ def get_batch(
         x_locations: Iterable[Number],
         y_locations: Iterable[Number],
     ) -> List[Example]:
+        """
+        Get batch data
+
+        Args:
+            t0_datetimes: list of timstamps
+            x_locations: list of x locations, where the batch data is for
+            y_locations: list of y locations, where the batch data is for
+
+        Returns: batch data
 
+        """
         # Lazily select time slices.
         selections = []
         for t0_dt in t0_datetimes[: self.n_timesteps_per_batch]:
@@ -168,12 +193,20 @@ def _put_data_into_example(self, selected_data: xr.DataArray) -> Example:
         )
 
     def _get_time_slice(self, t0_dt: pd.Timestamp) -> xr.DataArray:
-        """Select the numerical weather predictions for a single time slice.
+        """
+        Select the numerical weather predictions for a single time slice.
 
         Note that this function does *not* resample from hourly to 5 minutely.
         Resampling would be very expensive if done on the whole geographical
         extent of the NWP data!  So resampling is done in
-        _post_process_example()."""
+        _post_process_example().
+
+        Args:
+            t0_dt: the time slice is around t0_dt.
+
+        Returns: Slice of data
+
+        """
         start_dt = self._get_start_dt(t0_dt)
         end_dt = self._get_end_dt(t0_dt)
 
@@ -222,8 +255,14 @@ def datetime_index(self) -> pd.DatetimeIndex:
 
 def open_nwp(filename: str, consolidated: bool) -> xr.Dataset:
     """
+    Open The NWP data
+
     Args:
-        filename must start with 'gs://' if it's on GCP.
+        filename: filename must start with 'gs://' if it's on GCP.
+        consolidated: consolidate the zarr file?
+
+    Returns: nwp data
+
     """
     _LOG.debug("Opening NWP data: %s", filename)
     utils.set_fsspec_for_multiprocess()
diff --git a/nowcasting_dataset/data_sources/pv_data_source.py b/nowcasting_dataset/data_sources/pv_data_source.py
index 32af2120..c25b1226 100644
--- a/nowcasting_dataset/data_sources/pv_data_source.py
+++ b/nowcasting_dataset/data_sources/pv_data_source.py
@@ -1,3 +1,4 @@
+""" PV Data Source """
 from nowcasting_dataset.consts import (
     PV_SYSTEM_ID,
     PV_SYSTEM_ROW_NUMBER,
@@ -35,6 +36,8 @@
 
 @dataclass
 class PVDataSource(ImageDataSource):
+    """ PV Data Source """
+
     filename: Union[str, Path]
     metadata_filename: Union[str, Path]
     start_dt: Optional[datetime.datetime] = None
@@ -48,12 +51,16 @@ class PVDataSource(ImageDataSource):
     get_center: bool = True
 
     def __post_init__(self, image_size_pixels: int, meters_per_pixel: int):
+        """ Post Init """
         super().__post_init__(image_size_pixels, meters_per_pixel)
         seed = torch.initial_seed()
         self.rng = np.random.default_rng(seed=seed)
         self.load()
 
     def load(self):
+        """
+        Load metadata and pv power
+        """
         self._load_metadata()
         self._load_pv_power()
         if self.load_azimuth_and_elevation:
@@ -180,9 +187,12 @@ def _get_all_pv_system_ids_in_roi(
         y_meters_center: Number,
         pv_system_ids_with_data_for_timeslice: pd.Int64Index,
     ) -> pd.Int64Index:
-        """Find the PV system IDs for all the PV systems within the geospatial
-        region of interest, defined by self.square."""
+        """
+        Find the PV system IDs. for all the PV systems within the geospatial
 
+        This is for all the PV systems within the geospatial
+        region of interest, defined by self.square.
+        """
         logger.debug(f"Getting PV example data for {x_meters_center} and {y_meters_center}")
 
         bounding_box = self._square.bounding_box_centered_on(
@@ -205,7 +215,18 @@ def _get_all_pv_system_ids_in_roi(
     def get_example(
         self, t0_dt: pd.Timestamp, x_meters_center: Number, y_meters_center: Number
     ) -> Example:
+        """
+        Get Example data for PV data
+
+        Args:
+            t0_dt: list of timestamps for the datetime of the batches. The batch will also include data
+                for historic and future depending on 'history_minutes' and 'future_minutes'.
+            x_meters_center: x center batch locations
+            y_meters_center: y center batch locations
 
+        Returns: Example data
+
+        """
         logger.debug("Getting PV example data")
 
         (
@@ -288,7 +309,6 @@ def get_locations_for_batch(
         Returns:  x_locations, y_locations. Each has one entry per t0_datetime.
             Locations are in OSGB coordinates.
         """
-
         # Set this up as a separate function, so we can cache the result!
         @functools.cache  # functools.cache requires Python >= 3.9
         def _get_pv_system_ids(t0_datetime: pd.Timestamp) -> pd.Int64Index:
@@ -323,7 +343,6 @@ def _calculate_azimuth_and_elevation(self):
         """
         Calculate the azimuth and elevation angles for each datestamp, for each pv system.
         """
-
         logger.debug("Calculating azimuth and elevation angles")
 
         self.pv_azimuth, self.pv_elevation = calculate_azimuth_and_elevation_all_pv_systems(
@@ -336,8 +355,14 @@ def calculate_azimuth_and_elevation_all_pv_systems(
 ) -> (pd.Series, pd.Series):
     """
     Calculate the azimuth and elevation angles for each datestamp, for each pv system.
-    """
 
+    Args:
+        datestamps: list of timestamps for when to collected data for
+        pv_metadata: pv metadata, so we know where to collected data for
+
+    Returns: Azimuth and Elevations data
+
+    """
     logger.debug(
         f"Will be calculating for {len(datestamps)} datestamps and {len(pv_metadata)} pv systems"
     )
@@ -395,12 +420,16 @@ def load_solar_pv_data_from_gcs(
     from_gcs: bool = True,
 ) -> pd.DataFrame:
     """
-    Load solar pv data from gcs (althought there is an option to load from loca - for testing)
-    @param filename: filename of file to be loaded
-    @param start_dt: the start datetime, which to trim the data to
-    @param end_dt: the end datetime, which to trim the data to
-    @param from_gcs: option to laod from gcs, or form local file
-    @return: dataframe of pv data
+    Load solar pv data from gcs (although there is an option to load from local - for testing)
+
+    Args:
+        filename: filename of file to be loaded
+        start_dt: the start datetime, which to trim the data to
+        end_dt: the end datetime, which to trim the data to
+        from_gcs: option to laod from gcs, or form local file
+
+    Returns: Solar PV data
+
     """
     gcs = gcsfs.GCSFileSystem(access="read_only")
 
diff --git a/nowcasting_dataset/data_sources/satellite_data_source.py b/nowcasting_dataset/data_sources/satellite_data_source.py
index e1910448..77650a52 100644
--- a/nowcasting_dataset/data_sources/satellite_data_source.py
+++ b/nowcasting_dataset/data_sources/satellite_data_source.py
@@ -1,3 +1,4 @@
+""" Satellite Data Source """
 from nowcasting_dataset.data_sources.data_source import ZarrDataSource
 from nowcasting_dataset.dataset.example import Example, to_numpy
 from nowcasting_dataset import utils
@@ -62,8 +63,9 @@
 @dataclass
 class SatelliteDataSource(ZarrDataSource):
     """
-    Args:
-        filename: Must start with 'gs://' if on GCP.
+    Satellite Data Source
+
+    filename: Must start with 'gs://' if on GCP.
     """
 
     filename: str = None
@@ -73,6 +75,7 @@ class SatelliteDataSource(ZarrDataSource):
     normalise: bool = True
 
     def __post_init__(self, image_size_pixels: int, meters_per_pixel: int):
+        """ Post Init """
         super().__post_init__(image_size_pixels, meters_per_pixel)
         self._cache = {}
         n_channels = len(self.channels)
@@ -84,10 +87,14 @@ def __post_init__(self, image_size_pixels: int, meters_per_pixel: int):
         )
 
     def open(self) -> None:
-        # We don't want to open_sat_data in __init__.
-        # If we did that, then we couldn't copy SatelliteDataSource
-        # instances into separate processes.  Instead,
-        # call open() _after_ creating separate processes.
+        """
+        Open Satellite data
+
+        We don't want to open_sat_data in __init__.
+        If we did that, then we couldn't copy SatelliteDataSource
+        instances into separate processes.  Instead,
+        call open() _after_ creating separate processes.
+        """
         self._data = self._open_data()
         self._data = self._data.sel(variable=list(self.channels))
 
@@ -100,6 +107,24 @@ def get_batch(
         x_locations: Iterable[Number],
         y_locations: Iterable[Number],
     ) -> List[Example]:
+        """
+        Get batch data
+
+        Load the first _n_timesteps_per_batch concurrently.  This
+        loads the timesteps from disk concurrently, and fills the
+        cache.  If we try loading all examples
+        concurrently, then SatelliteDataSource will try reading from
+        empty caches, and things are much slower!
+
+        Args:
+            t0_datetimes: list of timestamps for the datetime of the batches. The batch will also include data
+                for historic and future depending on 'history_minutes' and 'future_minutes'.
+            x_locations: x center batch locations
+            y_locations: y center batch locations
+
+        Returns: Batch data
+
+        """
         # Load the first _n_timesteps_per_batch concurrently.  This
         # loads the timesteps from disk concurrently, and fills the
         # cache.  If we try loading all examples
diff --git a/nowcasting_dataset/data_sources/topographic_data_source.py b/nowcasting_dataset/data_sources/topographic_data_source.py
index 39940943..20902e27 100644
--- a/nowcasting_dataset/data_sources/topographic_data_source.py
+++ b/nowcasting_dataset/data_sources/topographic_data_source.py
@@ -1,3 +1,4 @@
+""" Topological DataSource """
 from nowcasting_dataset.data_sources.data_source import ImageDataSource
 from nowcasting_dataset.dataset.example import Example
 from nowcasting_dataset.consts import TOPOGRAPHIC_DATA
@@ -41,6 +42,7 @@ class TopographicDataSource(ImageDataSource):
     normalize: bool = True
 
     def __post_init__(self, image_size_pixels: int, meters_per_pixel: int):
+        """ Post init """
         super().__post_init__(image_size_pixels, meters_per_pixel)
         self._shape_of_example = (
             image_size_pixels,
@@ -70,7 +72,6 @@ def get_example(
         Returns:
             Example containing topographic data for the selected area
         """
-
         bounding_box = self._square.bounding_box_centered_on(
             x_meters_center=x_meters_center, y_meters_center=y_meters_center
         )
@@ -126,8 +127,7 @@ def _post_process_example(
         self, selected_data: xr.DataArray, t0_dt: pd.Timestamp
     ) -> xr.DataArray:
         """
-        Post process the topographical data, removing an extra dim and optionally
-        normalizing
+        Post process the topographical data, removing an extra dim and optionally normalizing
 
         Args:
             selected_data: DataArray containing the topographic data
diff --git a/nowcasting_dataset/dataset/__init__.py b/nowcasting_dataset/dataset/__init__.py
index e69de29b..5e8bd110 100644
--- a/nowcasting_dataset/dataset/__init__.py
+++ b/nowcasting_dataset/dataset/__init__.py
@@ -0,0 +1 @@
+""" Data objects """
diff --git a/nowcasting_dataset/dataset/batch.py b/nowcasting_dataset/dataset/batch.py
index fcd47f7d..977d88e9 100644
--- a/nowcasting_dataset/dataset/batch.py
+++ b/nowcasting_dataset/dataset/batch.py
@@ -1,3 +1,4 @@
+""" batch functions """
 from typing import List, Optional, Union
 import logging
 
@@ -27,6 +28,7 @@
 def write_batch_locally(batch: List[Example], batch_i: int, path: Path):
     """
     Write a batch to a locally file
+
     Args:
         batch: A batch of data
         batch_i: The number of the batch
@@ -196,8 +198,13 @@ def batch_to_dataset(batch: List[Example]) -> xr.Dataset:
 def coord_to_range(
     da: xr.DataArray, dim: str, prefix: Optional[str], dtype=np.int32
 ) -> xr.DataArray:
-    # TODO: Actually, I think this is over-complicated?  I think we can
-    # just strip off the 'coord' from the dimension.
+    """
+    TODO
+
+    TODO: Actually, I think this is over-complicated?  I think we can
+    just strip off the 'coord' from the dimension.
+
+    """
     coord = da[dim]
     da[dim] = np.arange(len(coord), dtype=dtype)
     if prefix is not None:
diff --git a/nowcasting_dataset/dataset/datamodule.py b/nowcasting_dataset/dataset/datamodule.py
index 2b8efadd..0817f79b 100644
--- a/nowcasting_dataset/dataset/datamodule.py
+++ b/nowcasting_dataset/dataset/datamodule.py
@@ -1,3 +1,4 @@
+""" Data Modules """
 from typing import Union, Optional, Iterable, Dict, Callable
 from pathlib import Path
 import pandas as pd
@@ -24,6 +25,8 @@
 @dataclass
 class NowcastingDataModule(pl.LightningDataModule):
     """
+    Nowcasting Data Module, used to make batches
+
     Attributes (additional to the dataclass attributes):
       pv_data_source: PVDataSource
       sat_data_source: SatelliteDataSource
@@ -79,6 +82,7 @@ class NowcastingDataModule(pl.LightningDataModule):
     skip_n_test_batches: int = 0  # number of test batches to skip
 
     def __post_init__(self):
+        """ Post Init """
         super().__init__()
 
         self.history_len_30_minutes = self.history_minutes // 30
@@ -97,7 +101,7 @@ def __post_init__(self):
             self.prefetch_factor = 2  # Set to default when not using multiprocessing.
 
     def prepare_data(self) -> None:
-        # Satellite data
+        """ Prepare all datasources """
         n_timesteps_per_batch = self.batch_size // self.n_samples_per_timestep
 
         self.sat_data_source = data_sources.SatelliteDataSource(
@@ -286,7 +290,6 @@ def _n_batches_per_epoch_per_worker(self, n_batches_per_epoch: int) -> int:
 
     def _split_data(self):
         """Sets self.train_t0_datetimes and self.val_t0_datetimes."""
-
         logger.debug("Going to split data")
 
         self._check_has_prepared_data()
@@ -304,15 +307,19 @@ def _split_data(self):
         )
 
     def train_dataloader(self) -> torch.utils.data.DataLoader:
+        """ Train dataloader """
         return torch.utils.data.DataLoader(self.train_dataset, **self._common_dataloader_params())
 
     def val_dataloader(self) -> torch.utils.data.DataLoader:
+        """ Validation dataloader """
         return torch.utils.data.DataLoader(self.val_dataset, **self._common_dataloader_params())
 
     def test_dataloader(self) -> torch.utils.data.DataLoader:
+        """ Test dataloader """
         return torch.utils.data.DataLoader(self.test_dataset, **self._common_dataloader_params())
 
     def contiguous_dataloader(self) -> torch.utils.data.DataLoader:
+        """ Get continours dataloader TODO this is not needed anymore?"""
         if self.contiguous_dataset is None:
             pv_data_source = deepcopy(self.pv_data_source)
             pv_data_source.random_pv_system_for_given_location = False
@@ -351,7 +358,8 @@ def _common_dataloader_params(self) -> Dict:
     def _get_datetimes(
         self, interpolate_for_30_minute_data: bool = False, adjust_for_sequence_length: bool = True
     ) -> pd.DatetimeIndex:
-        """Compute the datetime index.
+        """
+        Compute the datetime index.
 
         interpolate_for_30_minute_data: If True,
         1. all datetimes from source will be interpolated to 5 min intervals,
@@ -364,7 +372,8 @@ def _get_datetimes(
         This deals with a mixture of data sources that have 5 mins and 30 min datatime.
 
         Returns the intersection of the datetime indicies of all the
-        data_sources, filtered by daylight hours."""
+        data_sources, filtered by daylight hours.
+        """
         logger.debug("Get the datetimes")
         self._check_has_prepared_data()
 
diff --git a/nowcasting_dataset/dataset/datasets.py b/nowcasting_dataset/dataset/datasets.py
index 8220233c..5c58ef32 100644
--- a/nowcasting_dataset/dataset/datasets.py
+++ b/nowcasting_dataset/dataset/datasets.py
@@ -1,3 +1,4 @@
+""" Dataset and functions"""
 import pandas as pd
 from numbers import Number
 from typing import List, Tuple, Callable, Union, Optional
@@ -89,7 +90,9 @@
 
 
 class NetCDFDataset(torch.utils.data.Dataset):
-    """Loads data saved by the `prepare_ml_training_data.py` script.
+    """
+    Loads data saved by the `prepare_ml_training_data.py` script.
+
     Moved from predict_pv_yield
     """
 
@@ -105,6 +108,7 @@ def __init__(
         forecast_minutes: Optional[int] = None,
     ):
         """
+        Netcdf Dataset
 
         Args:
             n_batches: Number of batches available on disk.
@@ -117,8 +121,8 @@ def __init__(
             history_minutes: How many past minutes of data to use, if subsetting the batch
             forecast_minutes: How many future minutes of data to use, if reducing the amount of forecast time
             configuration: configuration object
+            cloud: which cloud is used, can be "gcp", "aws" or "local".
         """
-
         self.n_batches = n_batches
         self.src_path = src_path
         self.tmp_path = tmp_path
@@ -157,12 +161,14 @@ def __init__(
             os.mkdir(self.tmp_path)
 
     def per_worker_init(self, worker_id: int):
+        """ Function called by a worker """
         if self.cloud == "gcp":
             self.gcs = gcsfs.GCSFileSystem()
         elif self.cloud == "aws":
             self.s3_resource = boto3.resource("s3")
 
     def __len__(self):
+        """ Length of dataset """
         return self.n_batches
 
     def __getitem__(self, batch_idx: int) -> example.Example:
@@ -248,6 +254,7 @@ class NowcastingDataset(torch.utils.data.IterableDataset):
     batch_index: int = 0
 
     def __post_init__(self):
+        """ Post Init """
         super().__init__()
         self._per_worker_init_has_run = False
         self._n_timesteps_per_batch = self.batch_size // self.n_samples_per_timestep
@@ -266,8 +273,11 @@ def __post_init__(self):
             _LOG.warning(f"Will be skipping {self.skip_batch_index}, is this correct?")
 
     def per_worker_init(self, worker_id: int) -> None:
-        """Called by worker_init_fn on each copy of NowcastingDataset after
-        the worker process has been spawned."""
+        """
+        Called by worker_init_fn on each copy of NowcastingDataset
+
+        This happens after the worker process has been spawned.
+        """
         # Each worker must have a different seed for its random number gen.
         # Otherwise all the workers will output exactly the same data!
         self.worker_id = worker_id
@@ -402,7 +412,6 @@ def subselect_data(
     Returns:
         Example with only data between [t0 - history_minutes, t0 + forecast_minutes] remaining
     """
-
     _LOG.debug(
         f"Select sub data with new historic minutes of {history_minutes} "
         f"and forecast minutes if {forecast_minutes}"
diff --git a/nowcasting_dataset/dataset/example.py b/nowcasting_dataset/dataset/example.py
index 0fee3032..b49bb7eb 100644
--- a/nowcasting_dataset/dataset/example.py
+++ b/nowcasting_dataset/dataset/example.py
@@ -1,3 +1,4 @@
+""" Example Data Class """
 from typing import TypedDict, List
 import pandas as pd
 
@@ -106,7 +107,6 @@ def xr_to_example(batch_xr: xr.core.dataset.Dataset, required_keys: List[str]) -
     Returns: Example object of the xarray data
 
     """
-
     batch = Example(
         sat_datetime_index=batch_xr.sat_time_coords,
         nwp_target_time=batch_xr.nwp_time_coords,
@@ -121,6 +121,9 @@ def xr_to_example(batch_xr: xr.core.dataset.Dataset, required_keys: List[str]) -
 
 
 def to_numpy(example: Example) -> Example:
+    """
+    Change items in Example to numpy objects
+    """
     for key, value in example.items():
         if isinstance(value, xr.DataArray):
             # TODO: Use to_numpy() or as_numpy(), introduced in xarray v0.19?
diff --git a/nowcasting_dataset/dataset/split/method.py b/nowcasting_dataset/dataset/split/method.py
index df8164b8..66eca857 100644
--- a/nowcasting_dataset/dataset/split/method.py
+++ b/nowcasting_dataset/dataset/split/method.py
@@ -1,3 +1,4 @@
+""" Methods for splitting data into train, validation and test """
 from typing import List, Tuple
 
 import numpy as np
@@ -45,7 +46,6 @@ def split_method(
     Returns: train, validation and test datetimes
 
     """
-
     # find all the unique periods (dates, weeks, e.t.c)
     datetimes_period = pd.to_datetime(datetimes.to_period(freq).to_timestamp())
     unique_periods_in_dataset = datetimes_period.unique()
diff --git a/nowcasting_dataset/dataset/split/model.py b/nowcasting_dataset/dataset/split/model.py
index c633245c..1f319c23 100644
--- a/nowcasting_dataset/dataset/split/model.py
+++ b/nowcasting_dataset/dataset/split/model.py
@@ -1,15 +1,19 @@
+""" Model for splitting data """
 from typing import List
 
 from pydantic import BaseModel, validator
 
 
 class TrainValidationTestSpecific(BaseModel):
+    """ Class on how to specifically split the data into train, validation and test. """
+
     train: List[str]
     validation: List[str]
     test: List[str]
 
     @validator("train")
     def train_validation_test(cls, v, values):
+        """ Make sure there is no overlap for the train data """
         for vv in ["test", "validation"]:
             if vv in values.keys():
                 overlap = [period for period in v if period in values[vv]]
@@ -20,6 +24,7 @@ def train_validation_test(cls, v, values):
 
     @validator("validation")
     def validation_overlap(cls, v, values):
+        """ Make sure there is no overlap for the validation data """
         for vv in ["test", "train"]:
             if vv in values.keys():
                 overlap = [period for period in v if period in values[vv]]
@@ -30,6 +35,7 @@ def validation_overlap(cls, v, values):
 
     @validator("test")
     def test_overlap(cls, v, values):
+        """ Make sure there is no overlap for the test data """
         for vv in ["validation", "train"]:
             if vv in values.keys():
                 overlap = [period for period in v if period in values[vv]]
diff --git a/nowcasting_dataset/dataset/split/split.py b/nowcasting_dataset/dataset/split/split.py
index 1fc3f859..1252aadf 100644
--- a/nowcasting_dataset/dataset/split/split.py
+++ b/nowcasting_dataset/dataset/split/split.py
@@ -16,6 +16,8 @@
 
 
 class SplitMethod(Enum):
+    """  Different split methods """
+
     DAY = "day"
     DAY_RANDOM = "day_random"
     DAY_SPECIFIC = "day_specific"
@@ -47,7 +49,6 @@ def split_data(
 
     Returns: train, validation and test dataset
     """
-
     logger.info(f"Splitting data with method {method}")
 
     datetimes = pd.DatetimeIndex(datetimes)
diff --git a/nowcasting_dataset/dataset/validate.py b/nowcasting_dataset/dataset/validate.py
index 7d4c68b1..fde00c0f 100644
--- a/nowcasting_dataset/dataset/validate.py
+++ b/nowcasting_dataset/dataset/validate.py
@@ -1,6 +1,4 @@
-"""
-A class to validate the prepare ml dataset
-"""
+""" A class to validate the prepare ml dataset """
 from typing import Union
 
 import numpy as np
@@ -51,7 +49,6 @@ def __init__(
             batches: Dataset that needs validating
             configuration: Configuration file
         """
-
         self.batches = batches
         self.configuration = configuration
 
@@ -102,7 +99,13 @@ class FakeDataset(torch.utils.data.Dataset):
     """Fake dataset."""
 
     def __init__(self, configuration: Configuration, length: int = 10):
+        """
+        Init
 
+        Args:
+            configuration: configuration object
+            length: length of dataset
+        """
         self.batch_size = configuration.process.batch_size
         self.seq_length_5 = (
             configuration.process.seq_len_5_minutes
@@ -117,13 +120,23 @@ def __init__(self, configuration: Configuration, length: int = 10):
         self.length = length
 
     def __len__(self):
+        """ Number of pieces of data """
         return self.length
 
     def per_worker_init(self, worker_id: int):
+        """ Not needed """
         pass
 
     def __getitem__(self, idx):
+        """
+        Get item, use for iter and next method
+
+        Args:
+            idx: batch index
+
+        Returns: Dictionary of random data
 
+        """
         x = {
             "sat_data": torch.randn(
                 self.batch_size,
@@ -216,6 +229,7 @@ def validate_example(
 ):
     """
     Validate the size and shape of the data
+
     Args:
         data: Typed dictionary of the data
         seq_len_30_minutes: the length of the sequence for 30 minutely data
@@ -228,7 +242,6 @@ def validate_example(
         n_gsp_per_example: the number gsp systems with nan padding
         batch: if this example class is a batch or not
     """
-
     n_gsp_id = data[GSP_ID].shape[-1]
     assert (
         n_gsp_id == n_gsp_per_example
@@ -345,7 +358,6 @@ def validate_batch_from_configuration(data: Example, configuration: Configuratio
         configuration: confgiruation of the data
 
     """
-
     validate_example(
         data=data,
         seq_len_30_minutes=configuration.process.seq_len_30_minutes,
diff --git a/nowcasting_dataset/geospatial.py b/nowcasting_dataset/geospatial.py
index 00745191..3955602a 100644
--- a/nowcasting_dataset/geospatial.py
+++ b/nowcasting_dataset/geospatial.py
@@ -1,3 +1,4 @@
+""" Geospatial functions """
 import pandas as pd
 import pyproj
 from numbers import Number
@@ -21,27 +22,35 @@
 
 class Transformers:
     """
-    Class to store transformation from one Grid to another. Its good to make this only once, but need the
+    Class to store transformation from one Grid to another.
+
+    Its good to make this only once, but need the
     option of updating them, due to out of data grids.
     """
 
     def __init__(self):
-
+        """ Init """
         self._osgb_to_lat_lon = None
         self._lat_lon_to_osgb = None
         self.make_transformers()
 
     def make_transformers(self):
-        # Nice to only make these once, as it makes calling the functions below quicker
+        """
+        Make transformers
+
+         Nice to only make these once, as it makes calling the functions below quicker
+        """
         self._osgb_to_lat_lon = pyproj.Transformer.from_crs(crs_from=OSGB, crs_to=WGS84)
         self._lat_lon_to_osgb = pyproj.Transformer.from_crs(crs_from=WGS84, crs_to=OSGB)
 
     @property
     def osgb_to_lat_lon(self):
+        """ OSGB to lat-lon property """
         return self._osgb_to_lat_lon
 
     @property
     def lat_lon_to_osgb(self):
+        """ lat-lon to OSGB property """
         return self._lat_lon_to_osgb
 
 
@@ -50,10 +59,7 @@ def lat_lon_to_osgb(self):
 
 
 def download_grids():
-    """
-    The transformer grid sometimes need updating
-    """
-
+    """ The transformer grid sometimes need updating """
     pyproj.transformer.TransformerGroup(crs_from=OSGB, crs_to=WGS84).download_grids(verbose=True)
     pyproj.transformer.TransformerGroup(crs_from=WGS84, crs_to=OSGB).download_grids(verbose=True)
 
@@ -61,20 +67,29 @@ def download_grids():
 
 
 def osgb_to_lat_lon(x: Number, y: Number) -> Tuple[Number, Number]:
-    """Returns 2-tuple of latitude (north-south), longitude (east-west).
+    """
+    Change OSGB coordinates to lat, lon
 
     Args:
-      x, y: Location in Ordnance Survey GB 1936, also known as
-        British National Grid, coordinates.
+        x: osgb east-west
+        y: osgb north-south
+
+    Return: 2-tuple of latitude (north-south), longitude (east-west).
+
     """
     return transformers.osgb_to_lat_lon.transform(x, y)
 
 
 def lat_lon_to_osgb(lat: Number, lon: Number) -> Tuple[Number, Number]:
-    """Returns 2-tuple of x (east-west), y (north-south).
+    """
+    Change lat, lon to a OSGB coordinates
 
     Args:
-      lat, lon: Location is WGS84 coordinates.
+        lat: latitude
+        lon: longitude
+
+    Return: 2-tuple of x (east-west), y (north-south).
+
     """
     return transformers.lat_lon_to_osgb.transform(lat, lon)
 
@@ -96,7 +111,6 @@ def calculate_azimuth_and_elevation_angle(
     have been calculate.
 
     """
-
     # get the solor position
     solpos = pvlib.solarposition.get_solarposition(datestamps, latitude, longitude)
 
diff --git a/nowcasting_dataset/square.py b/nowcasting_dataset/square.py
index b8c77fc0..2624261e 100644
--- a/nowcasting_dataset/square.py
+++ b/nowcasting_dataset/square.py
@@ -1,3 +1,4 @@
+""" Square objects """
 from typing import NamedTuple, Union
 from numbers import Number
 
@@ -5,6 +6,8 @@
 
 
 class BoundingBox(NamedTuple):
+    """ Bounding box tuple """
+
     top: Union[Number, float]
     bottom: Union[Number, float]
     left: Union[Number, float]
@@ -12,9 +15,16 @@ class BoundingBox(NamedTuple):
 
 
 class Square:
-    """"Class for computing bounding box for satellite imagery."""
+    """ Class for computing bounding box for satellite imagery. """
 
     def __init__(self, size_pixels: int, meters_per_pixel: Number):
+        """
+        Init
+
+        Args:
+            size_pixels: number of pixels
+            meters_per_pixel: how many meters for each pixel
+        """
         self.size_pixels = size_pixels
         size_meters = size_pixels * meters_per_pixel
         self._half_size_meters = size_meters / 2
@@ -22,6 +32,16 @@ def __init__(self, size_pixels: int, meters_per_pixel: Number):
     def bounding_box_centered_on(
         self, x_meters_center: Number, y_meters_center: Number
     ) -> BoundingBox:
+        """
+        Get bounding box from a centre
+
+        Args:
+            x_meters_center: x center of the bounding box
+            y_meters_center: y center of the bounding box
+
+        Returns: Bounding box
+
+        """
         return BoundingBox(
             top=y_meters_center + self._half_size_meters,
             bottom=y_meters_center - self._half_size_meters,
@@ -33,6 +53,7 @@ def bounding_box_centered_on(
 def get_bounding_box_mask(bounding_box: BoundingBox, x: Array, y: Array) -> Array:
     """
     Get boundary box mask from x and y locations. I.e are the x,y coords in the boundaring box
+
     Args:
         bounding_box: Bounding box
         x: x coordinates
diff --git a/nowcasting_dataset/time.py b/nowcasting_dataset/time.py
index 29984e86..0d63ad7a 100644
--- a/nowcasting_dataset/time.py
+++ b/nowcasting_dataset/time.py
@@ -1,3 +1,4 @@
+""" Time functions """
 import pandas as pd
 import numpy as np
 from typing import Iterable, Tuple, List
@@ -18,16 +19,18 @@
 def select_daylight_datetimes(
     datetimes: pd.DatetimeIndex, locations: Iterable[Tuple[float, float]], ghi_threshold: float = 10
 ) -> pd.DatetimeIndex:
-    """Returns datetimes for which the global horizontal irradiance
-    (GHI) is above ghi_threshold across all locations.
+    """
+    Select only the day time datetimes
 
     Args:
-      dt_index: DatetimeIndex to filter.
-      locations: List of Tuples of x, y coordinates in OSGB projection.
+        datetimes: DatetimeIndex to filter.
+        locations: List of Tuples of x, y coordinates in OSGB projection.
         For example, use the four corners of the satellite imagery.
-      ghi_threshold: Global horizontal irradiance threshold.
+        ghi_threshold: Global horizontal irradiance threshold.
           (Watts per square meter?)
 
+    Returns: datetimes for which the global horizontal irradiance (GHI) is above ghi_threshold across all locations.
+
     """
     ghi_for_all_locations = []
     for x, y in locations:
@@ -50,6 +53,7 @@ def select_daylight_datetimes(
 
 
 def intersection_of_datetimeindexes(indexes: List[pd.DatetimeIndex]) -> pd.DatetimeIndex:
+    """ Get intersections of datetime indexes """
     assert len(indexes) > 0
     intersection = indexes[0]
     for index in indexes[1:]:
@@ -113,6 +117,7 @@ def get_t0_datetimes(
 ) -> pd.DatetimeIndex:
     """
     Get datetimes for ML learning batches. T0 refers to the time 'now'.
+
     Args:
         datetimes: list of datetimes when data is available
         total_seq_len: total sequence length of data for ml model
@@ -123,7 +128,6 @@ def get_t0_datetimes(
     Returns: Datetimes that ml learning data can be built around.
 
     """
-
     logger.debug("Getting t0 datetimes")
 
     start_datetimes = get_start_datetimes(
@@ -138,11 +142,21 @@ def get_t0_datetimes(
 
 
 def timesteps_to_duration(n_timesteps: int, minute_delta: int = 5) -> pd.Timedelta:
+    """ Change timesteps to a time duration """
     assert n_timesteps >= 0
     return pd.Timedelta(n_timesteps * minute_delta, unit="minutes")
 
 
 def datetime_features(index: pd.DatetimeIndex) -> pd.DataFrame:
+    """
+    Make datetime features, hour_of_day and day_of_year
+
+    Args:
+        index: index of datestamps
+
+    Returns: Example data with datetime features
+
+    """
     features = {}
     features["hour_of_day"] = index.hour + (index.minute / 60)
     features["day_of_year"] = index.day_of_year
@@ -150,6 +164,15 @@ def datetime_features(index: pd.DatetimeIndex) -> pd.DataFrame:
 
 
 def datetime_features_in_example(index: pd.DatetimeIndex) -> Example:
+    """
+    Make datetime features with sin and cos
+
+    Args:
+        index: index of datestamps
+
+    Returns: Example data with datetime features
+
+    """
     dt_features = datetime_features(index)
     dt_features["hour_of_day"] /= 24
     dt_features["day_of_year"] /= 365
@@ -164,7 +187,6 @@ def fill_30_minutes_timestamps_to_5_minutes(index: pd.DatetimeIndex) -> pd.Datet
     """
     Fill a 30 minute index with 5 minute timestamps too. Note any gaps in 30 mins are not filled
     """
-
     # resample index to 5 mins
     index_5 = pd.Series(0, index=index).resample("5T")
 
diff --git a/nowcasting_dataset/utils.py b/nowcasting_dataset/utils.py
index f75b765a..bb0179af 100644
--- a/nowcasting_dataset/utils.py
+++ b/nowcasting_dataset/utils.py
@@ -1,3 +1,4 @@
+""" utils functions """
 import logging
 import numpy as np
 import pandas as pd
@@ -12,15 +13,20 @@
 
 
 def set_fsspec_for_multiprocess() -> None:
-    """Clear reference to the loop and thread.  This is necessary otherwise
+    """
+    Clear reference to the loop and thread.
+
+    This is necessary otherwise
     gcsfs hangs in the ML training loop.  Only required for fsspec >= 0.9.0
     See https://github.com/dask/gcsfs/issues/379#issuecomment-839929801
-    TODO: Try deleting this two lines to make sure this is still relevant."""
+    TODO: Try deleting this two lines to make sure this is still relevant.
+    """
     fsspec.asyn.iothread[0] = None
     fsspec.asyn.loop[0] = None
 
 
 def is_monotonically_increasing(a: Array) -> bool:
+    """ Check the array is monotonically increasing """
     # TODO: Can probably replace with pd.Index.is_monotonic_increasing()
     assert a is not None
     assert len(a) > 0
@@ -31,6 +37,7 @@ def is_monotonically_increasing(a: Array) -> bool:
 
 
 def is_unique(a: Array) -> bool:
+    """ Check array has unique values """
     # TODO: Can probably replace with pd.Index.is_unique()
     return len(a) == len(np.unique(a))
 
@@ -45,7 +52,8 @@ def scale_to_0_to_1(a: Array) -> Array:
 
 
 def sin_and_cos(df: pd.DataFrame) -> pd.DataFrame:
-    """For every column in df, creates cols for sin and cos of that col.
+    """
+    For every column in df, creates cols for sin and cos of that col.
 
     Args:
       df: Input DataFrame.  The values must be in the range [0, 1].
@@ -56,7 +64,8 @@ def sin_and_cos(df: pd.DataFrame) -> pd.DataFrame:
     Returns:
       A new DataFrame, with twice the number of columns as the input df.
       For each col in df, the output DataFrame will have a <col name>_sin
-      and a <col_name>_cos."""
+      and a <col_name>_cos.
+    """
     columns = []
     for col_name in df.columns:
         columns.append(f"{col_name}_sin")
@@ -96,6 +105,7 @@ def get_netcdf_filename(batch_idx: int, add_hash: bool = False) -> Path:
 
 
 def pad_nans(array, pad_width) -> np.ndarray:
+    """ Pad nans with nans"""
     array = array.astype(np.float32)
     return np.pad(array, pad_width, constant_values=np.NaN)
 
@@ -119,7 +129,7 @@ def pad_data(
         one_dimensional_arrays: list of data items that should be padded by one dimension
         two_dimensional_arrays: list of data tiems that should be padded in the third dimension (and more)
 
-    Returns:
+    Returns: Example data
 
     """
     # Pad (if necessary) so returned arrays are always of size
diff --git a/setup.py b/setup.py
index 566c6b42..50613c42 100644
--- a/setup.py
+++ b/setup.py
@@ -1,3 +1,4 @@
+""" Usual setup file for package """
 from setuptools import setup, find_packages
 
 # read the contents of your README file