In [217]:
%load_ext jupyter_black

python standard library

- os
- sys
- pathlib

- dataclasses

In [2]:
import os
import sys
import shutil

import gzip
import zipfile
from pathlib import Path

text = f"""
os.path = {os.path.abspath(".")}
pathlib.Path.cwd() = {Path.cwd()}
sys.path = {sys.path}
"""
print(text)
dict(line.split("=", maxsplit=1) for line in text.split("\n") if line != "")


os.path = /home/leaver2000/afit/2022-09
pathlib.Path.cwd() = /home/leaver2000/afit/2022-09
sys.path = ['/home/leaver2000/afit/2022-09', '/usr/lib/python310.zip', '/usr/lib/python3.10', '/usr/lib/python3.10/lib-dynload', '', '/opt/venv/lib/python3.10/site-packages', '/opt/venv/lib/python3.10/site-packages/IPython/extensions', '/home/leaver2000/.ipython']



{'os.path ': ' /home/leaver2000/afit/2022-09',
 'pathlib.Path.cwd() ': ' /home/leaver2000/afit/2022-09',
 'sys.path ': " ['/home/leaver2000/afit/2022-09', '/usr/lib/python310.zip', '/usr/lib/python3.10', '/usr/lib/python3.10/lib-dynload', '', '/opt/venv/lib/python3.10/site-packages', '/opt/venv/lib/python3.10/site-packages/IPython/extensions', '/home/leaver2000/.ipython']"}

In [3]:
cwd = Path.cwd()
assert cwd.is_dir() and not cwd.is_file()
# glob (/ɡlɑːb/) patterns specify sets of filenames with wildcard characters
# so in our working directory match all of the files or navigate to the parent and look for files ending with .py
# the glob method yields a generator function
tuple(cwd.glob("*")), tuple(cwd.parent.glob("*.py"))

((PosixPath('/home/leaver2000/afit/2022-09/lesson-2.ipynb'),
  PosixPath('/home/leaver2000/afit/2022-09/lesson-3.ipynb'),
  PosixPath('/home/leaver2000/afit/2022-09/lesson-1.ipynb')),
 (PosixPath('/home/leaver2000/afit/app.py'),))

In [4]:
import json

for file in Path.cwd().glob("*"):
    # the path object
    assert file.is_file()
    if ".ipynb" in file.suffixes:
        print(f"found a notebook {file}")
        # using the with key word will open the file as a context manager
        with file.open("r") as f:
            lines = f.readlines()
            print(f"it had {len(lines)} lines")
    # if not using the with key word
    # need to remember to close the file
    # otherwise we are loading tons of
    # unused data into memory
    f = file.open()
    f.close()

found a notebook /home/leaver2000/afit/2022-09/lesson-2.ipynb
it had 349 lines
found a notebook /home/leaver2000/afit/2022-09/lesson-3.ipynb
it had 0 lines
found a notebook /home/leaver2000/afit/2022-09/lesson-1.ipynb
it had 993 lines


In [5]:
from dataclasses import dataclass
from typing import overload

import pandas as pd
import numpy as np
from numpy.typing import NDArray

In [6]:
@dataclass(frozen=True)
class Wind:
    """
    surface wind representation
    """

    direction: float
    speed: float
    gust: float = 0.0

    def to_dict(self) -> dict[str, float]:
        return self.__dict__

    def to_numpy(self) -> np.ndarray:
        return np.asanyarray(tuple(self), dtype=np.float32)

    def to_pandas(self) -> pd.Series:
        return pd.Series(self.__dict__, dtype=np.float32)

    def __gt__(self, other: "Wind") -> bool:
        return max(self.speed, self.gust) > max(other.speed, other.gust)

    def __sub__(self, other: "Wind") -> "Wind":
        wdir, wspd, wgst = self.to_numpy() - other.to_numpy()
        wdir = (wdir + 180) % 360 - 180
        return Wind(wdir, wspd, wgst)

    def __abs__(self) -> "Wind":
        return Wind(*np.abs(tuple(self)))

    def __str__(self) -> str:
        wdir, wspd, wgst = map(int, tuple(self))
        return f"{wdir:03d}{wspd:02d}{f'G{wgst:02d}' if wgst else ''}KT"

    def __iter__(self):
        yield from self.to_dict().values()


wind1 = Wind(10, 10, 15.0)
(
    wind1,
    wind1.to_dict(),
    wind1.to_dict().values(),
    wind1.to_dict().keys(),
    str(wind1),
    wind1.to_numpy(),
    wind1.to_pandas(),
)

(Wind(direction=10, speed=10, gust=15.0),
 {'direction': 10, 'speed': 10, 'gust': 15.0},
 dict_values([10, 10, 15.0]),
 dict_keys(['direction', 'speed', 'gust']),
 '01010G15KT',
 array([10., 10., 15.], dtype=float32),
 direction    10.0
 speed        10.0
 gust         15.0
 dtype: float32)

In [11]:
wind2 = Wind(350, 15, 25)
delta = wind1 - wind2
delta, abs(delta)

(Wind(direction=20.0, speed=-5.0, gust=-10.0),
 Wind(direction=20.0, speed=5.0, gust=10.0))

In [8]:
abs(delta), max([wind1, wind2]), min([wind1, wind2])

(Wind(direction=5.0, speed=5.0, gust=10.0),
 Wind(direction=15, speed=15, gust=25),
 Wind(direction=10, speed=10, gust=15.0))

In [9]:
print(
    f"the diff = {abs(delta)}\nthe max = {max([wind1,wind2])}\nthe min = {min([wind1,wind2])}"
)

the diff = 00505G10KT
the max = 01515G25KT
the min = 01010G15KT


# enums.Enums

In [11]:
from enum import Enum, auto
import pandas as pd
import urllib.parse
import requests

class URLEnum(str, Enum):
    """
    Enum with URL encoding support

    def urlencode(query, doseq=False, safe='', encoding=None, errors=None, quote_via=quote_plus):
    Encode a dict or sequence of two-element tuples into a URL query string.

    If any values in the query arg are sequences and doseq is true, each
    sequence element is converted to a separate parameter.

    If the query arg is a sequence of two-element tuples, the order of the
    parameters in the output will match the order of parameters in the
    input.

    The components of a query arg may each be either a string or a bytes type.

    The safe, encoding, and errors parameters are passed down to the function
    specified by quote_via (encoding and errors only if a component is a str).
    """

    __base_url__ = ""

    def _generate_next_value_(name: str, *_):
        return name

    @classmethod
    @property
    def __query_key__(cls):
        return cls.__name__

    @classmethod
    @property
    def __urlencoding__(cls) -> dict:
        """
        hello
        """
        return {}

    @classmethod
    @property
    def values(cls):
        return cls._member_names_

    @classmethod
    def select(cls, params: list[str]):
        return tuple((cls.__query_key__, cls[k].value) for k in params)

    @classmethod
    def items(cls):
        yield from ((cls.__query_key__, member.value) for member in cls)

    @classmethod
    def urlencode(cls, names: list[str] = None):
        return urllib.parse.urlencode(
            cls.select(names) if names else cls, **cls.__urlencoding__
        )

    @classmethod
    def url(cls, names: list[str] = None) -> str:
        return urllib.parse.quote(f"{cls.__base_url__}?{cls.urlencode(names)}")

    @classmethod
    def get(cls, names: list[str] = None):
        return requests.get(
            cls.__base_url__, params=cls.select(names) if names else None
        )


class Models(URLEnum):
    __base_url__ = os.getenv("SOME_URL", "http://www.example.com/models")
    __query_key__ = "model"

    GALWEM = auto()
    NAM = auto()
    GFS = auto()
    WRF_17K = "WRF-1.7k"




<Response [404]>

In [36]:
from enum import Enum, auto, DynamicClassAttribute
class URL2Enum(Enum):
    @DynamicClassAttribute
    def name(self) -> str:
        return self._name_
        
    GALWEM = "model", "GALWEM"
    NAM = "model", "NAM"

    @classmethod
    def items(cls):
        return (x.value for x in cls)

    @classmethod
    def filter(cls, names:list=...):
        for member in cls:
            if member._name_ in names:
                yield member.value


tuple(URL2Enum.filter(["GALWEM"]))


(('model', 'GALWEM'),)

In [198]:
tuple(Models.items())

(('model', 'GALWEM'),
 ('model', 'NAM'),
 ('model', 'GFS'),
 ('model', 'WRF-1.7k'))

In [200]:
Models.select(["GALWEM", "WRF_17K"])

(('model', 'GALWEM'), ('model', 'WRF-1.7k'))

In [203]:
Models.urlencode(), Models.urlencode(["GALWEM"])

('model=GALWEM&model=NAM&model=GFS&model=WRF-1.7k', 'model=GALWEM')

In [12]:
import numpy as np
import pandas as pd
import xarray as xr

xr, np, pd



(<module 'xarray' from '/opt/venv/lib/python3.10/site-packages/xarray/__init__.py'>,
 <module 'numpy' from '/opt/venv/lib/python3.10/site-packages/numpy/__init__.py'>,
 <module 'pandas' from '/opt/venv/lib/python3.10/site-packages/pandas/__init__.py'>)

In [15]:
a = np.arange(20)
a, a.reshape(5, 4)

(array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
        17, 18, 19]),
 array([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11],
        [12, 13, 14, 15],
        [16, 17, 18, 19]]))

In [20]:
import requests

url = "https://www.ncei.noaa.gov/pub/data/normals/1981-2010/products/station/AQC00914000.normals.txt"
r = requests.get(url)
r

<Response [200]>

In [79]:
def notspace(line: str):
    return not line.isspace()


newset = True
_bucket = {}
for line in filter(notspace, r.text.split("\n")):
    if line.startswith("---"):
        continue
    try:
        key, *values = filter(notspace, line.split())
        _bucket[key] = values
    except ValueError:
        print(line)
    # print(line.split())
_bucket["Monthly"], _bucket["mly-prcp-normal"]
# pd.DataFrame([_bucket]).columns




(['JAN',
  'FEB',
  'MAR',
  'APR',
  'MAY',
  'JUN',
  'JUL',
  'AUG',
  'SEP',
  'OCT',
  'NOV',
  'DEC'],
 ['2116R',
  '2022S',
  '1840S',
  '1780R',
  '1820R',
  '1306R',
  '1274R',
  '1383R',
  '1570S',
  '2003S',
  '2036R',
  '2242R'])

In [82]:
import requests

year = 2021
url = f"https://www.ncei.noaa.gov/pub/data/ghcn/daily/by_year/{year}.csv.gz"
r = requests.get(url)
r

In [4]:
from pathlib import Path
import gzip
file = Path.cwd().parent / "ghcnd_all.tar.gz"
with gzip.GzipFile(file) as f:
    f.read()
    # print(gzip.f))
    