In [2]:
from __future__ import annotations
# %pip uninstall mediocreatbest
%pip install --upgrade --force-reinstall \
    mediocreatbest@git+https://gist.github.com/player1537/3457b026ed6ef6696d758517f55a58df.git
try:
    from mediocreatbest import auto
    %pip install opencage
except ImportError:
    %pip install --quiet --upgrade pip
    %pip install --upgrade --force-reinstall \
        mediocreatbest@git+https://gist.github.com/player1537/3457b026ed6ef6696d758517f55a58df.git
    from mediocreatbest import auto

Defaulting to user installation because normal site-packages is not writeable
Collecting mediocreatbest@ git+https://gist.github.com/player1537/3457b026ed6ef6696d758517f55a58df.git
  Cloning https://gist.github.com/player1537/3457b026ed6ef6696d758517f55a58df.git to /tmp/pip-install-19ody0l_/mediocreatbest_c90e070350694c86a5a7c91a45d3ec24
  Running command git clone --filter=blob:none --quiet https://gist.github.com/player1537/3457b026ed6ef6696d758517f55a58df.git /tmp/pip-install-19ody0l_/mediocreatbest_c90e070350694c86a5a7c91a45d3ec24
  Resolved https://gist.github.com/player1537/3457b026ed6ef6696d758517f55a58df.git to commit e41d8d653f5889dd403fed60135cfcf0de585704
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25hBuilding wheels for collected packages: mediocreatbest
  Building wheel for mediocreatbest (pyproject.toml) ... [?25ldone
[?25h  Created wheel for medio

# Lib

## Config

In [4]:
#@title Config { display-mode: "form" }
config = auto.types.SimpleNamespace()

/auto.pprint.pp config width=144

namespace()


## Spatial

In [5]:
#@title Spatial
class Spatial:
    Degree = auto.typing.NewType('Degree', float)
    Radian = auto.typing.NewType('Radian', float)
    Meter = auto.typing.NewType('Meter', float)
    Kilometer = auto.typing.NewType('Kilometer', float)

    def __new__(
        cls,
        *,
        lat: Degree,
        lng: Degree,
        alt: Meter,
    ) -> tuple[Kilometer, Kilometer, Kilometer]:
        Degree = cls.Degree
        Radian = cls.Radian
        Meter = cls.Meter
        Kilometer = cls.Kilometer

        # Thanks https://gis.stackexchange.com/a/4148

        #> Note that "Lat/Lon/Alt" is just another name for spherical coordinates, and
        #> phi/theta/rho are just another name for latitude, longitude, and altitude.
        #> :) (A minor difference: altitude is usually measured from the surface of the
        #> sphere; rho is measured from the center -- to convert, just add/subtract the
        #> radius of the sphere.)
        phi: Radian = auto.np.radians(lat)
        theta: Radian = auto.np.radians(lng)

        # Thanks https://en.wikipedia.org/wiki/Earth_radius
        #> A globally-average value is usually considered to be 6,371 kilometres (3,959 mi)
        rho: Kilometer = 6_371 + alt / 1000.0

        #> x = math.cos(phi) * math.cos(theta) * rho
        x: Kilometer = auto.np.cos(phi) * auto.np.cos(theta) * rho

        #> y = math.cos(phi) * math.sin(theta) * rho
        y: Kilometer = auto.np.cos(phi) * auto.np.sin(theta) * rho

        #> z = math.sin(phi) * rho # z is 'up'
        z: Kilometer = auto.np.sin(phi) * rho

        #> (Note there's some slightly arbitrary choices here in what each axis means...
        #> you might want 'y' to point at the north pole instead of 'z', for example.)

        # I do :)
        y, z = z, y

        return x, y, z


## with_exit_stack

In [6]:
#@title with_exit_stack
def with_exit_stack(func):
    def wrapper(*args, **kwargs):
        with auto.contextlib.ExitStack() as stack:
            return func(*args, stack=stack, **kwargs)
    return wrapper

## Location

In [7]:
#@title Location
@auto.dataclasses.dataclass(frozen=True, kw_only=True)
class Location:
    lat: float
    lng: float

    @classmethod
    @auto.mediocreatbest.doctest
    def parse(Location, s: str, /):
        r"""

        >>> parse(dict, "35.4963185/-88.468932")
        {'lat': 35.4963185, 'lng': -88.468932}

        """
        lat, lng = s.split('/')
        lat = float(lat)
        lng = float(lng)

        return Location(
            lat=lat,
            lng=lng,
        )


## Unit

In [8]:
#@title Unit
@auto.functools.cache
def Unit():
    unit = auto.pint.UnitRegistry()

    return unit

_ = Unit()

## Bounds

In [9]:
#@title Bounds
@auto.dataclasses.dataclass(frozen=True, kw_only=True)
class Bounds:
    sw: Location
    ne: Location

    def __post_init__(self):
        assert self.sw.lat <= self.ne.lat
        assert self.sw.lng <= self.ne.lng

## Address to Lat/Lng

In [10]:
from opencage.geocoder import OpenCageGeocode
def dms_to_decimal(dms_str):
    # Regular expression to parse the DMS format
    match = auto.re.match(r"(\d+)° (\d+)' ([\d.]+)'' ([NSEW])", dms_str)
    if not match:
        raise ValueError("Invalid DMS format")

    degrees = int(match.group(1))
    minutes = int(match.group(2))
    seconds = float(match.group(3))
    direction = match.group(4)

    # Convert to decimal degrees
    decimal_degrees = degrees + minutes / 60 + seconds / 3600

    # Adjust sign for South and West
    if direction in 'SW':
        decimal_degrees *= -1

    return decimal_degrees

# from opencage.geocoder import Op
def addr_to_coords(addr: str) -> (float, float):
    API_KEY = '6cda2bcd35394349886014d2a5c6f497'
    geocoder = auto.opencage.geocoder.OpenCageGeocode(API_KEY)

    data = geocoder.geocode(addr)[0]
    lat = data['annotations']['DMS']['lat']
    lng = data['annotations']['DMS']['lng']
    lat = dms_to_decimal(lat)
    lng = dms_to_decimal(lng)
    return (lat, lng)
    

#  App

In [11]:
config.app = auto.types.SimpleNamespace()
config.app.state = 'TN'

config.app.datadir = auto.pathlib.Path('/mnt/seenas2/data/model-america/data')
assert config.app.datadir.exists()

config.app.buildings = config.app.datadir / 'MAv1_CSVS' / f'{config.app.state}.csv'
assert config.app.buildings.exists()

# Providers

In [12]:
%%module --reuse app
#@title app.Providers { display-mode: "form" }
from __future__ import annotations
from mediocreatbest import auto
# import lib

<module 'app'>

In [13]:
def Providers(
    *,
    root: auto.pathlib.Path | auto.typing.Literal[...] = ...,

    csv_path: auto.pathlib.Path | auto.typing.Literal[...] = ...,
    csv_root: auto.pathlib.Path | auto.typing.Literal[...] = ...,
    csv_name: str = 'Providers.csv',
    # csv_href: str = 'https://data.cms.gov/provider-data/sites/default/files/resources/f317fd60a3f5a039b35a50286697a2af_1723752312/NH_ProviderInfo_Aug2024.csv',\
    csv_href: str = 'https://data.cms.gov/provider-data/sites/default/files/resources/f72ba63402003123765dac9ef332260e_1726517112/NH_ProviderInfo_Sep2024.csv',\


    tmp_path: auto.pathlib.Path | auto.typing.Literal[...] = ...,
    tmp_root: auto.pathlib.Path | auto.typing.Literal[...] = ...,
    tmp_name: str = 'Providers.tmp',

    geometry_path: auto.pathlib.Path | auto.typing.Literal[...] = ...,
    geometry_root: auto.pathlib.Path | auto.typing.Literal[...] = ...,
    geometry_name: str = 'Provider.geometry.csv',
) -> auto.pd.DataFrame:
    if root is ...:
        root = auto.pathlib.Path.cwd()

    if csv_path is ...:
        if csv_root is ...:
            csv_root = root
        csv_path = csv_root / csv_name

    if not csv_path.exists():
        if tmp_path is ...:
            if tmp_root is ...:
                tmp_root = root
            tmp_path = tmp_root / tmp_name

        with auto.contextlib.ExitStack() as stack:
            response = stack.enter_context( auto.requests.request(
                'GET',
                csv_href,
                stream=True,
            ) )
            response.raise_for_status()

            pbar = stack.enter_context( auto.tqdm.auto.tqdm(
                total=int(response.headers.get('Content-Length', 0)),
                unit='B',
                unit_scale=True,
                unit_divisor=1024,
            ) )

            f = stack.enter_context( tmp_path.open('wb') )
            for chunk in response.iter_content(chunk_size=8192):
                pbar.update(len(chunk))
                f.write(chunk)

        tmp_path.rename(csv_path)
    assert csv_path.exists(), csv_path

    numeric = lambda s: auto.np.nan if s in ('', '.') else auto.pd.to_numeric(s)
    numerics = [
        'Number of Certified Beds',
        'Average Number of Residents per Day',
        'Overall Rating',
        'Health Inspection Rating',
        'QM Rating',
        'Long-Stay QM Rating',
        'Short-Stay QM Rating',
        'Staffing Rating',
        'Reported Nurse Aide Staffing Hours per Resident per Day',
        'Reported LPN Staffing Hours per Resident per Day',
        'Reported RN Staffing Hours per Resident per Day',
        'Reported Licensed Staffing Hours per Resident per Day',
        'Reported Total Nurse Staffing Hours per Resident per Day',
        'Total number of nurse staff hours per resident per day on the weekend',
        'Registered Nurse hours per resident per day on the weekend',
        'Reported Physical Therapist Staffing Hours per Resident Per Day',
        'Total nursing staff turnover',
        'Registered Nurse turnover',
        'Number of administrators who have left the nursing home',
        'Nursing Case-Mix Index',
        'Nursing Case-Mix Index Ratio',
        'Case-Mix Nurse Aide Staffing Hours per Resident per Day',
        'Case-Mix LPN Staffing Hours per Resident per Day',
        'Case-Mix RN Staffing Hours per Resident per Day',
        'Case-Mix Total Nurse Staffing Hours per Resident per Day',
        'Case-Mix Weekend Total Nurse Staffing Hours per Resident per Day',
        'Adjusted Nurse Aide Staffing Hours per Resident per Day',
        'Adjusted LPN Staffing Hours per Resident per Day',
        'Adjusted RN Staffing Hours per Resident per Day',
        'Adjusted Total Nurse Staffing Hours per Resident per Day',
        'Adjusted Weekend Total Nurse Staffing Hours per Resident per Day',
        'Rating Cycle 1 Total Number of Health Deficiencies',
        'Rating Cycle 1 Number of Standard Health Deficiencies',
        'Rating Cycle 1 Number of Complaint Health Deficiencies',
        'Rating Cycle 1 Health Deficiency Score',
        'Rating Cycle 1 Number of Health Revisits',
        'Rating Cycle 1 Health Revisit Score',
        'Rating Cycle 1 Total Health Score',
        'Rating Cycle 2 Total Number of Health Deficiencies',
        'Rating Cycle 2 Number of Standard Health Deficiencies',
        'Rating Cycle 2 Number of Complaint Health Deficiencies',
        'Rating Cycle 2 Health Deficiency Score',
        'Rating Cycle 2 Number of Health Revisits',
        'Rating Cycle 2 Health Revisit Score',
        'Rating Cycle 2 Total Health Score',
        'Rating Cycle 3 Total Number of Health Deficiencies',
        'Rating Cycle 3 Number of Standard Health Deficiencies',
        'Rating Cycle 3 Number of Complaint Health Deficiencies',
        'Rating Cycle 3 Health Deficiency Score',
        'Rating Cycle 3 Number of Health Revisits',
        'Rating Cycle 3 Health Revisit Score',
        'Rating Cycle 3 Total Health Score',
        'Total Weighted Health Survey Score',
        'Number of Facility Reported Incidents',
        'Number of Substantiated Complaints',
        'Number of Citations from Infection Control Inspections',
        'Number of Fines',
        'Total Amount of Fines in Dollars',
        'Number of Payment Denials',
        'Total Number of Penalties',
        'Latitude',
        'Longitude',
    ]

    with auto.warnings.catch_warnings():
        auto.warnings.simplefilter('ignore', auto.pd.errors.ParserWarning)

        df = auto.pd.read_csv(
            csv_path,
            dtype=str,
            na_filter=False,
            converters={
                k: numeric
                for k in numerics
            },
        )

    df.rename(columns={
        'CMS Certification Number (CCN)': 'ProviderID',
    }, inplace=True)

    df.set_index([
        'ProviderID',
    ], inplace=True)
    df.sort_index(inplace=True)

    if geometry_path is ...:
        if geometry_root is ...:
            geometry_root = root
        geometry_path = geometry_root / geometry_name

    if geometry_path.exists():
        with geometry_path.open('r') as f:
            geometry = auto.pd.read_csv(
                f,
                dtype=str,
            )

        geometry.set_index([
            'ProviderID',
        ], inplace=True)
        geometry.sort_index(inplace=True)

        geometry = auto.geopandas.GeoSeries.from_wkt(geometry['WKT'])

        df = auto.geopandas.GeoDataFrame(
            df,
            geometry=geometry,
        )

    return df

In [14]:
def scope():
    providers = Providers()

    /print type(providers)

    /auto.pprint.pp providers.columns.to_list()

    /auto.pprint.pp providers.dtypes.to_dict()

    with auto.pd.option_context('display.max_columns', None):
        /display providers

    with auto.pd.option_context('display.max_columns', None):
        /display providers.describe()
    return providers

    # with auto.mediocreatbest.Textarea():
    #     print(lib.summary(providers))

providers = scope()

<class 'pandas.core.frame.DataFrame'>
['Provider Name',
 'Provider Address',
 'City/Town',
 'State',
 'ZIP Code',
 'Telephone Number',
 'Provider SSA County Code',
 'County/Parish',
 'Ownership Type',
 'Number of Certified Beds',
 'Average Number of Residents per Day',
 'Average Number of Residents per Day Footnote',
 'Provider Type',
 'Provider Resides in Hospital',
 'Legal Business Name',
 'Date First Approved to Provide Medicare and Medicaid Services',
 'Affiliated Entity Name',
 'Affiliated Entity ID',
 'Continuing Care Retirement Community',
 'Special Focus Status',
 'Abuse Icon',
 'Most Recent Health Inspection More Than 2 Years Ago',
 'Provider Changed Ownership in Last 12 Months',
 'With a Resident and Family Council',
 'Automatic Sprinkler Systems in All Required Areas',
 'Overall Rating',
 'Overall Rating Footnote',
 'Health Inspection Rating',
 'Health Inspection Rating Footnote',
 'QM Rating',
 'QM Rating Footnote',
 'Long-Stay QM Rating',
 'Long-Stay QM Rating Footnote',
 

Unnamed: 0_level_0,Provider Name,Provider Address,City/Town,State,ZIP Code,Telephone Number,Provider SSA County Code,County/Parish,Ownership Type,Number of Certified Beds,Average Number of Residents per Day,Average Number of Residents per Day Footnote,Provider Type,Provider Resides in Hospital,Legal Business Name,Date First Approved to Provide Medicare and Medicaid Services,Affiliated Entity Name,Affiliated Entity ID,Continuing Care Retirement Community,Special Focus Status,Abuse Icon,Most Recent Health Inspection More Than 2 Years Ago,Provider Changed Ownership in Last 12 Months,With a Resident and Family Council,Automatic Sprinkler Systems in All Required Areas,Overall Rating,Overall Rating Footnote,Health Inspection Rating,Health Inspection Rating Footnote,QM Rating,QM Rating Footnote,Long-Stay QM Rating,Long-Stay QM Rating Footnote,Short-Stay QM Rating,Short-Stay QM Rating Footnote,Staffing Rating,Staffing Rating Footnote,Reported Staffing Footnote,Physical Therapist Staffing Footnote,Reported Nurse Aide Staffing Hours per Resident per Day,Reported LPN Staffing Hours per Resident per Day,Reported RN Staffing Hours per Resident per Day,Reported Licensed Staffing Hours per Resident per Day,Reported Total Nurse Staffing Hours per Resident per Day,Total number of nurse staff hours per resident per day on the weekend,Registered Nurse hours per resident per day on the weekend,Reported Physical Therapist Staffing Hours per Resident Per Day,Total nursing staff turnover,Total nursing staff turnover footnote,Registered Nurse turnover,Registered Nurse turnover footnote,Number of administrators who have left the nursing home,Administrator turnover footnote,Nursing Case-Mix Index,Nursing Case-Mix Index Ratio,Case-Mix Nurse Aide Staffing Hours per Resident per Day,Case-Mix LPN Staffing Hours per Resident per Day,Case-Mix RN Staffing Hours per Resident per Day,Case-Mix Total Nurse Staffing Hours per Resident per Day,Case-Mix Weekend Total Nurse Staffing Hours per Resident per Day,Adjusted Nurse Aide Staffing Hours per Resident per Day,Adjusted LPN Staffing Hours per Resident per Day,Adjusted RN Staffing Hours per Resident per Day,Adjusted Total Nurse Staffing Hours per Resident per Day,Adjusted Weekend Total Nurse Staffing Hours per Resident per Day,Rating Cycle 1 Standard Survey Health Date,Rating Cycle 1 Total Number of Health Deficiencies,Rating Cycle 1 Number of Standard Health Deficiencies,Rating Cycle 1 Number of Complaint Health Deficiencies,Rating Cycle 1 Health Deficiency Score,Rating Cycle 1 Number of Health Revisits,Rating Cycle 1 Health Revisit Score,Rating Cycle 1 Total Health Score,Rating Cycle 2 Standard Health Survey Date,Rating Cycle 2 Total Number of Health Deficiencies,Rating Cycle 2 Number of Standard Health Deficiencies,Rating Cycle 2 Number of Complaint Health Deficiencies,Rating Cycle 2 Health Deficiency Score,Rating Cycle 2 Number of Health Revisits,Rating Cycle 2 Health Revisit Score,Rating Cycle 2 Total Health Score,Rating Cycle 3 Standard Health Survey Date,Rating Cycle 3 Total Number of Health Deficiencies,Rating Cycle 3 Number of Standard Health Deficiencies,Rating Cycle 3 Number of Complaint Health Deficiencies,Rating Cycle 3 Health Deficiency Score,Rating Cycle 3 Number of Health Revisits,Rating Cycle 3 Health Revisit Score,Rating Cycle 3 Total Health Score,Total Weighted Health Survey Score,Number of Facility Reported Incidents,Number of Substantiated Complaints,Number of Citations from Infection Control Inspections,Number of Fines,Total Amount of Fines in Dollars,Number of Payment Denials,Total Number of Penalties,Location,Latitude,Longitude,Geocoding Footnote,Processing Date
ProviderID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1
015009,"BURNS NURSING HOME, INC.",701 MONROE STREET NW,RUSSELLVILLE,AL,35653,2563324110,290,Franklin,For profit - Corporation,57,50.0,,Medicare and Medicaid,N,"BURNS NURSING HOME, INC.",1969-09-01,,,N,,N,N,N,Resident,Yes,2.0,,2.0,,4.0,,4.0,,4.0,,4.0,,,,2.61906,0.42971,1.32259,1.75231,4.37137,3.40867,0.67566,0.01632,39.0,,18.2,,3.0,,1.36901,1.00095,2.26028,0.87774,0.66328,3.80131,3.338,2.60798,0.42789,1.317,4.35288,3.39425,2023-03-02,4.0,4.0,3.0,56.0,1.0,0.0,56.0,2019-08-21,2.0,2.0,0.0,8.0,1.0,0.0,8.0,2018-08-01,1.0,1.0,0.0,4.0,1.0,0.0,4.0,31.333,2,0,,1,23989.0,0,1,"701 MONROE STREET NW,RUSSELLVILLE,AL,35653",34.5149,-87.736,,2024-09-01
015010,COOSA VALLEY HEALTHCARE CENTER,260 WEST WALNUT STREET,SYLACAUGA,AL,35150,2562495604,600,Talladega,For profit - Corporation,85,76.9,,Medicare and Medicaid,N,COOSA VALLEY HEALTHCARE CENTER LLC,1967-01-01,PRIME HEALTH CARE ENTERPRISES,419,N,,N,Y,N,Resident,Yes,4.0,,4.0,,4.0,,2.0,,5.0,,3.0,,,,2.55324,0.82527,0.8733,1.69858,4.25182,3.37006,0.35982,0.01225,,6,,6,0.0,,1.33445,0.97568,2.20323,0.85559,0.64654,3.70535,3.25374,2.60828,0.84306,0.89213,4.34347,3.44271,2022-04-09,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2019-06-13,1.0,1.0,0.0,4.0,1.0,0.0,4.0,2018-06-07,4.0,4.0,0.0,32.0,1.0,0.0,32.0,6.667,0,0,0.0,0,0.0,0,0,"260 WEST WALNUT STREET,SYLACAUGA,AL,35150",33.1637,-86.254,,2024-09-01
015012,HIGHLANDS HEALTH AND REHAB,380 WOODS COVE ROAD,SCOTTSBORO,AL,35768,2562183708,350,Jackson,Government - County,50,45.1,,Medicare and Medicaid,N,HH HEALTH SYSTEM - JACKSON LLC,1967-01-01,HUNTSVILLE HOSPITAL HEALTH SYSTEM,610,N,,N,Y,N,Resident,Yes,4.0,,4.0,,2.0,,1.0,,3.0,,3.0,,,,2.91853,0.59342,1.08177,1.67519,4.59373,4.0095,0.30854,0.24806,,6,,6,,6,1.35439,0.99026,2.23614,0.86837,0.6562,3.76071,3.30235,2.93756,0.59729,1.08882,4.62367,4.03564,2022-03-24,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2019-06-06,2.0,2.0,0.0,20.0,1.0,0.0,20.0,2018-05-03,4.0,4.0,0.0,40.0,1.0,0.0,40.0,13.333,0,0,,0,0.0,0,0,"380 WOODS COVE ROAD,SCOTTSBORO,AL,35768",34.6611,-86.047,,2024-09-01
015014,EASTVIEW REHABILITATION & HEALTHCARE CENTER,7755 FOURTH AVENUE SOUTH,BIRMINGHAM,AL,35206,2058330146,360,Jefferson,For profit - Individual,92,76.9,,Medicare and Medicaid,N,BALL HEALTHCARE EASTVIEW INC,1967-01-01,BALL HEALTHCARE SERVICES,66,N,,N,N,N,,Yes,2.0,,3.0,,3.0,,5.0,,2.0,,1.0,,,,1.65027,0.81038,0.55715,1.36754,3.01781,2.5921,0.47864,0.00847,,6,,6,,6,1.27246,0.93035,2.10087,0.81584,0.6165,3.53321,3.10258,1.76798,0.86819,0.59689,3.23306,2.77699,2023-09-13,5.0,4.0,1.0,36.0,1.0,0.0,36.0,2020-02-20,1.0,1.0,0.0,4.0,1.0,0.0,4.0,2019-01-24,2.0,2.0,0.0,20.0,1.0,0.0,20.0,22.667,1,0,0.0,0,0.0,0,0,"7755 FOURTH AVENUE SOUTH,BIRMINGHAM,AL,35206",33.5595,-86.722,,2024-09-01
015015,PLANTATION MANOR NURSING HOME,6450 OLD TUSCALOOSA HIGHWAY,MC CALLA,AL,35111,2054776161,360,Jefferson,For profit - Individual,103,86.6,,Medicare and Medicaid,N,"C & G HEALTHCARE SERVICES, INC.",1971-07-01,,,N,,N,Y,N,Resident,Yes,2.0,,2.0,,2.0,,2.0,,2.0,,4.0,,,,2.17986,0.89774,0.41495,1.3127,3.49256,3.41406,0.17476,0.01506,27.6,,37.5,,2.0,,1.12652,0.82366,1.85994,0.72228,0.5458,3.12801,2.74677,2.63787,1.08637,0.50214,4.22637,4.13138,2020-03-05,2.0,2.0,0.0,20.0,1.0,0.0,20.0,2019-05-02,4.0,1.0,3.0,36.0,1.0,0.0,36.0,2018-04-19,7.0,7.0,0.0,28.0,1.0,0.0,28.0,26.667,0,1,,0,0.0,0,0,"6450 OLD TUSCALOOSA HIGHWAY,MC CALLA,AL,35111",33.3221,-87.034,,2024-09-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
745022,WARE MEMORIAL CARE CENTER,1510 S. VAN BUREN ST.,AMARILLO,TX,79101,8063730471,860,Potter,For profit - Corporation,120,81.4,,Medicare and Medicaid,N,BAPTIST COMMUNITY SERVICES,2022-12-30,,,Y,,N,N,N,Resident,Yes,3.0,,3.0,,3.0,,3.0,,,2,4.0,,,,3.64601,1.74101,0.49008,2.23108,5.8771,5.32537,0.31404,0.01809,,6,,6,,6,1.23738,0.90471,2.04296,0.79335,0.59951,3.43582,3.01706,4.01679,1.91806,0.53992,6.47476,5.86692,2023-10-25,10.0,8.0,2.0,72.0,1.0,0.0,72.0,2022-08-18,1.0,1.0,0.0,16.0,1.0,0.0,16.0,,,,,,,,,49.6,0,0,,0,0.0,0,0,"1510 S. VAN BUREN ST.,AMARILLO,TX,79101",35.1987,-101.84,,2024-09-01
745038,TIERRA ESTE NURSING AND REHABILITATION CENTER,14300 PEBBLE HILLS BLVD,EL PASO,TX,79938,9159559998,480,El Paso,For profit - Corporation,120,80.3,,Medicare and Medicaid,N,EAST EL PASO HEALTHCARE OPERATIONS LLC,2023-06-06,,,N,,N,N,N,,Yes,,1,,1,,1,,1,,1,,1,,,1.80952,1.26518,0.16171,1.42689,3.2364,2.93501,0.17579,0.09541,,6,,6,,6,1.36251,0.9962,2.24956,0.87358,0.66013,3.78327,3.32217,,,,,,2023-05-19,,,,,,,,,,,,,,,,,,,,,,,,,2,5,,7,31752.27,0,7,"14300 PEBBLE HILLS BLVD,EL PASO,TX,79938",31.7822,-106.23,,2024-09-01
745039,MIDTOWNE MEADOWS HEALTH AND REHAB,110 DYLAN WAY,MIDLOTHIAN,TX,76065,0000001111,470,Ellis,For profit - Limited Liability company,121,51.0,,Medicare and Medicaid,N,HACO HEALTH MIDTOWNE LLC,2023-06-20,,,N,,N,N,N,Resident,Yes,,1,,1,,1,,1,,1,,1,6,6,,,,,,,,,,6,,6,,6,,,,,,,,,,,,,2023-06-20,,,,,,,,,,,,,,,,,,,,,,,,,0,0,,5,20131.64,0,5,"110 DYLAN WAY,MIDLOTHIAN,TX,76065",32.4783,-96.982,22,2024-09-01
745040,THE SARAH ROBERTS FRENCH HOME,1315 TEXAS AVE,SAN ANTONIO,TX,78201,2107364238,130,Bexar,Non profit - Corporation,60,24.9,,Medicare and Medicaid,N,SARAH ROBERTS FRENCH HOME,2022-10-13,,,N,,N,N,N,Resident,Yes,3.0,,4.0,,,2,,2,,2,1.0,12,6,6,,,,,,,,,,6,,6,,6,,,,,,,,,,,,,2023-10-13,6.0,6.0,6.0,28.0,1.0,0.0,28.0,2022-08-05,3.0,3.0,0.0,20.0,1.0,0.0,20.0,,,,,,,,,24.8,0,0,,7,35733.13,0,7,"1315 TEXAS AVE,SAN ANTONIO,TX,78201",29.4494,-98.534,,2024-09-01


Unnamed: 0,Provider Name,Provider Address,City/Town,State,ZIP Code,Telephone Number,Provider SSA County Code,County/Parish,Ownership Type,Number of Certified Beds,Average Number of Residents per Day,Average Number of Residents per Day Footnote,Provider Type,Provider Resides in Hospital,Legal Business Name,Date First Approved to Provide Medicare and Medicaid Services,Affiliated Entity Name,Affiliated Entity ID,Continuing Care Retirement Community,Special Focus Status,Abuse Icon,Most Recent Health Inspection More Than 2 Years Ago,Provider Changed Ownership in Last 12 Months,With a Resident and Family Council,Automatic Sprinkler Systems in All Required Areas,Overall Rating,Overall Rating Footnote,Health Inspection Rating,Health Inspection Rating Footnote,QM Rating,QM Rating Footnote,Long-Stay QM Rating,Long-Stay QM Rating Footnote,Short-Stay QM Rating,Short-Stay QM Rating Footnote,Staffing Rating,Staffing Rating Footnote,Reported Staffing Footnote,Physical Therapist Staffing Footnote,Reported Nurse Aide Staffing Hours per Resident per Day,Reported LPN Staffing Hours per Resident per Day,Reported RN Staffing Hours per Resident per Day,Reported Licensed Staffing Hours per Resident per Day,Reported Total Nurse Staffing Hours per Resident per Day,Total number of nurse staff hours per resident per day on the weekend,Registered Nurse hours per resident per day on the weekend,Reported Physical Therapist Staffing Hours per Resident Per Day,Total nursing staff turnover,Total nursing staff turnover footnote,Registered Nurse turnover,Registered Nurse turnover footnote,Number of administrators who have left the nursing home,Administrator turnover footnote,Nursing Case-Mix Index,Nursing Case-Mix Index Ratio,Case-Mix Nurse Aide Staffing Hours per Resident per Day,Case-Mix LPN Staffing Hours per Resident per Day,Case-Mix RN Staffing Hours per Resident per Day,Case-Mix Total Nurse Staffing Hours per Resident per Day,Case-Mix Weekend Total Nurse Staffing Hours per Resident per Day,Adjusted Nurse Aide Staffing Hours per Resident per Day,Adjusted LPN Staffing Hours per Resident per Day,Adjusted RN Staffing Hours per Resident per Day,Adjusted Total Nurse Staffing Hours per Resident per Day,Adjusted Weekend Total Nurse Staffing Hours per Resident per Day,Rating Cycle 1 Standard Survey Health Date,Rating Cycle 1 Total Number of Health Deficiencies,Rating Cycle 1 Number of Standard Health Deficiencies,Rating Cycle 1 Number of Complaint Health Deficiencies,Rating Cycle 1 Health Deficiency Score,Rating Cycle 1 Number of Health Revisits,Rating Cycle 1 Health Revisit Score,Rating Cycle 1 Total Health Score,Rating Cycle 2 Standard Health Survey Date,Rating Cycle 2 Total Number of Health Deficiencies,Rating Cycle 2 Number of Standard Health Deficiencies,Rating Cycle 2 Number of Complaint Health Deficiencies,Rating Cycle 2 Health Deficiency Score,Rating Cycle 2 Number of Health Revisits,Rating Cycle 2 Health Revisit Score,Rating Cycle 2 Total Health Score,Rating Cycle 3 Standard Health Survey Date,Rating Cycle 3 Total Number of Health Deficiencies,Rating Cycle 3 Number of Standard Health Deficiencies,Rating Cycle 3 Number of Complaint Health Deficiencies,Rating Cycle 3 Health Deficiency Score,Rating Cycle 3 Number of Health Revisits,Rating Cycle 3 Health Revisit Score,Rating Cycle 3 Total Health Score,Total Weighted Health Survey Score,Number of Facility Reported Incidents,Number of Substantiated Complaints,Number of Citations from Infection Control Inspections,Number of Fines,Total Amount of Fines in Dollars,Number of Payment Denials,Total Number of Penalties,Location,Latitude,Longitude,Geocoding Footnote,Processing Date
count,14817,14817,14817,14817,14817,14817,14817,14817,14817,14817,14767.0,14817.0,14817,14817,14817,14817,14817.0,14817.0,14817,14817.0,14817,14817,14817,14817,14817,14673.0,14817.0,14673.0,14817.0,14563.0,14817.0,14230.0,14817.0,12146.0,14817.0,14613.0,14817.0,14817.0,14817.0,14492.0,14492.0,14492.0,14492.0,14492.0,14492.0,14492.0,14492.0,13283.0,14817.0,12231.0,14817.0,12251.0,14817.0,14492.0,14492.0,14492.0,14492.0,14492.0,14492.0,14492.0,14466.0,14466.0,14466.0,14466.0,14466.0,14817,14760.0,14760.0,14760.0,14760.0,14760.0,14760.0,14760.0,14817,14760.0,14760.0,14760.0,14760.0,14760.0,14760.0,14760.0,14817.0,14672.0,14672.0,14672.0,14672.0,14672.0,14672.0,14672.0,14760.0,14817,14817,7830.0,14817,14817.0,14817,14817,14817,14817.0,14817.0,14817.0,14817
unique,14626,14789,5143,53,8973,14757,295,1687,13,389,2103.0,2.0,3,2,12036,6982,600.0,600.0,2,3.0,2,2,2,4,2,5.0,3.0,5.0,3.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,2.0,2.0,13864.0,13558.0,13450.0,13658.0,14031.0,13997.0,13158.0,9016.0,821.0,2.0,470.0,2.0,8.0,2.0,12795.0,12270.0,13430.0,12017.0,11387.0,13844.0,13769.0,13938.0,13557.0,13482.0,14097.0,14078.0,1109,68.0,54.0,53.0,515.0,5.0,191.0,575.0,1330,73.0,50.0,60.0,517.0,5.0,192.0,570.0,1439.0,67.0,46.0,54.0,452.0,5.0,149.0,490.0,1830.0,50,118,25.0,53,6348.0,6,51,14806,13914.0,10646.0,2.0,1
top,MILLER'S MERRY MANOR,16 VALLEY VIEW DRIVE,LOS ANGELES,TX,94541,4137868000,200,Los Angeles,For profit - Corporation,120,84.2,,Medicare and Medicaid,N,Legal Business Name Not Available,1967-01-01,,,N,,N,N,N,Resident,Yes,1.0,,2.0,,5.0,,5.0,,5.0,,1.0,,,,0.0,0.0,0.23696,1.41685,3.29255,3.04475,0.0,0.0,50.0,,50.0,,0.0,,1.314,0.96073,2.16946,0.86999,0.63663,3.64857,3.1897,0.0,0.0,0.44555,3.8901,3.16512,2023-11-16,3.0,3.0,0.0,0.0,1.0,0.0,0.0,2022-12-01,3.0,3.0,0.0,0.0,1.0,0.0,0.0,,4.0,3.0,0.0,0.0,1.0,0.0,0.0,22.667,0,0,0.0,0,0.0,0,0,"16 VALLEY VIEW DRIVE,COUNCIL BLUFFS,IA,51503",40.7702,-118.15,,2024-09-01
freq,12,3,78,1185,12,4,507,372,5742,1127,27.0,14767.0,13987,14317,539,485,4932.0,4932.0,13238,14290.0,13458,13256,14419,12087,14782,3423.0,14673.0,3671.0,14673.0,4135.0,14467.0,4609.0,14134.0,3714.0,12146.0,3462.0,13824.0,14492.0,14492.0,13.0,46.0,4.0,4.0,3.0,4.0,18.0,1070.0,221.0,13283.0,851.0,12231.0,6821.0,12251.0,5.0,5.0,5.0,5.0,7.0,4.0,4.0,13.0,46.0,4.0,3.0,4.0,126,1056.0,1301.0,5984.0,705.0,12924.0,14176.0,705.0,77,1137.0,1392.0,6292.0,878.0,13025.0,14172.0,878.0,145.0,1242.0,1452.0,7425.0,972.0,12879.0,14214.0,972.0,129.0,7904,4707,4764.0,5502,5502.0,12433,5368,3,7.0,23.0,14020.0,14817


## Associate rating with lat/lng for each provider in configured state

In [15]:
TARGET_VALUE = 'Overall Rating'

def scope():
    providers_dict = {}
    rating_list = []
    in_state_providers = providers[providers['State'] == 'TN']
    it = auto.tqdm.auto.tqdm(in_state_providers.iterrows())
    print(len(in_state_providers))

    for index, row in it:
        state = row['State']
        city  = row['City/Town']
        addr  = row['Provider Address']

        full_address = f'{addr}, {city}, {state}'
        coords = addr_to_coords(full_address)
        rating_list.append({'coords': coords, 'rating': float(row[TARGET_VALUE])})
        # providers_dict[coords] = float(row[TARGET_VALUE])
        auto.time.sleep(0.75)
    
    # return providers_dict
    return rating_list

values = scope()

0it [00:00, ?it/s]

304


In [16]:
provider_ratings = values

In [17]:
def scope(save=False):
    data = auto.pandas.DataFrame(provider_ratings)
    data = data.dropna(subset='rating') # discard locations without a rating
    
    if save:
        data.to_csv(f'{config.app.state}-ratings.csv')
    return data

rating_df = scope(save=False)
/display rating_df

Unnamed: 0,coords,rating
0,"(35.449721, -86.808795)",2.0
1,"(36.084979000000004, -87.38126609999999)",5.0
2,"(35.050397999999994, -85.292158)",2.0
3,"(35.039666, -85.259345)",3.0
4,"(35.76736, -83.9239786)",1.0
...,...,...
294,"(35.228456, -88.849334)",4.0
296,"(35.621955, -86.17269800000001)",1.0
300,"(35.9650766, -83.18242140000001)",2.0
301,"(35.2272715, -88.2313102)",3.0


# Buildings

## Building

In [18]:
#@title Building
@auto.dataclasses.dataclass(frozen=True, kw_only=True)
class Building:
    identifier: str
    centroid: Location
    footprint: list[Location]
    state: str
    volume: auto.pint.Quantity
    area: auto.pint.Quantity
    height: auto.pint.Quantity
    floors: int
    kind: str
    standard: str
    # center: Location
    # county: str

    @classmethod
    @auto.mediocreatbest.doctest
    def parse(Building, row: auto.df.Series, /) -> auto.typing.Self:
        # r"""

        # >>> parse(dict, {
        # ...     'ID': 6022005286564,
        # ...     'Centroid': '35.4963185/-88.468932',
        # ...     'Footprint2D': '35.49638/-88.468974_35.49638/-88.46889_35.496257/-88.46889_35.496257/-88.468974',
        # ...     'State_Abbr': 'TN',
        # ...     'Area': 5071.597932155534,
        # ...     'Area2D': 724.5139903079335,
        # ...     'Height': 20.68000066176,
        # ...     'NumFloors': 7.0,
        # ...     'WWR_surfaces': '.15_.15_.15_.15',
        # ...     'CZ': '3A',
        # ...     'BuildingType': 'MidriseApartment',
        # ...     'Standard': 'DOE-Ref-Pre-1980',
        # ...     'lat': 35.4963185,
        # ...     'lon': -88.468932,
        # ...     '.points': 'POINT (-88.468932 35.4963185)',
        # ...     'index_right': 0})
        # ... #doctest: +NORMALIZE_WHITESPACE
        # {'identifier': 6022005286564,
        # 'centroid': Location(lat=35.4963185, lng=-88.468932),
        # 'footprint': [Location(lat=35.49638, lng=-88.468974),
        #             Location(lat=35.49638, lng=-88.46889),
        #             Location(lat=35.496257, lng=-88.46889),
        #             Location(lat=35.496257, lng=-88.468974)],
        # 'state': 'TN',
        # 'volume': <Quantity(5071.59793, 'meter ** 3')>,
        # 'area': <Quantity(724.51399, 'meter ** 2')>,
        # 'height': <Quantity(20.6800007, 'meter')>,
        # 'floors': 7.0,
        # 'kind': 'MidriseApartment',
        # 'standard': 'DOE-Ref-Pre-1980',
        # }

        # """
        # 'center': Location(lat=35.4963185, lng=-88.468932),
        # ...     'City': 'Henderson'})

        unit = Unit()

        identifier = row['ID']
        centroid = Location.parse(row['Centroid'])
        footprint = [Location.parse(p) for p in row['Footprint2D'].split('_')]
        state = row['State_Abbr']
        volume = row['Area'] * unit.meter ** 3
        area = row['Area2D'] * unit.meter ** 2
        height = row['Height'] * unit.meter
        floors = row['NumFloors']
        kind = row['BuildingType']
        standard = row['Standard']
        # center = Location(
        #     lat=row['lat'],
        #     lng=row['lon'],
        # )
        # county = row['City']

        return Building(
            identifier=identifier,
            centroid=centroid,
            footprint=footprint,
            state=state,
            volume=volume,
            area=area,
            height=height,
            floors=floors,
            kind=kind,
            standard=standard,
            # center=center,
            # county=county,
        )

    @auto.functools.cached_property
    def url(building: Building, /) -> str:
        return (
            f'http://maps.google.com/maps'
            f'?z=12'
            f'&t=k'
            f'&q=loc:{building.centroid.lat}+{building.centroid.lng}'
        )

    @auto.functools.cached_property
    def bounds(building: Building, /) -> Bounds:
        n = e = float('-inf')
        s = w = float('inf')

        for location in building.footprint:
            n = max(n, location.lat)
            s = min(s, location.lat)
            w = min(w, location.lng)
            e = max(e, location.lng)

        return Bounds(
            sw=Location(lng=w, lat=s),
            ne=Location(lng=e, lat=n),
        )

    # @auto.functools.cached_property
    @property
    def osm(building: Building, /) -> dict[str, list[dict[str, list[str]]]]:
        root = config.app.things[building.county]
        path = root / 'ModelsOrig' / f'{building.identifier}' / f'{building.identifier}.osm'
        with path.open('r') as f:
            return Thing.parse(f.read())

    # @auto.functools.cached_property
    @property
    def idf(building: Building, /) -> dict[str, list[dict[str, list[str]]]]:
        root = config.app.things[building.county]
        path = root / 'ModelsOrig' / f'{building.identifier}' / f'{building.identifier}.idf'
        with path.open('r') as f:
            return Thing.parse(f.read())


## Buildings

In [19]:
#@title Buildings
class Buildings(auto.collections.UserList[Building]):
    @classmethod
    def parse(Buildings, fileobj, /, nrows=None):
        df = auto.pd.read_csv(
            fileobj,
            nrows=nrows,
        )

        buildings = Buildings()
        for _, row in auto.tqdm.notebook.tqdm(df.iterrows(), total=len(df)):
            buildings.append(Building.parse(row))

        return buildings

    @auto.functools.cached_property
    def knox(buildings_, /):
        buildings = Buildings()
        for building in buildings_:
            if building.county == 'Knox':
                buildings.append(building)

        return buildings

    @auto.functools.cached_property
    def henderson(buildings_, /):
        buildings = Buildings()
        for building in buildings_:
            if building.county == 'Henderson':
                buildings.append(building)

        return buildings

    @auto.functools.cached_property
    def bounds(buildings: Buildings, /) -> Bounds:
        n = e = float('-inf')
        s = w = float('inf')

        for building in buildings:
            bounds = building.bounds()
            n = max(n, bounds.ne.lat)
            s = min(s, bounds.sw.lat)
            w = min(w, bounds.sw.lng)
            e = max(e, bounds.ne.lng)

        return Bounds(
            sw=Location(lng=w, lat=s),
            ne=Location(lng=e, lat=n),
        )


## Testing

In [20]:
def scope():
    !ls -l {config.app.datadir}

    !ls -l {config.app.buildings}

/scope

total 0
drwxrwxr-x 1 raustin9 raustin9 111436 Oct  2 23:35 Counties_IDF
drwxrwxr-x 1 raustin9 raustin9 111436 Oct  3 10:05 Counties_OSM
drwxrwxr-x 1 raustin9 raustin9    612 Oct  2 17:26 MAv1_CSVS


-rw-r--r-- 1 raustin9 raustin9 683147264 Oct  2 17:22 /mnt/seenas2/data/model-america/data/MAv1_CSVS/TN.csv


In [21]:
buildings = Buildings.parse(config.app.buildings)
/print len(buildings)

  0%|          | 0/2548858 [00:00<?, ?it/s]

2548858


In [105]:
# buildings = Buildings.parse(config.app.buildings, nrows=10)
# /print len(buildings)

### look

In [101]:
def scope():
    def scope():
        for building in buildings:
            print(building.centroid)

    scope()

/scope

Location(lat=35.324066199533256, lng=-90.01537539556219)
Location(lat=35.2859495, lng=-90.05371699999999)
Location(lat=35.313541783475046, lng=-90.02276104484608)
Location(lat=35.3065905, lng=-90.01488850000001)
Location(lat=35.302507718305606, lng=-90.006061812955)
Location(lat=35.2926525, lng=-90.03100650000002)
Location(lat=35.286177, lng=-90.04003599999999)
Location(lat=35.28657335062733, lng=-90.04082830458886)
Location(lat=35.28792820474406, lng=-90.04115539943659)
Location(lat=35.24837399999999, lng=-90.0620205)


In [22]:
#@title save
def scope():
    # path = auto.pathlib.Path('/mnt/data') / 'buildings.zip'
    # if path.exists():
    #     path.unlink()

    # with auto.zipfile.ZipFile(path, 'w', compression=auto.zipfile.ZIP_DEFLATED) as arc:
    #     root = auto.zipfile.Path(arc)

    root = auto.pathlib.Path('/mnt/seenas2/data') / 'ct-buildings' / f'{config.app.state}'
    # root = auto.pathlib.Path('/mnt/data') / 'buildings'
    root.mkdir(exist_ok=True)

    with auto.contextlib.ExitStack() as stack:
        pbar = stack.enter_context( auto.tqdm.auto.tqdm(unit='building') )
        ebar = stack.enter_context( auto.tqdm.auto.tqdm(unit='error') )

        pbar.reset(total=len(buildings))
        for building in buildings:
            pbar.update()

            # path = root / f'{building.identifier}' / 'osm.json.gz'
            # path.parent.mkdir(exist_ok=True)
            # if not path.exists():
            #     try:
            #         osm = building.osm
            #     except KeyboardInterrupt:
            #         raise
            #     except:
            #         ebar.update()
            #         pass
            #     else:
            #         with auto.gzip.open(path, 'wb') as f:
            #             with auto.io.TextIOWrapper(f, encoding='utf-8') as f:
            #                 auto.json.dump(osm.sections, f)

            # path = root / f'{building.identifier}' / 'idf.json.gz'
            # path.parent.mkdir(exist_ok=True)

            # if path.exists() and path.stat().st_size < 1000:
            #     path.unlink()

            # if not path.exists():
            #     try:
            #         idf = building.idf
            #     except KeyboardInterrupt:
            #         raise
            #     except:
            #         ebar.update()
            #         pass
            #     else:
            #         with auto.gzip.open(path, 'wb') as f:
            #             with auto.io.TextIOWrapper(f, encoding='utf-8') as f:
            #                 auto.json.dump(idf.sections, f)

            path = root / f'{building.identifier}' / 'building.json'
            path.parent.mkdir(exist_ok=True)
            if not path.exists():
                with path.open('w') as f:
                    f.write(auto.json.dumps({
                        'identifier': building.identifier,
                        'centroid': {
                            'lat': building.centroid.lat,
                            'lng': building.centroid.lng,
                        },
                        'footprint': [
                            {
                                'lat': p.lat,
                                'lng': p.lng,
                            }
                            for p in building.footprint
                        ],
                        'state': building.state,
                        'volume': building.volume.m_as(Unit().meter ** 3),
                        'area': building.area.m_as(Unit().meter ** 2),
                        'height': building.height.m_as(Unit().meter),
                        'floors': building.floors,
                        'kind': building.kind,
                        'standard': building.standard,
                        # 'center': {
                        #     'lat': building.center.lat,
                        #     'lng': building.center.lng,
                        # },
                        # 'county': building.county,
                    }))

/scope


0building [00:00, ?building/s]

0error [00:00, ?error/s]

### Graham Scan

In [103]:

# Function to compute polar angle relative to pivot point
def polar_angle(p0, p1):
    return math.atan2(p1[1] - p0[1], p1[0] - p0[0])

# Cross product of vectors OA and OB
# A positive cross product indicates a left turn, negative indicates a right turn
def cross_product(o, a, b):
    return (a[0] - o[0]) * (b[1] - o[1]) - (a[1] - o[1]) * (b[0] - o[0])

def graham_scan(points):
    # Find the point with the lowest y-coordinate
    points = sorted(points, key=lambda p: (p[1], p[0]))
    pivot = points[0]

    # Sort points by polar angle with respect to pivot
    sorted_points = sorted(points[1:], key=lambda p: (polar_angle(pivot, p), -p[1], p[0]))

    # Build the convex hull
    hull = [pivot]
    for point in sorted_points:
        while len(hull) > 1 and cross_product(hull[-2], hull[-1], point) <= 0:
            hull.pop()  # Remove point if it makes a right turn
        hull.append(point)

    return hull

## JSON -> Geometry

In [24]:
# Take the JSON and get footprint

def scope():
    path = f'/mnt/seenas2/data/ct-buildings/{config.app.state}'
    dir_list = auto.os.listdir(path)

    total_items = len(dir_list)

    zpath = auto.pathlib.Path(f'/mnt/seenas2/data/model-america/gen/{config.app.state}.zip')
    if zpath.exists():
        zpath.unlink()

    # print(total_items)
    # return
    with auto.zipfile.ZipFile(f'/mnt/seenas2/data/model-america/gen/{config.app.state}.zip', 'x') as fout:
        for building in auto.tqdm.auto.tqdm(dir_list):
            points = []
            building_height = 0
            with open(f'{path}/{building}/building.json') as fp:
                data = auto.json.load(fp)

                # print(f'Building <{building}>')
                # print('\tFootprint:')
                for point in data["footprint"]:
                    # print(f'\t\t({point["lat"]}, {point["lng"]})')
                    x, y, z = Spatial(
                        lat=point["lat"],
                        lng=point["lng"],
                        alt=0
                    )
                    points.append(auto.numpy.array([x,y,z]))
                    # points.append(auto.numpy.array([x * 1000,y * 1000,z * 1000]))
                    # points.append((x,y,z))
                    # print(
                    #     f'\t\t> Spatial: ({x}, {y}, {z})'
                    # )
                # print(f'\tHeight: {data["height"]}')
                building_height = float(data["height"])
                # print()
            
            points = auto.numpy.array(points)
            # Get top face of building
            def normalize_vector(point):
                # Convert point to a numpy array for easy calculations
                vector = auto.numpy.array(point)
        
                # Calculate the magnitude of the vector
                magnitude = auto.numpy.linalg.norm(vector)
        
                # Check if the magnitude is not zero to avoid division by zero
                if magnitude == 0:
                    raise ValueError("Cannot normalize a zero vector.")
        
                # Normalize the vector
                normalized_vector = vector / magnitude
                return normalized_vector
            
            normal = normalize_vector(points[0])
            vertical = normal * building_height
            # print(f'Normal {normal}. Height: {building_height} -> Vertical: {vertical}')

            # find points for top of building
            roof_verts = []
            # print("Points")
            for p in points:
                # print(p)
                roof_verts.append(p + vertical)


            roof_verts = auto.numpy.array(roof_verts)

            faces = []
            num_vertices = len(points)
            for i in range(num_vertices):
                next_index = (i+1) % num_vertices
                faces.append([i, next_index, num_vertices + next_index])  # Triangle 1
                faces.append([i, num_vertices + next_index, num_vertices + i])  # Triangle 2

            vertices = auto.numpy.vstack((points, roof_verts))
            mesh = auto.trimesh.Trimesh(vertices=vertices, faces=faces)
            # mesh.export('building.obj')

            # print(mesh.vertices)
            verts = auto.numpy.array(mesh.vertices, dtype='f4').flatten()
            indices = auto.numpy.array(mesh.faces, dtype='u4').flatten()
            # for i in range(len(indices)):
            #     print(f'{indices[i]}  |  ', end='')
            #     if (i+1) % 3 == 0:
            #         print()
            # light = auto.trimesh.scene.lighting.DirectionalLight()
            # light.intensity = 10
            # scene = auto.trimesh.Scene([mesh], lights=[light])
            # scene.show(viewer='notebook')

            # all_vertices = auto.numpy.concatenate([all_vertices, auto.numpy.array(mesh.vertices).flatten()])
            # all_indices = auto.numpy.concatenate([all_indices, auto.numpy.array(mesh.faces).flatten()])

            pospath = auto.pathlib.Path(f'{building}.mesh.vec3f[].vertex.position.bin')
            indexpath = auto.pathlib.Path(f'{building}.mesh.vec3ui[].vertex.index.bin')
            pospath.write_bytes(verts.tobytes())
            indexpath.write_bytes(indices.tobytes())
            fout.write(pospath, f'{building}.mesh.vec3f[].vertex.position.bin')
            fout.write(indexpath, f'{building}.mesh.vec3ui[].vertex.index.bin')
            indexpath.unlink()
            pospath.unlink()
            # with open(f'/mnt/seenas2/data/ct-buildings/gen/{config.app.state}/{building}.mesh.vec3f[].vertex.position.bin', 'wb') as f:
            #     f.write(verts.tobytes())
            # with open(f'/mnt/seenas2/data/ct-buildings/gen/{config.app.state}/{building}.mesh.vec3ui[].vertex.index.bin', 'wb') as f:
            #     f.write(indices.tobytes())

        # break




/scope

  0%|          | 0/2548852 [00:00<?, ?it/s]

In [None]:
def scope():
        with auto.zipfile.ZipFile(f'/mnt/seenas2/data/model-america/gen/{config.app.state}.zip', 'r') as fin:
                index = 0
                for file in fin.namelist():
                        print(file)
                        # building_name = file.split('.')[0]
                        # print(building_name)
                        zdata = None
                        data = None
                        with fin.open(file) as zf, open(f'/mnt/seenas2/data/ct-buildings/gen/{config.app.state}/{file}', 'rb') as f:
                                p = auto.pathlib.Path(f'/mnt/seenas2/data/ct-buildings/gen/{config.app.state}/{file}')
                                size = p.stat().st_size
                                zip_data = zf.read()
                                zfile_info = fin.getinfo(file)
                                print(f'Zipfile {file}: {zfile_info.file_size} bytes')
                                print(f'Normal  {p}:    {size} bytes')

                                i = 0
                                while True:
                                        byte_z = zf.read(1)
                                        byte_n = f.read(1)

                                        if not byte_z and not byte_n:
                                                print(f'EOF: {i}')
                                                break
                                        elif byte_z != byte_n:
                                                print(f'Difference at {i}: {byte_z} /= {byte_n}')
                                        i += 1



                        if index == 5:
                                break
                        index += 1


/scope