In [60]:
# Dependencies and Setup
import os
import shutil
from zipfile import ZipFile

import matplotlib.pyplot as plt
import pandas as pd
import requests
import scipy.stats as st
from scipy.stats import linregress
from pathlib import Path

In [65]:
# Notebook configuration
verbose = True  # Only set this to true for debugging
# verbose = False

In [66]:
# The BLS files have all the county employment / wage info for our selected years and
# counties.

# Do not need anymore, keeping in case we missed something.
# nys_industycounty_path = "housing_data/NYS_Industry_by_County.csv"

# Study parameters

# Years 2012 - 2022
years = [2012, 2013, 2014, 2015, 2016, 2017, 2018,  # Before pandemic lockdown
         2019, 2020, 2021, 2022]  # During and after pandemic lockdown

# All NYC Boroughs and geographically adjacent counties
counties = [
    # NYC Boroughs
    "Bronx County, New York",
    "Kings County, New York",  # Brooklyn
    "New York County, New York",  # Manhattan
    "Queens County, New York",
    "Richmond County, New York",  # Staten Island

    # Counties adjacent to NYC Boroughs
    "Bergen County, New Jersey",
    "Nassau County, New York",
    "Hudson County, New Jersey",
    "Westchester County, New York",
    "Rockland County, New York",
    "Fairfield County, Connecticut",
]

In [69]:
# Download and/or extract BLS datasets from BLS site or file system (if pre-downloaded)
bls_csv_dir = "housing_data"
bls_csv_files = {}
for year in years:
    zip_fn = f"{year}_qtrly_by_area.zip"
    zip_url = f"https://data.bls.gov/cew/data/files/{year}/csv/{zip_fn}"
    zip_path = f"{bls_csv_dir}/{zip_fn}"

    # Download the zip archive with the data for each year if it's not in the file system.
    if not os.path.isfile(zip_path):
        head_response = requests.head(zip_url)
        file_size = float(head_response.headers['Content-Length'])
        if file_size >= 2**30:
            file_size, units = file_size / 2**30, 'GiB'
        elif file_size >= 2**20:
            file_size, units = file_size / 2**20, 'MiB'
        elif file_size >= 2**10:
            file_size, units = file_size / 2**10, 'kiB'
        else:
            units = 'B'
        file_size = f"{file_size:03.2f} {units}"
        
        print(f"Downloading {zip_url!r} to {zip_path!r} for year {year}.")
        print(f"Expected size: {file_size}")
        get_response = requests.get(zip_url)
        with open(zip_path, 'wb') as zip_file:
            zip_file.write(get_response.content)

    # Find the csv for each county within the archive
    # print(f"Listing files in {zip_path!r} zip archive.")
    with ZipFile(zip_path) as zip_archive:
        files = zip_archive.namelist()
        for file in files:
            for county in counties:
                if file.endswith(f'{county}.csv'):
                    # print(f"{file=!r}")
                    csv_key = (year, county)
                    csv_fn = os.path.basename(file)
                    csv_path = os.path.join(bls_csv_dir, csv_fn)
                    # print(f"{csv_fn=!r}")
                    bls_csv_files[csv_key] = csv_path
                    zip_archive.extract(file, bls_csv_dir)
                    shutil.move(f"{bls_csv_dir}/{file}", csv_path)

if verbose:
    for key, path in bls_csv_files.items():
        year, county = key
        print(f"BLS employment / wage info csv for {year=} and {county=!r}: {path=!r}")
print(f"Found {len(bls_csv_files)} BLS employment / wage info csv files (expected {len(counties) * len(years)}).")

BLS employment / wage info csv for year=2012 and county='Fairfield County, Connecticut': path='housing_data/2012.q1-q4 09001 Fairfield County, Connecticut.csv'
BLS employment / wage info csv for year=2012 and county='Bergen County, New Jersey': path='housing_data/2012.q1-q4 34003 Bergen County, New Jersey.csv'
BLS employment / wage info csv for year=2012 and county='Hudson County, New Jersey': path='housing_data/2012.q1-q4 34017 Hudson County, New Jersey.csv'
BLS employment / wage info csv for year=2012 and county='Bronx County, New York': path='housing_data/2012.q1-q4 36005 Bronx County, New York.csv'
BLS employment / wage info csv for year=2012 and county='Kings County, New York': path='housing_data/2012.q1-q4 36047 Kings County, New York.csv'
BLS employment / wage info csv for year=2012 and county='Nassau County, New York': path='housing_data/2012.q1-q4 36059 Nassau County, New York.csv'
BLS employment / wage info csv for year=2012 and county='New York County, New York': path='housi

In [68]:
printed_example = False
for key, path in bls_csv_files.items():
    year, county = key
    df = pd.read_csv(Path(path))
    df["Year"] = year
    df["County"] = county
    if not printed_example:
        print(df.head())
        printed_example = True

IsADirectoryError: [Errno 21] Is a directory: 'housing_data/2012.q1-q4 09001 Fairfield County, Connecticut.csv'

In [None]:
# Read the employment data and the nys industry results
first_quarteremployment = pd.read_csv(first_quarteremployment_path)
nys_industrycounty = pd.read_csv(nys_industrycounty_path)

# Combine the data into a single DataFrame
first_quarteremployment_nys_industrycounty = pd.merge(first_quarteremployment, nys_industrycounty,on=["County"])

# Display the data table for preview
first_quarteremployment_nys_industrycounty.head()

In [2]:
pwd = os.getcwd()  # Get current working directory
files = os.listdir()  # Get files in current working directory
print(f"{pwd=!r}")
for file in files:
    print(f"{file=!r}")

NameError: name 'os' is not defined

In [None]:
#import requests
#import json
#urllib.request

#urllib

In [None]:

#url = "https://api.bls.gov/publicAPI/v2/timeseries/data/ "

In [None]:
#response = requests.post(url).json()
#response_json = response.json()
#print(json.dumps(response, indent=4, sorts_keys=True))

In [3]:
#Reference NYC Housing Data

#Kaggle Datasets
#https://www.kaggle.com/code/shaqiavelli/nyc-geospatial-analysis
#https://www.kaggle.com/code/ashokmevada/house-price

#NYC Planning #????
#https://www.nyc.gov/site/planning/data-maps/open-data/dwn-housing-database.page#housingdevelopmentproject

#NYC.GOV Annual Housing Sales 2012-2022
#https://www.nyc.gov/site/finance/property/property-annualized-sales-update.page

#Reference Bureau of Labor Statistics (bls) API_Key Multiple Series and csv files

##bls.gov: https://www.bls.gov/developers/api_signature_v2.htm (API_Key)
##https://www.bls.gov/cew/downloadable-data-files.htm (csv files)
##https://www.bls.gov/cew/additional-resources/open-data/sample-code.htm (sample python code)

In [None]:
# Study data files
#nasdaqdatalink_apidata_path = "data/Mouse_metadata.csv"
#study_results_path = "data/Study_results.csv"

# Read the mouse data and the study results
#nasdaqdatalink_apidata = pd.read_csv(mouse_metadata_path)
#study_results = pd.read_csv(study_results_path)

# Combine the data into a single DataFrame
#mouse_study_results = pd.merge(mouse_metadata, study_results,on=["Mouse ID"])

# Display the data table for preview
#mouse_study_results.head()

In [16]:
d = {
    'potato': 'tomato',
    'pi': 3.14159,
    'name': 'Carlos',
}
d2 = dict(
    potato='tomato',
    pi=3.14159,
    name='Carlos',
)
class A:
    def __init__(self, , **kwargs):
        for param, arg in kwargs.items():
            setattr(self, param, arg)
    def __str__(self):
        return f"str version: {self.__class__.__name__}" \
            f"({",".join(f"{param}={arg}" for param, arg in sorted(self.__dict__.items()))})"
    def __repr__(self):
        return f"repr version: {self.__class__.__name__}" \
            f"({','.join(f'{param}={arg!r}' for param, arg in sorted(self.__dict__.items()))})"

a = A(potato='tomato', pi=3.1415926, name='Carlos', none=None)
print(f"default: {a}")
print(f"with_bang_s: {a!s}")
print(f"with_bang_r: {a!r}")

default: str version: A(name=Carlos,none=None,pi=3.1415926,potato=tomato)
with_bang_s: str version: A(name=Carlos,none=None,pi=3.1415926,potato=tomato)
with_bang_r: repr version: A(name='Carlos',none=None,pi=3.1415926,potato='tomato')


In [27]:
# `a.potato` is quivalent to `getattr(a, 'potato')`


'tomato'

In [20]:
repr('a')

"'a'"

In [21]:
A(name='Carlos',none=None,pi=3.1415926,potato='tomato')

repr version: A(name='Carlos',none=None,pi=3.1415926,potato='tomato')

In [22]:
def mysum(*values):
    s = 0
    for value in values:
        s += value
    return s

In [59]:
len(counties) *len(years)

121