In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import glob
import re
pd.set_option('display.max_columns', None)

In [2]:
# geostl nghbd codes:
    # gravois park = 19
    # benton park west = 30
    # dutchtown = 16
    # mount pleasant = 17
# https://dynamic.stlouis-mo.gov/citydata/newdesign/sqlsearch.cfm
# gravois_total_parcels = 1733
# benton_park_west_total_parcels = 1693
# dutchtown_total_parcels = 4850

In [3]:
csv_folder = 'stl_vacancy_data/'
path = "stl_vacancy_data/*.csv"
csv_list = []

gj_vacancy_cat_df = pd.DataFrame(columns=['Date', 'V_Indeterminate', 'V_Possible', 'V_Very_likely', 'V_Definite'])
gj_burden_cat_df = pd.DataFrame(columns=['Date', 'B_Zero', 'B_Low', 'B_Medium', 'B_High'])

lra_vacancy_cat_df = pd.DataFrame(columns=['Date', 'V_Indeterminate', 'V_Possible', 'V_Very_likely', 'V_Definite'])
lra_burden_cat_df = pd.DataFrame(columns=['Date', 'B_Zero', 'B_Low', 'B_Medium', 'B_High'])

non_lra_vacancy_cat_df = pd.DataFrame(columns=['Date', 'V_Indeterminate', 'V_Possible', 'V_Very_likely', 'V_Definite'])
non_lra_burden_cat_df = pd.DataFrame(columns=['Date', 'B_Zero', 'B_Low', 'B_Medium', 'B_High'])

for fname in glob.glob(path):
    csv_name = re.findall(r'stl_vacancy_data_\d\d\d\d-\d\d-\d\d.csv', fname)[0]
    csv_list.append(csv_name)

In [4]:
parcels = pd.read_csv('grav-jeff-parcels.csv')
nhd_num_list = [16, 17, 19, 30]
mask = parcels['NBRHD'].isin(nhd_num_list)
parcels_df = parcels[mask]

parcels_df['SITEADDR'] = parcels_df['SITEADDR'].replace(r'\s+', ' ', regex=True)

  parcels = pd.read_csv('grav-jeff-parcels.csv')


In [5]:
gj_vacancy_cat_df = pd.DataFrame(columns=['Date', 'V_Indeterminate', 'V_Possible', 'V_Very_likely', 'V_Definite'])
gj_burden_cat_df = pd.DataFrame(columns=['Date', 'B_Zero', 'B_Low', 'B_Medium', 'B_High'])

In [6]:
def combine_columns(row):
    return str(row['StAddrNum']) + ' ' + row['StNameFull']

In [7]:
class VacancyTransformer():
    def __init__(self, csv_name):
        self.date = re.findall(r'\d\d\d\d-\d\d-\d\d', csv_name)[0]
        self.csv_name = csv_name
        
    def load_raw_df(self):
        raw_df = pd.read_csv(csv_folder+self.csv_name)
        return raw_df

    def create_regional_df(self, raw_df, parcel_df):

        # nhd_names = ['Gravois Park', 'Benton Park West', 'Dutchtown', 'Mount Pleasant']
        # mask = self.raw_df['NhdName'].isin(nhd_names)
        # regional_df = raw_df[mask]

        full_nhds = ['Gravois Park', 'Benton Park West']
        partial_nhds = ['Dutchtown', 'Mount Pleasant']

        full_mask = raw_df['NhdName'].isin(full_nhds)
        bpw_gp_df = raw_df[full_mask]

        partial_mask = raw_df['NhdName'].isin(partial_nhds)
        partial_df = raw_df[partial_mask]

        matching_parcels = []
        for handle in list(partial_df['Handle']):
            if handle in list(parcels_df['HANDLE']):
                matching_parcels.append(handle)

        handle_mask = partial_df['NhdName'].isin(matching_parcels)
        handle_match_df = partial_df[handle_mask]

        partial_df['SITEADDR'] = partial_df.apply(combine_columns, axis=1)
        
        matching_parcels = []
        for site_addr in list(partial_df['SITEADDR']):
            if site_addr in list(parcels_df['SITEADDR']):
                matching_parcels.append(site_addr)

        addr_mask = partial_df['SITEADDR'].isin(matching_parcels)
        addr_match_df = partial_df[addr_mask]

        joint_df = pd.concat([handle_match_df, addr_match_df], ignore_index=True)
        dt_mp_df = joint_df.drop_duplicates(ignore_index=True)

        regional_df = pd.concat([bpw_gp_df, dt_mp_df], ignore_index=True)
        
        # gj_addr = list(regional_df['SITEADDR'])
        # site_mask = parcels['SITEADDR'].isin(gj_addr)
        # site_handles = parcels['HANDLE'][site_mask]

        # for handle in list(regional_df['Handle']):
        #     if handle in list(parcels_df['HANDLE']):
        #         matching_parcels.append(handle)
        #     elif 

        return regional_df
        
    def calc_vacancy_cats(self, df, aggregate_df):
        counts = df['VacancyCat'].value_counts().to_dict()
        cnt_cols = ['Indeterminant', 'Possible', 'Very Likely', 'Definite']
        for col in cnt_cols:
            if col not in counts.keys():
                counts[col] = 0
                
        data = [self.date] + [counts['Indeterminant'], counts['Possible'], counts['Very Likely'], counts['Definite']]
        
        aggregate_df.loc[len(aggregate_df.index)] = data

    def calc_burden_cats(self, df, aggregate_df):
        counts = df['BurdenCat'].value_counts().to_dict()
        cnt_cols = ['Zero', 'Minimal', 'Very Low', 'Low', 'Medium Low', 'Medium', 'Medium High', 'Somewhat High', 'High', 'Very High', 'Extremely High']
        for col in cnt_cols:
            if col not in counts.keys():
                counts[col] = 0
                
        zero_cat = counts['Zero']
        low_cat = sum([counts['Minimal'], counts['Very Low'], counts['Low']])
        med_cat = sum([counts['Medium Low'], counts['Medium'], counts['Medium High'], counts['Somewhat High']])
        high_cat = sum([counts['High'], counts['Very High'], counts['Extremely High']])
        
        aggregate_df.loc[len(aggregate_df.index)] = [self.date, zero_cat, low_cat, med_cat, high_cat]

    def calc_groupby_counts(self, df, cat_list, region):
        
        # valid = {'stl','bpw-gp', 'dutchtown', 'gravois-jefferson'}
        # if region not in valid:
        #     raise ValueError("results: status must be one of %r." % valid)
        
        for cat in cat_list:

            # VacancyCat:
            vac_df = df.groupby(cat)['VacancyCat'].value_counts().to_frame().unstack()
            vac_df.columns = vac_df.columns.droplevel()
            vac_df.fillna(value=0, inplace=True)
            vac_value_name = vac_df.columns.name
            vac_index_name = vac_df.index.name

            vac_dict = vac_df.to_dict() # sdflkjasd;lfja;slfjklsd;fkjls;dkfjlsdfja;lsfdjl;skdjflsdjflsdjfklsdkfjl;sdkfjaksjdfasjdf;ldsf
            key = list(vac_dict.keys())[0]
            b_types = list(vac_dict[key].keys())
            v_cols = ['Indeterminant', 'Possible', 'Very Likely', 'Definite']

            for col in v_cols:
                if col not in vac_dict.keys():
                    vac_dict[col] = {}
                    for b in b_types:
                        vac_dict[col][b] = 0

            vac_df = pd.DataFrame(vac_dict)
            
            vac_df = vac_df[['Indeterminant', 'Possible', 'Very Likely', 'Definite']]
            vac_df.rename(columns={'Indeterminant': 'V_Indeterminant',
                               'Possible': 'V_Possible',
                               'Very Likely': 'V_Very_Likely',
                               'Definite': 'V_Definite'},
                          inplace=True)
            vac_df.reset_index(inplace=True, names='Type')
            vac_df.insert(loc=0, column='Date', value=self.date)
            vac_df.to_csv(f'data/temp/{vac_index_name.lower()}_vacancy_data/{region}/{region}_{vac_index_name}_{vac_value_name}_{self.date}.csv')

            # BurdenCat
            bur_df =  df.groupby(cat)['BurdenCat'].value_counts().to_frame().unstack()
            bur_df.columns = bur_df.columns.droplevel()
            bur_df.fillna(value=0, inplace=True)
            bur_index_name = bur_df.index.name
            bur_value_name = bur_df.columns.name

            bur_dict = bur_df.to_dict()
            key = list(bur_dict.keys())[0]
            b_types = list(bur_dict[key].keys())
            b_cols = ['Zero', 'Minimal', 'Very Low', 'Low', 'Medium Low', 'Medium', 'Medium High', 'Somewhat High', 'High', 'Very High', 'Extremely High']

            for col in b_cols:
                if col not in bur_dict.keys():
                    bur_dict[col] = {}
                    for b in b_types:
                        bur_dict[col][b] = 0

            bur_df = pd.DataFrame(bur_dict).reset_index(names='Type')
            
            bur_df['B_Zero'] = bur_df['Zero']
            bur_df['B_Low'] = bur_df['Minimal'] + bur_df['Very Low'] + bur_df['Low']
            bur_df['B_Medium'] = bur_df['Medium Low'] + bur_df['Medium'] + bur_df['Medium High'] + bur_df['Somewhat High']
            bur_df['B_High'] = bur_df['High'] + bur_df['Very High'] + bur_df['Extremely High']
            bur_df.reset_index(inplace=True)
            bur_df.insert(loc=0, column='Date', value=self.date)
            bur_df = bur_df[['Date', 'Type', 'B_Zero', 'B_Low', 'B_Medium', 'B_High']]
            bur_df.to_csv(f'data/temp/{vac_index_name.lower()}_burden_data/{region}/{region}_{bur_index_name}_{bur_value_name}_{self.date}.csv')

In [8]:
for csv in csv_list:

    d = VacancyTransformer(csv)
    raw = d.load_raw_df()

    lra = raw.loc[raw['IsLRA'].isin([True])]
    non_lra = raw.loc[~raw['IsLRA'].isin([True])]

    # calc the grav-jeff totals
    reg_df = d.create_regional_df(raw_df=raw, parcel_df=parcels_df)
    d.calc_vacancy_cats(df=reg_df, aggregate_df=gj_vacancy_cat_df)
    d.calc_burden_cats(df=reg_df, aggregate_df=gj_burden_cat_df)
    d.calc_groupby_counts(df=reg_df, cat_list=['Type'], region='gravois-jefferson')

    # calc the lra totals
    lra_df = d.create_regional_df(raw_df=lra, parcel_df=parcels_df)
    d.calc_vacancy_cats(df=lra_df, aggregate_df=lra_vacancy_cat_df)
    d.calc_burden_cats(df=lra_df, aggregate_df=lra_burden_cat_df)
    d.calc_groupby_counts(df=lra_df, cat_list=['Type'], region='gj_lra')
    
    # calc the non-lra totals
    non_lra_df = d.create_regional_df(raw_df=non_lra, parcel_df=parcels_df)
    d.calc_vacancy_cats(df=non_lra_df, aggregate_df=non_lra_vacancy_cat_df)
    d.calc_burden_cats(df=non_lra_df, aggregate_df=non_lra_burden_cat_df)
    d.calc_groupby_counts(df=non_lra_df, cat_list=['Type'], region='gj_non-lra')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  partial_df['SITEADDR'] = partial_df.apply(combine_columns, axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  partial_df['SITEADDR'] = partial_df.apply(combine_columns, axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  partial_df['SITEADDR'] = partial_df.apply(combine_columns, axis=1)
A va

In [9]:
gj_burden_cat_df.to_csv('data/temp/gravois-jefferson_burden.csv')
gj_vacancy_cat_df.to_csv('data/temp/gravois-jefferson_vacancy.csv')

lra_burden_cat_df.to_csv('data/temp/gj_lra_burden.csv')
lra_vacancy_cat_df.to_csv('data/temp/gj_lra_vacancy.csv')

non_lra_burden_cat_df.to_csv('data/temp/gj_non-lra_burden.csv')
non_lra_vacancy_cat_df.to_csv('data/temp/gj_non-lra_vacancy.csv')

### now split the type counts by categories

In [10]:
# csv_folder = 'data/'
path = "data/temp/type_burden_data/gravois-jefferson/*.csv"
csv_list = []

for fname in glob.glob(path):
    df = pd.read_csv(fname, index_col=0)
    
    date = df['Date'][0]
    
    res_cats = ['Duplex', 'Multi-Unit', 'Single-Family']
    res_mask = df['Type'].isin(res_cats)
    res_df = df[res_mask]
    
    df.loc[len(df.index)] = [date, 'Residential', res_df['B_Zero'].sum(), res_df['B_Low'].sum(), res_df['B_Medium'].sum(), res_df['B_High'].sum()]

    multi_cats = ['Mixed-Use', 'Other']
    multi_mask = df['Type'].isin(multi_cats)
    multi_df = df[multi_mask]
    
    df.loc[len(df.index)] = [date, 'Mixed-Use/Other', multi_df['B_Zero'].sum(), multi_df['B_Low'].sum(), multi_df['B_Medium'].sum(), multi_df['B_High'].sum()]

    df.to_csv(fname)

In [11]:
# csv_folder = 'data/'
path = "data/temp/type_vacancy_data/gravois-jefferson/*.csv"
csv_list = []

for fname in glob.glob(path):
    df = pd.read_csv(fname, index_col=0)
    
    date = df['Date'][0]
    
    res_cats = ['Duplex', 'Multi-Unit', 'Single-Family']
    res_mask = df['Type'].isin(res_cats)
    res_df = df[res_mask]
    
    df.loc[len(df.index)] = [date, 'Residential', res_df['V_Indeterminant'].sum(), res_df['V_Possible'].sum(), res_df['V_Very_Likely'].sum(), res_df['V_Definite'].sum()]

    multi_cats = ['Mixed-Use', 'Other']
    multi_mask = df['Type'].isin(multi_cats)
    multi_df = df[multi_mask]
    
    df.loc[len(df.index)] = [date, 'Mixed-Use/Other', multi_df['V_Indeterminant'].sum(), multi_df['V_Possible'].sum(), multi_df['V_Very_Likely'].sum(), multi_df['V_Definite'].sum()]

    df.to_csv(fname)

In [12]:
path = "data/temp/type_burden_data/gj_lra/*.csv"
csv_list = []

for fname in glob.glob(path):
    df = pd.read_csv(fname, index_col=0)
    
    date = df['Date'][0]
    
    res_cats = ['Duplex', 'Multi-Unit', 'Single-Family']
    res_mask = df['Type'].isin(res_cats)
    res_df = df[res_mask]
    
    df.loc[len(df.index)] = [date, 'Residential', res_df['B_Zero'].sum(), res_df['B_Low'].sum(), res_df['B_Medium'].sum(), res_df['B_High'].sum()]

    multi_cats = ['Mixed-Use', 'Other']
    multi_mask = df['Type'].isin(multi_cats)
    multi_df = df[multi_mask]
    
    df.loc[len(df.index)] = [date, 'Mixed-Use/Other', multi_df['B_Zero'].sum(), multi_df['B_Low'].sum(), multi_df['B_Medium'].sum(), multi_df['B_High'].sum()]

    df.to_csv(fname)

path = "data/temp/type_vacancy_data/gj_lra/*.csv"
csv_list = []

for fname in glob.glob(path):
    df = pd.read_csv(fname, index_col=0)
    
    date = df['Date'][0]
    
    res_cats = ['Duplex', 'Multi-Unit', 'Single-Family']
    res_mask = df['Type'].isin(res_cats)
    res_df = df[res_mask]
    
    df.loc[len(df.index)] = [date, 'Residential', res_df['V_Indeterminant'].sum(), res_df['V_Possible'].sum(), res_df['V_Very_Likely'].sum(), res_df['V_Definite'].sum()]

    multi_cats = ['Mixed-Use', 'Other']
    multi_mask = df['Type'].isin(multi_cats)
    multi_df = df[multi_mask]
    
    df.loc[len(df.index)] = [date, 'Mixed-Use/Other', multi_df['V_Indeterminant'].sum(), multi_df['V_Possible'].sum(), multi_df['V_Very_Likely'].sum(), multi_df['V_Definite'].sum()]

    df.to_csv(fname)

In [13]:
path = "data/temp/type_burden_data/gj_non-lra/*.csv"
csv_list = []

for fname in glob.glob(path):
    df = pd.read_csv(fname, index_col=0)
    
    date = df['Date'][0]
    
    res_cats = ['Duplex', 'Multi-Unit', 'Single-Family']
    res_mask = df['Type'].isin(res_cats)
    res_df = df[res_mask]
    
    df.loc[len(df.index)] = [date, 'Residential', res_df['B_Zero'].sum(), res_df['B_Low'].sum(), res_df['B_Medium'].sum(), res_df['B_High'].sum()]

    multi_cats = ['Mixed-Use', 'Other']
    multi_mask = df['Type'].isin(multi_cats)
    multi_df = df[multi_mask]
    
    df.loc[len(df.index)] = [date, 'Mixed-Use/Other', multi_df['B_Zero'].sum(), multi_df['B_Low'].sum(), multi_df['B_Medium'].sum(), multi_df['B_High'].sum()]

    df.to_csv(fname)

path = "data/temp/type_vacancy_data/gj_non-lra/*.csv"
csv_list = []

for fname in glob.glob(path):
    df = pd.read_csv(fname, index_col=0)
    
    date = df['Date'][0]
    
    res_cats = ['Duplex', 'Multi-Unit', 'Single-Family']
    res_mask = df['Type'].isin(res_cats)
    res_df = df[res_mask]
    
    df.loc[len(df.index)] = [date, 'Residential', res_df['V_Indeterminant'].sum(), res_df['V_Possible'].sum(), res_df['V_Very_Likely'].sum(), res_df['V_Definite'].sum()]

    multi_cats = ['Mixed-Use', 'Other']
    multi_mask = df['Type'].isin(multi_cats)
    multi_df = df[multi_mask]
    
    df.loc[len(df.index)] = [date, 'Mixed-Use/Other', multi_df['V_Indeterminant'].sum(), multi_df['V_Possible'].sum(), multi_df['V_Very_Likely'].sum(), multi_df['V_Definite'].sum()]

    df.to_csv(fname)

In [14]:
path = "data/temp/type_burden_data/gravois-jefferson/*.csv"
burden_list = []
for fname in glob.glob(path):
    df = pd.read_csv(fname, index_col=0)
    burden_list.append(df)
gj_type_burden_all = pd.concat(burden_list, ignore_index=True)
gj_type_burden_all.to_csv('data/gravois-jefferson_type_burden_all.csv')

path = "data/temp/type_vacancy_data/gravois-jefferson/*.csv"
vacancy_list = []
for fname in glob.glob(path):
    df = pd.read_csv(fname, index_col=0)
    vacancy_list.append(df)
gj_type_vacancy_all = pd.concat(vacancy_list, ignore_index=True)
gj_type_vacancy_all.to_csv('data/gravois-jefferson_type_vacancy_all.csv')

In [15]:
path = "data/temp/type_burden_data/gj_lra/*.csv"
burden_list = []
for fname in glob.glob(path):
    df = pd.read_csv(fname, index_col=0)
    burden_list.append(df)
lra_type_burden_all = pd.concat(burden_list, ignore_index=True)
lra_type_burden_all.to_csv('data/gj_lra_type_burden_all.csv')

path = "data/temp/type_vacancy_data/gj_lra/*.csv"
vacancy_list = []
for fname in glob.glob(path):
    df = pd.read_csv(fname, index_col=0)
    vacancy_list.append(df)
lra_type_vacancy_all = pd.concat(vacancy_list, ignore_index=True)
lra_type_vacancy_all.to_csv('data/gj_lra_type_vacancy_all.csv')

In [16]:
path = "data/temp/type_burden_data/gj_non-lra/*.csv"
burden_list = []
for fname in glob.glob(path):
    df = pd.read_csv(fname, index_col=0)
    burden_list.append(df)
non_lra_type_burden_all = pd.concat(burden_list, ignore_index=True)
non_lra_type_burden_all.to_csv('data/gj_non-lra_type_burden_all.csv')

path = "data/temp/type_vacancy_data/gj_non-lra/*.csv"
vacancy_list = []
for fname in glob.glob(path):
    df = pd.read_csv(fname, index_col=0)
    vacancy_list.append(df)
non_lra_type_vacancy_all = pd.concat(vacancy_list, ignore_index=True)
non_lra_type_vacancy_all.to_csv('data/gj_non-lra_type_vacancy_all.csv')

### then do the same for STL

In [17]:
stl_vacancy_cat_df = pd.DataFrame(columns=['Date', 'V_Indeterminate', 'V_Possible', 'V_Very_likely', 'V_Definite'])
stl_burden_cat_df = pd.DataFrame(columns=['Date', 'B_Zero', 'B_Low', 'B_Medium', 'B_High'])

stl_lra_vacancy_cat_df = pd.DataFrame(columns=['Date', 'V_Indeterminate', 'V_Possible', 'V_Very_likely', 'V_Definite'])
stl_lra_burden_cat_df = pd.DataFrame(columns=['Date', 'B_Zero', 'B_Low', 'B_Medium', 'B_High'])

stl_non_lra_vacancy_cat_df = pd.DataFrame(columns=['Date', 'V_Indeterminate', 'V_Possible', 'V_Very_likely', 'V_Definite'])
stl_non_lra_burden_cat_df = pd.DataFrame(columns=['Date', 'B_Zero', 'B_Low', 'B_Medium', 'B_High'])

In [18]:
path = "stl_vacancy_data/*.csv"
csv_list = []
for fname in glob.glob(path):
    csv_name = re.findall(r'stl_vacancy_data_\d\d\d\d-\d\d-\d\d.csv', fname)[0]
    csv_list.append(csv_name)

for csv in csv_list:

    d = VacancyTransformer(csv)
    stl_df = d.load_raw_df()

    lra_df = stl_df.loc[stl_df['IsLRA'].isin([True])]
    non_lra_df = stl_df.loc[~stl_df['IsLRA'].isin([True])]


    d.calc_vacancy_cats(df=stl_df, aggregate_df=stl_vacancy_cat_df)
    d.calc_burden_cats(df=stl_df, aggregate_df=stl_burden_cat_df)
    d.calc_groupby_counts(df=stl_df, cat_list=['Type'], region='stl')

    # calc the lra totals
    d.calc_vacancy_cats(df=lra_df, aggregate_df=stl_lra_vacancy_cat_df)
    d.calc_burden_cats(df=lra_df, aggregate_df=stl_lra_burden_cat_df)
    d.calc_groupby_counts(df=lra_df, cat_list=['Type'], region='stl_lra')
    
    # calc the non-lra totals
    d.calc_vacancy_cats(df=non_lra_df, aggregate_df=stl_non_lra_vacancy_cat_df)
    d.calc_burden_cats(df=non_lra_df, aggregate_df=stl_non_lra_burden_cat_df)
    d.calc_groupby_counts(df=non_lra_df, cat_list=['Type'], region='stl_non-lra')

In [19]:
stl_burden_cat_df.to_csv('data/temp/stl_burden.csv')
stl_vacancy_cat_df.to_csv('data/temp/stl_vacancy.csv')

stl_lra_burden_cat_df.to_csv('data/temp/stl_lra_burden.csv')
stl_lra_vacancy_cat_df.to_csv('data/temp/stl_lra_vacancy.csv')

stl_non_lra_burden_cat_df.to_csv('data/temp/stl_non-lra_burden.csv')
stl_non_lra_vacancy_cat_df.to_csv('data/temp/stl_non-lra_vacancy.csv')

In [20]:
# csv_folder = 'data/'
path = "data/temp/type_burden_data/stl/*.csv"
csv_list = []

for fname in glob.glob(path):
    df = pd.read_csv(fname, index_col=0)
    
    date = df['Date'][0]
    
    res_cats = ['Duplex', 'Multi-Unit', 'Single-Family']
    res_mask = df['Type'].isin(res_cats)
    res_df = df[res_mask]
    
    df.loc[len(df.index)] = [date, 'Residential', res_df['B_Zero'].sum(), res_df['B_Low'].sum(), res_df['B_Medium'].sum(), res_df['B_High'].sum()]

    multi_cats = ['Mixed-Use', 'Other']
    multi_mask = df['Type'].isin(multi_cats)
    multi_df = df[multi_mask]
    
    df.loc[len(df.index)] = [date, 'Mixed-Use/Other', multi_df['B_Zero'].sum(), multi_df['B_Low'].sum(), multi_df['B_Medium'].sum(), multi_df['B_High'].sum()]

    df.to_csv(fname)

# csv_folder = 'data/'
path = "data/temp/type_vacancy_data/stl/*.csv"
csv_list = []

for fname in glob.glob(path):
    df = pd.read_csv(fname, index_col=0)
    
    date = df['Date'][0]
    
    res_cats = ['Duplex', 'Multi-Unit', 'Single-Family']
    res_mask = df['Type'].isin(res_cats)
    res_df = df[res_mask]
    
    df.loc[len(df.index)] = [date, 'Residential', res_df['V_Indeterminant'].sum(), res_df['V_Possible'].sum(), res_df['V_Very_Likely'].sum(), res_df['V_Definite'].sum()]

    multi_cats = ['Mixed-Use', 'Other']
    multi_mask = df['Type'].isin(multi_cats)
    multi_df = df[multi_mask]
    
    df.loc[len(df.index)] = [date, 'Mixed-Use/Other', multi_df['V_Indeterminant'].sum(), multi_df['V_Possible'].sum(), multi_df['V_Very_Likely'].sum(), multi_df['V_Definite'].sum()]

    df.to_csv(fname)

In [21]:
path = "data/temp/type_burden_data/stl_lra/*.csv"
csv_list = []

for fname in glob.glob(path):
    df = pd.read_csv(fname, index_col=0)
    
    date = df['Date'][0]
    
    res_cats = ['Duplex', 'Multi-Unit', 'Single-Family']
    res_mask = df['Type'].isin(res_cats)
    res_df = df[res_mask]
    
    df.loc[len(df.index)] = [date, 'Residential', res_df['B_Zero'].sum(), res_df['B_Low'].sum(), res_df['B_Medium'].sum(), res_df['B_High'].sum()]

    multi_cats = ['Mixed-Use', 'Other']
    multi_mask = df['Type'].isin(multi_cats)
    multi_df = df[multi_mask]
    
    df.loc[len(df.index)] = [date, 'Mixed-Use/Other', multi_df['B_Zero'].sum(), multi_df['B_Low'].sum(), multi_df['B_Medium'].sum(), multi_df['B_High'].sum()]

    df.to_csv(fname)

path = "data/temp/type_vacancy_data/stl_lra/*.csv"
csv_list = []

for fname in glob.glob(path):
    df = pd.read_csv(fname, index_col=0)
    
    date = df['Date'][0]
    
    res_cats = ['Duplex', 'Multi-Unit', 'Single-Family']
    res_mask = df['Type'].isin(res_cats)
    res_df = df[res_mask]
    
    df.loc[len(df.index)] = [date, 'Residential', res_df['V_Indeterminant'].sum(), res_df['V_Possible'].sum(), res_df['V_Very_Likely'].sum(), res_df['V_Definite'].sum()]

    multi_cats = ['Mixed-Use', 'Other']
    multi_mask = df['Type'].isin(multi_cats)
    multi_df = df[multi_mask]
    
    df.loc[len(df.index)] = [date, 'Mixed-Use/Other', multi_df['V_Indeterminant'].sum(), multi_df['V_Possible'].sum(), multi_df['V_Very_Likely'].sum(), multi_df['V_Definite'].sum()]

    df.to_csv(fname)

In [22]:
path = "data/temp/type_burden_data/stl_non-lra/*.csv"
csv_list = []

for fname in glob.glob(path):
    df = pd.read_csv(fname, index_col=0)
    
    date = df['Date'][0]
    
    res_cats = ['Duplex', 'Multi-Unit', 'Single-Family']
    res_mask = df['Type'].isin(res_cats)
    res_df = df[res_mask]
    
    df.loc[len(df.index)] = [date, 'Residential', res_df['B_Zero'].sum(), res_df['B_Low'].sum(), res_df['B_Medium'].sum(), res_df['B_High'].sum()]

    multi_cats = ['Mixed-Use', 'Other']
    multi_mask = df['Type'].isin(multi_cats)
    multi_df = df[multi_mask]
    
    df.loc[len(df.index)] = [date, 'Mixed-Use/Other', multi_df['B_Zero'].sum(), multi_df['B_Low'].sum(), multi_df['B_Medium'].sum(), multi_df['B_High'].sum()]

    df.to_csv(fname)

path = "data/temp/type_vacancy_data/stl_non-lra/*.csv"
csv_list = []

for fname in glob.glob(path):
    df = pd.read_csv(fname, index_col=0)
    
    date = df['Date'][0]
    
    res_cats = ['Duplex', 'Multi-Unit', 'Single-Family']
    res_mask = df['Type'].isin(res_cats)
    res_df = df[res_mask]
    
    df.loc[len(df.index)] = [date, 'Residential', res_df['V_Indeterminant'].sum(), res_df['V_Possible'].sum(), res_df['V_Very_Likely'].sum(), res_df['V_Definite'].sum()]

    multi_cats = ['Mixed-Use', 'Other']
    multi_mask = df['Type'].isin(multi_cats)
    multi_df = df[multi_mask]
    
    df.loc[len(df.index)] = [date, 'Mixed-Use/Other', multi_df['V_Indeterminant'].sum(), multi_df['V_Possible'].sum(), multi_df['V_Very_Likely'].sum(), multi_df['V_Definite'].sum()]

    df.to_csv(fname)

In [23]:
path = "data/temp/type_burden_data/stl/*.csv"
burden_list = []
for fname in glob.glob(path):
    df = pd.read_csv(fname, index_col=0)
    burden_list.append(df)
gj_type_burden_all = pd.concat(burden_list, ignore_index=True)
gj_type_burden_all.to_csv('data/stl_type_burden_all.csv')

path = "data/temp/type_vacancy_data/stl/*.csv"
vacancy_list = []
for fname in glob.glob(path):
    df = pd.read_csv(fname, index_col=0)
    vacancy_list.append(df)
gj_type_vacancy_all = pd.concat(vacancy_list, ignore_index=True)
gj_type_vacancy_all.to_csv('data/stl_type_vacancy_all.csv')

In [24]:
path = "data/temp/type_burden_data/stl_lra/*.csv"
burden_list = []
for fname in glob.glob(path):
    df = pd.read_csv(fname, index_col=0)
    burden_list.append(df)
lra_type_burden_all = pd.concat(burden_list, ignore_index=True)
lra_type_burden_all.to_csv('data/stl_lra_type_burden_all.csv')

path = "data/temp/type_vacancy_data/stl_lra/*.csv"
vacancy_list = []
for fname in glob.glob(path):
    df = pd.read_csv(fname, index_col=0)
    vacancy_list.append(df)
lra_type_vacancy_all = pd.concat(vacancy_list, ignore_index=True)
lra_type_vacancy_all.to_csv('data/stl_lra_type_vacancy_all.csv')

In [25]:
path = "data/temp/type_burden_data/stl_non-lra/*.csv"
burden_list = []
for fname in glob.glob(path):
    df = pd.read_csv(fname, index_col=0)
    burden_list.append(df)
non_lra_type_burden_all = pd.concat(burden_list, ignore_index=True)
non_lra_type_burden_all.to_csv('data/stl_non-lra_type_burden_all.csv')

path = "data/temp/type_vacancy_data/stl_non-lra/*.csv"
vacancy_list = []
for fname in glob.glob(path):
    df = pd.read_csv(fname, index_col=0)
    vacancy_list.append(df)
non_lra_type_vacancy_all = pd.concat(vacancy_list, ignore_index=True)
non_lra_type_vacancy_all.to_csv('data/stl_non-lra_type_vacancy_all.csv')

In [26]:
parcels

Unnamed: 0,LowerAsrPa,ColParcelI,ColCityBlo,ColParcel,PrimAddrRe,AddrType,LowAddrNum,LowAddrSuf,HighAddrNu,HighAddrSu,NLC,PARITY,StPreDir,StName,StType,StSufDir,StdUnitNum,OWNERNAME,OWNERNAME2,OWNERADDR,OWNERCITY,OWNERSTATE,OwnerCount,OWNERZIP,OwnerRank,LegalDesc1,LegalDesc2,LegalDesc3,LegalDesc4,LegalDesc5,AsrClassCo,AsrLandUse,AsrLandu_1,RedevPhase,RedevYearE,RedevPha_1,RedevYea_1,VacantLot,SpecBusDis,SpecBusD_1,TIFDist,LendingAgc,Condominiu,NbrOfUnits,NbrOfApts,FRONTAGE,LANDAREA,RecDailyDa,RecDailyNu,RecBookNum,RecPageNum,AsdLand,AsdImprove,AsdTotal,BillLand,BillImprov,BillTotal,AprLand,CostAprImp,AsmtAppeal,AsmtAppe_1,AsmtAppe_2,PriorAsdDa,PriorAsdLa,PriorAsdIm,PriorAsdTo,PriorTaxAm,CDALandUse,CDALandU_1,LRMSUnitNu,Zoning,NbrOfBldgs,NbrOfBld_1,FirstYearB,LastYearBu,ResSalePri,ResSaleDat,VacBldgYea,GeoCityBlo,WARD10,PRECINCT10,INSPAREA10,Ward00,PRECINCT02,PRECINCT04,NBRHD,CDADIST,CDASUBDIST,POLICEDIST,CensTract1,CensBlock1,CensBlock0,Ward90,Precinct90,CensBlock9,HouseConsD,ASRNBRHD,EntZone,IMPACTAREA,CTDArea,LEAFAREA,ZIP,OnFloodBlo,SpecParcel,SubParcelT,NbrOfSubAc,NbrOfCondo,LRMSParcel,AcctPrimar,HANDLE,OWNEROCC,FirstDate,LastDate,OwnerUpdat,OwnerCode,SITEADDR,SQFT,ParcelId,WARD,TaxBalance,PropertyCl,IsAbatedPr,AbatementS,AbatementE,SpecBusD_2,Ward20,Precinct20,InspArea20,CensTract2,CensBlock2,MaintZoneW,TransDevDi,Shape_Leng,Shape_Area
0,15749330.0,15740003300,157400,3300,1,3,3541,,3541,,145.0,O,,OREGON,AV,,,YIELD UNIFIED VALUE ASSETS LAKE LLC,,SNUNIT 4,GEDERA,,ISRAEL,,,C.B. 1574 OREGON,40 FT X 132 FT 11 IN,ANDERSON ADDN,BND N-RAUSCH E-OREGON S-DICKHAUS W-ALLEY,,100,1140,0,0,0,0,0,0,0,0,0,0,0,4,0,40.0,0,2022/08/18,74,1856,694,1370.0,15650.0,17020.0,1370.0,15650.0,17020.0,7200.0,82346.0,0,0,,,0.0,0.0,0.0,0.0,1140,0,,B,1,0,1924,1924,53500.0,2012/12/05,0,1574.0,20,3,1,20,7,2,19,7,45,3,1241.0,4004,1241.4003,10,6,1241.605,17,141,0,1,0,0,63118,0,,,0,0,1,1,11574000330,,2001/10/07,2023/12/31,2022/08/26,0,3541 OREGON AV,5294,15749330000,7,0.00,15,0,0,0,0,7,2,2,1241,4004,11,0,345.501217,5293.865692
1,25699060.0,25690000600,256900,600,1,3,3930,,3930,,178.0,E,,NEBRASKA,AV,,,3930 NEBRASKA LLC,,1243 WATER TOWER PL STE 307,ARNOLD,MO,,63010,,C B 2569 NEBRASKA AV,25 FT X 124 FT 9 IN,ST L COMMONS ADDN,BLK 2 LOT 16,,100,1120,0,0,0,0,0,0,0,0,0,0,0,2,0,25.0,0,2019/04/26,47,,0,860.0,1280.0,2140.0,860.0,1280.0,2140.0,4500.0,6741.0,0,0,,,0.0,0.0,0.0,0.0,1120,0,,B,1,0,1895,1895,26800.0,2000/12/07,0,2569.0,20,1,2,20,2,1,16,7,45,3,1241.0,3022,1241.2003,10,7,1241.403,17,141,0,2,0,0,63118,0,,,0,0,1,1,12569000060,,2001/10/07,2023/12/31,2019/05/02,0,3930 NEBRASKA AV,3084,25699060000,3,0.00,15,0,0,0,0,3,13,13,1241,3020,11,0,298.942990,3083.935947
2,25699070.0,25690000700,256900,700,1,3,3928,,3928,,178.0,E,,NEBRASKA,AV,,,VANDELAY LLC,,24040 NW BRAGA RD,NORTH PLAINS,OR,,97133,,C B 2569 NEBRASKA AV,25 FT X 124 FT 9 IN,ST L COMMONS ADDN,BLK 2 LOT 17,,100,1110,0,0,0,0,0,0,0,0,0,0,0,1,0,25.0,0,2019/09/16,213,,0,860.0,2010.0,2870.0,860.0,2010.0,2870.0,4500.0,10602.0,0,0,,,0.0,0.0,0.0,0.0,1110,0,,B,1,0,1895,1895,32000.0,1999/05/13,0,2569.0,20,1,2,20,2,1,16,7,45,3,1241.0,3022,1241.2003,10,7,1241.403,17,141,0,2,0,0,63118,0,,,0,0,1,1,12569000070,,2001/10/07,2023/12/31,2019/09/20,0,3928 NEBRASKA AV,3084,25699070000,3,0.00,15,0,0,0,0,3,13,13,1241,3020,11,0,298.942342,3083.912005
3,25939100.0,25930001000,259300,1000,1,3,3436,,3436,,626.0,E,,MONTANA,ST,,,RON LLC,,5415 GERTRUDE AV,ST LOUIS,MO,USA,63116,,C. B. 2593 MONTANA ST,25 FT X 125 FT,KRETZER'S ADDN,BLOCK 2,LOTS W-15 & E-16,100,1120,0,0,0,0,0,0,0,0,0,0,0,2,0,25.0,0,2019/09/25,66,1842,36,1200.0,7670.0,8870.0,1200.0,7670.0,8870.0,6300.0,40384.0,0,0,,,0.0,0.0,0.0,0.0,1120,0,,B,1,0,1902,1902,85000.0,2003/05/07,0,2593.0,25,1,2,25,1,1,16,7,45,3,1157.0,3003,1157.3001,25,1,1157.502,86,140,0,2,0,0,63118,0,,,0,0,1,1,12593000100,,2001/10/07,2023/12/31,2021/09/07,0,3436 MONTANA ST,3125,25939100000,3,0.00,15,0,0,0,0,3,7,7,1157,3003,11,0,299.999819,3124.979122
4,25939110.0,25930001100,259300,1100,1,3,3434,,3434,,626.0,E,,MONTANA,ST,,,SFR3-010 LLC,,608 SUMMIT AVE,MILL VALLEY,CA,USA,94941,,C. B. 2593 MONTANA ST,25 FT X 125 FT,KRETZER'S ADDN,BLOCK 2,LOT W-14 & E-15,100,1110,0,0,0,0,0,0,0,0,0,0,0,1,0,25.0,0,2020/12/02,132,1847,383,1200.0,10010.0,11210.0,1200.0,10010.0,11210.0,6300.0,52661.0,0,0,,,0.0,0.0,0.0,0.0,1110,0,,B,1,0,1902,1902,45000.0,1986/05/22,0,2593.0,25,1,2,25,1,1,16,7,45,3,1157.0,3003,1157.3001,25,1,1157.502,86,140,0,2,0,0,63118,0,,,0,0,1,1,12593000110,,2001/10/07,2023/12/31,2020/12/16,0,3434 MONTANA ST,3125,25939110000,3,920.27,15,0,0,0,0,3,7,7,1157,3003,11,0,300.000467,3125.019005
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5122,25669351.0,25660003540,256600,3540,1,3,2701,,2701,,288.0,O,,OSAGE,ST,,A,"LASH, DEBRA L",,3815 OHIO AV,ST LOUIS,MO,USA,63118.0,,C.B. 2566 OHIO,ALEXIAN POINT CONDO,UNIT A,BTO SEE 2566 00 03400 & 03500,,100,1114,0,0,0,0,0,0,0,0,0,0,1,1,0,0.0,0,2021/09/14,3,1851,661,0.0,16370.0,16370.0,0.0,16370.0,16370.0,0.0,86135.0,0,0,,,0.0,0.0,0.0,0.0,1114,0,A,B,1,0,1895,1895,0.0,,0,2566.0,20,4,2,9,11,8,16,7,45,3,1241.0,1028,1241.1015,10,7,1241.305,17,144,0,2,0,0,63118,0,,C,0,0,1,1,12566008351,,2005/07/03,2023/12/31,2021/09/17,0,2701 OSAGE ST,7714,25669351004,3,1330.92,15,0,0,0,0,3,13,13,1241,1026,11,0,402.020896,7714.477178
5123,25669351.0,25660003550,256600,3550,1,3,2701,,2701,,288.0,O,,OSAGE,ST,,B,WOOD FAMILY HOMES LLC,,2701 OSAGE ST #B,ST LOUIS,MO,,63118.0,P,C.B. 2566 OHIO,ALEXIAN POINT CONDO,UNIT B,BTO SEE 2566 00 03400 & 03500,,100,1114,0,0,0,0,0,0,0,0,0,670,1,1,0,0.0,0,2005/10/14,188,0000,0,0.0,16370.0,16370.0,0.0,16370.0,16370.0,0.0,86135.0,0,0,,,0.0,0.0,0.0,0.0,1114,0,B,B,1,0,1895,1895,0.0,,0,2566.0,20,4,2,9,11,8,16,7,45,3,1241.0,1028,1241.1015,10,7,1241.305,17,144,0,2,0,0,63118,0,,C,0,0,1,1,12566008351,C,2005/07/03,2023/12/31,2018/01/25,0,2701 OSAGE ST,7714,25669351005,3,1330.92,15,0,0,0,0,3,13,13,1241,1026,11,0,402.020896,7714.477178
5124,25669351.0,25660003560,256600,3560,1,3,2701,,2701,,288.0,O,,OSAGE,ST,,C,"THORNTON, ROBERTA MOORE",,2701 OSAGE ST,ST LOUIS,,MO,63118.0,,C.B. 2566 OHIO,ALEXIAN POINT CONDO,UNIT C,BTO SEE 2566 00 03400 & 03500,,100,1114,0,0,0,0,0,0,0,0,0,670,1,1,0,0.0,0,2005/08/04,94,,0,0.0,17650.0,17650.0,0.0,17650.0,17650.0,0.0,92876.0,0,0,,,0.0,0.0,0.0,0.0,1114,0,C,B,1,0,1895,1895,0.0,,0,2566.0,20,4,2,9,11,8,16,7,45,3,1241.0,1028,1241.1015,10,7,1241.305,17,144,0,2,0,0,63118,0,,C,0,0,1,1,12566008351,C,2005/07/03,2023/12/31,2016/03/18,0,2701 OSAGE ST,7714,25669351006,3,0.00,15,0,0,0,0,3,13,13,1241,1026,11,0,402.020896,7714.477178
5125,15699143.0,15690001430,156900,1430,2,3,3414,,3416,,91.0,E,,CALIFORNIA,AV,,,WJL PROPERTIES LLC,,1105 BUCK AVE,ST LOUIS,MO,USA,63117.0,,C. B. 1569 CALIFORNIA AV,3416 CALIFORNIA CONDOMINIUM,UNIT 3414,,,100,1114,0,0,0,0,0,0,0,0,0,0,1,1,0,0.0,0,2011/12/28,246,,0,0.0,2640.0,2640.0,0.0,2640.0,2640.0,0.0,13910.0,0,0,,,0.0,0.0,0.0,0.0,1114,0,,B,1,0,1889,1889,0.0,,1989,1569.0,20,5,1,20,5,4,19,7,45,3,1241.0,4008,1241.4000,10,6,1241.602,17,144,0,1,0,0,63118,0,,C,0,0,1,1,11569008143,,2008/03/09,2023/12/31,2022/10/03,0,3414 CALIFORNIA AV,2668,15699143001,7,0.00,15,0,0,0,0,7,2,2,1241,4008,11,0,271.605319,2667.607684
