In [2]:
%ls ../data

Business_Licenses.csv  README.md


In [3]:
from datetime import datetime 
from pathlib import Path

import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import matplotlib2tikz

%matplotlib inline

In [4]:
data_dir = Path("../data/")
df = pd.read_csv(data_dir/"Business_Licenses.csv", low_memory=False)

In [24]:
dt_cols = [
    'LICENSE TERM START DATE', 
    'LICENSE TERM EXPIRATION DATE',
    'DATE ISSUED']
# dt_cols = ['LICENSE TERM EXPIRATION DATE',
#  'APPLICATION CREATED DATE',
#  'PAYMENT DATE',
#  'LICENSE TERM START DATE',
#  'DATE ISSUED',
#  'LICENSE STATUS CHANGE DATE']
df[dt_cols] = df[dt_cols].apply(pd.to_datetime).apply(lambda _:_.dt.year)

df


KeyboardInterrupt: 

In [None]:
# df.drop(
#     columns=['ID', 'LATITUDE', 'LONGITUDE', 'POLICE DISTRICT', 'WARD PRECINCT', 'APPLICATION REQUIREMENTS COMPLETE', 'PAYMENT DATE'], 
#     inplace=True)

In [7]:
renewals = df[
    (df["APPLICATION TYPE"] == "RENEW") & # subsequent renewal
    (df["LICENSE STATUS"]   == "AAI")     # license granted
]

latest_renewal_experation_year = (
    renewals.groupby(["LICENSE NUMBER"])["LICENSE TERM EXPIRATION DATE"]
    .apply(max)
    .to_frame())

latest_failures = latest_renewal_experation_year[latest_renewal_experation_year["LICENSE TERM EXPIRATION DATE"] <= 2018]

In [None]:
font = lambda s: {'fontname':'ITC Franklin Gothic Std', 'size': s}

latest_failures.plot(kind="hist", facecolor="#df6666", edgecolor="white", bins=20)
plt.title("Number of License Non-renewals Per Year".upper(), **font(16))
plt.xlabel("Year", **font(14))
plt.ylabel("Number of Non-renewals", **font(14))
plt.gca().get_legend().remove()

plt.savefig("non-renewals-over-time.svg", bbox_inches='tight', dpi=300)
# matplotlib2tikz.save("non-renewals-over-time.tex", figureheight="3in", figurewidth="3in")
plt.show()

In [9]:
business_failures = latest_failures.join(df, rsuffix="_orig", how="left")
(100 * business_failures["BUSINESS ACTIVITY"].value_counts(normalize=True)).to_frame().head(10)

Unnamed: 0,BUSINESS ACTIVITY
Operation of a Fuel Filling Station,23.497835
Retail Sales of Perishable Foods,12.311688
Buying and Reselling of Used Valuable Objects,10.47619
Consumption of Liquor on Premises,3.991342
Sales of Packaged Frozen Desserts From a Non-Motorized Cart (Mobile Desserts Vendor),2.744589
Tavern - Consumption of Liquor on Premise,1.878788
Hair Services,1.670996
Preparation of Food and Dining on Premise With Seating,1.627706
Other Home Occupations,1.619048
Miscellaneous Commercial Services,1.428571


In [None]:
wards = gpd.read_file("wards/wards.shp") 

failures_by_ward = business_failures["WARD"].value_counts()

# couldn't get indexes to work :(
wards["num_failures"] = wards["ward"].map(lambda w: failures_by_ward.get(int(w), 0))

In [None]:
wards.plot(column="num_failures", legend=True, cmap='inferno')
plt.gca().axis("off")
plt.title("Number of License Non-renewals Per Ward".upper(), **font(16))
plt.savefig("failure-choropleth.png", bbox_inches="tight", dpi=300)


In [19]:
business_failures.drop(columns=["LOCATION"]).to_csv('../data/imputed_failures.csv')

In [12]:
business_failures.columns

Index(['LICENSE TERM EXPIRATION DATE', 'ID', 'LICENSE ID', 'ACCOUNT NUMBER',
       'SITE NUMBER', 'LEGAL NAME', 'DOING BUSINESS AS NAME', 'ADDRESS',
       'CITY', 'STATE', 'ZIP CODE', 'WARD', 'PRECINCT', 'WARD PRECINCT',
       'POLICE DISTRICT', 'LICENSE CODE', 'LICENSE DESCRIPTION',
       'BUSINESS ACTIVITY ID', 'BUSINESS ACTIVITY', 'LICENSE NUMBER',
       'APPLICATION TYPE', 'APPLICATION CREATED DATE',
       'APPLICATION REQUIREMENTS COMPLETE', 'PAYMENT DATE',
       'CONDITIONAL APPROVAL', 'LICENSE TERM START DATE',
       'LICENSE TERM EXPIRATION DATE_orig', 'LICENSE APPROVED FOR ISSUANCE',
       'DATE ISSUED', 'LICENSE STATUS', 'LICENSE STATUS CHANGE DATE', 'SSA',
       'LATITUDE', 'LONGITUDE', 'LOCATION'],
      dtype='object')

In [21]:
business_failures.iloc[2]

LICENSE TERM EXPIRATION DATE                                                  2007
ID                                                                2496579-20170117
LICENSE ID                                                             2.49658e+06
ACCOUNT NUMBER                                                              401489
SITE NUMBER                                                                      1
LEGAL NAME                                                THIRD DEGREE CHICAGO LLC
DOING BUSINESS AS NAME                                                  STUDIO III
ADDRESS                                                     648 N CLARK ST LL, 1-2
CITY                                                                       CHICAGO
STATE                                                                           IL
ZIP CODE                                                                     60654
WARD                                                                            42
PREC

In [22]:
[col for col in business_failures.columns if "DATE" in col]

['LICENSE TERM EXPIRATION DATE',
 'APPLICATION CREATED DATE',
 'PAYMENT DATE',
 'LICENSE TERM START DATE',
 'LICENSE TERM EXPIRATION DATE_orig',
 'DATE ISSUED',
 'LICENSE STATUS CHANGE DATE']