# __311 Service Requests in Chicago__

## Import dependencies

In [1]:
import altair as alt
import pandas as pd
import geopandas as gpd

## Helper Functions

In [2]:
def show_col_row_count(df):

    row, col = df.shape
    msg = "The dataframe currently has {} rows and {} columns."
    print(msg.format(row, col))

def drop_non_numeric(df, columns, verbose=False):

    if verbose:
        rows, _ = df.shape
        col_msg = "Removing all non-numeric rows for the following columns: {}."
        print(col_msg.format(columns))
    for col in columns:
        df = df[pd.to_numeric(df[col], errors="coerce").notnull()]
        if verbose:
            new_rows, _ = df.shape
            print("Removed {} rows from {}.".format(rows - new_rows, col))
            rows = new_rows
    return df

def drop_na(dataframe, columns, verbose=False):

    if verbose:
        rows, _ = dataframe.shape
        col_msg = "Removing all rows with NAs for the following columns: {}."
        print(col_msg.format(columns))
    df = dataframe.dropna(subset = columns)
    if verbose:
        new_rows, _ = dataframe.shape
        print("Removed {} rows.".format(rows - new_rows))
    return df

def save_columns(dataframe, columns):

    dataframe = dataframe[columns]
    return dataframe

def df_to_csv(dataframe, filename, verbose=False):

    try:
        dataframe.to_csv(filename, index=False)
        if verbose:
            print("Successfully saved to {}.".format(filename))
    except:
        if verbose:
            print("Failed to save to {}.".format(filename))

def extract_census_tract(row):
    return row.split(",")[0]

def evaluateForBivariate(row, x_var):
    # Adapted from the slack channel (Credit to Andrew McNutt)
    colorMatrix = [
        ["#64acbe", "#627f8c", "#574249"],
        ["#b0d5df", "#ad9ea5", "#985356"],
        ["#e8e8e8", "#e4acac", "#c85a5a"],
    ]
    xBoundaries = x_var[1]#[1165.02, 1959.06]
    xIdx = 0
    if row[x_var[0]] < xBoundaries[0]:
        xIdx = 0
    elif row[x_var[0]] < xBoundaries[1]:
        xIdx = 1
    else:
        xIdx = 2
    yBoundaries = [8200.15, 12498.46]
    yIdx = 0
    if int(row["Household_Income"]) < yBoundaries[0]:
        yIdx = 2
    elif int(row["Household_Income"]) < yBoundaries[1]:
        yIdx = 1
    else:
        yIdx = 0

    return colorMatrix[yIdx][xIdx]

def create_count_df_by_time(df, time_col):

    df = df[[time_col, "CREATED_YEAR", "STATUS"]]\
        .groupby([time_col, "CREATED_YEAR"])\
        .STATUS.agg("count")\
        .reset_index(name="COUNT")
    df["COUNT"] = df["COUNT"].div(1000)
    return df

def create_count_df_by_year(dfs, time_col, verbose=False):
    
    merged_df = create_count_df_by_time(dfs[0], time_col)
    for df in dfs[1:]:
        df = create_count_df_by_time(df, time_col)
        merged_df = merged_df.append(df, ignore_index=True)
    if verbose:
        print(merged_df.sample(n=5))
    return merged_df

def income_by_tract(request_df, income_df):
    
    tracts_by_type = request_df.groupby(["namelsad10"]).size().reset_index()
    tracts_by_type = tracts_by_type.rename(columns={0: "Number_of_Requests"})
    tracts_by_type = tracts_by_type.merge(income_df, on='namelsad10', how='left')
    tracts_by_type = drop_non_numeric(tracts_by_type, ["Household_Income"], verbose=True)
    return tracts_by_type

def request_by_tract_per_type(request_df, sr_types):
    
    result_df = request_df.groupby(["namelsad10"]).size().reset_index()
    result_df = result_df.rename(columns={0: "All Requests"})
    for sr_type in sr_types:
        tracts_by_type = request_df[(request_df['SR_TYPE'] == sr_type)]
        tracts_by_type = tracts_by_type.groupby(["namelsad10"]).size().reset_index()
        tracts_by_type = tracts_by_type.rename(columns={0: "{}".format(sr_type)})
        result_df = result_df.merge(tracts_by_type, on='namelsad10', how='left')
    filename = "./data/tracts_count_by_type.csv"
    df_to_csv(result_df, filename, verbose=True)
        

## Load the dataset - `311_Service_Requests.csv`

In [3]:
data_dir = "./data/311_Service_Requests_Chicago.csv"
dtype_dict = {
    "STREET_NUMBER": "object",
    "LEGACY_SR_NUMBER": "object",
    "PARENT_SR_NUMBER": "object",
    "SANITATION_DIVISION_DAYS": "object",
    }
df = pd.read_csv(data_dir, dtype=dtype_dict)

  interactivity=interactivity, compiler=compiler, result=result)


In [4]:
df["CREATED_DATE"] = pd.to_datetime(df["CREATED_DATE"], format='%m/%d/%Y %I:%M:%S %p')
df["CREATED_YEAR"] = df["CREATED_DATE"].dt.year
df["CLOSED_DATE"].fillna("01/16/2020 10:20:00 PM", inplace = True) 
df["CLOSED_DATE"]= pd.to_datetime(df["CLOSED_DATE"], format='%m/%d/%Y %I:%M:%S %p')
df["CLOSED_YEAR"] = df["CLOSED_DATE"].dt.year
df["DAYS_TAKEN"] = df["CLOSED_DATE"] - df["CREATED_DATE"]
df["DAYS_TAKEN"] = df["DAYS_TAKEN"].dt.days

In [5]:
df.sample(n = 1) 

Unnamed: 0,SR_NUMBER,SR_TYPE,SR_SHORT_CODE,OWNER_DEPARTMENT,STATUS,CREATED_DATE,LAST_MODIFIED_DATE,CLOSED_DATE,STREET_ADDRESS,CITY,...,CREATED_DAY_OF_WEEK,CREATED_MONTH,X_COORDINATE,Y_COORDINATE,LATITUDE,LONGITUDE,LOCATION,CREATED_YEAR,CLOSED_YEAR,DAYS_TAKEN
803001,SR19-02588449,Street Light Out Complaint,SFD,CDOT - Department of Transportation,Completed,2019-09-27 07:19:46,09/30/2019 06:19:14 PM,2019-09-30 18:19:14,7642 S PEORIA ST,,...,6,9,1171557.0,1853972.0,41.754766,-87.646869,"(41.75476556052962, -87.64686906674802)",2019,2019,3


In [6]:
show_col_row_count(df)

The dataframe currently has 2362297 rows and 40 columns.


In [7]:
df = drop_non_numeric(df, ["ZIP_CODE"], verbose=True)

Removing all non-numeric rows for the following columns: ['ZIP_CODE'].
Removed 17961 rows from ZIP_CODE.


In [8]:
df = drop_na(df, ["ZIP_CODE", "CREATED_DATE", "WARD", "LOCATION", "LATITUDE", "LATITUDE"], verbose=True)

Removing all rows with NAs for the following columns: ['ZIP_CODE', 'CREATED_DATE', 'WARD', 'LOCATION', 'LATITUDE', 'LATITUDE'].
Removed 0 rows.


In [9]:
columns = [
    "SR_NUMBER", "SR_TYPE", "SR_SHORT_CODE",
    "OWNER_DEPARTMENT","STATUS", "CREATED_DATE",
    "CLOSED_DATE", "ZIP_CODE", "DUPLICATE",
    "LEGACY_RECORD", "WARD", "LATITUDE",
    "LONGITUDE", "LOCATION",
    "DAYS_TAKEN", "CREATED_YEAR", "CLOSED_YEAR",
    "CREATED_HOUR",	"CREATED_DAY_OF_WEEK", "CREATED_MONTH"
]
df = save_columns(df, columns)

In [10]:
df.sample(n=1)

Unnamed: 0,SR_NUMBER,SR_TYPE,SR_SHORT_CODE,OWNER_DEPARTMENT,STATUS,CREATED_DATE,CLOSED_DATE,ZIP_CODE,DUPLICATE,LEGACY_RECORD,WARD,LATITUDE,LONGITUDE,LOCATION,DAYS_TAKEN,CREATED_YEAR,CLOSED_YEAR,CREATED_HOUR,CREATED_DAY_OF_WEEK,CREATED_MONTH
630673,SR19-02269888,311 INFORMATION ONLY CALL,311IOC,311 City Services,Completed,2019-08-21 20:11:45,2019-08-21 20:11:45,60612,False,False,28.0,41.871831,-87.679846,"(41.871831277993564, -87.67984621876099)",0,2019,2019,20,4,8


In [11]:
show_col_row_count(df)

The dataframe currently has 2325604 rows and 20 columns.


In [12]:
# Save the dataset

# Dataset for 2018/07/01 - 2018/12/31
df_2018 = df[(df['CREATED_DATE'] > '2018-07-01 00:00:00') & (df['CREATED_DATE'] <= '2018-12-31 23:59:59')]
filename_2018 = "./data/cleaned_311_2018.csv"
df_to_csv(df_2018, filename_2018, verbose=True)
# Dataset for 2019/07/01 - 2019/12/31
df_2019 = df[(df['CREATED_DATE'] > '2019-07-01 00:00:00') & (df['CREATED_DATE'] <= '2019-12-31 23:59:59')]
filename_2019 = "./data/cleaned_311_2019.csv"
df_to_csv(df_2019, filename_2019, verbose=True)
# Entire cleaned dataset
filename = "./data/cleaned_311.csv"
df_to_csv(df, filename, verbose=True)

Successfully saved to ./data/cleaned_311_2018.csv.
Successfully saved to ./data/cleaned_311_2019.csv.
Successfully saved to ./data/cleaned_311.csv.


In [13]:
day_dict = {
    1: "Sunday",
    2: "Monday",
    3: "Tuesday",
    4: "Wednesday",
    5: "Thursday",
    6: "Friday",
    7: "Saturday",
}

requests_by_day = create_count_df_by_year([df_2018, df_2019], "CREATED_DAY_OF_WEEK")
requests_by_day["CREATED_DAY_OF_WEEK"] = requests_by_day["CREATED_DAY_OF_WEEK"].apply(lambda x: day_dict[x])

day_filename = "./data/311_requests_by_day.csv"
df_to_csv(requests_by_day, day_filename, verbose=True)

Successfully saved to ./data/311_requests_by_day.csv.


In [14]:
month_dict = {
    7: "July",
    8: "August",
    9: "September",
    10: "October",
    11: "November",
    12: "December"
}
requests_by_month = create_count_df_by_year([df_2018, df_2019], "CREATED_MONTH")
requests_by_month["MONTH"] = requests_by_month["CREATED_MONTH"].apply(lambda x: month_dict[x])

month_filename = "./data/311_requests_by_month.csv"
df_to_csv(requests_by_month, month_filename, verbose=True)

Successfully saved to ./data/311_requests_by_month.csv.


In [15]:
dept = [
        "Streets and Sanitation",
        "CDOT - Department of Transportation",
        "DWM - Department of Water Management",
        "DOB - Buildings",
        "Animal Care and Control",
        "BACP - Business Affairs and Consumer Protection",
        "311 City Services"
        ]
dept_dict = {
    "Streets and Sanitation": "Streets and Sanitation",
    "CDOT - Department of Transportation": "Transportation",
    "DWM - Department of Water Management": "Water Management",
    "DOB - Buildings": "Buildings",
    "Animal Care and Control": "Animal Care / Control",
    "BACP - Business Affairs and Consumer Protection": "Business Affairs",
    "311 City Services": "311 City Services"
    }

dept_count = df_2018.groupby(['OWNER_DEPARTMENT', "CREATED_YEAR"]).size().sort_values(ascending=False).reset_index(name='COUNT')
top_10_type_2018 = dept_count.nlargest(8, 'COUNT')
dept_count = df_2019.groupby(['OWNER_DEPARTMENT', "CREATED_YEAR"]).size().sort_values(ascending=False).reset_index(name='COUNT')
top_10_type_2019 = dept_count.nlargest(8, 'COUNT')

top_10_type = top_10_type_2018.append(top_10_type_2019, ignore_index=True)
top_10_type = top_10_type[top_10_type.OWNER_DEPARTMENT.isin(dept)].reset_index(drop=True)
top_10_type["OWNER_DEPARTMENT"] = top_10_type["OWNER_DEPARTMENT"].apply(lambda x: dept_dict[x])
top_10_type["COUNT"] = top_10_type["COUNT"] / 1000

top_10_filename = "./data/top_10_dept.csv"
df_to_csv(top_10_type, top_10_filename, verbose=True)

Successfully saved to ./data/top_10_dept.csv.


In [16]:
top_10_type.sample(n=1)

Unnamed: 0,OWNER_DEPARTMENT,CREATED_YEAR,COUNT
0,Streets and Sanitation,2018,240.046


In [17]:
top_10_sr_type_2018 = df_2018.groupby(['SR_TYPE', "CREATED_YEAR"]).size().sort_values(ascending=False).reset_index(name='COUNT')
top_10_sr_type_2019 = df_2019.groupby(['SR_TYPE', "CREATED_YEAR"]).size().sort_values(ascending=False).reset_index(name='COUNT')
sr_type = [
    "Aircraft Noise Complaint",
    "Pothole in Street Complaint",
    "Rodent Baiting/Rat Complaint",
    "311 INFORMATION ONLY CALL",
    "Street Light Out Complaint",
    "Graffiti Removal Request"
    ]
top_10_sr_type = top_10_sr_type_2018.append(top_10_sr_type_2019, ignore_index=True)
top_10_sr_type = top_10_sr_type[top_10_sr_type.SR_TYPE.isin(sr_type)].reset_index(drop=True)
#top_10_sr_type["SR_TYPE"] = top_10_sr_type["SR_TYPE"].apply(lambda x: dept_dict[x])
top_10_sr_type["COUNT"] = top_10_sr_type["COUNT"] / 1000

top_10_sr_filename = "./data/top_10_sr_type.csv"
df_to_csv(top_10_sr_type, top_10_sr_filename, verbose=True)

Successfully saved to ./data/top_10_sr_type.csv.


In [18]:
top_10_sr_type

Unnamed: 0,SR_TYPE,CREATED_YEAR,COUNT
0,Graffiti Removal Request,2018,49.983
1,Street Light Out Complaint,2018,42.677
2,Rodent Baiting/Rat Complaint,2018,24.243
3,Pothole in Street Complaint,2018,20.114
4,311 INFORMATION ONLY CALL,2018,17.812
5,Aircraft Noise Complaint,2018,1.825
6,311 INFORMATION ONLY CALL,2019,348.179
7,Aircraft Noise Complaint,2019,243.317
8,Street Light Out Complaint,2019,51.46
9,Graffiti Removal Request,2019,49.783


## Create a dataset related to the budget for Chicago

In [19]:
dept = [
    "Aviation",
    "CDOT - Department of Transportation",
    "Streets and Sanitation",
    "Fire",
    "DWM - Department of Water Management"
    ]
dept_dict = {
    "AVIATION": "Aviation",
    "CDOT": "CDOT - Department of Transportation",
    "DSS": "Streets and Sanitation",
    "CFD": "Fire",
    "OEMC": "DWM - Department of Water Management",
    }
label_dict = {
    "Aviation": "Aviation",
    "CDOT - Department of Transportation": "Transportation",
    "Streets and Sanitation": "Streets and Sanitation",
    "Fire": "Fire Department",
    "DWM - Department of Water Management": "Water Management"
    }

count_by_dept = df.groupby(['OWNER_DEPARTMENT', "CREATED_YEAR"]).size().sort_values(ascending=False).reset_index(name='COUNT')
count_by_dept = count_by_dept[count_by_dept.OWNER_DEPARTMENT.isin(dept)].reset_index(drop=True)
count_by_dept["COUNT"] = count_by_dept["COUNT"] / 1000

budget_dir = "./data/budget/2019_Budget_Ordinance.csv"
budget_df = pd.read_csv(budget_dir)

budget_df = budget_df[budget_df["DEPARTMENT DESCRIPTION"].isin(dept_dict)].reset_index(drop=True)
budget_df["DEPARTMENT DESCRIPTION"] = budget_df["DEPARTMENT DESCRIPTION"].apply(lambda x: dept_dict[x])
budget_df = budget_df.groupby(['DEPARTMENT DESCRIPTION'])['2019 ORDINANCE (AMOUNT $)'].agg('sum').reset_index()
budget_df["2019 ORDINANCE (AMOUNT $)"] = budget_df["2019 ORDINANCE (AMOUNT $)"]/1000000
budget_df = budget_df.rename(columns={"DEPARTMENT DESCRIPTION": "OWNER_DEPARTMENT"})
budget_df = budget_df.merge(count_by_dept[count_by_dept["CREATED_YEAR"]==2019], on='OWNER_DEPARTMENT', how='left')
budget_df["OWNER_DEPARTMENT"] = budget_df["OWNER_DEPARTMENT"].apply(lambda x: label_dict[x])
budget_filename = "./data/budget.csv"
df_to_csv(budget_df, budget_filename, verbose=True)

Successfully saved to ./data/budget.csv.


In [20]:
budget_df

Unnamed: 0,OWNER_DEPARTMENT,2019 ORDINANCE (AMOUNT $),CREATED_YEAR,COUNT
0,Aviation,911.289787,2019,345.002
1,Transportation,543.060487,2019,319.012
2,Water Management,233.860841,2019,55.205
3,Fire Department,652.282316,2019,0.355
4,Streets and Sanitation,268.02634,2019,413.55


In [21]:
dept = [
    "BACP - Business Affairs and Consumer Protection",
    "Health",
    "Department of Planning and Development",
    "City Clerk's Office",
    "DOB - Buildings",
    "Animal Care and Control",
    "Extreme Weather Notification",
    ]
dept_dict = {
    "BUILDINGS": "DOB - Buildings",
    "BACP": "BACP - Business Affairs and Consumer Protection",
    "HEALTH": "Health",
    "PLANNING AND DEVELOPMENT": "Department of Planning and Development",
    "CITY CLERK": "City Clerk's Office",
    "ANIMAL CARE / CONTROL": "Animal Care and Control",
    "OEMC": "Extreme Weather Notification",
    }

count_by_dept = df.groupby(['OWNER_DEPARTMENT', "CREATED_YEAR"]).size().sort_values(ascending=False).reset_index(name='COUNT')
count_by_dept = count_by_dept[count_by_dept.OWNER_DEPARTMENT.isin(dept)].reset_index(drop=True)
count_by_dept["COUNT"] = count_by_dept["COUNT"] / 1000

budget_dir = "./data/budget/2019_Budget_Ordinance.csv"
budget_df = pd.read_csv(budget_dir)

budget_df = budget_df[budget_df["DEPARTMENT DESCRIPTION"].isin(dept_dict)].reset_index(drop=True)
budget_df["DEPARTMENT DESCRIPTION"] = budget_df["DEPARTMENT DESCRIPTION"].apply(lambda x: dept_dict[x])
budget_df = budget_df.groupby(['DEPARTMENT DESCRIPTION'])['2019 ORDINANCE (AMOUNT $)'].agg('sum').reset_index()
budget_df["2019 ORDINANCE (AMOUNT $)"] = budget_df["2019 ORDINANCE (AMOUNT $)"]/1000000
budget_df = budget_df.rename(columns={"DEPARTMENT DESCRIPTION": "OWNER_DEPARTMENT"})
budget_df = budget_df.merge(count_by_dept[count_by_dept["CREATED_YEAR"]==2019], on='OWNER_DEPARTMENT', how='left')
budget_filename = "./data/budget_all.csv"
df_to_csv(budget_df, budget_filename, verbose=True)

Successfully saved to ./data/budget_all.csv.


In [22]:
budget_df

Unnamed: 0,OWNER_DEPARTMENT,2019 ORDINANCE (AMOUNT $),CREATED_YEAR,COUNT
0,Animal Care and Control,6.843307,2019,33.876
1,BACP - Business Affairs and Consumer Protection,22.199674,2019,11.741
2,City Clerk's Office,10.756887,2019,1.293
3,DOB - Buildings,38.119079,2019,34.554
4,Department of Planning and Development,35.082318,2019,0.765
5,Extreme Weather Notification,233.860841,2019,0.049
6,Health,177.264833,2019,4.195


## Create a dataset for Census Tract (Number of Requests and Income Level)



In [23]:
# Load Census Tract Polygon dataset
tract_dir = "./data/census_tract_boundary/tracts.geojson"
tracts = gpd.read_file(tract_dir)
tracts.crs = 'epsg:4326'
tracts.sample(n=1)

Unnamed: 0,statefp10,name10,commarea_n,namelsad10,commarea,geoid10,notes,tractce10,countyfp10,geometry
103,17,8419,28,Census Tract 8419,28,17031841900,,841900,31,"MULTIPOLYGON (((-87.65086 41.86096, -87.65086 ..."


In [24]:
cleaned_311 = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.LONGITUDE, df.LATITUDE))

In [25]:
cleaned_311.sample(n=1)

Unnamed: 0,SR_NUMBER,SR_TYPE,SR_SHORT_CODE,OWNER_DEPARTMENT,STATUS,CREATED_DATE,CLOSED_DATE,ZIP_CODE,DUPLICATE,LEGACY_RECORD,...,LATITUDE,LONGITUDE,LOCATION,DAYS_TAKEN,CREATED_YEAR,CLOSED_YEAR,CREATED_HOUR,CREATED_DAY_OF_WEEK,CREATED_MONTH,geometry
994851,SR19-02816552,Garbage Cart Maintenance,SIE,Streets and Sanitation,Completed,2019-10-26 02:00:53,2019-11-18 12:26:09,60617,False,False,...,41.75093,-87.576916,"(41.75092996168121, -87.5769163541104)",23,2019,2019,2,7,10,POINT (-87.57692 41.75093)


In [26]:
cleaned_311.crs = "epsg:4326"
cleaned_311 = cleaned_311[cleaned_311.geometry.type == 'Point']
tracts_311 = gpd.sjoin(cleaned_311, tracts, how="inner", op='intersects')

In [27]:
tracts_311.sample(n=1)

Unnamed: 0,SR_NUMBER,SR_TYPE,SR_SHORT_CODE,OWNER_DEPARTMENT,STATUS,CREATED_DATE,CLOSED_DATE,ZIP_CODE,DUPLICATE,LEGACY_RECORD,...,index_right,statefp10,name10,commarea_n,namelsad10,commarea,geoid10,notes,tractce10,countyfp10
1301475,SR20-03256684,311 INFORMATION ONLY CALL,311IOC,311 City Services,Completed,2020-01-04 15:54:22,2020-01-04 15:54:22,60612,False,False,...,5,17,8382,28,Census Tract 8382,28,17031838200,,838200,31


In [28]:
# tracts_311_call = tracts_311[(tracts_311['SR_TYPE'] == "311 INFORMATION ONLY CALL")]
tracts_count = tracts_311.groupby(["namelsad10"]).size().reset_index()
tracts_count = tracts_count.rename(columns={0: "Number_of_Requests"})

In [29]:
tracts_count.sample(n=1)

Unnamed: 0,namelsad10,Number_of_Requests
387,Census Tract 4604,1858


In [30]:
# load income dataset
income_dir = "./data/income_by_census_tract/income_2018_by_census_tract.csv"
income_df = pd.read_csv(income_dir)
income_df.sample(n=1)

Unnamed: 0,GEO_ID,NAME,S1901_C01_001E,S1901_C01_001M,S1901_C02_001E,S1901_C02_001M,S1901_C03_001E,S1901_C03_001M,S1901_C04_001E,S1901_C04_001M,...,S1901_C04_015E,S1901_C04_015M,S1901_C01_016E,S1901_C01_016M,S1901_C02_016E,S1901_C02_016M,S1901_C03_016E,S1901_C03_016M,S1901_C04_016E,S1901_C04_016M
1259,1400000US17031836800,"Census Tract 8368, Cook County, Illinois",826,113,445,113,101,54,381,101,...,(X),(X),(X),(X),(X),(X),(X),(X),27.3,(X)


In [31]:
# Extract Census Tract
income_df["NAME"] = income_df["NAME"].apply(lambda x : extract_census_tract(x))

# Save Estimate!!Households!!Mean income (dollars)
columns = ["NAME", "S1901_C01_013M"]
income_df = save_columns(income_df, columns)
income_df = income_df.rename(columns={"NAME": "namelsad10", "S1901_C01_013M": "Household_Income"})

In [32]:
# Merge it with tracts_count
tracts_count = tracts_count.merge(income_df, on='namelsad10', how='left')
tracts_count = drop_non_numeric(tracts_count, ["Household_Income"], verbose=True)

Removing all non-numeric rows for the following columns: ['Household_Income'].
Removed 3 rows from Household_Income.


In [33]:
tracts_count.sample(n=1)

Unnamed: 0,namelsad10,Number_of_Requests,Household_Income
528,Census Tract 6309,2208,5166


## Create a dataset for Census Tract (Population)


In [34]:
# load income dataset
pop_dir = "./data/population/population_by_tract.csv"
pop_df = pd.read_csv(pop_dir)

# Extract Census Tract
pop_df["NAME"] = pop_df["NAME"].apply(lambda x : extract_census_tract(x))

# Save Population
columns = ["NAME", "S0101_C01_001E"]
pop_df = save_columns(pop_df, columns)
pop_df = pop_df.rename(columns={"NAME": "namelsad10", "S0101_C01_001E": "Population"})

# Merge it with tracts_count
tracts_count = tracts_count.merge(pop_df, on='namelsad10', how='left')
tracts_count = drop_non_numeric(tracts_count, ["Population"], verbose=True)
tracts_count["Number_of_Requests"] = pd.to_numeric(tracts_count["Number_of_Requests"])
tracts_count["Population"] = pd.to_numeric(tracts_count["Population"])
tracts_count["Household_Income"] = pd.to_numeric(tracts_count["Household_Income"])
tracts_count["Request_Rate"] = tracts_count["Number_of_Requests"]/tracts_count["Population"]

Removing all non-numeric rows for the following columns: ['Population'].
Removed 0 rows from Population.


In [35]:
tracts_count.sample(n=1)

Unnamed: 0,namelsad10,Number_of_Requests,Household_Income,Population,Request_Rate
212,Census Tract 2601,1358,7488,1154,1.176776


In [36]:
tracts_count.quantile([.33, .66])

Unnamed: 0,Number_of_Requests,Household_Income,Population,Request_Rate
0.33,1165.02,8200.15,2370.05,0.404341
0.66,1959.06,12498.46,3943.16,0.600562


In [37]:
# 
tracts_count["color"] = tracts_count.apply(lambda x : evaluateForBivariate(x, ("Number_of_Requests", [1165.02, 1959.06])), axis = 1)
tracts_count["rate_color"] = tracts_count.apply(lambda x : evaluateForBivariate(x, ("Request_Rate", [0.404341, 0.600562])), axis = 1)

In [38]:
tracts_count.sample(n=1)

Unnamed: 0,namelsad10,Number_of_Requests,Household_Income,Population,Request_Rate,color,rate_color
324,Census Tract 3907,797,10939,5639,0.141337,#b0d5df,#b0d5df


In [39]:
# Save the merged dataset
tracts_311_count_filename = "./data/tracts_count.csv"
df_to_csv(tracts_count, tracts_311_count_filename, verbose=True)

Successfully saved to ./data/tracts_count.csv.


In [40]:
# Save the merged dataset (by type)
sr_type = [
    "Aircraft Noise Complaint",
    "Pothole in Street Complaint",
    "Rodent Baiting/Rat Complaint",
    "Weed Removal Request",
    "Street Light Out Complaint",
    "Graffiti Removal Request"
    ]
request_by_tract_per_type(tracts_311, sr_type)

Successfully saved to ./data/tracts_count_by_type.csv.
