# Final Project: Part 2 (Viz for Peer)

## Group Member:

- MingFu Chou (Me)
- Ruonan Zhang
- Shukai Yao
- Ni Lin

## Data Introduction

- Dataset: City of Boston Assessing Department
- File Name: ast2018full.csv
- Data Source: Analyze Boston (City of Boston's open data hub)
- Link: https://data.boston.gov/dataset/property-assessment
- License: Open Data Commons Public Domain Dedication and License (PDDL) - 
- Data usage: PDDL is a document intended to allow you to freely share, modify, and use this work for any purpose and without any restrictions. 
- File Size: 54.1 MB

## Data Description

Gives property, or parcel, ownership together with value information, which ensures fair assessment of Boston taxable and non-taxable property of all types and classifications. To preserve their integrity, the identifiers PID, CM_ID, GIS_ID, ZIPCODE, and MAIL_ZIPCODE all are marked with an underscore ("_") as the last character.

## Data Exploration

In [1]:
# import library
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import ipywidgets
import matplotlib.ticker as mtick

In [2]:
# read data file
df = pd.read_csv("data/ast2018full.csv")

  interactivity=interactivity, compiler=compiler, result=result)


In [3]:
df.shape

(172841, 75)

In [4]:
df.columns

Index(['PID', 'CM_ID', 'GIS_ID', 'ST_NUM', 'ST_NAME', 'ST_NAME_SUF',
       'UNIT_NUM', 'ZIPCODE', 'PTYPE', 'LU', 'OWN_OCC', 'OWNER',
       'MAIL_ADDRESSEE', 'MAIL_ADDRESS', 'MAIL CS', 'MAIL_ZIPCODE', 'AV_LAND',
       'AV_BLDG', 'AV_TOTAL', 'GROSS_TAX', 'LAND_SF', 'YR_BUILT', 'YR_REMOD',
       'GROSS_AREA', 'LIVING_AREA', 'NUM_FLOORS', 'STRUCTURE_CLASS',
       'R_BLDG_STYL', 'R_ROOF_TYP', 'R_EXT_FIN', 'R_TOTAL_RMS', 'R_BDRMS',
       'R_FULL_BTH', 'R_HALF_BTH', 'R_BTH_STYLE', 'R_BTH_STYLE2',
       'R_BTH_STYLE3', 'R_KITCH', 'R_KITCH_STYLE', 'R_KITCH_STYLE2',
       'R_KITCH_STYLE3', 'R_HEAT_TYP', 'R_AC', 'R_FPLACE', 'R_EXT_CND',
       'R_OVRALL_CND', 'R_INT_CND', 'R_INT_FIN', 'R_VIEW', 'S_NUM_BLDG',
       'S_BLDG_STYL', 'S_UNIT_RES', 'S_UNIT_COM', 'S_UNIT_RC', 'S_EXT_FIN',
       'S_EXT_CND', 'U_BASE_FLOOR', 'U_NUM_PARK', 'U_CORNER', 'U_ORIENT',
       'U_TOT_RMS', 'U_BDRMS', 'U_FULL_BTH', 'U_HALF_BTH', 'U_BTH_STYLE',
       'U_BTH_STYLE2', 'U_BTH_STYLE3', 'U_KITCH_TYPE', 'U_KIT

In [5]:
df.dtypes

PID                  int64
CM_ID              float64
GIS_ID             float64
ST_NUM              object
ST_NAME             object
ST_NAME_SUF         object
UNIT_NUM            object
ZIPCODE            float64
PTYPE                int64
LU                  object
OWN_OCC             object
OWNER               object
MAIL_ADDRESSEE      object
MAIL_ADDRESS        object
MAIL CS             object
MAIL_ZIPCODE        object
AV_LAND              int64
AV_BLDG              int64
AV_TOTAL             int64
GROSS_TAX            int64
LAND_SF            float64
YR_BUILT           float64
YR_REMOD           float64
GROSS_AREA         float64
LIVING_AREA        float64
NUM_FLOORS         float64
STRUCTURE_CLASS     object
R_BLDG_STYL         object
R_ROOF_TYP          object
R_EXT_FIN           object
                    ...   
R_OVRALL_CND        object
R_INT_CND           object
R_INT_FIN           object
R_VIEW              object
S_NUM_BLDG         float64
S_BLDG_STYL         object
S

In [6]:
df.head()

Unnamed: 0,PID,CM_ID,GIS_ID,ST_NUM,ST_NAME,ST_NAME_SUF,UNIT_NUM,ZIPCODE,PTYPE,LU,...,U_BTH_STYLE2,U_BTH_STYLE3,U_KITCH_TYPE,U_KITCH_STYLE,U_HEAT_TYP,U_AC,U_FPLACE,U_INT_FIN,U_INT_CND,U_VIEW
0,100001000,,100001000.0,104 A 104,PUTNAM,ST,,2128.0,105,R3,...,,,,,,,,,,
1,100002000,,100002000.0,197,LEXINGTON,ST,,2128.0,105,R3,...,,,,,,,,,,
2,100003000,,100003000.0,199,LEXINGTON,ST,,2128.0,105,R3,...,,,,,,,,,,
3,100004000,,100004000.0,201,LEXINGTON,ST,,2128.0,105,R3,...,,,,,,,,,,
4,100005000,,100005000.0,203,LEXINGTON,ST,,2128.0,104,R2,...,,,,,,,,,,


In [7]:
r1 = df[df['LU'] == 'R1']
r2 = df[df['LU'] == 'R2']
r3 = df[df['LU'] == 'R3']
r4 = df[df['LU'] == 'R4']

In [8]:
resident = pd.concat([r1, r2, r3, r4])

In [9]:
resident = resident[resident["YR_BUILT"]!= 0]

In [10]:
resident = resident[resident["YR_REMOD"]!= 0]

In [11]:
@ipywidgets.interact(numeric = ["AV_LAND", "AV_BLDG","AV_TOTAL", "GROSS_TAX", "LAND_SF",  "GROSS_AREA", "LIVING_AREA"],
                     categorical = ["R_BLDG_STYL", "R_ROOF_TYP", "R_HEAT_TYP", "R_AC", "R_OVRALL_CND", "R_VIEW", "LU"],
                     year = ["YR_BUILT", "YR_REMOD"])

def get_line(numeric, categorical, year):
    plt.figure(figsize=(20,10))
    for l in list(resident[categorical].unique()):
        plt.plot(resident[resident[categorical]== l].groupby(year)[numeric].mean())
    plt.xlabel(year)
    plt.ylabel(numeric)
    plt.legend(loc='upper left', labels = list(resident[categorical].unique()))

interactive(children=(Dropdown(description='numeric', options=('AV_LAND', 'AV_BLDG', 'AV_TOTAL', 'GROSS_TAX', …

In [12]:
@ipywidgets.interact(numeric = ["AV_LAND", "AV_BLDG","AV_TOTAL", "GROSS_TAX", "LAND_SF",  "GROSS_AREA", "LIVING_AREA"],
                     categorical = ["R_BLDG_STYL", "R_ROOF_TYP", "R_HEAT_TYP", "R_AC", "R_OVRALL_CND", "R_VIEW", "LU"],
                     year = ["YR_BUILT", "YR_REMOD"],
                     present = ["%", "value"])


def get_bar(numeric, categorical, year, present):
    bar = resident.groupby([year,categorical])[numeric].mean()
    if present == "%":
        bar.groupby(level=0).apply(
            lambda x: 100 * x / x.sum()
        ).unstack().plot(kind='bar',stacked=True, figsize = (20, 10))
        plt.gca().yaxis.set_major_formatter(mtick.PercentFormatter())
    else:
        bar.unstack().plot(kind='bar',stacked=True, figsize = (20, 10))
    plt.show()

interactive(children=(Dropdown(description='numeric', options=('AV_LAND', 'AV_BLDG', 'AV_TOTAL', 'GROSS_TAX', …