# University of Michigan
## School of Information
### Masters of Applied Data Science
#### Milestone 1
#### Alan, Koon Leong, George

**Data Visualizatio**

In [1]:
import pandas as pd
import numpy as np

import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import os
warnings.filterwarnings('ignore')
directory=os.getcwd()

#### Geospatial data
These are shapefiles for all the parcels in Kent County.  The data are provided by Kent County by cities and townships.  These were merged into one set of files for the entire county.

The source data geographical information uses "NAD83 State Plane Michigan South (International Feet)" projection.  This will be converted into "latitude and longitude coordinates on the WGS84 reference ellipsoid" projection.

Source: https://www.accesskent.com/GISLibrary/

In [2]:
import geopandas
GRParcelsUnit=geopandas.read_file(directory + 
                              r'/municipalities/All Municipalities Geometry and Municipality.shp')
# convert into latitude/longitude coordinates
GRParcelsUnit=GRParcelsUnit.to_crs('EPSG:4326')
GRParcelsUnit=GRParcelsUnit.__geo_interface__


In [3]:
Property_Level=pd.read_csv(directory + r'/property_level.csv')
Property_Level = Property_Level.drop(Property_Level.columns[[0,1]], axis = 1)
Property_Level = Property_Level.rename(columns = {'PNUM': 'APN'})
Property_Level.City = Property_Level.City.str.upper() 
Property_Level['MSHDA Score'] = Property_Level['MSHDA Score'].astype('int') 
Property_Level["Per Acre"] = (Property_Level["SEV"]/Property_Level["Acres"]).fillna(0).astype('int')

In [4]:
GRParcels=geopandas.read_file(directory + r'/block_group/Block_Group_Summary.shp')
GRParcels=GRParcels.to_crs('EPSG:4326')
GRParcels=GRParcels.__geo_interface__
Block_Property_Level=pd.read_csv(directory + r'/block_groups.csv')

#### Heatmap
The entire geospatial data has about 240,000 entries.  To reduced the memory usage, this will be filtered the city parameter.

In [5]:
# pd.DataFrame(Property_Level.City.unique())
summary_df = Property_Level[['City','MSHDA Score']].copy(deep = True)
summary_df = summary_df.groupby(['City', 'MSHDA Score']).agg({'MSHDA Score': 'count'}). \
    rename(columns={'MSHDA Score': 'Count'}).reset_index(drop = False)

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

summary_df = summary_df.pivot(index = 'MSHDA Score', columns = 'City', values = 'Count'). \
                fillna(0).astype(int).reset_index(drop= False)


In [6]:
# Define parameters for the map plot
city = 'GRAND RAPIDS'
min_score = 0

# Create the filter for the data
city_filter = Property_Level.City == city 
score_filter = Property_Level['MSHDA Score']>= min_score
final_filter = city_filter & score_filter

# Set the map center appropriate for the City
map_center = {'lat':42.9634,'lon':-85.6681}
zoom = 15

# Reduce the dataset to show what is filtered in
plot_df = Property_Level.loc[final_filter, ['ID', 'MSHDA Score', 'Address','City','Acres','SEV', 'Per Acre']].fillna(0)

In [None]:
fig=px.choropleth_mapbox(plot_df, geojson=GRParcelsUnit,color='MSHDA Score',\
                         locations='ID',featureidkey='plot_df.ID',\
                         center= map_center,
                         mapbox_style="carto-positron", zoom=zoom,
                         color_continuous_midpoint = 20,
                        hover_data=['Address','City','Acres','SEV', 'Per Acre'])
fig.update_layout(margin={'r':0,'t':0,'l':0,'b':0})
fig.show()

In [None]:
# fig=px.choropleth_mapbox(Block_Property_Level,geojson=GRParcels,color='MSHDA Score',\
#                         locations='block_group',featureidkey='properties.GEOID',\
#                         center=map_center,
#                         mapbox_style="carto-positron", zoom=zoom
#                         )
# fig.update_layout(margin={'r':0,'t':0,'l':0,'b':0})
# fig.show()

In [None]:
plot_df = Property_Level.loc[:,['City', 'MSHDA Score']]
plot_df['City'] = plot_df['City'].str.upper()


In [None]:
heatmap = px.density_heatmap(plot_df, x="MSHDA Score", y="City",
                             facet_row_spacing = 0.3,
                             facet_col_spacing = 0.3,
                             color_continuous_scale="RdYlGn",
                             width=800, height=800
                            )
heatmap.show()

In [None]:
splom_df = Property_Level.loc[:,["MSHDA Score", "Acres", "SEV", "Per Acre"]]

sev_upper_limit = 5000000
per_acre_upper_limit = 500000
acre_upper_limit = 50
min_score = 10
ideal_lower_limit = 1
ideal_upper_limit = 10

sev_limit = splom_df.SEV<sev_upper_limit
splom_df = splom_df[sev_limit]


per_acre_limit = splom_df["Per Acre"] < per_acre_upper_limit
splom_df = splom_df[per_acre_limit]

acre_limit = splom_df.Acres < acre_upper_limit
splom_df = splom_df[acre_limit]


MSHDA_limit = splom_df['MSHDA Score'] >= min_score
splom_df = splom_df[MSHDA_limit]

splom_df['Size']=""
splom_df.loc[splom_df.Acres < ideal_lower_limit, ['Size']]="Small"
splom_df.loc[splom_df.Acres > ideal_upper_limit, ['Size']]="Large"
ideal = (splom_df.Acres >= ideal_lower_limit) & (splom_df.Acres <= ideal_upper_limit)
splom_df.loc[ideal, ['Size']]="Ideal"


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style="whitegrid")

plt.figure(figsize=(8000, 8000))
sns.set_theme(style="ticks")
sns.pairplot(splom_df.loc[:,['MSHDA Score', 'Acres', 'Per Acre', 'Size']],
             hue_order = ['Small', 'Ideal', 'Large'],
             palette = {'Small': 'gainsboro', 'Ideal': 'r', 'Large': 'powderblue'},
             hue = 'Size',
             diag_kind = 'kde',
             height = 4,
             corner = True
            )
sns.despine()


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style="whitegrid")


# Draw a scatter plot while assigning point colors and sizes to different
# variables in the dataset
f, ax = plt.subplots(figsize=(14, 14))
sns.despine(f, left=True, bottom=True)
sns.scatterplot(x="MSHDA Score", y="Per Acre",
                hue="Size",
                palette = {'Small': 'gainsboro', 'Ideal': 'r', 'Large': 'powderblue'},
                hue_order = ['Small', 'Ideal', 'Large'],
                linewidth=0,
                data=splom_df.loc[:,['MSHDA Score', 'Acres', 'Per Acre', 'Size']], ax=ax)