# Data Analysis Code for DC Gentrification Trends
----
### To be considered gentrified in DC, a Ward’s median household income and median home value needs to fall within the bottom 40th percentile of all ward’s within DC from the beginning of the decade.  

### (For our analysis, we used data from 2000 and 2016)

Analysis
1. Wards in DC most affected by gentrification
2. Kinds of schools (public vs charter) that make up these communities (wards)
3. Gentrification and where students choose to enroll in school

### important: must import geopandas (support for geographic data to pandas objects) to build heatmaps  

to install geopandas... install the following in this order (good luck!)

1. numpy
2. GDAL
3. gyshp
4. shapley
5. fiona
6. geopy
7. pyproj
8. geopandas


### Analysis #1: Which Wards in DC have been affected most by gentrification
Demonstration:
Housing and Income by Ward

### 1a: Housing Bar Graph

In [None]:
pctchange = [418.64, 160, 157.69, 314.04, 338.81, 375.23, 221.31, 200.08]
x_axis = np.arange(len(pctchange))

In [None]:
plt.bar(x_axis, pctchange, color='r', alpha=0.5, align="center")
plt.ylim(-10, max(pctchange)+30)
plt.title("Percent Change in Average Housing Price by Ward 2000-2016")
plt.xlabel("Ward in DC")
plt.ylabel("Percent Change in Housing Price")
plt.savefig("IncomeBar.png")
tick_locations = [value for value in x_axis]
plt.xticks(tick_locations, ["Ward 1", "Ward 2", "Ward 3", "Ward 4", "Ward 5", "Ward 6", "Ward 7", "Ward 8"])
plt.savefig("figures/housingbar.png")
plt.show()

### 1b: Income Bar Chart

In [None]:
# Create an array that percentage change of income per ward, from 2000-2016. 
pctchange = [60.94, 6.95, 4.68, 19.66, 24.35, 71.38, -3.90, 1.83]
x_axis = np.arange(len(pctchange))

In [None]:
# Tell matplotlib that we will be making a bar chart
# PctChangAvgIncome is our y axis and Wards is, our x axis
# We apply align="edge" to ensure our bars line up with our tick marks

plt.bar(x_axis, pctchange, color='r', alpha=0.5, align="center")
plt.ylim(-10, max(pctchange)+30)
plt.title("Percent Change in Income by Ward 2000-2016")
plt.xlabel("Ward in DC")
plt.ylabel("Percent Change in Income")
tick_locations = [value for value in x_axis]
plt.xticks(tick_locations, ["Ward 1", "Ward 2", "Ward 3", "Ward 4", "Ward 5", "Ward 6", "Ward 7", "Ward 8"])
plt.savefig("figures/incomebar.png")
plt.show()

### 1c:  Heatmaps (Map percent change in income and housing price for DC Wards)

In [None]:
#import dependencies 
import geopandas as gpd
import fiona
import descartes

In [None]:
#read income csv as a dataframe
#preview dataframe

#original_path = "income_data.csv"
original_path = "census_pctchg_combined.csv"
house_income = pd.read_csv(original_path, encoding="utf-8")
house_income.head()

In [None]:
# create initial map of dc 
# set the filepath and load in a shapefile
fp = "Ward_from_2012/Ward_from_2012.shp"

map_df = gpd.read_file(fp)

# check data type so we can see that this is not a normal dataframe, but a GEOdataframe
map_df.head()

#pull only the columns we need
map_df = map_df[['NAME', 'geometry']]
map_df

In [None]:
map_df.plot()

In [None]:
#  join the geodataframe with the cleaned up csv dataframe

merged = map_df.set_index("NAME").join(house_income.set_index("Ward"))
merged

In [None]:
#create variables to hold columns to plot 

var_1 = '% Change Avg. Family Income (Adjusted)'
var_2 = '% Change Housing Prices'

#set the range for the % Change Avg. Family Income (Adjusted)
vmin, vmax = -5, 61

#create figure and axes
fig, ax = plt.subplots(1, figsize=(10,6))

#create map of % Change Avg. Family Income (Adjusted)
merged.plot(column=var_1, cmap='Reds', linewidth=0.8, ax=ax, edgecolor='0.8')

#remove axis     
ax.axis('off')

#add a title and format it 
ax.set_title('Average Family Income (% change 2000-2016)', fontdict={'fontsize': '25', 'fontweight': '3'})

#add data source information 
ax.annotate('Source: Open Data DC', xy=(0.1,.08),
           xycoords='figure fraction',horizontalalignment='left',
           verticalalignment='top',fontsize=12,color='Gray')

#create legend - colorbar 
sm = plt.cm.ScalarMappable(cmap='Reds',norm=plt.Normalize(vmin=vmin,vmax=vmax))

#WORK IN PROGRESS - MAP LABEL
#set empty range 
# sm._A = []
# ax = merged.plot
# ax = merged.apply(lambda x: ax.annotate(s=merged.index, xy=x.geometry.centroid.coords[0])


#add legend to plot 
cbar = fig.colorbar(sm)

#save figure 
fig.savefig("figures/income_export.png", dpi=300)


### Analysis #2: What kinds of schools (public vs charter) make up these communities (wards)? 
Demonstration:
Charter Vs. Public Schools gentrified vs non-gentrified ward (sample) comparisons.
(most gentrified wards: 1,6, and 5 - consistent with housing & income data)
    

In [None]:
#set the range for the % Change Housing Prices
vmin, vmax = 0, 420

#create figure and axes 
fig, ax = plt.subplots(1, figsize=(10,6))

#plot map of % Change Housing Prices
merged.plot(column=var_2, cmap='Blues', linewidth=0.8, ax=ax, edgecolor='0.8')

#remove axes 
ax.axis('off')

#create title and format
ax.set_title('% Change Housing Prices', fontdict={'fontsize': '25', 'fontweight': '3'})

#add source information 
ax.annotate('Source: Open Data DC', xy=(0.1,.08),
           xycoords='figure fraction',horizontalalignment='left',
           verticalalignment='top',fontsize=12,color='Gray')

#create legend - color bar 
sm = plt.cm.ScalarMappable(cmap='Blues',norm=plt.Normalize(vmin=vmin,vmax=vmax))

#WORK IN PROGRESS - MAP LABEL
#set empty range 
# sm._A = []
# ax = merged.plot
# ax = merged.apply(lambda x: ax.annotate(s=merged.index, xy=x.geometry.centroid.coords[0])

#add legend to map 
cbar = fig.colorbar(sm)

#save figure 
fig.savefig("figures/housing_export.png", dpi=300)

### Charter vs. Public School Pie Charts

In [None]:
# load data source
csv_path = "clean/studentbyward.csv"

In [None]:
# Read with pandas into dataframe
studentbyward_df = pd.read_csv(csv_path, encoding = 'iso-8859-1', low_memory=False)

# Inspect all columns
list(studentbyward_df.columns)

In [None]:
# sum enrollment for 2014-15 and 2016-17

studentbyward_df['Enrollment'] = studentbyward_df['PK-12 Enrollment 2014-15'] + studentbyward_df['PK-12 Enrollment 2016-17']            
# Slide data
sub_studentbyward_df = studentbyward_df.iloc[:,[0, 1, 2, 3, 13]]

#preview data
sub_studentbyward_df.head(10)

In [None]:
#clean housing data, only using timeframe, ward, start date, end date, and housing prices 

housing_clean = housing_data.iloc[[8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23],[0,1,5,6,12]]
housing_clean = housing_clean.rename(columns={"timeframe":"Timeframe",
                                              "ward2012": "Ward", 
                                              "start_date": "Start Date", 
                                              "end_date": "End Date", 
                                              "mprice_sf": "Housing Prices"})

#print new dataframe 
housing_clean

### 2a:  Ward 1 Analysis (gentrified ward)

In [None]:
#Slide data for Ward 1
array = ['Ward 1']
ward1_df = sub_studentbyward_df.loc[sub_studentbyward_df['School Ward 2016-17'].isin(array)]
ward1_df.head()

In [None]:
#group ward 1 data by sector for data visualization
grouped_ward1_df = ward1_df.groupby(['Sector'])
pie_ward1_df = grouped_ward1_df.sum()
pie_ward1_df

In [None]:
# Create Ward 1 pie chart, set color, size, and percent

colors = ["lightcoral", "lightskyblue"]
ward1plot = pie_ward1_df.plot.pie(y='Enrollment', autopct="%1.2f%%", colors = colors, figsize=(10, 10))
# Create title, save output png file
plt.title("Ward 1 Charter vs. Public schools")
plt.savefig("figures/ward1pie.png")
plt.show()

### 2b: Ward 6 Analysis (gentrified ward)

In [None]:
#Slide data for Ward 6
array = ['Ward 6']
ward6_df = sub_studentbyward_df.loc[sub_studentbyward_df['School Ward 2016-17'].isin(array)]
ward6_df.head()

In [None]:
#Group Ward 6 data by sector for data visualization
grouped_ward6_df = ward6_df.groupby(['Sector'])
pie_ward6_df = grouped_ward6_df.sum()
pie_ward6_df

In [None]:
# Create Ward 3 pie chart, set color, size, and percent
ward6plot = pie_ward6_df.plot.pie(y='Enrollment', autopct="%1.2f%%", colors = colors, figsize=(10, 10))
plt.title("Ward 6 Charter vs. Public schools")
# Create title, save output png file
plt.savefig("figures/ward6pie.png")
plt.show()

### 2c: Ward 3 Analysis (non-gentrified ward)

In [None]:
#Slide data for Ward 3
array = ['Ward 3']
ward3_df = sub_studentbyward_df.loc[sub_studentbyward_df['School Ward 2016-17'].isin(array)]
ward3_df.head()

In [None]:
#Group Ward 3 data by sector for data visualization

grouped_ward3_df = ward3_df.groupby(['Sector'])
pie_ward3_df = grouped_ward3_df.sum()
pie_ward3_df

In [None]:
# Create Ward 6 pie chart, set color, size, and percent
color = ["lightskyblue"]
ward3plot = pie_ward3_df.plot.pie(y='Enrollment', autopct="%1.2f%%", colors = color, figsize=(10, 10))
# Create title, save output png file
plt.title("Ward 3 Charter vs. Public schools")
plt.savefig("figures/ward3pie.png")
plt.show()

### Analysis #3: Does gentrification make an impact on where students choose to enroll in school?

In [None]:
# load source data
#csv_path = "clean/landscape-diversity-data.xlsx"
csv_path = "clean/studentbysector.csv"

In [None]:
# Read with pandas into dataframe
clean_sector_ward_df = pd.read_csv(csv_path, encoding = 'iso-8859-1', low_memory=False)

# Inspect all columns
list(clean_sector_ward_df.columns)

In [None]:
#convert convert-a-string-percent-to-a-float

clean_sector_ward_df['% living in School Ward 2016-17'] = \
              clean_sector_ward_df['% living in School Ward 2016-17'].str.rstrip('%').astype('float') / 100.0

clean_sector_ward_df.dtypes

In [None]:
ward1 = (clean_sector_ward_df.groupby(['School Ward 2016-17', '% living in School Ward 2016-17'], as_index=False).mean()
            .groupby('School Ward 2016-17')['% living in School Ward 2016-17'].mean())
ward1

In [None]:
# %matplotlib notebook
# from matplotlib import pyplot as plt
# Create Ward 6 pie chart, set color, size, and percent
ax = ward1.plot.bar(x='School Ward 2016-17', y='% living in School Ward 2016-17', rot=0)

plt.title("Percent enrolled living in same ward")
plt.xlabel("School Ward")
plt.ylabel("Percent living in ward of school")

plt.savefig("figures/LivingInWard.png")
plt.show()