In [None]:
# widget & notebook adds a little interactivity. widget for jupyterlab
%matplotlib widget
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv('../Real_Estate_Sales_2001-2017.csv')

In [None]:
# What is the average value by PropertyType ?
averages = df.groupby(['PropertyType']).mean().reset_index()
averages

In [None]:
fig, ax = plt.subplots(2,3) # use subplots() if only 1 plot. Here we get a grid of 6 plots 2 rows, 3 columns
ax[0,0].bar(averages['PropertyType'],averages['SaleAmount'])

# averages['SaleAmount'].plot(ax=ax[0,0]) by passing the axes object to dataframe

# Area Charts (Stacked, unstacked)
- Total (cumulative) Sales by property type by year

In [None]:
pd.set_option('display.float_format', lambda x: '%.2f' % x)
df_group_by_year_type = df.groupby(['ListYear','PropertyType'])['SaleAmount'].sum()
df_group_by_year_type = df_group_by_year_type.unstack()
df_group_by_year_type

In [None]:
cum_sales_year_type = df_group_by_year_type.fillna(0).cumsum()
cum_sales_year_type

In [None]:
fig, ax = plt.subplots()
cum_sales_year_type.plot.area(title="Total Sales by year by type", ax=ax)
fig.tight_layout() # Sometimes legends get cut off , hence we tighten so that everything fits

- In the above graph, we can't clearly compare red and green, because all of them are stacked vs. overlapped
- To fix this we have to unstack them (and then for aesthetics can add a grid)

In [None]:
fig, ax = plt.subplots()
cum_sales_year_type.plot.area(title="Total Sales by year by type", ax=ax, stacked=False, grid=True) # Observe that colors lighten/transparent
fig.tight_layout() # Sometimes legends get cut off , hence we tighten so that everything fits

- Now I want cumulative sales by total year on the same graph - to be able to compare yearly sales with each type

In [None]:
pd.set_option('display.float_format', lambda x: '%.2f' % x)
df_group_by_year = df.groupby(['ListYear'])['SaleAmount'].sum().cumsum()
df_group_by_year

In [None]:
fig, ax = plt.subplots()
ax.ticklabel_format(style='plain') # The style=plain disables scientific notation on y axis
# Plot cumsum by year first with a line on the ax layout
df_group_by_year.plot(ax=ax, style='b--', legend=True, label="Total Yearly")

# Now area plot by year by type on the same ax layout
cum_sales_year_type.plot.area(title="Total Sales by year by type", ax=ax, stacked=False, grid=True) # Observe that colors lighten/transparent
fig.tight_layout() # Sometimes legends get cut off , hence we tighten so that everything fits

# Bar Charts (stacked & unstacked)

In [None]:
# Easiest way without using matplotlib object oriented
averages = df.groupby(['PropertyType']).mean().reset_index()
ax = averages.plot.bar(title='Average sale by Property Type',x='PropertyType',y=['AssessedValue','SaleAmount'])

In [None]:
# Easiest way without using matplotlib object oriented and stacking them
fig, ax = plt.subplots()
averages = df.groupby(['PropertyType']).mean().reset_index()
ax = averages.plot.bar(ax=ax, title='Average sale by Property Type',x='PropertyType',y=['AssessedValue','SaleAmount'], stacked=True)
fig.tight_layout()

### Learning: Area charts are stacked by default, whereas bar charts are NOT

# Lets try another example

In [None]:
df_by_year_type = df.groupby(['ListYear','PropertyType']).size().unstack()
df_by_year_type = df_by_year_type.fillna(0)
df_by_year_type

In [None]:
fig, ax = plt.subplots()
ax = df_by_year_type.plot.bar(ax=ax, title='Count Share by Year by Type', stacked=True)
fig.tight_layout()

In [None]:
fig, ax = plt.subplots()
ax = df_by_year_type.plot.bar(ax=ax, title='Count Share by Year by Type')
fig.tight_layout()

# Stacked Bar Chart (using seaborn)

In [None]:
from matplotlib.colors import ListedColormap
import seaborn as sns
sns.set()
ax = df.groupby(['ListYear','PropertyType']).size().unstack().plot(kind='bar', stacked=True,colormap=ListedColormap(sns.color_palette("muted")), 
          figsize=(20,15))