### setup

In [1]:
%load_ext lab_black

In [2]:
import requests
import urllib.request
import numpy as np
import datetime
import random

In [3]:
import pandas as pd
import gspread
from oauth2client.service_account import ServiceAccountCredentials

In [4]:
import altair as alt
import altair_stiles as altstiles

alt.themes.register("stiles", altstiles.theme)
alt.themes.enable("grid")

ThemeRegistry.enable('grid')

In [5]:
from datawrapper import Datawrapper

dw = Datawrapper(
    access_token="FtIwtvFtoGLaRT9a3gjX69PLu4wSuRyKddoOz6SOPw3k9wWyNICMHTkcPhOGCR5Z"
)

In [6]:
scope = ["https://spreadsheets.google.com/feeds"]
credentials = ServiceAccountCredentials.from_json_keyfile_name(
    "jupyter-integration-credentials.json", scope
)
gc = gspread.authorize(credentials)

In [7]:
spreadsheet_key = "1_RXzXkHPEyDAiDKmz98wTSC9UNo-8OyjGTT540X_vJk"
book = gc.open_by_key(spreadsheet_key)

In [8]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000

In [9]:
# Opening the worksheet by using Worksheet ID
workbook = gc.open_by_key(spreadsheet_key)
# Selecting which sheet to pulling the data
sheet = workbook.worksheet("Sheet1")
# Pulling the data and transform it to the data frame
values = sheet.get_all_values()
nbs = pd.DataFrame(values[1:], columns=values[0]).transpose()

### nbs data

In [10]:
nbs.columns = nbs.iloc[0]
df = nbs[1:].reset_index().rename(columns={"index": "month"})

cleaning nbs data

In [11]:
df["date"] = pd.to_datetime(df["month"], format="%b %Y")

In [12]:
df = df[["date", "sold", "started", "completed"]].sort_values("date", ascending=True)

want millions of square meters, not 10,000 square meters

In [13]:
df["sold"] = df["sold"].astype(float) / 100
df["started"] = df["started"].astype(float) / 100
df["completed"] = df["completed"].astype(float) / 100

In [14]:
df["month"] = pd.to_datetime(df["date"]).dt.month

In [15]:
df = df.rename(
    columns={"sold": "sold_cum", "started": "started_cum", "completed": "completed_cum"}
)

get monthly square footage, rather than cumulative ytd

In [16]:
df["sold"] = df["sold_cum"] - df["sold_cum"].shift(periods=1)
df["started"] = df["started_cum"] - df["started_cum"].shift(periods=1)
df["completed"] = df["completed_cum"] - df["completed_cum"].shift(periods=1)

keep january and do rolling means 

In [17]:
df.loc[df["month"] == 1, "sold"] = 0
df.loc[df["month"] == 1, "started"] = 0
df.loc[df["month"] == 1, "completed"] = 0

In [18]:
sold = (
    df[["date", "sold"]]
    .rename(columns={"sold": "value"})
    .sort_values("date", ascending=True)
)
sold["type"] = "sold"

In [19]:
sold["rollmean"] = sold["value"].rolling(12).mean()

In [20]:
started = (
    df[["date", "started"]]
    .rename(columns={"started": "value"})
    .sort_values("date", ascending=True)
)
started["type"] = "started"

In [21]:
started["rollmean"] = started["value"].rolling(12).mean()

In [22]:
completed = (
    df[["date", "completed"]]
    .rename(columns={"completed": "value"})
    .sort_values("date", ascending=True)
)
completed["type"] = "completed"

In [23]:
completed["rollmean"] = completed["value"].rolling(12).mean()

In [24]:
df_long = pd.concat([sold, started, completed])

In [75]:
alt.Chart(
    df_long[(df_long["date"] >= "2016-01-01") & (df_long["type"] != "sold")]
).mark_area(opacity=0.4).encode(
    x="date", y=alt.Y("rollmean", stack=None, title=""), color="type"
).properties(
    width=500, height=150
)

In [31]:
alt.Chart(df_long).mark_line().encode(
    x=alt.X("date"), y=alt.Y("rollmean"), color="type"
)

In [28]:
df_long["y"] = 2

In [84]:
base_chart = (
    alt.Chart(df_long[df_long["type"] != "sold"])
    .mark_bar()
    .encode(
        x=alt.X("date", stack="normalize", title=""),
        y=alt.Y("y", title=""),
        color=alt.Color(
            ("rollmean"),
            scale=alt.Scale(scheme="goldorange", reverse=False),
        ),
        facet=alt.Facet("type", columns=1),
    )
    .properties(width=400, height=100)
)

# alt.layer(base_chart, data=df_long).facet(
#    row=alt.Row("type", sort=["started", "completed", "sold"])
# )

In [85]:
base_chart.configure_legend(orient="top")

# make a dataframe to put in datawrapper for area chart

In [87]:
started_dw = started.rename(
    columns={"value": "started_monthly", "rollmean": "started_rollmean"}
).drop(columns=["type"])

In [89]:
completed_dw = completed.rename(
    columns={"value": "completed_monthly", "rollmean": "completed_rollmean"}
).drop(columns=["type"])

In [91]:
dw_area = started_dw.merge(completed_dw, on="date")

In [92]:
dw_area = dw_area[
    (~dw_area["started_rollmean"].isna()) & (~dw_area["completed_rollmean"].isna())
]

In [94]:
dw.add_data(chart_id="ljQcp", data=dw_area)

<Response [204]>