In [None]:
import pandas as pd
import altair as alt
from selenium import webdriver
%matplotlib inline

In [None]:
df_esb = pd.read_csv("data/tidy_esb.csv")

In [None]:
df_esb.info()

In [None]:
df_esb["date"] = pd.to_datetime(df_esb["date"])

## 1) Total number of connections for each County

In [None]:
bars = alt.Chart(df_esb).mark_bar().encode(
    x=alt.X('sum(esb):Q', stack='zero'),
    y=alt.Y('County Councils:N'),
    #color=alt.value('steelblue')
    color='County Councils'
).properties(
    title = "ESB connections for each County")

text = alt.Chart(df_esb).mark_text(dx=-20, dy=3, color='white').encode(
    x=alt.X('sum(esb):Q', stack='zero'),
    y=alt.Y('County Councils:N'),
    detail='County Councils:N',
    text=alt.Text('sum(esb):Q', format='.1f')
)

barplot = bars + text
barplot

## 2) Average trends of connections in each year

In [None]:
alt.Chart(df_esb).mark_line().encode(
 x = alt.X('date',
     title="Year"),
 y = alt.Y('esb',
      aggregate = "average",
     title="ESB Count"),
 color = alt.value('green')

).properties(
    title = "ESB connections Trend over the years")


## 3) Esb Connection distribution for every Council

In [None]:
alt.Chart(df_esb).mark_area().encode(
    x='Year:O',
    y=alt.Y(
        'sum(esb):Q',
        title='ESB Connections',
        axis=alt.Axis(format='~s')
    ),
    facet=alt.Facet('County Councils:O', columns=4),
    color = 'County Councils'
).properties(
    title='ESB Connections trend for each County',
)

## 4)Visualization to get Min,25%,Median(50%),75% and Max values of Esb Count for every year

In [None]:
alt.Chart(df_esb).mark_boxplot(size = 30).encode(
    x=alt.X('Year:O',
            title = 'Years'),
    y=alt.Y('esb:Q',
            title = 'ESB Count')
).properties(width=350,title='Overview of ESB connections every year')

In [None]:
df_esb.loc[df_esb['date'].dt.month <= 6,'Half'] = 1
df_esb.loc[df_esb['date'].dt.month > 6,'Half'] = 2

## 5) Connections in 1st and 2nd half of each year. (use slider at the bottom of graph for particular year data)

In [1]:
slider = alt.binding_range(min=2006, max=2013, step=1)
select_year = alt.selection_single(name='Select', fields=['Year'],
                                   bind=slider, init={'Year': 2006})

base = alt.Chart(df_esb).add_selection(
    select_year
).transform_filter(
    select_year
).transform_calculate(
    types=alt.expr.if_(alt.datum.Half == 1, '1st Half', '2nd Half')
).properties(
    width=250,
)

color_scale = alt.Scale(domain=['1st Half', '2nd Half'],
                        range=['green', 'orange'])

left = base.transform_filter(
    alt.datum.types == '2nd Half'
).encode(
    y=alt.Y('County Councils:O', axis=None),
    x=alt.X('sum(esb):Q',
            title='ESB Count',
            sort=alt.SortOrder('descending')),
    color=alt.Color('types:N', scale=color_scale, legend=None),
    tooltip = ('sum(esb):Q')
).mark_bar().properties(title='First Half of Year')

middle = base.encode(
    y=alt.Y('County Councils:O', axis=None),
    text=alt.Text('County Councils:O'),
).mark_text(color = 'steelblue',size = 15).properties(width=105)


right = base.transform_filter(
            alt.datum.types == '1st Half'
        ).encode(
            y=alt.Y('County Councils:O', axis=None),
            x=alt.X('sum(esb):Q', title='ESB Count'),
            color=alt.Color('types:N', scale=color_scale, legend=None),
            tooltip = ('sum(esb):Q')
        ).mark_bar().properties(title='Second Half of Year')

alt.concat(left, middle, right, spacing=5)

NameError: name 'alt' is not defined