In [2]:
import pandas as pd
import altair as alt

In [3]:
data = pd.read_csv('sunshine.csv')
data

Unnamed: 0,city,Longitude,lat,month,sunshine,API,Energy(kWh),latcenter
0,Chicago,-87.623177,41.881832,Jan,135,35,714,39.5
1,Chicago,-87.623177,41.881832,Feb,136,36,631,39.5
2,Chicago,-87.623177,41.881832,Mar,187,39,548,39.5
3,Chicago,-87.623177,41.881832,Apr,215,45,443,39.5
4,Chicago,-87.623177,41.881832,May,281,51,458,39.5
...,...,...,...,...,...,...,...,...
67,Seattle,-122.335167,47.608013,Aug,281,32,950,39.5
68,Seattle,-122.335167,47.608013,Sep,221,29,773,39.5
69,Seattle,-122.335167,47.608013,Oct,142,23,807,39.5
70,Seattle,-122.335167,47.608013,Nov,72,21,913,39.5


In [17]:
chart3 = alt.Chart(data).mark_circle().encode(
    size='API:Q',
    x=alt.X('sunshine:Q', axis=alt.Axis(title='Sunshine(in hours)')),
    opacity=alt.Opacity('month:O',sort=['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']),
    color='city:N',
    y='Energy(kWh):Q',
    tooltip=['city', 'month', 'sunshine', 'Energy(kWh)', 'API']
).properties(
    width=600,
    height=400,
    title='Sunshine Hours, Energy Consumption, and API by City and Month'
)

chart4 = alt.Chart(data).mark_line(interpolate = 'basis', strokeDash=[8,4]).encode(
    x=alt.X('sunshine:Q'),
    y='Energy(kWh):Q',
    color='city:N'
)
chart2 = chart3 + chart4

In [5]:
chart = alt.Chart(data).mark_line().encode(
    x=alt.X('lat', axis=alt.Axis(title='Latitude')),
    y=alt.Y('sunshine', aggregate='mean', axis=alt.Axis(title='Mean'))
).properties(
    width=600,
    height=400,
    title='Mean and Standard Deviation of Sunshine by City'
)

# Add individual city points
points = alt.Chart(data).mark_point(filled=True, size=100).encode(
    x=alt.X('lat'),
    y=alt.Y('sunshine', aggregate='mean', axis=alt.Axis(title='Standard Deviation of Sunshine(hours)')),
    color=alt.Color('city')
)

band = alt.Chart(data).mark_errorbar(extent='stdev').encode(
    x=alt.X('lat'),
    y=alt.Y('sunshine', aggregate='stdev', axis=alt.Axis(title='')),
    strokeWidth='Longitude:Q',
    color=alt.value('#FEAD00')
)
text = alt.Chart(data).mark_text(xOffset=20,yOffset=-10, dx=10, dy=-2, angle=0).encode(
    x=alt.X('lat'),
    y=alt.Y('sunshine', aggregate='mean'),
    color=alt.Color('city'),
    text = 'city:N')

rule = alt.Chart(data).mark_rule(color='red').encode(
    x=alt.X('mean(latcenter)', axis=alt.Axis(title='U.S Latitude Center'))
)


# Combine the chart and the points
chart1 = chart + points + band+text + rule

In [18]:
combined_plots = (chart1 | chart2).properties(
    title={
        "text": "Sunshine Hours VS Household Energy Consumption & API w.r.t Location",
        "anchor": "middle",
        "offset":30
    }
)
combined_plots

In [220]:
data.groupby('city')['API'].mean()

city
Chicago          42.333333
Houston          46.666667
Miami            44.250000
New York         41.583333
San Francisco    38.333333
Seattle          27.500000
Name: API, dtype: float64