In [1]:
# import relevant packages
import numpy as np
import pandas as pd
import altair as alt

In [2]:
# get our data ready
data = pd.read_csv("data/alldomains_states_data.csv")
data['visit_date'] = pd.to_datetime(data['visit_date'])

# delete the Jervis Bay territory and not set rows
data = data[data.region != '(not set)']
data = data[data.region != 'Jervis Bay Territory']

data = data.reset_index()

data.head()

Unnamed: 0,index,total_visits,region,visit_date
0,0,1045871,New South Wales,2019-10-30
1,1,547331,Queensland,2019-10-30
2,3,217460,South Australia,2019-10-30
3,4,15264,Northern Territory,2019-10-30
4,5,766685,Victoria,2019-10-30


In [3]:
# scale by population size

populations = {"New South Wales" : 7317500,
               "Queensland" : 4599400,
               "South Australia" : 1659800,
               "Northern Territory" : 231200,
               "Victoria" : 5640900,
               "Tasmania" : 511000,
               "Western Australia" : 2366900,
               "Australian Capital Territory" : 366900}


for row in range(len(data)):
    pop = populations[data["region"][row]]
    data["total_visits"][row] = data["total_visits"][row]/pop*10000

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  from ipykernel import kernelapp as app


In [4]:
data

Unnamed: 0,index,total_visits,region,visit_date
0,0,1429,New South Wales,2019-10-30
1,1,1190,Queensland,2019-10-30
2,3,1310,South Australia,2019-10-30
3,4,660,Northern Territory,2019-10-30
4,5,1359,Victoria,2019-10-30
...,...,...,...,...
2921,3497,0,Tasmania,2020-10-29
2922,3498,0,Queensland,2020-10-29
2923,3499,0,Western Australia,2020-10-29
2924,3500,0,South Australia,2020-10-29


In [5]:
chart = alt.Chart(data).mark_area().encode(
    alt.X('visit_date:T',title='Date'),
    alt.Y('sum(total_visits):Q', stack='center', axis = None, title='Total Visits'),
    alt.Color('region:N',
        scale=alt.Scale(scheme='category20')
    )
).properties(
    width=700,
    height=600,
    title = 'Users by state over a year scaled by state population'
).interactive()

chart.configure_header(
    titleColor='grey',
    titleFontSize=22,
)