# Data Visualization Gallery
> A collection of charts, graphs and visuals on social issues, climate and the economy. Check back periodically for new charts!

- badges: true
- toc: true
- categories: [altair, data vis]
- show_tags: true

# Social issues

In [1]:
#hide
import pandas as pd
import numpy as np
import altair as alt

In [None]:
#hide_input

url = 'https://raw.githubusercontent.com/stevhliu/ingolmo/master/images/data/mpv.csv'
df = pd.read_csv(url)

#calculate deaths per million
wdr = (3378/251103235)*1000000
bdr = (1944/43984096)*1000000

d = pd.DataFrame([('Black', bdr), ('White', wdr)], columns=['race', 'death/mil'])
d['death/mil'] = d['death/mil'].round(0)

#create bar chart
domain = ['Black', 'White']
range_ = ['#0ABF53', '#00112C']

bars = alt.Chart(d).mark_bar(size=150).encode(
    alt.X('race', axis=None), 
    alt.Y('death/mil', axis=None),
    color=alt.Color('race', scale=alt.Scale(domain=domain, range=range_), legend=None))

#add text
text = bars.mark_text(align='center', dy=-10, size=15).encode(text='death/mil')

(bars + text).configure_axis(
    grid=False
).configure_axisY(
    title=None, labels=False, ticks=False
).configure_axisX(
    title=None, labels=False, ticks=False
).configure_title(
    fontSize=20, color='#7f7f7f', anchor='middle', dy=-10
).properties(width=700, 
             title={'text':['Black Americans are killed more than 3x the rate of White Americans'],
                    'subtitle':['deaths are shown per million people'], 'subtitleColor':'#7f7f7f'}
).configure_view(
    strokeWidth=0
)

In [None]:
#hide_input

#subset data by age and race
x = df.groupby(['Race', 'Age']).size().reset_index()
x = x[(x.Race != 'Unknown race') & (x.Race != 'Unknown Race')]
x = x.rename(columns={0:'deaths'})
x = x[(x.Age != '40s') & (x.Age != 'Unknown')]
x['Age'] = x.Age.astype(int)

step = 59
overlap = 1

r = alt.Chart(x, height=step).mark_area(
    fill='#0ABF53', interpolate='monotone',
    fillOpacity=0.2, strokeWidth=1, stroke='#0ABF53'
).encode(
    alt.X('Age', bin='binned', axis=alt.Axis(title=None)),
    alt.Y('deaths', scale=alt.Scale(range=[step, -step * overlap]), axis=None)
).properties(width=600, height=60)

alt.layer(r, data=x).facet(
    row=alt.Row('Race', title=None, sort=alt.EncodingSortField('deaths', op='sum', order='ascending'),
                header=alt.Header(labelAlign='left', labelAngle=0, labelColor='#7f7f7f', labelPadding=25))
).properties(
    bounds='flush',
    title='Young Black Americans are dying'
).configure_facet(
    spacing=0
).configure_view(
    stroke=None
).configure_title(
    fontSize=20,
    anchor='middle',
    color='#7f7f7f',
)

In [None]:
#hide_input

#find top 25 cities with highest number of deaths
city = df.groupby('City').size().nlargest(25).reset_index()
top25 = city.City.to_list()

#extract month/year from the date
df['year'] = pd.DatetimeIndex(df['Date']).year
df['month'] = pd.DatetimeIndex(df['Date']).month
cleaned = df.groupby(['year','City']).size().reset_index()
cleaned = cleaned[cleaned['City'].isin(top25)]
cleaned = cleaned.rename(columns={0:'deaths'})
cleaned['year'] = cleaned['year'].astype(str)

#create faceted line chart
a = alt.Chart().mark_line(
    color='#0ABF53', strokeWidth=2
).encode(
    alt.X('year', title=None, 
          axis=alt.Axis(values=[2013, 2019], labelAngle=0)),
    alt.Y('deaths', title=None),
).properties(width=100, height=100)

b = alt.Chart().mark_area(fill='#0ABF53', fillOpacity=0.15
).encode(
    alt.X('year'),
    alt.Y('deaths'))

#order facet by most police shootings
alt.layer(a, b, data=cleaned).facet(
    alt.Column('City', title=None, sort=alt.EncodingSortField('deaths', op='sum', order='descending'),
              header=alt.Header(labelFontSize=13, labelColor='#7f7f7f', labelFontWeight='bolder', 
                                labelAlign='center', labelAnchor='middle', labelOrient='top', labelPadding=15)), 
    spacing=alt.RowColnumber(row=25, column=50), 
    title='Top 25 US cities with the most police shootings',
    columns=5,
    align='each',
).resolve_scale(y='independent', x='independent'
).configure_axis(
    grid=False
).configure_view(
    strokeWidth=0
).configure_title(
    fontSize=20,
    anchor='middle',
    color='#7f7f7f',
    dy=-25
)

In [None]:
#hide_input

#show where police shootings are happening
geo2 = df.groupby(['year','Race','Geography']).size().reset_index()
geo2 = geo2[(geo2.Race == 'Black') | (geo2.Race == 'White')]
geo2 = geo2.rename(columns={0:'deaths'})
geo2['year'] = geo2['year'].astype(str)

domain = ['Black', 'White']
range_ = ['#0ABF53', '#00112C']

a = alt.Chart(geo2).mark_line().encode(
    alt.X('year', axis=alt.Axis(values=[2013, 2019], labelAngle=0)),
    alt.Y('deaths'),
    color=alt.Color('Race', scale=alt.Scale(domain=domain, range=range_))
).properties(width=170)

alt.layer(a, data=geo2).facet(
    alt.Column('Geography', title=None, 
               header=alt.Header(labelFontSize=13, labelColor='#7f7f7f', labelFontWeight='bolder', 
                                 labelAlign='center', labelAnchor='middle', labelOrient='top', labelPadding=15)),
    spacing=alt.RowColnumber(column=50),
    title='Black deaths are increasing in rural areas'
).resolve_scale(
    y='independent'
).configure_axis(
    grid=False
).configure_axisX(
    title=None
).configure_axisY(
    title=None
).configure_view(
    strokeWidth=0
).configure_title(
    fontSize=20,
    anchor='middle',
    color='#7f7f7f',
    dy=-25
).configure_legend(
    title=None, labelFontSize=15, labelColor='#7f7f7f', orient='right'
)

In [None]:
#hide

#get data from Washington Post
url = 'https://raw.githubusercontent.com/washingtonpost/data-police-shootings/master/fatal-police-shootings-data.csv'
wapo = pd.read_csv(url)

#tidy up data
wapo['date'] = pd.to_datetime(wapo.date, format='%Y-%m-%d')
wapo = wapo[wapo['manner_of_death'] == 'shot']
wapo = wapo.groupby(['date', 'manner_of_death']).size().reset_index()
wapo = wapo.rename(columns={0:'deaths'})
wapo['year'] = pd.DatetimeIndex(wapo['date']).year
wapo['month'] = pd.DatetimeIndex(wapo['date']).month

#find cumulative deaths per year
x = wapo.groupby(['year', 'date']).sum().groupby(level=[0]).cumsum().reset_index()

In [None]:
#hide_input

#create layered line chart
base = alt.Chart(x).encode(
    x=alt.X('monthdate(date):T', axis=alt.Axis(format='%b')))

text = base.mark_text(dx=15, size=10).transform_window(
    sort=[alt.SortField('date', order='descending')], rank='rank(deaths)',
    groupby=['year']
).transform_filter(alt.datum.rank == 1).encode(
    x=alt.X('monthdate(date):T'), y=alt.Y('deaths:Q'), text=alt.Text('deaths:Q'))

alt.layer(
    base.mark_line(color='#7f7f7f', strokeWidth=0.25).transform_filter(alt.FieldEqualPredicate(field='year', equal=2015)).encode(y='deaths'),
    base.mark_line(color='#7f7f7f', strokeWidth=0.25).transform_filter(alt.FieldEqualPredicate(field='year', equal=2016)).encode(y='deaths'),
    base.mark_line(color='#7f7f7f', strokeWidth=0.25).transform_filter(alt.FieldEqualPredicate(field='year', equal=2017)).encode(y='deaths'),
    base.mark_line(color='#7f7f7f', strokeWidth=0.25).transform_filter(alt.FieldEqualPredicate(field='year', equal=2018)).encode(y='deaths'),
    base.mark_line(color='#7f7f7f', strokeWidth=0.25).transform_filter(alt.FieldEqualPredicate(field='year', equal=2019)).encode(y='deaths'),
    base.mark_line(color='#7f7f7f', strokeWidth=0.25).transform_filter(alt.FieldEqualPredicate(field='year', equal=2020)).encode(y='deaths'),
    base.mark_line(color='#0ABF53', strokeWidth=1.25).transform_filter(alt.FieldEqualPredicate(field='year', equal=2021)).encode(y='deaths'),
    text
).properties(
    height=400, width=600,
    title={'text':['Nearly the same number of people are killed by the police every year'],
                    'subtitle':['2021 shown by highlighted line'], 'subtitleColor':'#7f7f7f'}
).configure_view(
    strokeWidth=0
).configure_axis(
    grid=False
).configure_axisX(
    title=None
).configure_axisY(
    title=None
).configure_title(
    fontSize=20,
    anchor='middle',
    color='#7f7f7f',
    dy=-10
)

# Climate

In [None]:
#hide

df = pd.read_csv('https://raw.githubusercontent.com/stevhliu/ingolmo/master/images/data/ad_viz_plotval_data.csv')
alt.data_transformers.disable_max_rows()

# clean up data
df = df.rename(columns={'DAILY_AQI_VALUE':'Daily AQI Value'})
df = df.dropna()
df = df[(df['CBSA_NAME'] != 'Clearlake, CA') & (df['CBSA_NAME'] != 'Crescent City, CA') & (df['CBSA_NAME'] != 'Eureka-Arcata-Fortuna, CA') & (df['CBSA_NAME'] != 'Red Bluff, CA') & (df['CBSA_NAME'] != 'Redding, CA') & (df['CBSA_NAME'] != 'Ukiah, CA')]

> Important: The climate data is only current as of Sep 27, 2020. 

In [None]:
#hide_input

alt.Chart(df).mark_rect(timeUnitBand=1).encode(
    alt.X('yearmonthdate(Date):O', axis=alt.Axis(format='%B')),
    alt.Y('CBSA_NAME:O'),
    alt.Color("Daily AQI Value:Q", scale=alt.Scale(scheme='yelloworangered')),
    tooltip=['Daily AQI Value']       
).configure_axisX(
    title=None,
    ticks=False,
    labelAngle=0,
    labelOverlap=True,
    labelSeparation=30,
    labelPadding=20,
    orient='top'
).configure_axisY(
    title=None
).configure_title(
    fontSize=20, 
    color='#7f7f7f', 
    anchor='middle', 
    dy=-10
).configure_legend(
    orient='top', 
    direction='horizontal', 
    gradientLength=300, 
    titleAnchor='middle', 
    titleColor='#7f7f7f', 
    padding=12
).properties(
    width=800,
    title={'text':['California Air Quality, 2020']}
).interactive()

In [None]:
#hide

fire = pd.read_csv('https://raw.githubusercontent.com/stevhliu/ingolmo/master/images/data/mapdataall.csv')
fire = fire[['incident_name', 'incident_county', 'incident_acres_burned', 'incident_dateonly_created', 'incident_dateonly_extinguished']]

# calculate total number of acres burned
fire['year'] = pd.DatetimeIndex(fire['incident_dateonly_created']).year
df2 = fire.groupby(['year', 'incident_dateonly_created']).sum().groupby(level=[0]).cumsum().reset_index()
df2 = df2[(df2['year'] != 1969) & (df2['year'] != 2009)]
df2['incident_dateonly_created'] = pd.to_datetime(df2.incident_dateonly_created, format='%Y-%m-%d')

In [None]:
#hide_input

base = alt.Chart(df2).encode(
    alt.X('monthdate(incident_dateonly_created):T', 
           axis=alt.Axis(format='%b')),
    tooltip=['incident_acres_burned']
)

text = base.mark_text(
    dx=20, size=12
).transform_window(
    sort=[alt.SortField('incident_dateonly_created', order='descending')], 
    rank='rank(incident_acres_burned)',
    groupby=['year']
).transform_filter(
    alt.datum.rank == 1
).encode(
    x=alt.X('monthdate(incident_dateonly_created):T'), 
    y=alt.Y('incident_acres_burned:Q'), 
    text=alt.Text('year'))

alt.layer(
    base.mark_line(color='#7f7f7f', strokeWidth=0.45).transform_filter(alt.FieldEqualPredicate(field='year', equal=2013)).encode(y='incident_acres_burned'),
    base.mark_line(color='#7f7f7f', strokeWidth=0.45).transform_filter(alt.FieldEqualPredicate(field='year', equal=2014)).encode(y='incident_acres_burned'),
    base.mark_line(color='#7f7f7f', strokeWidth=0.45).transform_filter(alt.FieldEqualPredicate(field='year', equal=2015)).encode(y='incident_acres_burned'),
    base.mark_line(color='#7f7f7f', strokeWidth=0.45).transform_filter(alt.FieldEqualPredicate(field='year', equal=2016)).encode(y='incident_acres_burned'),
    base.mark_line(color='#7f7f7f', strokeWidth=0.45).transform_filter(alt.FieldEqualPredicate(field='year', equal=2017)).encode(y='incident_acres_burned'),
    base.mark_line(color='#7f7f7f', strokeWidth=0.45).transform_filter(alt.FieldEqualPredicate(field='year', equal=2018)).encode(y='incident_acres_burned'),
    base.mark_line(color='#7f7f7f', strokeWidth=0.45).transform_filter(alt.FieldEqualPredicate(field='year', equal=2019)).encode(y='incident_acres_burned'),
    base.mark_line(color='#0ABF53', strokeWidth=1.25).transform_filter(alt.FieldEqualPredicate(field='year', equal=2020)).encode(y='incident_acres_burned'),
    text
).properties(
    width=800,
    title={'text':['Total acres burned in California wilfdires'],
           'subtitle':['Hover over the year to view total acres burned'], 'subtitleColor':'#7f7f7f'}
).configure_view(
    strokeWidth=0
).configure_axis(
    grid=False
).configure_axisX(
    title=None
).configure_axisY(
    title=None
).configure_title(
    fontSize=20,
    anchor='middle',
    color='#7f7f7f',
    dy=-10
)

In [None]:
#hide

fire = fire[(fire['year'] != 1969) & (fire['year'] != 2009)]

# calculate average days to extinguish fires
fire['incident_dateonly_created'] = pd.to_datetime(fire['incident_dateonly_created'], format='%Y-%m-%d')
fire['incident_dateonly_extinguished'] = pd.to_datetime(fire['incident_dateonly_extinguished'], format='%Y-%m-%d')

fire['time_to_extinguish'] = (fire['incident_dateonly_extinguished'] - fire['incident_dateonly_created']).dt.days

df3 = fire[fire['time_to_extinguish'] > 0]
df3 = df3.groupby('year').mean().round(0).reset_index()
df3['time_to_extinguish'] = df3['time_to_extinguish'].astype(int)

In [None]:
#hide_input

bar = alt.Chart(df3).mark_bar(
    cornerRadiusEnd=30,
    color='#0ABF53'
).encode(
    alt.X('time_to_extinguish:Q'),
    alt.Y('year:O')
)

text = bar.mark_text(
    align='left',
    baseline='middle',
    dx=3,
    color='#7f7f7f',
    size=12
).encode(
    text='time_to_extinguish:Q'
)

(bar + text).properties(
    width=800,
    height=250
).configure_title(
    fontSize=20, 
    color='#7f7f7f', 
    anchor='middle', 
    dy=-10
).configure_axis(
    grid=False
).configure_axisY(
    title=None
).configure_axisX(
    title=None
).configure_view(
    strokeWidth=0
).configure_title(
    fontSize=20, color='#7f7f7f', anchor='middle', dy=-10
).properties(
    width=800, 
    title={'text':['Megafire years stretch the average number of days to extinguish wildfires']})

# Economy

In [None]:
#hide_input

df = pd.read_csv('https://raw.githubusercontent.com/OpportunityInsights/EconomicTracker/main/data/Affinity%20-%20National%20-%20Daily.csv')

# tidy up data
df = df[(df['year'] == 2020) & (df['month'] >= 1) & (df['day'] >= 13)]
df['year'] = df['year'].astype(str)
df['month'] = df['month'].astype(str)
df['day'] = df['day'].astype(str)
date = df[['year', 'month', 'day']].agg('-'.join, axis=1)
date = pd.to_datetime(date, format='%Y-%m-%d')
date = date[12:]

df = df.drop(['month', 'day', 'freq', 'spend_retail_no_grocery', 'spend_retail_w_grocery', 'provisional'], axis=1)
df = df.rename(columns= {'spend_all':'all spending', 'spend_apg':'general merchandise, apparel and accessories',
                         'spend_aer':'arts, entertainment and recreation', 'spend_grf':'grocery and food store',
                         'spend_hcs':'healthcare and social assistance', 'spend_acf':'accomodation and food service',
                         'spend_tws':'transportation and warehousing', 'spend_all_inclow':'low median income',
                         'spend_all_incmiddle':'middle median income', 'spend_all_inchigh':'high median income'})

clean = df.loc[:,'accomodation and food service':'middle median income'].astype(np.float64)
clean = pd.concat([date,clean], axis=1)
clean = clean.rename(columns = {0:'date'})

In [None]:
#hide_input

domain = ['low median income', 'middle median income', 'high median income']
range_ = ['#0ABF53', '#00112C', '#4EE5E5']

chart = alt.Chart(clean).transform_fold(['low median income', 'middle median income', 'high median income']
).mark_line(
    strokeWidth=1.5
).encode(
    alt.X('date:T', 
          title=None, 
          axis=alt.Axis(format='%b %Y', tickCount=10, labelOffset=0, tickOffset=0, labelPadding=60, ticks=False)),
    alt.Y('value:Q', 
          title=None, 
          axis=alt.Axis(format='%', tickCount=10)),
    alt.Color('key:N',
              scale=alt.Scale(domain=domain, range=range_), legend=None)
).properties(
    width=800,
    title={'text':['Credit and debit card spending per median income']}
)

labels = alt.Chart(clean).transform_fold(['low median income', 'middle median income', 'high median income']
).mark_text(
    dy=-7,
    dx=50, 
    color='#7f7f7f'
).encode(
    alt.X('date:T', 
          aggregate='max'),
    alt.Y('value:Q', 
          aggregate={'argmax': 'date'}),
    alt.Text('key:N')
)

alt.layer(chart,labels).configure_view(
    strokeWidth=0
).configure_view(
    strokeWidth=0
).configure_axis(
    grid=False
).configure_axisX(
    orient='top',
    offset=-60
).configure_title(
    fontSize=20,
    anchor='middle',
    color='#7f7f7f',
    dy=-10
)

In [None]:
#hide_input

chart = alt.Chart(clean).transform_fold(['general merchandise, apparel and accessories', 'arts, entertainment and recreation', 'grocery and food store', 
                                           'healthcare and social assistance', 'accomodation and food service', 'transportation and warehousing']
).mark_line(
    strokeWidth=1.5,
    color='#0ABF53'
).encode(
    alt.X('date:T', 
          title=None, 
          axis=alt.Axis(format='%b', tickCount=10, labelOffset=0, tickOffset=0, ticks=False)),
    alt.Y('value:Q', 
          title=None, 
          axis=alt.Axis(format='%', tickCount=10))
).properties(
    width=200,
    height=200
)

chart.facet(
    alt.Column('key:N', title=None,
              header=alt.Header(labelFontSize=13, labelColor='#7f7f7f', labelFontWeight='bolder', 
                                labelAlign='center', labelAnchor='middle', labelOrient='top', labelPadding=10)),
    spacing=alt.RowColnumber(row=40, column=50),
    title='Except for groceries, spending across all major categories fell in 2020',
    columns=3,
    align='each',
).resolve_scale(
    y='independent', 
    x='independent'
).configure_axis(
    grid=False
).configure_view(
    strokeWidth=0
).configure_title(
    fontSize=20,
    anchor='middle',
    color='#7f7f7f',
    dy=-25)

{{'Police and census data is sourced from [Mapping Police Violence](https://mappingpoliceviolence.org/), [The Washington Post](https://github.com/washingtonpost/data-police-shootings) and the [United States Census Bureau](https://www.census.gov/quickfacts/fact/table/US/SEX255219).' | fndetail: 1}}

{{'Climate data is sourced from the [EPA](https://www.epa.gov/outdoor-air-quality-data) and [Cal Fire](https://www.fire.ca.gov/).' | fndetail: 2}}

{{'Economic data is sourced from [Opportunity Insights](https://opportunityinsights.org/).' | fndetail: 3}}