# Car Usage
This notebook will generate some visualizations about car usage across the world.

Data sources:
Passenger-miles outside the US: [https://data.oecd.org/transport/passenger-transport.htm](https://data.oecd.org/transport/passenger-transport.htm)

~~Passenger-miles in the US: [https://www.bts.gov/content/us-passenger-miles](https://www.bts.gov/content/us-passenger-miles)~~ (didn't realize US data is in the first dataset)

In [101]:
# imports
import pandas as pd
import altair as alt
myJekyllDir = '/Users/rafigildiaz/Desktop/IS 445/rgildiaz.github.io/assets/json'

In [102]:
# US data is actually unneeded, it's in the other dataset
# # load the US data
# df_us = pd.read_excel('data/statistic_id185769_passenger-miles---highway-traffic-in-the-united-states-1990-2019.xlsx', sheet_name='Data', skiprows=4)

# # the first column is empty
# df_us = df_us.drop(columns=df_us.columns[0])

# # rename the columns
# df_us.columns = ['year', 'passenger_miles']

# # data is in millions of miles
# # df_us['passenger_miles'] = df_us['passenger_miles'] * 1000000

# df_us.head()

In [103]:
# load the not-US data
df_world = pd.read_csv('data/DP_LIVE_25042023212441825.csv')

# drop unneeded columns
df_world = df_world.drop(columns=['INDICATOR', 'SUBJECT', 'MEASURE', 'FREQUENCY', 'Flag Codes'])

# rename the columns
df_world.columns = ['country', 'year', 'passenger_miles']

# convert from kilometers to millions of miles (0.621371 miles per kilometer)
df_world['passenger_miles'] = df_world['passenger_miles'] * 0.621371 / 1000000

# only keep some major countries
df_world = df_world[df_world['country'].isin(['AUS', 'CAN', 'GBR', 'JPN', 'USA'])]

# format year as datetime
df_world['year'] = pd.to_datetime(df_world['year'], format='%Y')

df_world.head()

Unnamed: 0,country,year,passenger_miles
0,AUS,1998-01-01,0.153156
1,AUS,1999-01-01,0.156535
2,AUS,2000-01-01,0.159572
3,AUS,2001-01-01,0.158093
4,AUS,2002-01-01,0.161879


In [104]:
# unneeded
# chart1 = alt.Chart(df_us).mark_line().encode(
#     x='year',
#     y='passenger_miles'
# ).properties(
#     title='Passenger miles in the US'
# )

# chart1

In [109]:
# found this layering for word marks here: https://stackoverflow.com/questions/61194028/adding-labels-at-end-of-line-chart-in-altair

countries = alt.Chart(df_world).mark_line().encode(
    x=alt.X(
        'year',
        title="Year"
    ),
    y=alt.Y(
        'passenger_miles',
        title="Passenger miles (millions)",
        # scale=alt.Scale(type='log')
    ),
    color=alt.Color(
        'country',
        title="Country",
        scale=alt.Scale(
            scheme='category10',
        ),
        legend=None
    )
).properties(
    title='Yearly Passenger Miles'
)

labels = alt.Chart(df_world).mark_text(
    align='left',
    baseline='middle',
    dx=3
).encode(
    x=alt.X(
        'year',
        title="Year",
        aggregate='max'
    ),
    y=alt.Y(
        'passenger_miles',
        title="Passenger miles (millions)",
        # scale=alt.Scale(type='log'),
        aggregate={'argmax': 'passenger_miles'}
    ),
    text=alt.Text(
        'country'
    ),
    color=alt.Color(
        'country',
        title="Country",
        scale=alt.Scale(
            scheme='category10',
        ),
        legend=None
    )
)

chart = alt.layer(countries, labels).resolve_scale(color='independent').properties(
    width=600,
    height=300
)
chart.save(myJekyllDir + '/car_usage.json')

chart