In [1]:
"""
Part 1: World Vaccination
"""
# Import Required Libraries
import pandas as pd
import altair as alt


In [11]:
"""
Task 1: Read the world_vaccination.csv file
"""
# Load the dataset
world_vaccination = pd.read_csv('data/world_vaccination.csv')

# Filter out the "(WHO) Global" region
world_vaccination = world_vaccination[world_vaccination['who_region'] != '(WHO) Global']

# Display df
world_vaccination


Unnamed: 0,who_region,yr,pct_vaccinated,vaccine
0,Africa,2017,71.0,polio
1,Americas,2017,90.0,polio
2,South-East Asia,2017,88.0,polio
3,Europe,2017,93.0,polio
4,Eastern Mediterranean,2017,81.0,polio
...,...,...,...,...
463,Americas,1989,,hepatitis_b
464,South-East Asia,1989,,hepatitis_b
465,Europe,1989,,hepatitis_b
466,Eastern Mediterranean,1989,,hepatitis_b


In [16]:
"""
Task 2: Create a scatter plot
"""
scatter_plot = alt.Chart(world_vaccination).mark_point(size=60, opacity=0.6).encode(
    x=alt.X('yr:Q', title='Year', scale=alt.Scale(zero=False)),  
    y=alt.Y('pct_vaccinated:Q', title='Percentage Vaccinated'),
    tooltip=['who_region', 'yr', 'pct_vaccinated', 'vaccine']
).properties(
    title='Vaccination Percentage Over Time'
)

scatter_plot



In [22]:
"""
Task 3: Add a color and shape encoding objects
"""
scatter_plot_encoded = alt.Chart(world_vaccination).mark_point(size=60, opacity=0.6).encode(
    x=alt.X('yr:Q', title='Year', scale=alt.Scale(zero=False)),
    y=alt.Y('pct_vaccinated:Q', title='Percentage Vaccinated'),
    color=alt.Color('vaccine:N', title='Vaccine Type'),  # Color by vaccine type
    shape=alt.Shape('vaccine:N', title='Vaccine Type'),  # Shape by vaccine type
    tooltip=['who_region', 'yr', 'pct_vaccinated', 'vaccine']
).properties(
    title='Vaccination Percentage by Vaccine Type'
)

scatter_plot_encoded


In [24]:
"""
Task 4: Create a data frame object
"""
# Filter only polio vaccine data
polio = world_vaccination[world_vaccination['vaccine'] == 'polio']

# Display df
polio


Unnamed: 0,who_region,yr,pct_vaccinated,vaccine
0,Africa,2017,71.0,polio
1,Americas,2017,90.0,polio
2,South-East Asia,2017,88.0,polio
3,Europe,2017,93.0,polio
4,Eastern Mediterranean,2017,81.0,polio
...,...,...,...,...
260,Americas,1980,66.0,polio
261,South-East Asia,1980,3.0,polio
262,Europe,1980,75.0,polio
263,Eastern Mediterranean,1980,21.0,polio


In [29]:
"""
Task 5: Create a scatter plot
"""
polio_regions = alt.Chart(polio).mark_point(size=80, opacity=0.6).encode(
    x=alt.X('yr:Q', title='Year', scale=alt.Scale(zero=False)), 
    y=alt.Y('pct_vaccinated:Q', title='Percentage Vaccinated'),
    color=alt.Color('who_region:N', title='Region'),  # Different colors per region
    shape=alt.Shape('who_region:N', title='Region'),  # Different shapes per region
    tooltip=['who_region', 'yr', 'pct_vaccinated']
).properties(
    title='Polio Vaccination Percentage by Region',
    width=500
)

polio_regions


In [30]:
"""
Task 6: Change the point to a line
"""
polio_regions_line = alt.Chart(polio).mark_line().encode(
    x=alt.X('yr:Q', title='Year'),
    y=alt.Y('pct_vaccinated:Q', title='Percentage Vaccinated'),
    color=alt.Color('who_region:N', title='Region'),
    tooltip=['who_region', 'yr', 'pct_vaccinated']
).properties(
    title='Polio Vaccination Trends Over Time',
    width=500
)

polio_regions_line


In [31]:
"""
Task 7: Create a plot
"""
side_by_side_world = alt.Chart(world_vaccination).mark_line().encode(
    x=alt.X('yr:Q', title='Year'),
    y=alt.Y('pct_vaccinated:Q', title='Percentage Vaccinated'),
    color=alt.Color('who_region:N', title='Region'),
    tooltip=['who_region', 'yr', 'pct_vaccinated']
).facet(
    column='vaccine:N'
).properties(
    title='Vaccination Trends for Polio and Hepatitis B'
)

side_by_side_world


In [32]:
"""
Task 8: Use Facet to arrange the same two plots vertically
"""
vertical_world = alt.Chart(world_vaccination).mark_line().encode(
    x=alt.X('yr:Q', title='Year'),
    y=alt.Y('pct_vaccinated:Q', title='Percentage Vaccinated'),
    color=alt.Color('who_region:N', title='Region'),
    tooltip=['who_region', 'yr', 'pct_vaccinated']
).facet(
    row='vaccine:N'
).properties(
    title='Vaccination Trends for Polio and Hepatitis B'
)

vertical_world


In [10]:
"""
Part 2: Fast-Food Chains in the United States
"""

'\nPart 2: Fast-Food Chains in the United States\n'

In [33]:
"""
Task 1: Read the fast_food.csv file
"""
# Load the dataset
fast_food = pd.read_csv('data/fast_food.csv')

# Display df
fast_food


Unnamed: 0,name,st
0,McDonald's,NY
1,Wendy's,OH
2,Frisch's Big Boy,KY
3,McDonald's,NY
4,OMG! Rotisserie,OH
...,...,...
9995,Wendy's,GA
9996,Lee's Oriental Martial Arts,IL
9997,Five Guys,CO
9998,El Pollo Loco,CA


In [34]:
"""
Task 2: Find the top 9 restaurants
"""
# Filter for West Coast states (California, Washington, Oregon)
west_coast = fast_food[fast_food['st'].isin(['CA', 'WA', 'OR'])]

# Count restaurant occurrences and get the top 9
top_restaurants = west_coast['name'].value_counts().head(9).reset_index()
top_restaurants.columns = ['name', 'count']

# Display result
top_restaurants


Unnamed: 0,name,count
0,McDonald's,183
1,Taco Bell,102
2,Burger King,101
3,Jack in the Box,89
4,Carl's Jr.,46
5,Wendy's,42
6,KFC,40
7,Subway,40
8,Del Taco,29


In [36]:
"""
Task 3: Plot the counts for the top 9 fast food restaurants
"""
count_bar_chart = alt.Chart(top_restaurants).mark_bar().encode(
    x=alt.X('name:N', title='Restaurant'),
    y=alt.Y('count:Q', title='Number of Locations'),
    tooltip=['name', 'count']
).properties(
    title='Top 9 Fast Food Restaurants on the West Coast'
)

count_bar_chart


In [37]:
"""
Task 4: Use the configure_axis function to rotate the labels
"""
count_bar_chart_A = count_bar_chart.configure_axisX(
    labelAngle=-45
)

count_bar_chart_A


In [39]:
"""
Task 5: Use the configure_axis function to rotate the plot
"""
count_bar_chart_B = alt.Chart(top_restaurants).mark_bar().encode(
    y=alt.Y('name:N', title='Restaurant'),
    x=alt.X('count:Q', title='Number of Locations'),
    tooltip=['name', 'count']
).properties(
    title='Top 9 Fast Food Restaurants on the West Coast'
)

count_bar_chart_B


In [40]:
"""
Task 6: Find which state that has the highest number of restaurants
"""
state_counts = west_coast[west_coast['name'].isin(top_restaurants['name'])]['st'].value_counts().reset_index()
state_counts.columns = ['st', 'count']

# Display result
state_counts


Unnamed: 0,st,count
0,CA,461
1,WA,137
2,OR,74


In [41]:
"""
Task 7: Create a bar plot
"""
state_counts_plot = alt.Chart(state_counts).mark_bar().encode(
    x=alt.X('st:N', title='State'),
    y=alt.Y('count:Q', title='Number of Restaurants'),
    tooltip=['st', 'count']
).properties(
    title='Number of Top 9 Fast-Food Restaurants Per State'
)

state_counts_plot


In [63]:
"""
Task 8: Create the top_n_state data frame
"""

# Filter the dataset to only include the top 9 fast food restaurants
top_n_state = (
    west_coast[west_coast['name'].isin(top_restaurants['name'])]
    .groupby(['st', 'name']).size()
    .reset_index(name='count')  # Convert Series to DataFrame
    .sort_values(by='count', ascending=False)  # Sort by count descending
    .reset_index(drop=True)  # Reset index to start from 0
)

# Display result
top_n_state



Unnamed: 0,st,name,count
0,CA,McDonald's,121
1,CA,Taco Bell,70
2,CA,Jack in the Box,65
3,CA,Burger King,58
4,CA,Carl's Jr.,42
5,WA,McDonald's,39
6,WA,Burger King,29
7,CA,Subway,28
8,CA,Del Taco,27
9,CA,Wendy's,26


In [50]:
"""
Task 9: Plot the top 9 fast food restaurants as a stacked bar chart
"""
stacked_chart = alt.Chart(top_n_state).mark_bar().encode(
    x='st:N',
    y=alt.Y('count:Q', stack='normalize', title='Percentage'),
    color='name:N',
    tooltip=['st', 'name', 'count']
).properties(
    title='Top 9 Fast Food Restaurants by State'
)

stacked_chart


In [65]:
"""
Task 10: Plot the counts (y-axis) for the top 9 fast food restaurants (x-axis)
"""
facet_chart = alt.Chart(top_n_state).mark_bar().encode(
    x='name:N',
    y='count:Q',
    color='name:N',
    tooltip=['name', 'count']
).facet(
    column='st:N'
).properties(
    title='Top 9 Fast Food Restaurants by State'
)

facet_chart
