In [1]:
import pandas as pd
import altair as alt
import vegafusion as vf

vf.enable_widget()
alt.renderers.enable('default')

RendererRegistry.enable('default')

In [2]:
# Reading the dataset and saving it in a variable
billionaires_raw = pd.read_csv("https://raw.githubusercontent.com/lhuang20/DSCI320-Dashboard/main/billionaires_clean.csv")
billionaires = billionaires_raw.iloc[: , 1:]
billionaires.dropna(inplace=True)
billionaires.head(5)

Unnamed: 0,finalWorth,age,industries,grouped_industries,personName,gender,birthYear,country,continent,selfMade
0,211000000,74.0,Fashion & Retail,Entertainment & Leisure,Bernard Arnault & family,M,1949.0,France,Europe,False
1,180000000,51.0,Automotive,Industrial & Manufacturing,Elon Musk,M,1971.0,United States,North America,True
2,114000000,59.0,Technology,Technology & Telecommunications,Jeff Bezos,M,1964.0,United States,North America,True
3,107000000,78.0,Technology,Technology & Telecommunications,Larry Ellison,M,1944.0,United States,North America,True
4,106000000,92.0,Finance & Investments,Finance & Real Estate,Warren Buffett,M,1930.0,United States,North America,True


### Task 1: Is there a discernible trend between net worth and age? How is this different between genders?


In [3]:
brush = alt.selection_interval()
point = alt.selection_point(fields=['gender'], bind='legend')

agewealthscatter = alt.Chart(billionaires,
                             title=alt.Title('Total Net Worth ($USD) vs Age for each gender',
                                             subtitle="Task 1: Is there a discernible trend between net worth and age? How is this different between genders?")
).mark_circle(size=30).encode(
    x=alt.X('age:Q', scale=alt.Scale(domain=[18, 101])).title('Age'),
    y=alt.Y('finalWorth:Q', scale=alt.Scale(type="log")).title('Total Net Worth (Thousands of $USD)'),
    # color=alt.condition(brush, alt.value('blue'), alt.value('lightblue')),
    color=alt.condition(brush, alt.Color('gender:N').scale(scheme="set1"), alt.value('lightgrey')),
    opacity=alt.condition(point, alt.value(0.6), alt.value(0)),
    tooltip=[alt.Tooltip('personName:N', title="Person Name"), alt.Tooltip('age:Q', title="Age"), alt.Tooltip('finalWorth:Q', title="Net Worth ($USD)") ]
).add_params(
    brush, point
).properties(
    width=350,
    height=200
)
agewealthscatter

### Task 2: How do industries compare globally within continents, and is there a discernible trend in the relationship between industry and location?


In [4]:
agewealthheat = alt.Chart(billionaires,
                          title=alt.Title('Heatmap of Industries vs Continent',
                                          subtitle="Task 2: How do industries compare globally within continents, and is there a discernible trend in the relationship between industry and location?")
).mark_rect().encode(
    x=alt.X('continent:N').title('Continent'),
    y=alt.Y('grouped_industries:N').title('Industry Group'),
    color = alt.Color('count():Q').scale(scheme="blues"),
    tooltip=[alt.Tooltip('count():Q')]
).add_params(
    brush
).transform_filter(
    brush
).properties(
    width=300,
    height=200
)
agewealthheat

### Task 3: What is the distribution of billionaires in terms of age and gender for each industry?


In [5]:
options = ['Entertainment & Leisure', 'Finance & Real Estate', 'Healthcare', 'Industrial & Manufacturing', 'Technology & Telecommunications']
labels = [option + ' ' for option in options]

input_dropdown = alt.binding_radio(options=options + [None], labels=labels + ['All'], name='Industry: ')
selection = alt.selection_point(fields=['grouped_industries'], bind=input_dropdown)

male = alt.Chart(billionaires).mark_bar(color='#1f77b4').transform_filter(
    alt.datum.gender == 'M'
).encode(
    x = alt.X("age:Q", scale=alt.Scale(clamp=True)).bin(maxbins = 20).title("Age"),
    y = alt.Y("count():Q", axis=alt.Axis(domain=False, grid=False))
).properties(
    title = "Male"
).add_params(
    selection
).transform_filter(
    selection
)

female = alt.Chart(billionaires).mark_bar(color='red').transform_filter(
    alt.datum.gender == 'F'
).encode(
    x = alt.X("age:Q", scale=alt.Scale(clamp=True)).bin(maxbins = 20).title("Age"),
    y = alt.Y("count():Q", axis=alt.Axis(domain=False, grid=False))
).properties(
    title = "Female"
).add_params(
    selection
).transform_filter(
    selection
)

distribution1 = alt.hconcat(male, female).resolve_scale(y="shared", x="shared").properties(title=alt.Title("Age Distribution of Billionaires for each Industry Group",
                                                                                              subtitle="Task 3: What is the distribution of billionaires in terms of age and gender for each industry?"))
distribution1

### Task 4: Which birth years have resulted in billionaires with the highest total net worth? Is this similar between genders?


In [6]:
single = alt.selection_point(fields=['grouped_industries'])

options = ['M', 'F']
labels = [option + ' ' for option in options]

input_radio = alt.binding_radio(options=options + [None], labels=labels + ['All'], name='Gender: ')
selection = alt.selection_point(fields=['gender'], bind=input_radio)

scatter1 = alt.Chart(billionaires,
                     title=alt.Title("Billionaire Net Worth across Birth Year and Industry Groups",
                                     subtitle="Task 4: Which birth years have resulted in billionaires with the highest total net worth? Is this similar between genders?")
).mark_circle(
    size=100,
    stroke='black',
    strokeWidth=1,
    strokeOpacity=0.4,
).encode(
    x = alt.X('birthYear:Q', axis=alt.Axis(format='d'), scale=alt.Scale(domain=[1921, 2004])).title("Birth Year"),
    y = alt.Y('grouped_industries:N').title("Industry Group"),
    size = alt.Size('sum(finalWorth):Q', scale=alt.Scale(range=[1, 1000])).title("Total Net Worth (Millions)").legend(format="s"),
    color = alt.Color('grouped_industries:N').legend(None),
    opacity=alt.condition(single, alt.value(0.8), alt.value(0.1)),
    tooltip=[alt.Tooltip("birthYear:Q", title="Birth Year"), alt.Tooltip("sum(finalWorth):Q", format='$~s', title="Total Net Worth")]
).properties(
    width=800,
    height=300
).add_params(
    selection, single
).transform_filter(
    selection
).interactive()
scatter1

In [7]:
plotdash = alt.hconcat(agewealthscatter, agewealthheat)
plotdash

dash1 = (plotdash & distribution1)
dashboard = (dash1 & scatter1).configure_axisY(
    domain=False,
    ticks=False,
    offset=10
).configure_axisX(
    grid=False,
    ticks=False
).configure_view(
    stroke=None
).resolve_scale(size="independent").properties(title="Dashboard of Billionaire Wealth and Demographic Factors")

dashboard