In [93]:
import pandas as pd
import numpy as np
import cufflinks as cf
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.express as px

In [3]:
init_notebook_mode(connected=True)
cf.go_offline()

In [7]:
df = pd.read_csv('forbes_billionaires_geo.csv')

In [95]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2755 entries, 0 to 2754
Data columns (total 13 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Name         2755 non-null   object 
 1   NetWorth     2755 non-null   float64
 2   Country      2755 non-null   object 
 3   Source       2755 non-null   object 
 4   Rank         2755 non-null   int64  
 5   Age          2630 non-null   float64
 6   Residence    2715 non-null   object 
 7   Citizenship  2739 non-null   object 
 8   Status       2090 non-null   object 
 9   Children     1552 non-null   float64
 10  Education    1409 non-null   object 
 11  Self_made    2737 non-null   object 
 12  geometry     2755 non-null   object 
dtypes: float64(3), int64(1), object(9)
memory usage: 279.9+ KB


In [41]:
df.describe()

Unnamed: 0,NetWorth,Rank,Age,Children
count,2755.0,2755.0,2630.0,1552.0
mean,4.74922,1345.663521,63.2673,2.978093
std,9.615358,772.669811,13.47916,1.618569
min,1.0,1.0,18.0,1.0
25%,1.5,680.0,54.0,2.0
50%,2.3,1362.0,63.0,3.0
75%,4.2,2035.0,73.0,4.0
max,177.0,2674.0,99.0,23.0


In [92]:
networth_hist = df[['NetWorth']].figure(kind="histogram", 
                        bins=(0, 50, 5), 
                        title="Histogram of Net Worth", 
                        xTitle="Net Worth (Billions $USD)", 
                        yTitle="Frequency",
                        theme="pearl",
                        color="blue",
                        bargap=0.1,
                        orientation="v",
                        text="NetWorth")
networth_hist.update_yaxes(nticks=20)
networth_hist.update_xaxes(nticks=20)
display(networth_hist)

In [72]:
top_10_worth = df.sort_values('NetWorth', ascending=False).iloc[:10] 
top_10_worth_fig = top_10_worth.figure(kind="bar", 
                   x="Name", 
                   y="NetWorth", 
                   title="Net Worth of Top 10 Wealthiest Billionaires", 
                   xTitle="Name", 
                   yTitle="Net Worth (Billions $USD)",
                   color="blue")
top_10_worth_fig.update_yaxes(nticks=10)
display(top_10_worth_fig)

In [97]:
df['Self_made'].value_counts()

True     1960
False     777
Name: Self_made, dtype: int64

In [173]:
null_cnts = df.isnull().sum()
null_pcts = (df.isnull().sum() / len(df)).round(3)
pd.DataFrame(
    {'n_null': null_cnts, 'pct_null': null_pcts}).sort_values('n_null', ascending=False)

Unnamed: 0,n_null,pct_null
Education,1346,0.489
Children,1203,0.437
Status,665,0.241
Age,125,0.045
Residence,40,0.015
Self_made,18,0.007
Citizenship,16,0.006
Name,0,0.0
NetWorth,0,0.0
Country,0,0.0


In [177]:
df_selfmade = df[~df['Self_made'].isnull()]
fig = px.histogram(df_selfmade, 
                   x="NetWorth", 
                   facet_row="Self_made", 
                   range_x=(0, 50), 
                   range_y=(0, 1600),
                   facet_col_spacing=0.05,
                   nbins=40, 
                   title="Histogram of Net Worth by Self Made Status")

fig.update_layout(bargap=0.1)
fig.update_yaxes(range=[0, 1800])
fig.update_xaxes(nticks=20)
# fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
display(fig)

In [43]:
help(df.iplot)

Help on method _iplot in module cufflinks.plotlytools:

_iplot(kind='scatter', data=None, layout=None, filename='', sharing=None, title='', xTitle='', yTitle='', zTitle='', theme=None, colors=None, colorscale=None, fill=False, width=None, dash='solid', mode='', interpolation='linear', symbol='circle', size=12, barmode='', sortbars=False, bargap=None, bargroupgap=None, bins=None, histnorm='', histfunc='count', orientation='v', boxpoints=False, annotations=None, keys=False, bestfit=False, bestfit_colors=None, mean=False, mean_colors=None, categories='', x='', y='', z='', text='', gridcolor=None, zerolinecolor=None, margin=None, labels=None, values=None, secondary_y='', secondary_y_title='', subplots=False, shape=None, error_x=None, error_y=None, error_type='data', locations=None, lon=None, lat=None, asFrame=False, asDates=False, asFigure=False, asImage=False, dimensions=None, asPlot=False, asUrl=False, online=None, **kwargs) method of pandas.core.frame.DataFrame instance
           Retur

In [46]:
cf.getThemes()

['ggplot', 'pearl', 'solar', 'space', 'white', 'polar', 'henanigans']