In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from collections import Counter
from matplotlib.pyplot import figure
import math

plt.rcParams['figure.dpi'] = 200

data=pd.read_csv("/kaggle/input/forbes-billionaires-of-2021/Billionaire.csv")
#data

# Used when printing all columns is unnecessary
column_filtering_one = ['Name', 'NetWorth', 'Source', 'Rank', 'Industry']
column_filtering_two = ['Name', 'NetWorth', 'Source', 'Industry']
column_filtering_three = ['NetWorth', 'Source', 'Industry']
column_filtering_country = ['Name', 'NetWorth', 'Source', 'Country']


In [None]:
def get_person(data, name = ''):
    return data[data['Name'].str.contains(name, regex=False)]

def get_from_country(data, name = ''):
    return data[data['Country'] == name]

def count_values(data, value = ''):
    return data[value].value_counts()

def net_worth_more_than(data, networth = 1):
    return data[data['NewNetWorth'] >= networth]

def get_people_multiple_sources(data):
    return data[data["Source"].str.contains(",", regex=False)]

def get_from_industry(data, name = ''):
    return data[data['Industry'] == name]

def get_industries_country(data, country="United States"):
    tmp = data[data['Country'] == country]
    for i in tmp["Industry"].unique():
        networth_sum = sum(tmp[tmp["Industry"] == i]["NewNetWorth"])
        print(i,":", networth_sum, "billion")

def get_industry_networth(data):
    for item in data['Industry'].unique():
        industry = get_from_industry(data, item)
        print(item, "networth: {:,}".format(float(sum(industry["NewNetWorth"]))), " billions")
        print("---")

def billionaires_in_industry(data, name = ''):
    return len(data[data['Industry'] == name])

In [None]:
# All sources counted (even multiple sources)
sources = count_values(data, "Source")
# Convert string NetWorth to float without effecting the original column
data['NewNetWorth'] = pd.to_numeric(data.NetWorth.str.replace(r"[a-zA-Z\$]",''))

# All people with multiple sources
people_multiple_sources = get_people_multiple_sources(data)

# All people from <country>
from_us = get_from_country(data, 'United States')
from_china = get_from_country(data, 'China')

# All people from <country> with multiple sources
from_us_multiple_sources = get_people_multiple_sources(from_us)
from_china_multiple_sources = get_people_multiple_sources(from_china)

In [None]:
# All people worth over <networth> billions
print(net_worth_more_than(people_multiple_sources, 2)[column_filtering_two])

In [None]:
# All people from <country> worth over <networth> billions
net_worth_more_than(from_us, 20)[column_filtering_country]

In [None]:
net_worth_more_than(from_china, 20)[column_filtering_country]

In [None]:
billion_dollars_number = 1000000000
print("US Billionaire Total Networth: ","{:,}".format(sum(from_us["NewNetWorth"])*billion_dollars_number))
print("China Billionaire Total Networth: ", "{:,}".format(sum(from_china["NewNetWorth"])*billion_dollars_number))

In [None]:
get_person(data, 'Jeff')

In [None]:
industries = count_values(data, 'Industry')
industries

In [None]:
# CODE FROM https://community.dataquest.io/t/how-to-make-comical-visualizations-explained-using-netflix-movie-and-tv-show-dataset/553826

col = "Industry"
colours = ["violet", "cornflowerblue", "darkseagreen", "mediumvioletred", "blue", "mediumseagreen", "darkmagenta", "darkslateblue", "seagreen"]
countries_list = ["United States", "India", "United Kingdom", "Japan", "France", "Canada", "Spain", "Germany", "Iceland"]

df = data.copy()

with plt.xkcd():
    figure(num=None, figsize=(20, 12)) 
    x=1
    for country in countries_list:
        df["from_country"] = df['Country'].fillna("").apply(lambda x : 1 if country.lower() in x.lower() else 0)
        small = df[df["from_country"] == 1]
        genre = ", ".join(small['Industry'].fillna("")).split(", ")
        # change most_common here if you want more or less industries
        tags = Counter(genre).most_common(10)
        tags = [_ for _ in tags if "" != _[0]]
        labels, values = [_[0]+"  " for _ in tags][::-1], [_[1] for _ in tags][::-1]
        if max(values)>200:
            values_int = range(0, math.ceil(max(values)), 100)
        elif max(values)>100 and max(values)<=200:
            values_int = range(0, math.ceil(max(values))+50, 50)
        else:
            values_int = range(0, math.ceil(max(values))+25, 25)
        plt.subplot(3, 3, x)
        plt.barh(labels,values, color = colours[x-1])
        plt.xticks(values_int)
        plt.title(country)
        x+=1
    plt.suptitle('Top Industries: Number Of Billionaires In Each Industry')
    plt.tight_layout()
    plt.show()

In [None]:
#get_industry_networth(data)
#get_industry_networth(from_us)
from_spain = data[data['Country'] == "Spain"]
get_industry_networth(from_spain)

In [None]:
## Get number of billionaires in industry and their names
print(billionaires_in_industry(from_spain, "Fashion & Retail"))
print(from_spain[from_spain['Industry'] == 'Fashion & Retail'])

In [None]:
get_industries_country(data, "Iceland")