In [17]:
from pathlib import Path
import pandas as pd
import geopandas as gpd
import numpy as np
import seaborn as sns

In [4]:
# infile = Path(__file__).parent / "bill_stats.csv"
df = pd.read_csv("bill_stats.csv")
df.replace(
    {
        np.nan: 0
    }, inplace=True)

df = df.astype(
    {
        'category': 'category',
        'status': 'category',
        'gender': 'category',
        'birthDate': 'datetime64[ns]',
        'date': 'datetime64[ns]',
        'age': 'int',
        'birthYear': 'int',
        'birthMonth': 'int',
        'birthDay': 'int',
        'population_country': 'int'
    }
)
df['gdp_country'] = df['gdp_country'] \
    .str.replace('$', '').str.replace(',', '').str.strip()
df['gdp_country'] = pd.to_numeric(df['gdp_country'])
df['gdp_country'] = df['gdp_country'].fillna(0)
df['gdp_country'] = df['gdp_country'].astype('int')
df['nBillionairesCtr'] = df.groupby(
    'country')['country'].transform('count')
df['totalWealthCtr'] = df.groupby('country')['finalWorth'].transform('sum')
df['meanWealthCtr'] = df.groupby('country')['finalWorth'].transform('mean')
df['nBillionairesIndy'] = df.groupby(
    'industries')['industries'].transform('count')
df['totalWealthCtrIndy'] = df.groupby(
    'industries')['finalWorth'].transform('sum')
df['meanWealthCtrIndy'] = df.groupby(
    'industries')['finalWorth'].transform('mean')
df['birthDecade'] = pd.cut(df['birthYear'], bins=range(
    1920, 2005, 10), include_lowest=True, right=False)
df['birthDecade'] = df['birthDecade'].astype('str')
df['birthDecade'] = df['birthDecade'].str.replace(
    '[', '').str.replace(',', ' -').str.replace(')', '')
df['birthDecade'] = df['birthDecade'].replace('2000+', np.nan)
df = df.drop(columns=["category", "organization",
             "status", "lastName", "firstName", "title"], axis=1)


In [5]:
# USA Dataset
data_usa = df.copy()
df_usa = data_usa[(data_usa["country"] == "United States") | (
        data_usa["countryOfCitizenship"] == "United States")]

df_usa[["city", "state", "residenceStateRegion"]] = df[[
        "city", "state", "residenceStateRegion"]].replace({0: "Unknown"})

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_usa[["city", "state", "residenceStateRegion"]] = df[[


In [7]:
# mapfile_base = Path(__file__).parent / \
#         "./tl_2023_us_state/tl_2023_us_state.shp"
# datafile_us_pop = Path(__file__).parent / \
#         "./datasets/NST-EST2023-ALLDATA.csv"
# datafile_us_gdp = Path(__file__).parent / "./datasets/us_state_gdp.csv"

datafile_us = data_usa.copy()
df_mapfile_base = gpd.read_file("./tl_2023_us_state/tl_2023_us_state.shp")
df_us_pop = pd.read_csv("./datasets/NST-EST2023-ALLDATA.csv")
df_us_gdp = pd.read_csv("./datasets/us_state_gdp.csv", sep=";")
df_us = datafile_us

# Data processing
df_us["nBillionaires"] = df_us.groupby('state', observed=False)[
    'state'].transform('count')
df_us['totalWealth'] = df_us.groupby('state', observed=False)[
    'finalWorth'].transform('sum')
df_us['meanWealth'] = df_us.groupby('state', observed=False)[
    'finalWorth'].transform('mean')
df_us["nBillionaires"] = df_us["nBillionaires"].fillna(0).astype('int')
df_to_join_us_state = df_us[[
    'state', 'nBillionaires', 'totalWealth', 'meanWealth']].copy()

df_to_join_us_state.dropna(inplace=True)
states_grouped = df_to_join_us_state.drop_duplicates().reset_index()
states_grouped.rename(columns={'index': 'origin_idx'}, inplace=True)

pop_us_states_to_merge = df_us_pop[['NAME', 'POPESTIMATE2023']].copy()
states_grouped_first = states_grouped.merge(
    pop_us_states_to_merge, left_on='state', right_on='NAME', how='left')
states_grouped_first.drop(['origin_idx'], axis=1, inplace=True)
df_us_gdp['State'] = df_us_gdp['State'].str.lstrip().str.rstrip()
states_grouped_first['state'] = states_grouped_first['state'].str.lstrip(
).str.rstrip()

states_grouped_first = states_grouped_first.merge(
    df_us_gdp, left_on='state', right_on='State', how='left')
states_grouped_first.drop(['State'], axis=1, inplace=True)
df_mapfile_base['NAME'] = df_mapfile_base['NAME'].str.lstrip().str.rstrip()
states_grouped_first['state'] = states_grouped_first['state'].str.lstrip(
).str.rstrip()

states_grouped_first.rename(columns={'totalWealth': 'billTotalWealth',
                                     'meanWealth': 'billMeanWealth'},
                            inplace=True)
shp_us_states_economics = df_mapfile_base.merge(
    states_grouped_first, left_on='NAME', right_on='state', how='left')

# shp_us_states_econ_no_hawaii = shp_us_states_economics.drop(shp_us_states_economics.loc[shp_us_states_economics['NAME_y'] == 'Hawaii'].index).copy()
# drop state of hawaii to fit the US mainland map in full extent
# shp_us_states_economics.drop('NAME_y', axis=1, inplace=True)
# change to shp_us_states_econ_no_hawaii instead if that dataframe is to be used

In [9]:
shp_us_states_economics.columns

Index(['REGION', 'DIVISION', 'STATEFP', 'STATENS', 'GEOID', 'GEOIDFQ',
       'STUSPS', 'NAME_x', 'LSAD', 'MTFCC', 'FUNCSTAT', 'ALAND', 'AWATER',
       'INTPTLAT', 'INTPTLON', 'geometry', 'state', 'nBillionaires',
       'billTotalWealth', 'billMeanWealth', 'NAME_y', 'POPESTIMATE2023',
       '2022r', '2023r', 'gdp_2023_q4'],
      dtype='object')

In [18]:
pd.set_option("display.max_columns", None)
sns.set_style('darkgrid') 

In [19]:
shp_us_states_economics.head()

Unnamed: 0,REGION,DIVISION,STATEFP,STATENS,GEOID,GEOIDFQ,STUSPS,NAME_x,LSAD,MTFCC,FUNCSTAT,ALAND,AWATER,INTPTLAT,INTPTLON,geometry,state,nBillionaires,billTotalWealth,billMeanWealth,NAME_y,POPESTIMATE2023,2022r,2023r,gdp_2023_q4
0,3,5,54,1779805,54,0400000US54,WV,West Virginia,0,G4000,A,62266499712,489003081,38.6472854,-80.6183274,"POLYGON ((-77.75438 39.33346, -77.75422 39.333...",,,,,,,,,
1,3,5,12,294478,12,0400000US12,FL,Florida,0,G4000,A,138963763779,45970528648,28.3989775,-82.5143005,"MULTIPOLYGON (((-83.10874 24.62949, -83.10711 ...",Florida,94.0,382200.0,4065.957447,Florida,22610726.0,1465281.0,1600811.0,1642249.0
2,2,3,17,1779784,17,0400000US17,IL,Illinois,0,G4000,A,143778366814,6216688589,40.1028754,-89.1526108,"POLYGON ((-87.89243 38.28285, -87.89334 38.282...",Illinois,24.0,102500.0,4270.833333,Illinois,12549689.0,1040353.0,1098346.0,1114360.0
3,2,4,27,662849,27,0400000US27,MN,Minnesota,0,G4000,A,206244555303,18937471947,46.3159573,-94.1996043,"POLYGON ((-95.31991 48.99892, -95.31778 48.998...",Minnesota,5.0,7900.0,1580.0,Minnesota,5737915.0,454993.0,483162.0,492823.0
4,3,5,24,1714934,24,0400000US24,MD,Maryland,0,G4000,A,25151736098,6979330958,38.9466584,-76.6744939,"POLYGON ((-75.75600 39.24607, -75.75579 39.243...",Maryland,10.0,34700.0,3470.0,Maryland,6180253.0,484908.0,515607.0,525753.0


In [10]:
shp_us_states_economics_head = shp_us_states_economics.head()

In [12]:
shp_json = shp_us_states_economics_head.to_json()

In [13]:
shp_json

'{"type": "FeatureCollection", "features": [{"id": "0", "type": "Feature", "properties": {"REGION": "3", "DIVISION": "5", "STATEFP": "54", "STATENS": "01779805", "GEOID": "54", "GEOIDFQ": "0400000US54", "STUSPS": "WV", "NAME_x": "West Virginia", "LSAD": "00", "MTFCC": "G4000", "FUNCSTAT": "A", "ALAND": 62266499712, "AWATER": 489003081, "INTPTLAT": "+38.6472854", "INTPTLON": "-080.6183274", "state": null, "nBillionaires": null, "billTotalWealth": null, "billMeanWealth": null, "NAME_y": null, "POPESTIMATE2023": null, "2022r": null, "2023r": null, "gdp_2023_q4": null}, "geometry": {"type": "Polygon", "coordinates": [[[-77.754376, 39.333461], [-77.754219, 39.333421], [-77.754233, 39.333361], [-77.753947, 39.333344], [-77.753616, 39.333349], [-77.753078, 39.333356], [-77.752533, 39.333365], [-77.751968, 39.33331], [-77.75153, 39.333233], [-77.751476, 39.333219], [-77.751078, 39.333117], [-77.750961, 39.333071], [-77.750676, 39.332959], [-77.750378, 39.332842], [-77.749824, 39.332617], [-77.

In [14]:
print(shp_json)

{"type": "FeatureCollection", "features": [{"id": "0", "type": "Feature", "properties": {"REGION": "3", "DIVISION": "5", "STATEFP": "54", "STATENS": "01779805", "GEOID": "54", "GEOIDFQ": "0400000US54", "STUSPS": "WV", "NAME_x": "West Virginia", "LSAD": "00", "MTFCC": "G4000", "FUNCSTAT": "A", "ALAND": 62266499712, "AWATER": 489003081, "INTPTLAT": "+38.6472854", "INTPTLON": "-080.6183274", "state": null, "nBillionaires": null, "billTotalWealth": null, "billMeanWealth": null, "NAME_y": null, "POPESTIMATE2023": null, "2022r": null, "2023r": null, "gdp_2023_q4": null}, "geometry": {"type": "Polygon", "coordinates": [[[-77.754376, 39.333461], [-77.754219, 39.333421], [-77.754233, 39.333361], [-77.753947, 39.333344], [-77.753616, 39.333349], [-77.753078, 39.333356], [-77.752533, 39.333365], [-77.751968, 39.33331], [-77.75153, 39.333233], [-77.751476, 39.333219], [-77.751078, 39.333117], [-77.750961, 39.333071], [-77.750676, 39.332959], [-77.750378, 39.332842], [-77.749824, 39.332617], [-77.7