In [None]:
# Importing the required libraries and dependencies
import os
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import hvplot.pandas
import numpy as np
import matplotlib.pyplot as plt
import ipywidgets as widgets
from pathlib import Path
from dotenv import load_dotenv
import sqlalchemy

In [None]:
load_dotenv()

In [None]:
# Read in your MAPBOX_API_KEY
mapbox_api_access_token = os.getenv("MAPBOX_API_ACCESS_TOKEN")

# Set your Mapbox API access token
px.set_mapbox_access_token(mapbox_api_access_token)

# Confirm the availability of your Mapbox API access token by checking its type
type(mapbox_api_access_token)

In [None]:
# Dataset 1
data_one_df = pd.read_csv(
    Path("./Resources_Project_1/Data_Files/ONE_UI_data-15z92.csv"),

)
data_one_df = data_one_df.dropna().set_index("Age")
display(data_one_df.head(10))

In [None]:
# Plot of dataset 1
data_one_df.hvplot.bar(
    xlabel="Age Group",
    ylabel="% Difference in Average Wealth by Age",
    y = "Millenials/Generation X",
    x = "Age",
    title="Fig. 1: Millenial Families Compared to Older Generations",
    hover_color="teal",
    rot=90,
    height=300,
    width=600,   
)

In [None]:
# Dataset 2
data_two_df = pd.read_csv(
    Path("./Resources_Project_1/Data_Files/TWO_UI_YIOWz.csv"),
)
data_two_df = data_two_df.dropna().set_index("Year")
display(data_two_df.head(10))

In [None]:
# Plot of dataset 2
data_two_df.hvplot.line(
    xlabel="Year",
    ylabel="Debt in Billions",
    y = "",
    x = "Year",
    title="Fig. 2: Education Loan Debt Increasing",
    rot=90,
    height=300,
    width=600,   
)

In [None]:
# Dataset 3
data_three_df = pd.read_csv(
    Path("./Resources_Project_1/Data_Files/THREE_UI_cIWas.csv"),
)
data_three_df = data_three_df.dropna().set_index("Asset Type")
display(data_three_df.head(10))


In [None]:
# Plot of dataset 3
data_three_df.hvplot.bar(
    xlabel="Asset Type",
    ylabel="Percent",
    y = "",
    x = "",
    title="Fig. 3: Millenial Wealth Compared to All Adults.",
    height=400,
    width=800,
    rot=90,
).opts(yrotation=90)


In [None]:
# Dataset 4
data_four_df = pd.read_csv(
    Path("./Resources_Project_1/Data_Files/FOUR_UI_data_W2cix.csv"),
)
data_four_df = data_four_df.dropna().set_index("Asset Type")
display(data_four_df.head())

In [None]:
# Plot of dataset 4
data_four_df.hvplot.bar(
    xlabel="Asset Type",
    ylabel="Percent",
    y = "",
    x = "",
    title="Fig. 4: Proportions of Debt",
    height=300,
    width=600,
    rot=90,
).opts(yrotation=90)


In [None]:
# Dataset 5
data_five_df = pd.read_csv(
    Path("./Resources_Project_1/Data_Files/FIVE_UI_data-Z8QeE.csv"),
)
data_five_df.isnull().dropna()
display(data_five_df.head())
display(data_five_df.tail())

In [None]:
# Plot of dataset 5
data_five_df.iloc[0:3].hvplot(
    kind='barh', 
    x='Purchase', 
    title="Fig. 5: Cost of Subprime Credit Score", 
    legend='right'
)

In [None]:
# Dataset 6
data_six_df = pd.read_csv(
    Path("./Resources_Project_1/Data_Files/SIX_UI_data_rInqo.csv"),
)
data_six_df.isnull().dropna()
display(data_six_df.head())
display(data_six_df.tail())

In [None]:
# Plot of dataset 6
data_six_df.iloc[0:3].hvplot(
    kind='barh',
    x="Age Group", 
    ylabel="Credit Score", 
    xlabel="Age Group",
    title="Fig. 6: Subprime Credit Score Increases with Age", 
    legend=True,
    height=300,
    width=500
)

In [None]:
# Dataset 7
data_seven_df = pd.read_csv(
    Path("./Resources_Project_1/Data_Files/SEVEN_UI_data_riRjx.csv"),
)
data_seven_df = data_seven_df.dropna().set_index("Age Group")
display(data_seven_df.head())
display(data_seven_df.tail())

In [None]:
# Plot dataset 7
# relevant_factors = [Communities of color, Majority-white communities]
data_seven_df.hvplot.bar(
    xlabel="Age Group",
    ylabel="Share of Subprime Credit",
    y = "",
    x = "Age Group",
    title="Fig. 7: Share of Subprime Credit Increases with Age",
    hover_color="orange",
    rot=90,
    height=400,
    width=600,   
)

In [None]:
# Dataset 8
data_eight_df = pd.read_csv(
    Path("./Resources_Project_1/Data_Files/EIGHT_UI_data_QcJ1F.csv"),
)
data_eight_df = data_eight_df.dropna().set_index("Age Group")
display(data_eight_df.head())
display(data_eight_df.tail())

In [None]:
data_eight_df.hvplot.bar(
    xlabel="Age Group",
    ylabel="Median Credit Score by Race",
    y = "",
    x = "Age Group",
    title="Fig. 8: Persistent Racial Differences in Credit Score Among Millenials",
    hover_color="orange",
    rot=90,
    height=300,
    width=600,   
)

In [None]:
# Dataset 9
data_nine_df = pd.read_csv(
    Path("./Resources_Project_1/Data_Files/NINE_UI_data-wGEIn.csv"),
)
data_nine_df = data_nine_df.dropna().set_index("Year")
display(data_nine_df.head())
display(data_nine_df.tail())

In [None]:
# Plot of data 9
data_nine_df.hvplot.line(
    xlabel="Year",
    ylabel="Debt in Billions",
    y = "",
    x = "Year",
    title="Fig. 9: The Racial Wealth Gap Grows Sharply with Age",
    hover_color="orange",
    rot=90,
    height=400,
    width=700,   
)

In [None]:
# Slide 10
data_ten_df = pd.read_csv(
    Path("./Resources_Project_1/Data_Files/TEN_UI_data-okYos.csv"),
)
data_ten_df = data_ten_df.dropna().set_index("Age Group")
display(data_ten_df.head())
display(data_ten_df.tail())

In [None]:
data_ten_df.hvplot.bar(
    xlabel="Age Group",
    ylabel="Communities",
    y = "",
    x = "",
    title="Fig. 10: Deliquency Increases with Age",
    hover_color="orange",
    rot=90,
    height=400,
    width=500,   
)

In [None]:
# Dataset 11
data_eleven_df = pd.read_csv(
    Path("./Resources_Project_1/Data_Files/ELEVEN_UI_data_MxXI1.csv")
)
data_eleven_df_sorted = data_eleven_df.sort_values("ID").set_index("ID")

display(data_eleven_df_sorted.head(10))


In [None]:
# Load state coordinates data
usa_states_coordinates_df = pd.read_csv(
    Path("./Resources_Project_1/usa_states_lat_long.csv"), index_col="usa_state_code")

# Review the DataFrame
usa_states_coordinates_df.reset_index(inplace= True)
usa_states_coordinates_df.columns = ["usa_state_code", "usa_state_latitude", "usa_state_longitude","usa_state"]
usa_states_coordinates_df.set_index("usa_state", inplace = True)

display(usa_states_coordinates_df.head(56))

In [None]:
# Using the Pandas `concat` function, join the 
# slide_eleven_df_sorted and the usa_states_coordinates_df DataFrame
# The concat function will automatially combine columns with
# identical information, while keeping the additional columns.
all_states_data_df = pd.concat(
    [data_eleven_df_sorted, usa_states_coordinates_df], 
    axis="columns",
    sort=False
)
# Call the dropna function to remove any records that do not have data
all_states_data_df = all_states_data_df.reset_index().dropna()

# Rename the "index" column as "usa_state" for use in the Visualization
all_states_data_df = all_states_data_df.rename(columns={"index": "usa_state", "Value": "% in collection"})

# Review the resulting DataFrame
display(all_states_data_df.head(56))

In [None]:
# Create a scatter mapbox to analyze debt collection concentration
px.scatter_mapbox( 
    all_states_data_df,
    lat="usa_state_latitude",
    lon="usa_state_longitude",
    size="% in collection",
    color="% in collection",
    hover_name="usa_state",
    size_max=20,
    zoom=3,
    title="Fig. 11: Percentage of Adults Under 30 with Debt in Collections",
)

In [None]:
# Dataset 12
data_twelve_df = pd.read_csv(
    Path("./Resources_Project_1/Data_Files/TWELVE_data-wVncJ_back.csv"),
)
data_twelve_df = data_twelve_df.dropna().set_index("Age Group")
display(data_twelve_df.head())
display(data_twelve_df.tail())

In [None]:
# Plot datset 12
data_twelve_df.hvplot.bar(
    xlabel="Age Group",
    ylabel="Percent  (%) ",
    y = "",
    x = "Age Group",
    title="Fig. 12: Percent Difference in Average Wealth by Age Groups, 1983-2016",
    hover_color="orange",
    rot=90,
    height=400,
    width=500,   
    legend='bottom_left'
)

In [None]:
# Dataset 13
data_thirteen_df = pd.read_csv(
    Path("./Resources_Project_1/Data_Files/THIRTEEN_data_CcmyN.csv"),
)
data_thirteen_df = data_thirteen_df.dropna().set_index("State")
display(data_thirteen_df.head())
display(data_thirteen_df.tail(56))

In [None]:
# Using the Pandas `concat` function, join the 
# slide_eleven_df_sorted and the usa_states_coordinates_df DataFrame
# The concat function will automatially combine columns with
# identical information, while keeping the additional columns.
all_states_data_updated_df = pd.concat(
    [data_thirteen_df, usa_states_coordinates_df], 
    axis="columns",
    sort=False
)
# Call the dropna function to remove any records that do not have data
all_states_data_updated_df = all_states_data_updated_df.reset_index().dropna()

# Rename the "index" column as "usa_state" for use in the Visualization
all_states_data_updated_df = all_states_data_updated_df.rename(columns={"index": "usa_state", "2019": "2019 Credit Score", "2020": "2020 Credit Score"})

# Review the resulting DataFrame
display(all_states_data_updated_df.head(56))


In [None]:
# Create a scatter mapbox to analyze debt collection concentration
px.scatter_mapbox(
    all_states_data_updated_df,
    lat="usa_state_latitude",
    lon="usa_state_longitude",
    size="2019 Credit Score",
    color="2020 Credit Score",
    hover_name="usa_state",
    size_max=20,
    zoom=3,
    title="Fig. 13: Credit Scores by State",
)

In [None]:
from IPython.display import IFrame
IFrame(src="https://smartstartcredit.org", width='100%', height='200px')