In [3]:
import pandas as pd
import altair as alt

In [5]:
# Load incarceration data

incarceration_data = pd.read_csv('incarceration_by_race_1978_2011.csv')

incarceration_melted = incarceration_data.melt(
    id_vars=['YEAR'],
    value_vars=['BLACK', 'WHITE'],
    var_name='Race',
    value_name='Count'
)

# Create a more readable label
incarceration_melted['Race_Gender'] = incarceration_melted['Race'].map({
    'BLACK': 'Black Prisoners',
    'WHITE': 'White Prisoners'
})

color_scale = alt.Scale(
    domain=['Black Prisoners', 'White Prisoners'],
    range=['red', 'blue']
)

# Create line chart
chart = alt.Chart(incarceration_melted).mark_line(point=True).encode(
    x=alt.X('YEAR:O', title='Year', axis=alt.Axis(labelAngle=-45)),
    y=alt.Y('Count:Q', title='Number of Incarcerated Individuals'),
    color=alt.Color('Race_Gender:N', scale=color_scale, legend=alt.Legend(title="Race and Gender")),
    tooltip=['YEAR', 'Race_Gender', 'Count']
).properties(
    width=800,
    height=500,
    title='Incarceration Counts Over Time by Race (1978–2011)'
)

incarceration_chart = chart.configure(
    background="white"
).configure_axis(
    grid=True,
    labelColor="black",
    titleColor="black"
).configure_legend(
    labelColor="black",
    titleColor="black"
).configure_title(
    color="black"
)

incarceration_chart.save("incarceration_by_race.json")

incarceration_chart

KeyError: "The following id_vars or value_vars are not present in the DataFrame: ['BLACK', 'WHITE']"

In [6]:
import pandas as pd
import altair as alt

black_poverty = pd.read_csv('poverty_black.csv')
white_poverty = pd.read_csv('poverty_white.csv')


black_poverty = black_poverty.rename(columns={
    'Year': 'Year',
    'Total_Below_Poverty_Percent': 'poverty_black'
})

white_poverty = white_poverty.rename(columns={
    'Year': 'Year',
    'Total_Below_Poverty_Percent': 'poverty_white'
})


poverty_data = pd.merge(black_poverty[['Year', 'poverty_black']],
                        white_poverty[['Year', 'poverty_white']],
                        on='Year')


poverty_melted = poverty_data.melt(
    id_vars=['Year'],
    value_vars=['poverty_black', 'poverty_white'],
    var_name='Race',
    value_name='Poverty_Rate'
)

poverty_melted['Race_Label'] = poverty_melted['Race'].map({
    'poverty_black': 'Black Poverty Rate',
    'poverty_white': 'White Poverty Rate'
})


color_scale = alt.Scale(
    domain=['Black Poverty Rate', 'White Poverty Rate'],
    range=['red', 'blue']
)

poverty_chart = alt.Chart(poverty_melted).mark_line(point=True).encode(
    x=alt.X('Year:O', title='Year', axis=alt.Axis(labelAngle=-45)),
    y=alt.Y('Poverty_Rate:Q', title='Poverty Rate (%)'),
    color=alt.Color('Race_Label:N', scale=color_scale, legend=alt.Legend(title="Race")),
    tooltip=['Year', 'Race_Label', 'Poverty_Rate']
).properties(
    width=800,
    height=500,
    title='Poverty Rate Over Time by Race (Black vs. White)'
)

final_poverty_chart = poverty_chart.configure(
    background="white"
).configure_axis(
    grid=True,
    labelColor="black",
    titleColor="black"
).configure_legend(
    labelColor="black",
    titleColor="black"
).configure_title(
    color="black"
)

final_poverty_chart.save("poverty_by_race.json")

final_poverty_chart


In [7]:
# import pandas as pd

# # Explicitly tell pandas to treat 'NaN' as a missing value
data = pd.read_csv('Incarceration_Outcomes_decimalized.csv').head(500)

#print(data[['jail_black_pooled_p25', 'jail_white_pooled_p25']].head(500).to_csv(index=False))

# Calculate the mean for the first 10 rows only 
avg_black_10 = data['jail_black_pooled_p25'].head(70).mean()
avg_white_10 = data['jail_white_pooled_p25'].head(70).mean()

print(f"Average percent for Black individuals (first 10 rows): {avg_black_10:.2f}%")
print(f"Average percent for White individuals (first 10 rows): {avg_white_10:.2f}%")

Average percent for Black individuals (first 10 rows): 0.04%
Average percent for White individuals (first 10 rows): 0.02%


In [None]:
import pandas as pd
import altair as alt

data = pd.read_csv('Incarceration_Outcomes_decimalized.csv')

# Convert columns to numeric
data['jail_black_pooled_p25'] = pd.to_numeric(data['jail_black_pooled_p25']).mul(100)
data['jail_white_pooled_p25'] = pd.to_numeric(data['jail_white_pooled_p25']).mul(100)


# Calculate average jail outcome per race per state
state_avg = data.groupby('state')[['jail_black_pooled_p25', 'jail_white_pooled_p25']].mean().reset_index()

# Melt the DataFrame for plotting
melted = state_avg.melt(
    id_vars='state',
    value_vars=['jail_black_pooled_p25', 'jail_white_pooled_p25'],
    var_name='Race',
    value_name='Percent'
)

# Clean race labels
melted['Race_Label'] = melted['Race'].map({
    'jail_black_pooled_p25': 'Black',
    'jail_white_pooled_p25': 'White'

})
    
melted['state'] = melted['state'].astype(str).str.strip().str.title()

# sort alphabetically
chart = alt.Chart(melted).mark_bar().encode(
    x=alt.X('state:N', title='State'),
    y=alt.Y('Percent:Q', title='Average Jail Outcome Percent', stack='zero'),
    color=alt.Color('Race_Label:N',
        scale=alt.Scale(domain=['Black', 'White'], range=['red', 'blue']),
        legend=alt.Legend(title='Race')
    ),
    order=alt.Order('Race_Label:N', sort='ascending'),
    tooltip=[
        alt.Tooltip('state:N', title='State'),
        alt.Tooltip('Race_Label:N', title='Race'),
        alt.Tooltip('Percent:Q', title='Percent', format=".2f")
    ]
).properties(
    width=1000,
    height=400,
    title='Average Jail Outcomes by Race per State (Alphabetical)'
).configure_axisX(
    labelAngle=-45
)

chart.show()

# Optional: Print overall averages across the first 70 rows
avg_black = data['jail_black_pooled_p25'].mean() 
avg_white = data['jail_white_pooled_p25'].mean() 

print(f"Average percent incarcerated for Black individuals: {avg_black:.2f}%")
print(f"Average percent incarcerated for White individuals: {avg_white:.2f}%")


Average percent incarcerated for Black individuals: 5.89%
Average percent incarcerated for White individuals: 1.66%


In [10]:
import pandas as pd
import altair as alt
from vega_datasets import data

# Load and preprocess your dataset
df = pd.read_csv('Incarceration_Outcomes_decimalized.csv')

# Convert jail outcome columns to numeric and scale to %
df['jail_black_pooled_p25'] = pd.to_numeric(df['jail_black_pooled_p25'], errors='coerce') * 100
df['jail_white_pooled_p25'] = pd.to_numeric(df['jail_white_pooled_p25'], errors='coerce') * 100

# state names to uppercase
df['state'] = df['state'].str.strip().str.upper()

# state averages
state_avg = df.groupby('state')[['jail_black_pooled_p25', 'jail_white_pooled_p25']].mean().reset_index()

# Manual full state name → FIPS code mapping
state_full_to_fips = {
    'ALABAMA': 1, 'ALASKA': 2, 'ARIZONA': 4, 'ARKANSAS': 5, 'CALIFORNIA': 6,
    'COLORADO': 8, 'CONNECTICUT': 9, 'DELAWARE': 10, 'FLORIDA': 12, 'GEORGIA': 13,
    'HAWAII': 15, 'IDAHO': 16, 'ILLINOIS': 17, 'INDIANA': 18, 'IOWA': 19,
    'KANSAS': 20, 'KENTUCKY': 21, 'LOUISIANA': 22, 'MAINE': 23, 'MARYLAND': 24,
    'MASSACHUSETTS': 25, 'MICHIGAN': 26, 'MINNESOTA': 27, 'MISSISSIPPI': 28, 'MISSOURI': 29,
    'MONTANA': 30, 'NEBRASKA': 31, 'NEVADA': 32, 'NEW HAMPSHIRE': 33, 'NEW JERSEY': 34,
    'NEW MEXICO': 35, 'NEW YORK': 36, 'NORTH CAROLINA': 37, 'NORTH DAKOTA': 38, 'OHIO': 39,
    'OKLAHOMA': 40, 'OREGON': 41, 'PENNSYLVANIA': 42, 'RHODE ISLAND': 44, 'SOUTH CAROLINA': 45,
    'SOUTH DAKOTA': 46, 'TENNESSEE': 47, 'TEXAS': 48, 'UTAH': 49, 'VERMONT': 50,
    'VIRGINIA': 51, 'WASHINGTON': 53, 'WEST VIRGINIA': 54, 'WISCONSIN': 55, 'WYOMING': 56
}

# Add FIPS codes to data
state_avg['id'] = state_avg['state'].map(state_full_to_fips)

# Remove any rows where FIPS or jail data is missing
state_avg = state_avg.replace([float('inf'), -float('inf')], pd.NA).dropna(subset=[
    'jail_black_pooled_p25', 'jail_white_pooled_p25', 'id'
])

# U.S. map geometry
us_states = alt.topo_feature(data.us_10m.url, 'states')

#  map for Black incarceration rates
black_map = alt.Chart(us_states).mark_geoshape().encode(
    color=alt.Color('jail_black_pooled_p25:Q', scale=alt.Scale(scheme='reds'), title='Black Jail %'),
    tooltip=[
        alt.Tooltip('state:N', title='State'),
        alt.Tooltip('jail_black_pooled_p25:Q', title='Black Jail %', format='.2f')
    ]
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(state_avg, 'id', ['state', 'jail_black_pooled_p25'])
).project('albersUsa').properties(
    width=600,
    height=600,
    title="Black Jail Outcomes by State",
)

# map for White incarceration rates
white_map = alt.Chart(us_states).mark_geoshape().encode(
    color=alt.Color('jail_white_pooled_p25:Q', scale=alt.Scale(scheme='blues'), title='White Jail %'),
    tooltip=[
        alt.Tooltip('state:N', title='State'),
        alt.Tooltip('jail_white_pooled_p25:Q', title='White Jail %', format='.2f')
    ]
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(state_avg, 'id', ['state', 'jail_white_pooled_p25'])
).project('albersUsa').properties(
    width=600,
    height=600,
   title="White Jail Outcomes by State" 
    
)

(black_map | white_map).show()
