# 03 — Gun Homicide vs Gini Coefficient

Scatter plot with trend line exploring the correlation between income inequality (Gini) and gun homicide rates.

In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from scipy import stats
from pathlib import Path

DATA_DIR = Path('../data/processed')
df = pd.read_csv(DATA_DIR / 'merged_country_data.csv')

# Filter to countries with both Gini and gun homicide data
plot_df = df.dropna(subset=['gini', 'gun_homicide_rate']).copy()
print(f"Countries with both Gini and gun homicide data: {len(plot_df)}")

Countries with both Gini and gun homicide data: 147


## Scatter Plot — Gun Homicide Rate vs Gini Coefficient

In [2]:
# Linear regression on log-transformed homicide rate
plot_df['log_homicide'] = np.log10(plot_df['gun_homicide_rate'].clip(lower=0.01))

slope, intercept, r_value, p_value, std_err = stats.linregress(
    plot_df['gini'], plot_df['log_homicide']
)
r_squared = r_value ** 2

print(f"Linear regression (Gini vs log10 gun homicide rate):")
print(f"  R² = {r_squared:.4f}")
print(f"  p-value = {p_value:.2e}")
print(f"  slope = {slope:.4f}")

Linear regression (Gini vs log10 gun homicide rate):
  R² = 0.3588
  p-value = 1.10e-15
  slope = 0.0537


In [3]:
fig = px.scatter(
    plot_df,
    x='gini',
    y='gun_homicide_rate',
    color='region',
    hover_name='country_name',
    hover_data={'gini': ':.1f', 'gun_homicide_rate': ':.2f', 'region': True},
    log_y=True,
    title=f'Gun Homicide Rate vs Gini Coefficient (R²={r_squared:.3f}, p={p_value:.2e})',
    labels={
        'gini': 'Gini Coefficient',
        'gun_homicide_rate': 'Gun Homicide Rate per 100K (log scale)',
        'region': 'Region',
    },
)

# Add trend line
x_range = np.linspace(plot_df['gini'].min(), plot_df['gini'].max(), 100)
y_trend = 10 ** (slope * x_range + intercept)
fig.add_trace(go.Scatter(
    x=x_range, y=y_trend,
    mode='lines',
    name=f'Trend (R²={r_squared:.3f})',
    line=dict(color='red', dash='dash', width=2),
))

fig.update_layout(template='plotly_white', height=600)
fig.show()

## Gini Coefficient Choropleth (Reference)

In [4]:
gini_map = df.dropna(subset=['gini'])

fig = px.choropleth(
    gini_map,
    locations='country_code',
    color='gini',
    hover_name='country_name',
    hover_data={'gini': ':.1f', 'country_code': False},
    color_continuous_scale='RdYlGn_r',
    title='Gini Coefficient by Country',
    labels={'gini': 'Gini Coefficient'},
)
fig.update_layout(
    geo=dict(showframe=False, showcoastlines=True, coastlinecolor='#999'),
    template='plotly_white',
    height=500,
)
fig.show()