# Data Analysis

This notebook performs exploratory data analysis for the Ghosts of War Crimes project.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load processed data
processed_data_path = '../data/processed_data/processed_data.csv'
data = pd.read_csv(processed_data_path)

# Display basic statistics
data.describe()

In [None]:
# Victims by Incident Type
victims_by_type = data.groupby('Incident Type')['Number of Victims'].sum()
plt.figure(figsize=(12, 8))
victims_by_type.sort_values().plot(kind='bar', color='skyblue')
plt.title('Number of Victims by Incident Type')
plt.ylabel('Number of Victims')
plt.xlabel('Incident Type')
plt.xticks(rotation=45)
plt.show()

## Visualization: Incidents by Location

In [None]:
# Incidents by Location
incidents_by_location = data['Location'].value_counts()
plt.figure(figsize=(12, 8))
incidents_by_location[:10].plot(kind='bar', color='green')
plt.title('Top 10 Locations by Incident Count')
plt.ylabel('Incident Count')
plt.xlabel('Location')
plt.xticks(rotation=45)
plt.show()

## Heatmap of Victims by Incident Type and Responsible Party

In [None]:
# Heatmap for Victims by Incident Type and Responsible Party
pivot_table = data.pivot_table(
    index='Incident Type',
    columns='Responsible Party',
    values='Number of Victims',
    aggfunc='sum',
    fill_value=0
)

plt.figure(figsize=(12, 8))
sns.heatmap(pivot_table, cmap='Blues', annot=True, fmt='g')
plt.title('Victims by Incident Type and Responsible Party')
plt.ylabel('Incident Type')
plt.xlabel('Responsible Party')
plt.show()