In [58]:

import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import altair as alt
import folium
df = pd.read_csv("data/security_incidents.csv")


In [59]:
sum_columns = ["UN", "INGO", "ICRC", "NRCS and IFRC", "NNGO", "Other"]
total_affected_by_org = df[sum_columns].sum()  # Summing the organization columns for 'Total affected'
fig = go.Figure()

# Add choropleth for Nationals
fig.add_trace(go.Bar(
    x = total_affected_by_org.index,
    y = total_affected_by_org,
    name="Affected", 
    visible = True
))

fig.write_html("bar.html")

from IPython.display import IFrame

# Display the saved file directly in Jupyter Notebook
IFrame("bar.html", width=800, height=600)


In [60]:
import pandas as pd
import scipy.stats as stats

# Assuming df and sum_columns are defined as in the question:
sum_columns = ["UN", "INGO", "ICRC", "NRCS and IFRC", "NNGO", "Other"]

# Perform t-tests for each unique pair of organizations in sum_columns
output_matrix = []

# Loop through the combinations of columns to compare them
for i in range(len(sum_columns)):
    for j in range(i + 1, len(sum_columns)):
        # Perform t-test for the pair of organizations (two-sample t-test)
        t_stat, p_val = stats.ttest_ind(df[sum_columns[i]].dropna(), df[sum_columns[j]].dropna())
        
        # Store the results in a matrix (list of dictionaries)
        output_matrix.append({
            'Group 1': sum_columns[i],
            'Group 2': sum_columns[j],
            't-statistic': t_stat,
            'p-value': p_val
        })

# Convert the output matrix into a DataFrame for better presentation
output_df = pd.DataFrame(output_matrix)

# Display the result DataFrame
print(output_df)


          Group 1        Group 2  t-statistic        p-value
0              UN           INGO    -9.838241   1.012923e-22
1              UN           ICRC    10.998725   5.930294e-28
2              UN  NRCS and IFRC     8.474121   2.757711e-17
3              UN           NNGO    -1.778800   7.530756e-02
4              UN          Other    12.000280   6.448903e-33
5            INGO           ICRC    29.302664  4.722377e-180
6            INGO  NRCS and IFRC    24.818151  2.052936e-131
7            INGO           NNGO    10.803988   4.898793e-27
8            INGO          Other    31.055374  7.172148e-201
9            ICRC  NRCS and IFRC    -5.159420   2.532117e-07
10           ICRC           NNGO   -22.084266  3.464435e-105
11           ICRC          Other     3.719342   2.010041e-04
12  NRCS and IFRC           NNGO   -16.363863   2.677308e-59
13  NRCS and IFRC          Other     7.812154   6.278522e-15
14           NNGO          Other    24.496670  3.423553e-128


In [61]:
import plotly.graph_objects as go
import pandas as pd

# Assuming 'df' is your DataFrame with the necessary columns

# Prepare the cumulative sum DataFrame
df_cumulative = df[["UN", "INGO", "ICRC", "NRCS and IFRC", "NNGO", "Other", "Total affected", "Total killed", "Total kidnapped", "Total wounded"]].copy()

# Calculate cumulative sum for each column
df_cumulative['Cumulative UN'] = df_cumulative['UN'].cumsum()
df_cumulative['Cumulative INGO'] = df_cumulative['INGO'].cumsum()
df_cumulative['Cumulative ICRC'] = df_cumulative['ICRC'].cumsum()
df_cumulative['Cumulative NRCS and IFRC'] = df_cumulative['NRCS and IFRC'].cumsum()
df_cumulative['Cumulative NNGO'] = df_cumulative['NNGO'].cumsum()
df_cumulative['Cumulative Other'] = df_cumulative['Other'].cumsum()
df_cumulative['Cumulative Total affected'] = df_cumulative['Total affected'].cumsum()
df_cumulative['Cumulative Total killed'] = df_cumulative['Total killed'].cumsum()
df_cumulative['Cumulative Total kidnapped'] = df_cumulative['Total kidnapped'].cumsum()
df_cumulative['Cumulative Total wounded'] = df_cumulative['Total wounded'].cumsum()



# Create first figure (Total Affected)
fig_affected = go.Figure()

# Function to add traces for cumulative sum of Total affected
def add_trace_and_slope_affected(column, color, label):
    slope = (df_cumulative[column].iloc[-1] - df_cumulative[column].iloc[0]) / (len(df_cumulative) - 1)
    fig_affected.add_trace(go.Scatter(
        x=df_cumulative.index,
        y=df_cumulative[column],
        mode='lines',
        name=label,
        line=dict(color=color),
        hovertemplate='<b>%{text}</b><br>' +
                      'Value: %{y}<br>' +
                      'Overall Slope: %{customdata:.2f}',
        text=[label] * len(df_cumulative),
        customdata=[slope] * len(df_cumulative)
    ))

# Add traces for 'Total affected' plot
add_trace_and_slope_affected('Cumulative UN', 'blue', 'Cumulative UN')
add_trace_and_slope_affected('Cumulative INGO', 'orange', 'Cumulative INGO')
add_trace_and_slope_affected('Cumulative ICRC', 'red', 'Cumulative ICRC')
add_trace_and_slope_affected('Cumulative NRCS and IFRC', 'green', 'Cumulative NRCS and IFRC')
add_trace_and_slope_affected('Cumulative NNGO', 'pink', 'Cumulative NNGO')
add_trace_and_slope_affected('Cumulative Other', 'purple', 'Cumulative Other')
add_trace_and_slope_affected('Cumulative Total affected', 'black', 'Cumulative Total affected')
add_trace_and_slope_affected('Cumulative Total killed', 'grey', 'Cumulative Total killed')
add_trace_and_slope_affected('Cumulative Total wounded', 'lightgrey', 'Cumulative Total wounded')
add_trace_and_slope_affected('Cumulative Total kidnapped', 'darkgrey', 'Cumulative Total kidnapped')

x_lines = [1200, 2000, 3800]  # These are the x-axis points where you want vertical lines

# Add vertical lines at the specified x-axis points
for x in x_lines:
    fig_affected.add_shape(
        go.layout.Shape(
            type="line",  # Shape type is a line
            x0=x, x1=x,   # Start and end at the same x-value for a vertical line
            y0=0, y1=df_cumulative['Cumulative Total affected'].max(),  # Start from the bottom to the top
            line=dict(color="gray", dash="dash", width=.5)  # Style the line (color, dashed, width)
        )
    )

fig_affected.update_layout(
    title='Total Affected by Organization Over Time',
    xaxis_title='Cumulative Incidents',
    yaxis_title='Cumulative Sum',
    hovermode='closest',
)

# Show both figures
fig_affected.write_html("cumulative_total_affected.html")

from IPython.display import IFrame

# Display both saved files directly in Jupyter Notebook
IFrame("cumulative_total_affected.html", width=800, height=600)


In [62]:
import plotly.graph_objects as go
import pandas as pd

# Assuming 'df' is your DataFrame with the necessary columns

# Prepare the cumulative sum DataFrame
df_cumulative = df[["Gender Male", "Gender Female", "Gender Unknown", "Total affected", "Total killed", "Total kidnapped", "Total wounded"]].copy()

# Calculate cumulative sum for each column
df_cumulative['Cumulative Female'] = df_cumulative['Gender Female'].cumsum()
df_cumulative['Cumulative Male'] = df_cumulative['Gender Male'].cumsum()
df_cumulative['Cumulative Unknown'] = df_cumulative['Gender Unknown'].cumsum()
df_cumulative['Cumulative Total affected'] = df_cumulative['Total affected'].cumsum()
df_cumulative['Cumulative Total killed'] = df_cumulative['Total killed'].cumsum()
df_cumulative['Cumulative Total kidnapped'] = df_cumulative['Total kidnapped'].cumsum()
df_cumulative['Cumulative Total wounded'] = df_cumulative['Total wounded'].cumsum()



# Create first figure (Total Affected)
fig_affected = go.Figure()

# Function to add traces for cumulative sum of Total affected
def add_trace_and_slope_affected(column, color, label):
    slope = (df_cumulative[column].iloc[-1] - df_cumulative[column].iloc[0]) / (len(df_cumulative) - 1)
    fig_affected.add_trace(go.Scatter(
        x=df_cumulative.index,
        y=df_cumulative[column],
        mode='lines',
        name=label,
        line=dict(color=color),
        hovertemplate='<b>%{text}</b><br>' +
                      'Value: %{y}<br>' +
                      'Overall Slope: %{customdata:.2f}',
        text=[label] * len(df_cumulative),
        customdata=[slope] * len(df_cumulative)
    ))

# Add traces for 'Total affected' plot
add_trace_and_slope_affected('Cumulative Female', 'blue', 'Cumulative Female')
add_trace_and_slope_affected('Cumulative Male', 'orange', 'Cumulative Male')
add_trace_and_slope_affected('Cumulative Unknown', 'red', 'Cumulative Unknown')
add_trace_and_slope_affected('Cumulative Total affected', 'black', 'Cumulative Total affected')
add_trace_and_slope_affected('Cumulative Total killed', 'grey', 'Cumulative Total killed')
add_trace_and_slope_affected('Cumulative Total wounded', 'lightgrey', 'Cumulative Total wounded')
add_trace_and_slope_affected('Cumulative Total kidnapped', 'darkgrey', 'Cumulative Total kidnapped')

x_lines = [1200, 2000, 3800]  # These are the x-axis points where you want vertical lines

# Add vertical lines at the specified x-axis points
for x in x_lines:
    fig_affected.add_shape(
        go.layout.Shape(
            type="line",  # Shape type is a line
            x0=x, x1=x,   # Start and end at the same x-value for a vertical line
            y0=0, y1=df_cumulative['Cumulative Total affected'].max(),  # Start from the bottom to the top
            line=dict(color="gray", dash="dash", width=.5)  # Style the line (color, dashed, width)
        )
    )

fig_affected.update_layout(
    title='Total Affected by Organization Over Time',
    xaxis_title='Cumulative Incidents',
    yaxis_title='Cumulative Sum',
    hovermode='closest',
)

# Show both figures
fig_affected.write_html("cumulative_total_affected_gender.html")

from IPython.display import IFrame

# Display both saved files directly in Jupyter Notebook
IFrame("cumulative_total_affected_gender.html", width=800, height=600)


In [69]:
df_unknown = df[["UN", "INGO", "ICRC", "NRCS and IFRC", "NNGO", "Other", "Gender Unknown"]].dropna()

# Create a Plotly figure
fig = go.Figure()

# Calculate the cumulative sum for the columns
for column in df_unknown.columns:
    df_unknown[f'Cumulative {column}'] = df_unknown[column].cumsum()

# Add traces (lines) for each organization and "Gender Unknown"
def add_trace_and_slope_affected(column, color, label):
    fig.add_trace(go.Scatter(
        x=df_unknown.index,  # Use the index for the x-axis
        y=df_unknown[f'Cumulative {column}'],  # Use cumulative values
        mode='lines',
        name=label,
        line=dict(color=color),
        text=[label] * len(df_unknown),
    ))

# Add lines for each organization
add_trace_and_slope_affected('UN', 'blue', 'Cumulative UN')
add_trace_and_slope_affected('INGO', 'orange', 'Cumulative INGO')
add_trace_and_slope_affected('ICRC', 'red', 'Cumulative ICRC')
add_trace_and_slope_affected('NRCS and IFRC', 'green', 'Cumulative NRCS and IFRC')
add_trace_and_slope_affected('NNGO', 'pink', 'Cumulative NNGO')
add_trace_and_slope_affected('Other', 'purple', 'Cumulative Other')
add_trace_and_slope_affected('Gender Unknown', 'black', 'Cumulative Gender Unknown')

# Update layout for titles and labels
fig.update_layout(
    title='Cumulative Gender Unknown vs Organizations',
    xaxis_title='Index (Time or Observations)',
    yaxis_title='Cumulative Sum',
    hovermode='closest',
)

# Save the figure as an interactive HTML
fig.write_html("unknown_gender.html")

# Display the saved file directly in Jupyter Notebook
from IPython.display import IFrame
IFrame("unknown_gender.html", width=800, height=600)

In [68]:
# Assuming 'df' is the DataFrame with your data

# Step 1: Calculate the sum of each organization
sum_orgs = df[["UN", "INGO", "ICRC", "NRCS and IFRC", "NNGO", "Other"]].sum()
# Step 2: Calculate the sum of the "Gender Unknown" category
sum_gender_unknown = df["Gender Unknown"].sum()

# Step 3: Calculate the percentage of "Gender Unknown" for each organization
percentage_unknown = (sum_gender_unknown / sum_orgs.sum()) * 100

# Step 4: Output the results
percentage_unknown


45.4800879550452