In [1]:
import polars as pl
import plotly.express as px
import matplotlib.pyplot as plt

In [2]:
df = pl.read_csv('../rtf/joined_data.csv')
print(df.shape)

(59223, 18)


In [5]:


# select arms deals where UK is the buyer
uk_buyer_df = df.filter(pl.col("Buyer") == "United Kingdom")
total_uk_buyer_deals = len(uk_buyer_df)
print("Total UK arms deals as buyer:", total_uk_buyer_deals)

# select arms deals where UK is the seller
uk_seller_df = df.filter(pl.col("Seller") == "United Kingdom")
total_uk_seller_deals = len(uk_seller_df)
print("Total UK arms deals as seller:", total_uk_seller_deals)

Total UK arms deals as buyer: 815
Total UK arms deals as seller: 4140


In [10]:
# read conflicts data

df_conflict = pl.read_csv('../conflicts/ucdp-prio-acd-221.csv', dtypes={
    'side_a_id': pl.Utf8,
    'region': pl.Utf8,
    'gwno_a': pl.Utf8,
})
print(df_conflict.shape)

(2568, 28)


In [19]:
# count occurrences of each value in "side_a" and "side_b" columns
side_a_counts = df_conflict['side_a'].value_counts()
side_b_counts = df_conflict['side_b'].value_counts()

# combine the counts and sort in descending order
# top_participants = (side_a_counts.add(side_b_counts, fill_value=0)
#                           .sort_values(ascending=False))
side_a_counts = side_a_counts.rename({'side_a': 'name'})
side_b_counts = side_b_counts.rename({'side_b': 'name'})

top_participants = pl.concat([side_a_counts, side_b_counts])
top_participants = top_participants.groupby('name').sum()

# select the top 10 participants
top_10_participants = top_participants.sort("counts", descending=True).head(10)


shape: (10, 2)
┌───────────────────────────────┬────────┐
│ name                          ┆ counts │
│ ---                           ┆ ---    │
│ str                           ┆ u32    │
╞═══════════════════════════════╪════════╡
│ Government of Myanmar (Burma) ┆ 287    │
│ Government of India           ┆ 212    │
│ Government of Ethiopia        ┆ 134    │
│ Government of Philippines     ┆ 115    │
│ ...                           ┆ ...    │
│ Government of Iraq            ┆ 76     │
│ Government of Pakistan        ┆ 70     │
│ Government of Iran            ┆ 61     │
│ Government of Colombia        ┆ 57     │
└───────────────────────────────┴────────┘


In [28]:
import plotly.graph_objs as go

# create the bar chart
fig = go.Figure()
fig.add_trace(go.Bar(
    x=top_10_participants['counts'],
    y=top_10_participants['name'],
    orientation='h'
))

# customize the chart layout
fig.update_layout(
    title="Top 10 Participants in the Dataset",
    xaxis_title="Count",
    yaxis_title="Participant",
    yaxis_categoryorder="total ascending",
    margin=dict(l=150),
    height=500
)

# display the chart
fig.show()