In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
import ffmpeg

from src.params import CONFLICT_DATA_PATH, ISO_CODE_PATH, list_regions

from src.data_pipeline import DataPipeline
from src.data_viz import PlotBuilder







In [2]:
data_pipeline = DataPipeline(CONFLICT_DATA_PATH, ISO_CODE_PATH)
plot_builder = PlotBuilder(data_pipeline)

In [3]:
plot_builder.plot_worldmap()

In [4]:
plot_builder.plot_deaths_by_continent()

In [5]:
for region in list_regions:
    plot_builder.plot_deaths_by_region(region)

In [6]:
plot_builder.plot_deaths_br_country()

In [7]:
worst_conflict_years = data_pipeline.get_worst_conflict_years()
print(worst_conflict_years.head(5))

   year   country  region          sub-region  deaths  years_with_conflict
0  1994    Rwanda  Africa  Sub-Saharan Africa  772353                   27
1  2022  Ethiopia  Africa  Sub-Saharan Africa  102860                   34
2  2022   Ukraine  Europe      Eastern Europe   82588                    9
3  2013     Syria    Asia        Western Asia   76356                   14
4  2000   Eritrea  Africa  Sub-Saharan Africa   50000                   13


In [2]:
# data loading

def clean_abrv(text):
    """"""
    if '_' in text:
        return text.split('_')[1]
    else:
        return text


# read data
df_conflict = pd.read_csv('data/countries-in-conflict-data.csv')
df_conflict.columns = ['country', 'alpha-3', 'year', 'deaths']
df_conflict['alpha-3'] = df_conflict['alpha-3'].apply(lambda row: clean_abrv(row))

df_iso = pd.read_csv('data/countries_iso_code.csv')
df_iso = df_iso[['alpha-3', 'region', 'sub-region']]
df_iso.columns = ['alpha-3', 'region', 'sub-region']

data_conflict = df_conflict.merge(df_iso, on = ['alpha-3'], how = 'left')
data_conflict = data_conflict.dropna()
data_conflict = data_conflict[['country', 'alpha-3', 'region', 'sub-region', 'year', 'deaths']]

data_conflict['year_conflict'] = data_conflict['deaths'].apply(lambda row: 1 if row > 0 else 0)
data_conflict.sort_values(['country', 'year'], inplace=True)

list_cumsum = []
for country in data_conflict['country'].unique():
    sub_df = data_conflict[data_conflict['country'] == country].copy()
    # print(sub_df)
    sub_df['death_cumsum'] = sub_df['deaths'].cumsum()
    list_cumsum.append(sub_df)

data_conflict = pd.concat(list_cumsum)


data_conflict


Unnamed: 0,country,alpha-3,region,sub-region,year,deaths,year_conflict,death_cumsum
34,Afghanistan,AFG,Asia,Southern Asia,1989,5411,1,5411
35,Afghanistan,AFG,Asia,Southern Asia,1990,1514,1,6925
36,Afghanistan,AFG,Asia,Southern Asia,1991,3553,1,10478
37,Afghanistan,AFG,Asia,Southern Asia,1992,4366,1,14844
38,Afghanistan,AFG,Asia,Southern Asia,1993,4097,1,18941
...,...,...,...,...,...,...,...,...
6965,Zimbabwe,ZWE,Africa,Sub-Saharan Africa,2018,0,0,304
6966,Zimbabwe,ZWE,Africa,Sub-Saharan Africa,2019,0,0,304
6967,Zimbabwe,ZWE,Africa,Sub-Saharan Africa,2020,0,0,304
6968,Zimbabwe,ZWE,Africa,Sub-Saharan Africa,2021,0,0,304


In [3]:
# sacar lista paises sin conflicto y quitarlos
# tabla de paises con mas muertes
# tabla de paises con mas años en conflicto
# agregado muertes por region y subregion
# idem años en conflicto



# graficas
# world maps
# la dinamica de muertes por paises a lo largo del tiempo OK
# idem acumulativa (bar chart race) OK

# barplots y pie charts
# top continentes con mas muertes
# top subregiones con mas muertes por cada continente (5 pie)
# top 10 paises con mas muertes


# top 10 paises con mas años de conflicto


# sacar los momentos mas altos de conflicto y representarlos de alguna forma
# barras o kpi



In [4]:
# world maps
# la dinamica de muertes por paises a lo largo del tiempo

fig = px.choropleth(data_conflict, locations='alpha-3', color='deaths', hover_name='country',
                    projection='natural earth', animation_frame='year',
                    color_continuous_scale=px.colors.sequential.Burgyl,
                    title='Deaths in conflicts by Country')
fig.show()

In [17]:
# la dinamica de muertes acumuladas por paises a lo largo del tiempo con bar chart race


# necesitamos una tabla pivoteada
data_conflict

conflict_spread = pd.pivot_table(data_conflict, values='death_cumsum', index=['year'],
            columns=['country'])
conflict_spread

country,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda,Argentina,Armenia,Australia,Austria,...,United States,Uruguay,Uzbekistan,Vanuatu,Venezuela,Vietnam,Western Sahara,Yemen,Zambia,Zimbabwe
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1989,5411.0,0.0,0.0,0.0,2873.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,97.0,0.0,58.0,0.0,136.0,0.0,86.0,37.0
1990,6925.0,0.0,0.0,0.0,5482.0,0.0,0.0,22.0,0.0,0.0,...,0.0,0.0,97.0,0.0,122.0,0.0,136.0,0.0,126.0,51.0
1991,10478.0,0.0,34.0,0.0,5998.0,0.0,0.0,34.0,0.0,0.0,...,0.0,0.0,97.0,0.0,152.0,0.0,136.0,0.0,126.0,51.0
1992,14844.0,0.0,291.0,0.0,8344.0,0.0,0.0,154.0,0.0,0.0,...,0.0,0.0,97.0,0.0,375.0,0.0,136.0,0.0,126.0,51.0
1993,18941.0,0.0,1117.0,0.0,20741.0,0.0,0.0,198.0,0.0,0.0,...,0.0,0.0,97.0,0.0,438.0,0.0,136.0,0.0,127.0,51.0
1994,27996.0,0.0,3033.0,0.0,24725.0,0.0,86.0,228.0,0.0,0.0,...,0.0,0.0,97.0,0.0,609.0,0.0,136.0,1489.0,127.0,51.0
1995,33606.0,0.0,6068.0,0.0,24959.0,0.0,86.0,228.0,0.0,0.0,...,0.0,0.0,97.0,0.0,635.0,0.0,136.0,1489.0,127.0,51.0
1996,37180.0,0.0,7152.0,0.0,24982.0,0.0,86.0,228.0,0.0,0.0,...,0.0,0.0,97.0,0.0,679.0,0.0,136.0,1489.0,127.0,51.0
1997,43899.0,0.0,9160.0,0.0,25070.0,0.0,86.0,228.0,0.0,0.0,...,0.0,0.0,97.0,0.0,758.0,0.0,136.0,1489.0,127.0,51.0
1998,56053.0,0.0,12441.0,0.0,26238.0,0.0,86.0,228.0,0.0,0.0,...,0.0,0.0,97.0,0.0,797.0,0.0,136.0,1489.0,127.0,51.0


In [95]:
# world maps
# la dinamica de muertes acumuladas por paises a lo largo del tiempo

import bar_chart_race as bcr
# df = bcr.load_dataset('covid19_tutorial')
bcr.bar_chart_race(
        df=conflict_spread,
        filename='media/chart_race.mp4',
        orientation='h', 
        sort='desc', 
        n_bars=20, 
        fixed_order=False, 
        fixed_max=True, 
        steps_per_period=50, 
        period_length=1000, 
        # end_period_pause=0,
        interpolate_period=False, 
        period_label={'x': .98, 'y': .3, 'ha': 'right', 'va': 'center'}, 
        # period_template='%B %d, %Y', 
        period_summary_func=lambda v, r: {'x': .98, 'y': .2, 
                                        's': f'Total deaths: {v.sum():,.0f}', 
                                        'ha': 'right', 'size': 11}, 
        # perpendicular_bar_func='median', 
        # colors='dark12', 
        title='War Conflict Deaths by Country', 
        bar_size=.95, 
        # bar_textposition='inside',
        # bar_texttemplate='{x:,.0f}', 
        # bar_label_font=7, 
        # tick_label_font=7, 
        # tick_template='{x:,.0f}',
        shared_fontdict=None, 
        scale='linear', 
        fig=None, 
        writer=None, 
        bar_kwargs={'alpha': .7},
        # fig_kwargs={'figsize': (6, 3.5), 'dpi': 144},
        filter_column_colors=False
) 


Series.fillna with 'method' is deprecated and will raise in a future version. Use obj.ffill() or obj.bfill() instead.


Glyph 127 () missing from current font.


Glyph 128 (\x80) missing from current font.


Glyph 129 (\x81) missing from current font.


Glyph 130 (\x82) missing from current font.


Glyph 131 (\x83) missing from current font.


Glyph 132 (\x84) missing from current font.


Glyph 133 (\x85) missing from current font.


Glyph 134 (\x86) missing from current font.


Glyph 135 (\x87) missing from current font.


Glyph 136 (\x88) missing from current font.


Glyph 137 (\x89) missing from current font.


Glyph 138 (\x8a) missing from current font.


Glyph 139 (\x8b) missing from current font.


Glyph 140 (\x8c) missing from current font.


Glyph 141 (\x8d) missing from current font.


Glyph 142 (\x8e) missing from current font.


Glyph 143 (\x8f) missing from current font.


Glyph 144 (\x90) missing from current font.


Glyph 145 (\x91) missing from current font.


Glyph 14

In [62]:
# pie chart
# top 10 regiones con mas muertes
top10_deaths_by_continent = (data_conflict
    .groupby('region')
    .agg(sum_deaths=('deaths', 'sum'))
    .reset_index()
    .sort_values('sum_deaths', ascending=False)
    .reset_index(drop=True)
    .head(10)
)

# top10_deaths_by_continent

# fig = px.bar(top10_deaths_by_continent, 
#             x="region", 
#             y="sum_deaths", 
#             color='region',
#             labels={
#                     "region": "Continent",
#                     "sum_deaths": "Total deaths"
#                 },
#             title="Top 10 total deaths in conflicts by continent"
#         )
# fig.update_layout(showlegend=False)
# fig.show()

fig = px.pie(top10_deaths_by_continent, 
            values='sum_deaths',
            names='region',
            hole=.5, 
            title='Total deaths in conflicts by continent')
fig.show()

In [75]:
# list_regions = data_conflict['region'].unique()
list_regions = [
    'Europe', 
    'Americas',
    'Asia',
    'Africa', 
    'Oceania'
]

In [77]:
# pie chart
# top subregiones con mas muertes por continente
region = list_regions[0]

deaths_by_subregion = (data_conflict[data_conflict['region'] == region]
    .groupby('sub-region')
    .agg(sum_deaths=('deaths', 'sum'))
    .reset_index()
    .sort_values('sum_deaths', ascending=False)
    .reset_index(drop=True)
    # .head(20)
)

deaths_by_subregion

fig = px.pie(deaths_by_subregion, 
            values='sum_deaths',
            names='sub-region',
            hole=.5, 
            title=f'Total deaths in conflicts by sub-region ({region})')
fig.show()

In [78]:
# pie chart
# top subregiones con mas muertes por continente
region = list_regions[1]

deaths_by_subregion = (data_conflict[data_conflict['region'] == region]
    .groupby('sub-region')
    .agg(sum_deaths=('deaths', 'sum'))
    .reset_index()
    .sort_values('sum_deaths', ascending=False)
    .reset_index(drop=True)
    # .head(20)
)

deaths_by_subregion

fig = px.pie(deaths_by_subregion, 
            values='sum_deaths',
            names='sub-region',
            hole=.5, 
            title=f'Total deaths in conflicts by sub-region ({region})')
fig.show()

In [79]:
# pie chart
# top subregiones con mas muertes por continente
region = list_regions[2]

deaths_by_subregion = (data_conflict[data_conflict['region'] == region]
    .groupby('sub-region')
    .agg(sum_deaths=('deaths', 'sum'))
    .reset_index()
    .sort_values('sum_deaths', ascending=False)
    .reset_index(drop=True)
    # .head(20)
)

deaths_by_subregion

fig = px.pie(deaths_by_subregion, 
            values='sum_deaths',
            names='sub-region',
            hole=.5, 
            title=f'Total deaths in conflicts by sub-region ({region})')
fig.show()

In [80]:
# pie chart
# top subregiones con mas muertes por continente
region = list_regions[3]

deaths_by_subregion = (data_conflict[data_conflict['region'] == region]
    .groupby('sub-region')
    .agg(sum_deaths=('deaths', 'sum'))
    .reset_index()
    .sort_values('sum_deaths', ascending=False)
    .reset_index(drop=True)
    # .head(20)
)

deaths_by_subregion

fig = px.pie(deaths_by_subregion, 
            values='sum_deaths',
            names='sub-region',
            hole=.5, 
            title=f'Total deaths in conflicts by sub-region ({region})')
fig.show()

In [81]:
# pie chart
# top subregiones con mas muertes por continente
region = list_regions[4]

deaths_by_subregion = (data_conflict[data_conflict['region'] == region]
    .groupby('sub-region')
    .agg(sum_deaths=('deaths', 'sum'))
    .reset_index()
    .sort_values('sum_deaths', ascending=False)
    .reset_index(drop=True)
    # .head(20)
)

deaths_by_subregion

fig = px.pie(deaths_by_subregion, 
            values='sum_deaths',
            names='sub-region',
            hole=.5, 
            title=f'Total deaths in conflicts by sub-region ({region})')
fig.show()

In [39]:
# barplots
# top 10 paises con mas muertes
top10_deaths_by_country = (data_conflict
    .groupby('country')
    .agg(sum_deaths=('deaths', 'sum'))
    .reset_index()
    .sort_values('sum_deaths', ascending=False)
    .reset_index(drop=True)
    .head(10)
)

# top10_deaths_by_country

fig = px.bar(top10_deaths_by_country, 
            x="country", 
            y="sum_deaths", 
            color='country',
            labels={
                    "country": "Country",
                    "sum_deaths": "Total deaths"
                },
            title="Top 10 total deaths in conflicts by country"
        )
fig.update_layout(showlegend=False)
fig.show()

In [94]:
#
# sacar los momentos mas altos de conflicto y representarlos de alguna forma
# barras o kpi
list_countries = data_conflict['country'].unique()
worst_conflict_years = []
for country in list_countries:
    df_country = (data_conflict[data_conflict['country'] == country][['country', 'year', 'deaths']]
                    .copy()
                    .sort_values('deaths', ascending=False)
                    .head(1)
                )
    worst_conflict_years.append(df_country)

worst_conflict_years = pd.concat(worst_conflict_years)
worst_conflict_years = worst_conflict_years.sort_values('deaths', ascending=False)
worst_conflict_years = worst_conflict_years.reset_index(drop=True)
# worst_conflict_years


years_with_conflict = (data_conflict
    .groupby(['country', 'region', 'sub-region'])
    .agg(years_with_conflict=('year_conflict', 'sum'))
    .reset_index()
)

worst_conflict_years = worst_conflict_years.merge(years_with_conflict,
                                                on = 'country',
                                                how='left'
                                            )
columns = ['year', 'country', 'region', 'sub-region', 'deaths', 'years_with_conflict']
worst_conflict_years = worst_conflict_years[columns]
worst_conflict_years.head(10)

Unnamed: 0,year,country,region,sub-region,deaths,years_with_conflict
0,1994,Rwanda,Africa,Sub-Saharan Africa,772353,27
1,2022,Ethiopia,Africa,Sub-Saharan Africa,102860,34
2,2022,Ukraine,Europe,Eastern Europe,82588,9
3,2013,Syria,Asia,Western Asia,76356,14
4,2000,Eritrea,Africa,Sub-Saharan Africa,50000,13
5,2021,Afghanistan,Asia,Southern Asia,36370,34
6,1996,Democratic Republic of Congo,Africa,Sub-Saharan Africa,33062,34
7,1992,Bosnia and Herzegovina,Europe,Southern Europe,26840,5
8,1991,Iraq,Asia,Western Asia,24492,34
9,2021,Yemen,Asia,Western Asia,23336,17


In [90]:
years_with_conflict = (data_conflict
    .groupby(['country', 'region', 'sub-region'])
    .agg(years_with_conflict=('year_conflict', 'sum'))
    .reset_index()
)

Unnamed: 0,country,region,sub-region,years_with_conflict
0,Afghanistan,Asia,Southern Asia,34
1,Albania,Europe,Southern Europe,1
2,Algeria,Africa,Northern Africa,31
3,Andorra,Europe,Southern Europe,0
4,Angola,Africa,Sub-Saharan Africa,23
...,...,...,...,...
192,Vietnam,Asia,South-eastern Asia,0
193,Western Sahara,Africa,Northern Africa,1
194,Yemen,Asia,Western Asia,17
195,Zambia,Africa,Sub-Saharan Africa,5


array(['Asia', 'Europe', 'Africa', 'Americas', 'Oceania'], dtype=object)

In [59]:
# barplots
# top subregiones con mas muertes por continente
deaths_by_subregion = (data_conflict
    .groupby('sub-region')
    .agg(sum_deaths=('deaths', 'sum'))
    .reset_index()
    .sort_values('sum_deaths', ascending=False)
    .reset_index(drop=True)
    # .head(20)
)

deaths_by_subregion

# fig = px.bar(deaths_by_subregion, 
#             x="sub-region", 
#             y="sum_deaths", 
#             color='sub-region',
#             labels={
#                     "sub-region": "sub-region",
#                     "sum_deaths": "Total deaths"
#                 },
#             title="Total deaths in conflicts by sub-region"
#         )
# fig.update_layout(showlegend=False)
# fig.show()

# fig = px.pie(deaths_by_subregion, 
#             values='sum_deaths',
#             names='sub-region',
#             hole=.5, 
#             title='Total deaths in conflicts by sub-region')
# fig.show()

Unnamed: 0,sub-region,sum_deaths
0,Sub-Saharan Africa,1644038
1,Western Asia,663798
2,Southern Asia,497934
3,Latin America and the Caribbean,164963
4,Eastern Europe,116579
5,Northern Africa,91715
6,Southern Europe,71357
7,South-eastern Asia,62285
8,Central Asia,10374
9,Eastern Asia,3102


In [40]:
data_conflict

Unnamed: 0,country,alpha-3,region,sub-region,year,deaths,year_conflict,death_cumsum
34,Afghanistan,AFG,Asia,Southern Asia,1989,5411,1,5411
35,Afghanistan,AFG,Asia,Southern Asia,1990,1514,1,6925
36,Afghanistan,AFG,Asia,Southern Asia,1991,3553,1,10478
37,Afghanistan,AFG,Asia,Southern Asia,1992,4366,1,14844
38,Afghanistan,AFG,Asia,Southern Asia,1993,4097,1,18941
...,...,...,...,...,...,...,...,...
6965,Zimbabwe,ZWE,Africa,Sub-Saharan Africa,2018,0,0,304
6966,Zimbabwe,ZWE,Africa,Sub-Saharan Africa,2019,0,0,304
6967,Zimbabwe,ZWE,Africa,Sub-Saharan Africa,2020,0,0,304
6968,Zimbabwe,ZWE,Africa,Sub-Saharan Africa,2021,0,0,304


In [45]:
# barplots
# top 10 paises con mas años de conflicto
top10_years_conflict_by_country = (data_conflict
    .groupby('sub-region')
    .agg(sum_conflict_years=('year_conflict', 'mean'))
    .reset_index()
    .sort_values('sum_conflict_years', ascending=False)
    .reset_index(drop=True)
    .head(50)
)

top10_years_conflict_by_country

# fig = px.bar(top10_years_conflict_by_country, 
#             x="country", 
#             y="sum_conflict_years", 
#             color='country',
#             labels={
#                     "country": "Country",
#                     "sum_conflict_years": "Years with conflict"
#                 },
#             title="Top 10 countries with more years in conflict"
#         )
# fig.update_layout(showlegend=False)
# fig.show()

Unnamed: 0,sub-region,sum_conflict_years
0,Southern Asia,0.588235
1,Northern Africa,0.436975
2,Western Asia,0.393791
3,South-eastern Asia,0.385027
4,Sub-Saharan Africa,0.363971
5,Central Asia,0.176471
6,Latin America and the Caribbean,0.16221
7,Eastern Europe,0.117647
8,Northern America,0.107843
9,Melanesia,0.095588


In [3]:
data_conflict

Unnamed: 0,country,alpha-3,region,sub-region,year,deaths,year_conflict
34,Afghanistan,AFG,Asia,Southern Asia,1989,5411,1
35,Afghanistan,AFG,Asia,Southern Asia,1990,1514,1
36,Afghanistan,AFG,Asia,Southern Asia,1991,3553,1
37,Afghanistan,AFG,Asia,Southern Asia,1992,4366,1
38,Afghanistan,AFG,Asia,Southern Asia,1993,4097,1
...,...,...,...,...,...,...,...
6965,Zimbabwe,ZWE,Africa,Sub-Saharan Africa,2018,0,0
6966,Zimbabwe,ZWE,Africa,Sub-Saharan Africa,2019,0,0
6967,Zimbabwe,ZWE,Africa,Sub-Saharan Africa,2020,0,0
6968,Zimbabwe,ZWE,Africa,Sub-Saharan Africa,2021,0,0


In [7]:
deaths_per_country = (data_conflict
                        .groupby(['country', 'alpha-3', 'region'])
                        .agg(sum_death = ('deaths', 'sum'))
                        .reset_index()
                        .sort_values('sum_death', ascending=False)
                    )
deaths_per_country

Unnamed: 0,country,alpha-3,region,sum_death
145,Rwanda,RWA,Africa,794913
171,Syria,SYR,Asia,402416
0,Afghanistan,AFG,Asia,315930
58,Ethiopia,ETH,Africa,180528
55,Eritrea,ERI,Africa,139749
...,...,...,...,...
133,Palau,PLW,Oceania,0
131,Oman,OMN,Asia,0
130,Norway,NOR,Europe,0
128,North Korea,PRK,Asia,0


In [14]:
countries_no_conflict = deaths_per_country[deaths_per_country['sum_death'] == 0]['alpha-3']
countries_no_conflict

11     BHS
163    KOR
48     DMA
158    SVK
168    SUR
      ... 
133    PLW
131    OMN
130    NOR
128    PRK
114    MCO
Name: alpha-3, Length: 67, dtype: object

In [16]:
data_conflict = data_conflict[~data_conflict['alpha-3'].isin(countries_no_conflict)]
data_conflict

Unnamed: 0,country,alpha-3,year,deaths,region,sub-region
34,Afghanistan,AFG,1989,5411,Asia,Southern Asia
35,Afghanistan,AFG,1990,1514,Asia,Southern Asia
36,Afghanistan,AFG,1991,3553,Asia,Southern Asia
37,Afghanistan,AFG,1992,4366,Asia,Southern Asia
38,Afghanistan,AFG,1993,4097,Asia,Southern Asia
...,...,...,...,...,...,...
6965,Zimbabwe,ZWE,2018,0,Africa,Sub-Saharan Africa
6966,Zimbabwe,ZWE,2019,0,Africa,Sub-Saharan Africa
6967,Zimbabwe,ZWE,2020,0,Africa,Sub-Saharan Africa
6968,Zimbabwe,ZWE,2021,0,Africa,Sub-Saharan Africa


In [17]:

fig = px.line(data_conflict, x="year", y="deaths", color='country')
fig.show()

In [14]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6970 entries, 0 to 6969
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   country  6970 non-null   object
 1   abrv     6970 non-null   object
 2   year     6970 non-null   int64 
 3   deaths   6970 non-null   int64 
dtypes: int64(2), object(2)
memory usage: 217.9+ KB


In [15]:
df['country'].value_counts()

country
Abkhazia           34
Paraguay           34
Nigeria            34
North Korea        34
North Macedonia    34
                   ..
Greenland          34
Grenada            34
Guatemala          34
Guinea             34
Zimbabwe           34
Name: count, Length: 205, dtype: int64

In [25]:
deaths_by_country = (df
                        .groupby('country')
                        .agg(sum_deaths = ('deaths', 'sum'))
                        .reset_index()
                        .sort_values('sum_deaths', ascending=False))

deaths_by_country.head(20)

Unnamed: 0,country,sum_deaths
149,Rwanda,794913
176,Syria,402416
1,Afghanistan,315930
61,Ethiopia,180528
58,Eritrea,139749
47,Democratic Republic of Congo,126691
84,Iraq,126621
115,Mexico,94150
189,Ukraine,90924
171,Sri Lanka,65337


In [26]:
df_sample = df[df['country'] == 'Rwanda']
df_sample

Unnamed: 0,country,abrv,year,deaths
5066,Rwanda,RWA,1989,0
5067,Rwanda,RWA,1990,2647
5068,Rwanda,RWA,1991,1085
5069,Rwanda,RWA,1992,647
5070,Rwanda,RWA,1993,977
5071,Rwanda,RWA,1994,772353
5072,Rwanda,RWA,1995,4202
5073,Rwanda,RWA,1996,1041
5074,Rwanda,RWA,1997,7288
5075,Rwanda,RWA,1998,1913
