In [28]:
import pandas
from datetime import datetime
import numpy

def _process_count(x):
    for i in range(len(x)+1, 0,-1):
        try:
            return int(x[0:i])
        except ValueError:
            pass
    return numpy.nan

def load_month(m):
    results = pandas.read_html('https://en.wikipedia.org/wiki/List_of_terrorist_incidents_in_' + m + '_2017')
    df = results[0]
    df.columns = df[0:1].values[0]
    df = df[1:].copy()
    df['dead'] = df['Dead'].apply(_process_count)
    df['injured'] = df['Injured'].apply(_process_count)
    return df.drop(['Dead', 'Injured'], axis='columns').copy()

data = []
for month in [datetime(2008, i, 1).strftime('%B') for i in range(1,13)]:
    data.append(load_month(month))
data = pandas.concat(data)
data

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,Date,Type,Location,Details,Perpetrator,Part of,dead,injured
1,1,Shooting,"Istanbul, Turkey",2017 Istanbul nightclub shooting: One gunman k...,Islamic State,Turkey–ISIL conflict,39.0,70.0
2,1,Bombing,"Herat, Afghanistan",At least six people were hurt in an explosion ...,Taliban,War in Afghanistan,0.0,6.0
3,1,Shooting,"Bujumbura, Burundi",The Burundian environmental minister Emmanuel ...,Lone wolf,Burundian unrest,1.0,0.0
4,1,Bombing,"Quetta, Pakistan",Four Frontier Corps security personnel and two...,Lashkar-e-Jhangvi (suspected),Sectarianism in Pakistan,0.0,6.0
5,1,Assassination,"Ma'an, Jordan",The Islamic State claimed responsibility for a...,Islamic State,Spillover of the Syrian Civil War,1.0,0.0
6,2,Suicide car bombings,"Baghdad, Iraq",January 2017 Baghdad bombings: A series of car...,Islamic State,Iraqi Civil War,56.0,122.0
7,2,Suicide car bombing,"Mogadishu, Somalia",January 2017 Mogadishu bombings: A suicide car...,Al-Shabaab,Somali Civil War,7.0,17.0
8,2,Suicide bombings,"Samarra, Iraq",Gunmen wearing suicide vests attacked two poli...,Islamic State,Iraqi Civil War,7.0,
9,3,Assassination,"Cairo, Egypt",A Christian businessman was murdered by a Sala...,Lone wolf,,1.0,0.0
10,3,Shooting,"Abyan, Yemen",Three Yemeni soldiers were killed and 10 other...,Al-Qaeda in the Arabian Peninsula,Al-Qaeda insurgency in Yemen,3.0,10.0


In [67]:
def _confirmed(x):
    if pandas.isnull(x):
        return numpy.nan
    if x.endswith('(suspected)'):
        return False
    else:
        return True

perp_mapping = {
    'Islamic State (claimed)': 'Islamic State',
    'Al Shabaab' : 'Al-Shabaab'
}
    
def _perp_cleaned(x):
    if pandas.isnull(x):
        return None
    else:
        if 'Islamic State' in x:
            return 'Islamic State'
        if ('Al-Qaeda' in x) or ('Al Qaeda' in x):
            return 'Al-Qaeda'
        result = x.replace('(suspected)', '').strip()
        return perp_mapping.get(result, result)
    
data['confirmed'] = data['Perpetrator'].apply(_confirmed)
data['perpetrator_cleaned'] = data['Perpetrator'].apply(_perp_cleaned)
data

Unnamed: 0,Date,Type,Location,Details,Perpetrator,Part of,dead,injured,confirmed,perpetrator_cleaned
1,1,Shooting,"Istanbul, Turkey",2017 Istanbul nightclub shooting: One gunman k...,Islamic State,Turkey–ISIL conflict,39.0,70.0,True,Islamic State
2,1,Bombing,"Herat, Afghanistan",At least six people were hurt in an explosion ...,Taliban,War in Afghanistan,0.0,6.0,True,Taliban
3,1,Shooting,"Bujumbura, Burundi",The Burundian environmental minister Emmanuel ...,Lone wolf,Burundian unrest,1.0,0.0,True,Lone wolf
4,1,Bombing,"Quetta, Pakistan",Four Frontier Corps security personnel and two...,Lashkar-e-Jhangvi (suspected),Sectarianism in Pakistan,0.0,6.0,False,Lashkar-e-Jhangvi
5,1,Assassination,"Ma'an, Jordan",The Islamic State claimed responsibility for a...,Islamic State,Spillover of the Syrian Civil War,1.0,0.0,True,Islamic State
6,2,Suicide car bombings,"Baghdad, Iraq",January 2017 Baghdad bombings: A series of car...,Islamic State,Iraqi Civil War,56.0,122.0,True,Islamic State
7,2,Suicide car bombing,"Mogadishu, Somalia",January 2017 Mogadishu bombings: A suicide car...,Al-Shabaab,Somali Civil War,7.0,17.0,True,Al-Shabaab
8,2,Suicide bombings,"Samarra, Iraq",Gunmen wearing suicide vests attacked two poli...,Islamic State,Iraqi Civil War,7.0,,True,Islamic State
9,3,Assassination,"Cairo, Egypt",A Christian businessman was murdered by a Sala...,Lone wolf,,1.0,0.0,True,Lone wolf
10,3,Shooting,"Abyan, Yemen",Three Yemeni soldiers were killed and 10 other...,Al-Qaeda in the Arabian Peninsula,Al-Qaeda insurgency in Yemen,3.0,10.0,True,Al-Qaeda


In [99]:
count = data.groupby(['perpetrator_cleaned'])['dead'].sum().reset_index().sort_values('dead', ascending=False)
print(len(count))
count['fraction'] = count['dead'] / data['dead'].sum()
count[count['dead'] > 0]

230


Unnamed: 0,perpetrator_cleaned,dead,fraction
82,Islamic State,3743.0,0.376749
13,Al-Shabaab,1221.0,0.122899
205,Taliban,1176.0,0.118369
35,Boko Haram,815.0,0.082033
26,Anti-balaka,288.0,0.028988
12,Al-Qaeda,246.0,0.024761
94,Jamaat-ul-Ahrar,179.0,0.018017
204,Tahrir al-Sham,173.0,0.017413
221,Unknown,164.0,0.016507
69,Haqqani network,151.0,0.015199


In [106]:
import wikipedia

def get_summary(x):
    if x == 'Unknown':
        return None
    try:
        return wikipedia.page(x).summary
    except Exception:
        return None

count['perp_summary'] = count['perpetrator_cleaned'].apply(get_summary)
count



 BeautifulSoup([your markup])

to this:

 BeautifulSoup([your markup], "lxml")

  markup_type=markup_type))


Unnamed: 0,perpetrator_cleaned,dead,fraction,perp_summary
82,Islamic State,3743.0,0.376749,"An Islamic state (Arabic: دولة إسلامية‎, dawla..."
13,Al-Shabaab,1221.0,0.122899,Harakat al-Shabaab al-Mujahideen (HSM; Arabic:...
205,Taliban,1176.0,0.118369,"The Taliban (Pashto: طالبان‎, ṭālibān ""student..."
35,Boko Haram,815.0,0.082033,The Islamic State in West Africa or the Islami...
26,Anti-balaka,288.0,0.028988,The Anti-balaka is an alliance of militia grou...
12,Al-Qaeda,246.0,0.024761,"Al-Qaeda (; Arabic: القاعدة‎ al-Qāʿidah, IPA: ..."
94,Jamaat-ul-Ahrar,179.0,0.018017,"Jamaat-ul-Ahrar (""Assembly of the Free,"" abbre..."
204,Tahrir al-Sham,173.0,0.017413,Hay'at Tahrir al-Sham (Arabic: هيئة تحرير الشا...
221,Unknown,164.0,0.016507,
69,Haqqani network,151.0,0.015199,The Haqqani network is an Afghan guerrilla ins...


In [107]:
def extract_cause(x):
    if x is None:
        return None
    if 'Islamic' in x:
        return 'Islam'
    if 'Taliban' in x:
        return 'Islam'
    if 'jihadi' in x.lower():
        return 'Islam'
    if 'Communist' in x:
        return 'Communism'
    if 'Central African Republic' in x:
        return 'Central Africal Republic'
    if 'Congo' in x:
        return 'Congo'
    if ('far-right' in x.lower()) or ('far right' in x.lower()):
        return 'Far-right'
    if ('far-left' in x.lower()) or ('far left' in x.lower()):
        return 'Far-left'
    if 'anarchist' in x.lower():
        return 'Anarchy'
    if 'myanmar' in x.lower():
        return "Myanmar"
    if "Nasserite" in x:
        return 'Communism'
    if ('Marxist' in x) or ('Marxism' in x):
        return 'Communism'
    if 'Consumer Price Index' in x: # CPI is abbreviation for Communist Party of India, wikipedia gives Consumer Price Index
        return 'Communism'
    return None

count['cause'] = count['perp_summary'].apply(extract_cause)
print(pandas.isnull(count['cause']).sum())
count[pandas.isnull(count['cause'])]

121


Unnamed: 0,perpetrator_cleaned,dead,fraction,perp_summary,cause
221,Unknown,164.0,0.016507,,
136,Misrata Militants,141.0,0.014192,,
64,Fulani herdsmen,52.0,0.005234,Fulani herdsmen or Fulani pastoralists are nom...,
138,Murle Militias,45.0,0.004529,,
30,Balochistan Liberation Army,27.0,0.002718,The Balochistan Liberation Army (Urdu: بلوچستا...,
113,Lashkar-e-Jhangvi & Tehrik-i-Taliban Pakistan,25.0,0.002516,,
118,Lone wolf,25.0,0.002516,,
201,Sudan People's Liberation Army,24.0,0.002416,The Sudan People's Liberation Army (SPLA) is t...,
189,Salman Abedi,22.0,0.002214,The Manchester Arena bombing was a suicide bom...,
153,Ninja,18.0,0.001812,A ninja (忍者) or shinobi (忍び) was a covert agen...,


In [108]:
by_cause = count.groupby(['cause'])['dead'].sum().reset_index().sort_values('dead', ascending=False)
print(by_cause['dead'].sum())
by_cause

9109.0


Unnamed: 0,cause,dead
6,Islam,8170.0
1,Central Africal Republic,432.0
2,Communism,310.0
7,Myanmar,105.0
3,Congo,85.0
0,Anarchy,3.0
5,Far-right,3.0
4,Far-left,1.0


In [109]:
by_cause['dead'].sum() / count['dead'].sum()

0.9170441961139636

In [113]:
310 / count['dead'].sum()

0.031209100976542836

In [116]:
count['dead_total'] = count['dead'].cumsum()
count['cum_frac'] = count['dead_total'] / count['dead'].sum()
count

Unnamed: 0,perpetrator_cleaned,dead,fraction,perp_summary,cause,dead_total,cum_frac
82,Islamic State,3743.0,0.376749,"An Islamic state (Arabic: دولة إسلامية‎, dawla...",Islam,3743.0,0.376825
13,Al-Shabaab,1221.0,0.122899,Harakat al-Shabaab al-Mujahideen (HSM; Arabic:...,Islam,4964.0,0.499748
205,Taliban,1176.0,0.118369,"The Taliban (Pashto: طالبان‎, ṭālibān ""student...",Islam,6140.0,0.618142
35,Boko Haram,815.0,0.082033,The Islamic State in West Africa or the Islami...,Islam,6955.0,0.700191
26,Anti-balaka,288.0,0.028988,The Anti-balaka is an alliance of militia grou...,Central Africal Republic,7243.0,0.729186
12,Al-Qaeda,246.0,0.024761,"Al-Qaeda (; Arabic: القاعدة‎ al-Qāʿidah, IPA: ...",Islam,7489.0,0.753951
94,Jamaat-ul-Ahrar,179.0,0.018017,"Jamaat-ul-Ahrar (""Assembly of the Free,"" abbre...",Islam,7668.0,0.771972
204,Tahrir al-Sham,173.0,0.017413,Hay'at Tahrir al-Sham (Arabic: هيئة تحرير الشا...,Islam,7841.0,0.789389
221,Unknown,164.0,0.016507,,,8005.0,0.805900
69,Haqqani network,151.0,0.015199,The Haqqani network is an Afghan guerrilla ins...,Islam,8156.0,0.821101
