In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
data = pd.read_csv('/kaggle/input/journalist-deaths-since-1992/cpj-database.csv')
data = data.drop(['Unnamed: 17', 'Unnamed: 18',
       'Unnamed: 19', 'Unnamed: 20', 'Unnamed: 21', 'Unnamed: 22',
       'Unnamed: 23', 'Unnamed: 24', 'Unnamed: 25'], axis=1)
data.head()

In [None]:
data.query('Date == "11/23/09"')

In [None]:
data.query('Date == "1/7/15"')

In [None]:
data.query('Date == "10/12/06"')

In [None]:
data.Date.value_counts()

In [None]:
top5 = data['Country Killed'].value_counts().head(5)
sns.set(style='whitegrid', context='talk', palette='mako')

fig = plt.figure(figsize=(14,6))
fig.set_facecolor("#303934")
fig.text(x=0.14, y=0.95, s="Top 5 contries with the highest journalist deaths", fontsize="25", fontweight = "bold", color='#FFFFFF')

sns.barplot(x=top5.index, y=top5)

plt.xlabel('Countries where the journalist died', fontsize='large', fontweight='bold', horizontalalignment='center',color='#FFFFFF')
plt.ylabel("Number of journalists killed", fontsize='large', fontweight='bold', horizontalalignment='center',color='#FFFFFF')

plt.tick_params(axis="y", colors='white',labelsize=15)
plt.tick_params(axis="x", colors='white',labelsize=20)

plt.show()

In [None]:
top3 = data['Date'].value_counts().head(3)
sns.set(style='whitegrid', context='talk', palette='mako')

fig = plt.figure(figsize=(14,6))
fig.set_facecolor("#303934")
fig.text(x=0.14, y=0.95, s="Top 3 dates with the highest journalist deaths", fontsize="25", fontweight = "bold", color='#FFFFFF')
fig.text(x=0.7, y=0.2, s="Charlie Hebdo", fontsize="16",color='#FFFFFF') 
fig.text(x=0.7, y=0.25, s="France", fontsize="16", fontweight = "bold",color='#FFFFFF')
fig.text(x=0.16, y=0.7, s="Maguindanao Massacre", fontsize="16",color='#FFFFFF') 
fig.text(x=0.16, y=0.75, s="Philippines", fontsize="16", fontweight = "bold",color='#FFFFFF')
fig.text(x=0.45, y=0.3, s="Al-Shaabiya TV", fontsize="16",color='#FFFFFF') 
fig.text(x=0.45, y=0.35, s="Iraq", fontsize="16", fontweight = "bold",color='#FFFFFF')

sns.barplot(x=top3.index, y=top3)

plt.xlabel('Date of deaths', fontsize='large', fontweight='bold', horizontalalignment='center',color='#FFFFFF')
plt.ylabel("Number of journalists killed", fontsize='large', fontweight='bold', horizontalalignment='center',color='#FFFFFF')

plt.tick_params(axis="y", colors='white',labelsize=15)
plt.tick_params(axis="x", colors='white',labelsize=20)

plt.show()

In [None]:
cleandata=data[['Date','Sex','Medium','Coverage','Local/Foreign']]

cleandata = cleandata[~cleandata['Sex'].isin(['na','Sex','Local/Foreign'])]
cleandata= cleandata[~cleandata['Local/Foreign'].isin(['na','Sex','Local/Foreign'])]

In [None]:
sexpie = cleandata.Sex.value_counts()
Localpie = cleandata['Local/Foreign'].value_counts()

colors = ['#3B6FA3', '#67C2BA']
explode = [0,0.2]

plt.pie(x=sexpie, labels=['Male','Female'],autopct='%.0f%%',explode=explode, colors=colors)
plt.title("Journalist deaths divided by gender")
plt.show()

In [None]:
colors = ['#3B6FA3', '#67C2BA']
explode = [0,0.2]

plt.pie(x=Localpie, labels=['Local','Foreign'],autopct='%.0f%%',explode=explode, colors=colors)
plt.title("Journalist deaths divided by locals and foreigners")
plt.show()

In [None]:
# Date cleaning 

pd.options.display.max_colwidth = 100
datadate=data[['Date','Type of Death','Job','Medium','Country Killed']]
datadate=datadate[~datadate['Date'].isin(['Unknown'])]

datadate[['Month','Day','Year','algo']]=datadate.Date.str.split('/', expand=True)
datadate.at[1874,'Year']='2007'
datadate.at[1874,'Month']='2'
datadate=datadate[~datadate['Month'].isin(['Date unknown'])]
datadate.drop(['Day','algo'], axis=1, inplace=True)
datadate.drop([1875],inplace=True)
datadate=datadate[~datadate['Date'].isin(['Date'])]
datadate.drop([1854],inplace=True)

datadate.Month.unique()
datadate['year']= datadate.Month.str.extract('.*(\d{2})',expand=True)
datadate['Year'].replace(np.nan, 'bolinha',inplace=True)

for ind in datadate.index:
    if datadate['Year'][ind]=='bolinha':
        datadate['Year'][ind] = datadate['year'][ind]

datadate['Year'] = datadate['Year'].astype(str)

for ind in datadate.index:
    if datadate['Year'][ind].startswith(('9')):
        datadate['Year'][ind] = '19'+ datadate['Year'][ind]
    elif datadate['Year'][ind].startswith(('0','1')):
        datadate['Year'][ind] = '20'+ datadate['Year'][ind]  

datadate['Year'] = datadate['Year'].astype('int32')

In [None]:
topC = data['Coverage'].value_counts().head(5)
fig2 = plt.figure(figsize=(14,6))
fig2.set_facecolor("#303934")
fig2.text(x=0.14, y=0.95, s="Most common coverages", fontsize="25", fontweight = "bold", color='#FFFFFF')
fig2.text(x=0.17, y=0.77, s="War", fontsize="30", fontweight = "bold", color='#FFFFFF')
fig2.text(x=0.31, y=0.5, s="Politics", fontsize="22", fontweight = "bold", color='#FFFFFF')
fig2.text(x=0.46, y=0.3, s="War &", fontsize="20", fontweight = "bold", color='#FFFFFF')
fig2.text(x=0.46, y=0.25, s="Politics", fontsize="20", fontweight = "bold", color='#FFFFFF')
fig2.text(x=0.64, y=0.22, s="Crime", fontsize="19", fontweight = "bold", color='#FFFFFF')
fig2.text(x=0.77, y=0.20, s="Corruption", fontsize="17", fontweight = "bold", color='#FFFFFF')

sns.barplot(x=topC.index, y=topC)

plt.ylabel("Number of journalists killed", fontsize='large', fontweight='bold', horizontalalignment='center',color='#FFFFFF')
plt.xlabel('Coverage', fontsize='large', fontweight='bold', horizontalalignment='center',color='#303934')

plt.tick_params(axis="y", colors='white',labelsize=15)
plt.tick_params(axis="x", colors='#303934',labelsize=15)



In [None]:
countyear = datadate.Year.value_counts()

fig3 = plt.figure(figsize=(14,6))
fig3.text(x=0.14, y=0.95, s="Journalist Deaths per Year", fontsize="25", fontweight = "bold", color='#FFFFFF')
fig3.set_facecolor("#303934")
sns.lineplot(data=countyear,x=countyear.index, y=countyear, markers=True, color=colors[0])
plt.xticks(np.arange(1992, 2018, step=2))
plt.ylabel("Number of journalists killed", fontsize='large', fontweight='bold', horizontalalignment='center',color='#FFFFFF')
plt.xlabel('Year', fontsize='large', fontweight='bold', horizontalalignment='center',color='#FFFFFF')
plt.tick_params(axis="y", colors='white',labelsize=15)
plt.tick_params(axis="x", colors='white',labelsize=15,length=0.5)

In [None]:
BrYear = datadate.loc[datadate['Country Killed']=='Brazil']
IraqYear = datadate.loc[datadate['Country Killed']=='Iraq']
MexYear = datadate.loc[datadate['Country Killed']=='Mexico']

In [None]:

Brcount = BrYear.Year.value_counts()
Iraqcount = IraqYear.Year.value_counts()
Mexcount = MexYear.Year.value_counts()

fig4 = plt.figure(figsize=(14,6))
fig4.text(x=0.14, y=0.95, s="Journalist Deaths per Year: Brazil and Iraq", fontsize="25", fontweight = "bold", color='#FFFFFF')
fig4.set_facecolor("#303934")
sns.lineplot(data=countyear,x=countyear.index, y=countyear, label='Global', color=colors[1])
sns.lineplot(x=Iraqcount.index, y=Iraqcount, label='Iraq')
sns.lineplot(x=Brcount.index, y=Brcount, markers=True, label='Brazil')
plt.xticks(np.arange(1992, 2018, step=2))
plt.ylabel("Number of journalists killed", fontsize='large', fontweight='bold', horizontalalignment='center',color='#FFFFFF')
plt.xlabel('Year', fontsize='large', fontweight='bold', horizontalalignment='center',color='#FFFFFF')
plt.tick_params(axis="y", colors='white',labelsize=15)
plt.tick_params(axis="x", colors='white',labelsize=15,length=0.5)