In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
data = pd.read_csv('/kaggle/input/global-terrorism-database/gtd.csv')

In [4]:
data.head()

In [5]:
data.describe()

In [6]:
import plotly.express as px
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [7]:
sns.set_style('darkgrid')
matplotlib.rcParams['font.size'] = 14
matplotlib.rcParams['figure.figsize'] = (10, 6)
matplotlib.rcParams['figure.facecolor'] = '#00000000'

In [8]:
fig = px.histogram(data, 
                   x='Year', 
                   marginal='box', 
                   nbins=47, 
                   title='Distribution of Year')
fig.update_layout(bargap=0.1)
fig.show()

In [11]:
fig = px.histogram(data, 
                   x='Country', 
                   marginal='box', 
                   color_discrete_sequence=['red'], 
                   title='Distribution of Success')
fig.update_layout(bargap=0.1)
fig.show()

In [12]:
fig = px.histogram(data, 
                   x='Total_Casualities', 
                   marginal='box', 
                   color='Target', 
                   color_discrete_sequence=['green', 'grey'], 
                   title='Target de')
fig.update_layout(bargap=0.1)
fig.show()

In [13]:
px.histogram(data, x='Country', color='AttackType', title='Where')

In [15]:
fig = px.scatter(data, 
                 x='Year', 
                 y='Killed', 
                 color='Country', 
                 opacity=0.8, 
                 hover_data=['Weapon_type'], 
                 title='year vs. lost')
fig.update_traces(marker_size=5)
fig.show()

In [16]:
data.columns

In [20]:
fig = px.scatter(data, 
                 x='Year', 
                 y='Group', 
                 color='Sub_Target_Type', 
                 opacity=0.8, 
                 hover_data=['Target'], 
                 title='Which group what')
fig.update_traces(marker_size=5)
fig.show()

In [21]:
import plotly.graph_objects as go

In [25]:
# for Country in data['Country']:
#     fig.add_trace(go.Violin(x=data['Country'][data['Country'] == Country],
#                             y=data['Total_Casualities'][data['Country'] == Country],
#                             name=Country,
#                             box_visible=True,
#                             meanline_visible=True))

# fig.show()

In [26]:
data.Success.corr(data.Killed)

In [27]:
data.Total_Casualities.corr(data.Wounded)

In [28]:
data.Total_Casualities.corr(data.Killed)

In [29]:
data.Killed.corr(data.Wounded)

In [31]:
data.corr()

In [32]:
sns.heatmap(data.corr(), cmap='Reds', annot=True)
plt.title('Correlation Matrix');

In [40]:
bd = data[data.Country == 'Bangladesh']

In [43]:
plt.title('Bangladesh data')
sns.scatterplot(data=bd, x='Year', y='Killed', alpha=0.7, s=15);

In [44]:
plt.title('Bangladesh data')
sns.scatterplot(data=bd, x='Year', y='Total_Casualities', alpha=0.7, s=15);

In [45]:
plt.title('Bangladesh data')
sns.scatterplot(data=bd, x='Year', y='Wounded', alpha=0.7, s=15);

In [54]:
def estimate_Killed(Killed, w, b):
    return w * Killed + b

In [55]:
w = 50
b = 100

In [56]:
dead = bd.Killed
estimate_Killed = estimate_Killed(dead, w, b)

In [58]:
plt.plot(dead, estimate_Killed, 'r-o');
plt.xlabel('Killed');
plt.ylabel('Estimated dead');

In [60]:
target = bd.Wounded

plt.plot(dead, estimate_Killed, 'r', alpha=0.9);
plt.scatter(dead, target, s=8,alpha=0.8);
plt.xlabel('dead');
plt.ylabel('Injured')
plt.legend(['Estimate', 'Actual']);

In [61]:
def try_parameters(w, b):
    dead = bd.Wounded
    target = bd.Killed
    
    estimated_charges = estimate_Killed(dead, w, b)
    
    plt.plot(dead, estimate_Killed, 'r', alpha=0.9);
    plt.scatter(dead, target, s=8,alpha=0.8);
    plt.xlabel('dead');
    plt.ylabel('killed')
    plt.legend(['Estimate', 'Actual']);

In [63]:
def rmse(targets, predictions):
    return np.sqrt(np.mean(np.square(targets - predictions)))

In [64]:
w = 50
b = 100

In [68]:
from sklearn.linear_model import LinearRegression

In [69]:
model = LinearRegression()

In [70]:
help(model.fit)

In [71]:
inputs = bd[['Total_Casualities']]
targets = bd.Wounded
print('inputs.shape :', inputs.shape)
print('targes.shape :', targets.shape)

In [72]:
model.fit(inputs, targets)

In [73]:
model.predict(np.array([[23], 
                        [37], 
                        [61]]))

In [74]:
predictions = model.predict(inputs)

In [75]:
predictions

In [76]:
rmse(targets, predictions)

In [77]:
# w
model.coef_

In [78]:
# b
model.intercept_

In [80]:
loss = rmse(targets, predictions)
print('Loss:', loss)

In [82]:
fig = px.scatter_3d(bd, x='Year', y='Killed', z='Total_Casualities')
fig.update_traces(marker_size=3, marker_opacity=0.5)
fig.show()

In [83]:
model.coef_, model.intercept_

In [84]:
bd.Wounded.corr(bd.Killed)

In [87]:
fig = px.scatter_3d(bd, x='Killed', y='Wounded', z = 'Total_Casualities', title= "Killed vs. Wounded")
fig.update_traces(marker_size=4, marker_opacity=0.7)
fig.show()

In [89]:
sns.barplot(data=data, x='Year', y='Country');