# Group Classification - Decision Trees

Importing the libraries 

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn import preprocessing

In [3]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split

from sklearn.metrics import classification_report , confusion_matrix
from sklearn.metrics import accuracy_score

from sklearn.ensemble import RandomForestClassifier

Importing the Dataset

In [4]:
data = pd.read_csv("A:/MinorProjectData/GlobalTerrorCleanPartTwo.csv")

Considering the groups which have atleast 20 attacks through the year.

In [5]:
# Calculate the number of attacks by group
groups = data['gname'].value_counts()

# Include groups with at least 10 attacks
groups = groups[groups > 19]

# Exclude unknown groups
group_list = groups.index[groups.index != 'Unknown']

# Subset the data to major groups
major_groups = data[data['gname'].isin(group_list)]

# Display the number of attacks by group
major_groups['gname'].value_counts()

Taliban                                                  6558
Shining Path (SL)                                        4302
Islamic State of Iraq and the Levant (ISIL)              4261
Farabundo Marti National Liberation Front (FMLN)         2952
Al-Shabaab                                               2669
Irish Republican Army (IRA)                              2660
Revolutionary Armed Forces of Colombia (FARC)            2326
New People's Army (NPA)                                  2144
Boko Haram                                               2067
Kurdistan Workers' Party (PKK)                           2028
Basque Fatherland and Freedom (ETA)                      1993
Communist Party of India - Maoist (CPI-Maoist)           1766
Liberation Tigers of Tamil Eelam (LTTE)                  1554
Maoists                                                  1420
National Liberation Army of Colombia (ELN)               1381
Tehrik-i-Taliban Pakistan (TTP)                          1250
Palestin

# Using Decision Trees

In [6]:
#Decision Tree model object
dct_model = DecisionTreeClassifier()

In [7]:
#Predictor Variables
X = major_groups[['country','attacktype1' ,'targtype1' , 'weaptype1','latitude','longitude','year']]

#Target Variables
Y = major_groups['gname']

In [8]:
#Splitting The Data with a ratio of 80:30 into training and testing
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.3, random_state = 42)

In [9]:
#Fitting the model using the training set
dct_model.fit(X_train,Y_train)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')

In [10]:
#Predicting using the testing data
dct_predictions = dct_model.predict(X_test)

In [11]:
#Model performance using Classification Report
print(classification_report(Y_test,dct_predictions))

                                                              precision    recall  f1-score   support

                                                      09-Feb       0.71      1.00      0.83         5
       16 January Organization for the Liberation of Tripoli       1.00      1.00      1.00         7
                          23rd of September Communist League       1.00      0.77      0.87        13
                                     Abdullah Azzam Brigades       0.33      0.40      0.36         5
                                Abu Nidal Organization (ANO)       0.24      0.40      0.30        10
                                      Abu Sayyaf Group (ASG)       0.89      0.89      0.89       132
                                              Action Directe       0.59      0.93      0.72        14
                    Adan-Abyan Province of the Islamic State       0.53      0.56      0.55        16
                    African National Congress (South Africa)       0.94      0.97

In [12]:
print(confusion_matrix(Y_test,dct_predictions))

[[ 5  0  0 ...  0  0  0]
 [ 0  7  0 ...  0  0  0]
 [ 0  0 10 ...  0  0  0]
 ...
 [ 0  0  0 ...  4  0  0]
 [ 0  0  0 ...  0  9  0]
 [ 0  0  0 ...  0  0  3]]


In [13]:
# Model Accuracy Score
print(accuracy_score(Y_test,dct_predictions))


0.800699009736207
