# Fragile State Analysis

In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import plotly.offline as pyo
import plotly.graph_objs as go
import plotly.figure_factory as ff
import plotly.express as px
from plotly.subplots import make_subplots

import dash
import dash_core_components as dcc
import dash_html_components as html

pd.set_option('float_format', '{:,.2f}'.format)

In [2]:
from sklearn.metrics import mean_squared_error
from math import sqrt
from statsmodels.tsa.api import ExponentialSmoothing, SimpleExpSmoothing, Holt
import statsmodels.api as sm

from sklearn.cluster import KMeans 
from sklearn.preprocessing import StandardScaler

# Section I: FSI

## Part 1: Preprocessing

In [3]:
df_2019 = pd.read_excel('FSI_DATA/fsi-2019.xlsx')
df_2018 = pd.read_excel('FSI_DATA/fsi-2018.xlsx')
df_2017 = pd.read_excel('FSI_DATA/fsi-2017.xlsx')
df_2016 = pd.read_excel('FSI_DATA/fsi-2016.xlsx')
df_2015 = pd.read_excel('FSI_DATA/fsi-2015.xlsx')
df_2014 = pd.read_excel('FSI_DATA/fsi-2014.xlsx')
df_2013 = pd.read_excel('FSI_DATA/fsi-2013.xlsx')
df_2012 = pd.read_excel('FSI_DATA/fsi-2012.xlsx')
df_2011 = pd.read_excel('FSI_DATA/fsi-2011.xlsx')
df_2010 = pd.read_excel('FSI_DATA/fsi-2010.xlsx')
df_2009 = pd.read_excel('FSI_DATA/fsi-2009.xlsx')
df_2008 = pd.read_excel('FSI_DATA/fsi-2008.xlsx')
df_2007 = pd.read_excel('FSI_DATA/fsi-2007.xlsx')
df_2006 = pd.read_excel('FSI_DATA/fsi-2006.xlsx')

In [4]:
frames = [df_2006, df_2007, df_2008, df_2009, df_2010, df_2011, df_2012, df_2013,
          df_2014, df_2015, df_2016, df_2017, df_2018, df_2019]

df = pd.concat(frames, sort=False)

df['Year'] = df['Year'].dt.to_period('Y')

## Part 2: Country Analysis

### Part 2.1: Functions

In [5]:
def f_country_dashboard(country, final_year, initial_year):
    dff = df[df['Country']==country]
    dff = dff.drop(['Country', 'Rank', 'Change from Previous Year', 'Total'], axis=1)
    dff = dff.set_index('Year')
    dff = dff.loc[(dff.index >=initial_year) & (dff.index <= final_year)]
    dff = dff.T
    dff['VAR'] = ((dff[final_year] - dff[initial_year])/dff[initial_year])*100
    dff = dff.sort_values(by='VAR', ascending=False)
    
        
    return dff

In [6]:
def f_country_graph(country):

    country_selected = country

    df_trace1 = f_country_dashboard(country_selected, final_year, initial_year).T

    df_trace1 = df_trace1.drop(['VAR'])
    df_trace1.reset_index(level=0, inplace=True)
    df_trace1['Year'] = pd.to_datetime(df_trace1['Year'], format='%Y')

    x = df_trace1['Year']
    y1= df_trace1.iloc[:, 1]
    y2= df_trace1.iloc[:, 2]
    y3= df_trace1.iloc[:, 3]
    y4= df_trace1.iloc[:, 4]
    y5= df_trace1.iloc[:, 5]


    trace1 = go.Scatter(x=x,
                        y=y1,
                      mode='lines',
                      name=df_trace1.columns.values[1])

    trace2 = go.Scatter(x=x,
                        y=y2,
                      mode='lines',
                      name=df_trace1.columns.values[2])

    trace3 = go.Scatter(x=x,
                        y=y3,
                      mode='lines',
                      name=df_trace1.columns.values[3])

    trace4 = go.Scatter(x=x,
                        y=y4,
                      mode='lines',
                      name=df_trace1.columns.values[4])

    trace5 = go.Scatter(x=x,
                        y=y5,
                      mode='lines',
                      name=df_trace1.columns.values[5])

    data = [trace1, trace2, trace3, trace4, trace5]

    layout = go.Layout(title='Country: {}'.format(country))

    fig = go.Figure(data=data, layout=layout)
    pyo.plot(fig)

### Part 2.2: Analysis

In [7]:
country = 'Venezuela'
final_year = '2019'
initial_year = '2006'

In [8]:
f_country_dashboard(country, final_year, initial_year)

Year,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,VAR
E1: Economy,4.0,4.0,4.6,5.3,5.8,6.1,5.9,5.4,5.5,6.5,7.0,7.3,8.3,8.6,115.0
P1: State Legitimacy,7.5,7.5,7.1,7.2,7.2,7.5,7.9,7.6,7.7,8.6,8.7,8.5,9.0,9.3,24.0
C2: Factionalized Elites,7.3,7.5,7.5,7.7,7.5,7.3,7.3,7.3,7.7,8.1,8.2,8.2,8.53,8.8,20.55
P2: Public Services,7.0,6.3,6.3,5.9,6.1,5.8,6.3,6.5,7.5,7.2,7.2,7.7,7.8,8.3,18.57
S2: Refugees and IDPs,4.8,5.2,5.0,5.0,5.1,4.8,4.5,4.8,5.1,4.8,4.8,4.6,5.1,5.6,16.67
P3: Human Rights,7.8,7.9,7.4,7.3,7.2,7.4,7.7,7.7,7.8,8.3,8.6,8.9,9.0,9.0,15.38
C3: Group Grievance,6.8,6.8,6.8,7.0,6.8,7.0,6.7,6.4,6.8,7.3,7.4,7.2,7.3,7.6,11.76
C1: Security Apparatus,7.5,6.9,6.6,6.9,6.7,7.0,6.7,6.5,6.2,6.5,6.8,7.3,7.38,7.4,-1.33
X1: External Intervention,6.0,5.7,5.2,5.5,5.7,5.5,5.2,4.9,4.6,4.3,5.6,5.4,5.5,5.5,-8.33
E3: Human Flight and Brain Drain,7.0,6.9,6.9,6.9,6.7,6.4,6.1,5.8,5.5,5.2,5.2,5.5,6.0,6.1,-12.86


In [9]:
f_country_graph(country)

## Part 3: General Analysis

In [10]:
# Define the year for the analysis:

year = '2019'

### Part 3.1: Preprocessing

In [11]:
# Clean the Data
dff_year = df[df['Year']==year]
dff_year = dff_year.drop(['Year', 'Rank', 'Change from Previous Year'], axis=1)
dff_year = dff_year.set_index('Country')

# Normalize the Data
X = dff_year.values[:,1:]
X = np.nan_to_num(X)
Clus_dataSet = StandardScaler().fit_transform(X)

### Part 3.2: Clustering

In [12]:
clusterNum = 5
k_means = KMeans(init = "k-means++", n_clusters = clusterNum, n_init = 12)
k_means.fit(X)
labels = k_means.labels_

In [13]:
dff_year["Clus_km"] = labels

In [14]:
def f_cluster_resumen(cluster):
    
    dff_year_group = dff_year[dff_year['Clus_km']==cluster] 
    dff_year_group = dff_year_group.groupby('Clus_km').mean()
    
    return dff_year_group.mean()

In [15]:
clusters = [0,1,2,3,4]

dff_desc_cluster_data = []

for cluster in clusters:
    x = f_cluster_resumen(cluster)
    
    dff_desc_cluster_data.append(x)

pd.DataFrame(dff_desc_cluster_data)

Unnamed: 0,Total,C1: Security Apparatus,C2: Factionalized Elites,C3: Group Grievance,E1: Economy,E2: Economic Inequality,E3: Human Flight and Brain Drain,P1: State Legitimacy,P2: Public Services,P3: Human Rights,S1: Demographic Pressures,S2: Refugees and IDPs,X1: External Intervention
0,72.82,6.26,7.87,7.22,5.29,5.48,5.6,7.4,4.68,7.02,5.13,5.23,5.63
1,24.24,2.06,2.29,3.16,2.57,1.9,2.25,1.19,1.27,1.57,1.9,2.67,1.41
2,97.22,8.13,8.85,8.06,7.56,7.68,7.18,8.62,8.54,8.13,8.36,8.11,8.0
3,46.59,3.76,4.92,4.74,4.18,3.78,4.13,4.17,2.79,3.65,3.22,2.95,4.29
4,74.84,5.84,6.4,4.78,6.9,7.02,7.32,5.64,6.96,5.39,7.26,4.61,6.71


### Part 3.3: Functions

In [16]:
def f_scatter_variables(V1, V2):    
    
    dff_year_g = dff_year
    dff_year_g.reset_index(inplace=True)

    fig = px.scatter(dff_year_g, x=V1, y=V2,
                     color='Clus_km', hover_name='Country')

  
    fig.update_layout(title=go.layout.Title(text='FSI'), showlegend=False)

    pyo.plot(fig)   

### Part 3.4: Analysis

In [17]:
# List of variables:

dff_year.columns

Index(['Total', 'C1: Security Apparatus', 'C2: Factionalized Elites',
       'C3: Group Grievance', 'E1: Economy', 'E2: Economic Inequality',
       'E3: Human Flight and Brain Drain', 'P1: State Legitimacy',
       'P2: Public Services', 'P3: Human Rights', 'S1: Demographic Pressures',
       'S2: Refugees and IDPs', 'X1: External Intervention', 'Clus_km'],
      dtype='object')

In [18]:
f_scatter_variables('E1: Economy', 'Total')

In [31]:
f_scatter_variables('S2: Refugees and IDPs', 'Total')

### 3.5: Analysis Country selected versus Cluster

In [19]:
country = 'Venezuela'

cluster_select = dff_year[dff_year['Country']==country]
cluster_select = cluster_select['Clus_km'].values
print('Country Cluster: {} {}.'.format(country, cluster_select))

Country Cluster: Venezuela [2].


In [20]:
def f_country_compare__(country, cluster):
    # This function help to identify how good/bad is the country select with respect is cluster peers
    
    # Clean cluster data
    dff_cluster = dff_year[dff_year['Clus_km']==cluster]
    dff_cluster = dff_cluster.drop(['Clus_km'], axis=1)
    dff_cluster = dff_cluster.mean()
    dff_cluster = pd.DataFrame(dff_cluster)
    dff_cluster = dff_cluster.reset_index()
    dff_cluster = dff_cluster.rename(columns={'index':'Variables', 0:'Cluster_Values'})
    dff_cluster = dff_cluster.set_index('Variables')
    
    # Clean country data
    dff_country = df[df['Country']==country]
    dff_country = dff_country[dff_country['Year']==2019]
    dff_country = dff_country.drop(['Country', 'Rank', 'Change from Previous Year'], axis=1)
    dff_country = dff_country.rename(columns={'Year':'Variables'})
    dff_country = dff_country.set_index('Variables').T
    
    # Concat DF
    w = pd.concat([dff_cluster, dff_country], axis=1)
    w.rename(columns={0:'Group', w.columns[1]:country}, inplace=True)
    
    w['Dif'] = w['Cluster_Values'] - w[country]
    
    return w.sort_values(by='Dif')

In [30]:
f_country_compare__(country,2)

Unnamed: 0_level_0,Cluster_Values,Venezuela,Dif
Variables,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
E1: Economy,7.61,8.6,-0.99
P3: Human Rights,8.16,9.0,-0.84
P1: State Legitimacy,8.68,9.3,-0.62
C2: Factionalized Elites,8.85,8.8,0.05
P2: Public Services,8.59,8.3,0.29
C3: Group Grievance,8.01,7.6,0.41
C1: Security Apparatus,8.19,7.4,0.79
E2: Economic Inequality,7.72,6.9,0.82
E3: Human Flight and Brain Drain,7.21,6.1,1.11
S1: Demographic Pressures,8.36,6.2,2.16


In [22]:
dff_year = df[df['Year']=='2019']
dff_year = dff_year.drop(['Year', 'Rank', 'Change from Previous Year'], axis=1)
dff_year = dff_year.set_index('Country')
    
X = dff_year.values[:,1:]
X = np.nan_to_num(X)
Clus_dataSet = StandardScaler().fit_transform(X)

clusterNum = 5
k_means = KMeans(init = "k-means++", n_clusters = clusterNum, n_init = 12)
k_means.fit(X)
labels = k_means.labels_

dff_year["Clus_km"] = labels

In [23]:
df_peace = pd.read_excel('Data/Global Peace Index.xlsx')

In [24]:
df_peace = df_peace.set_index('Country')
df_peace.head()

Unnamed: 0_level_0,Peace_index
Country,Unnamed: 1_level_1
Afghanistan,3574
Albania,1821
Algeria,2219
Angola,2012
Argentina,1989


In [25]:
df_fei_peace = pd.merge(dff_year, df_peace, left_index=True, right_index=True)

In [26]:
df_fei_peace.head()

Unnamed: 0_level_0,Total,C1: Security Apparatus,C2: Factionalized Elites,C3: Group Grievance,E1: Economy,E2: Economic Inequality,E3: Human Flight and Brain Drain,P1: State Legitimacy,P2: Public Services,P3: Human Rights,S1: Demographic Pressures,S2: Refugees and IDPs,X1: External Intervention,Clus_km,Peace_index
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Yemen,113.5,10.0,10.0,9.6,9.7,8.1,7.3,9.8,9.8,9.9,9.7,9.6,10.0,2,3412
Somalia,112.3,9.6,10.0,8.9,8.8,9.4,9.2,9.0,9.4,9.3,10.0,9.4,9.2,2,3300
South Sudan,112.2,9.7,9.7,9.4,9.8,8.9,6.5,10.0,9.8,9.3,9.7,10.0,9.4,2,3526
Congo Democratic Republic,110.2,8.8,9.8,10.0,8.3,8.6,7.0,9.4,9.2,9.6,9.8,10.0,9.7,2,3218
Central African Republic,108.9,8.6,9.4,8.3,8.7,9.9,7.1,9.1,10.0,9.5,9.1,10.0,9.2,2,3296


In [27]:
df_fei_peace.reset_index(inplace=True)

In [28]:
fig = px.scatter(df_fei_peace, x='Total', y='Peace_index',
                 color='Clus_km', hover_name='Country', marginal_x="histogram", marginal_y="histogram", trendline='ols')


fig.update_layout(title=go.layout.Title(text='FSI / Peace Index'), showlegend=False)

pyo.plot(fig)


Method .ptp is deprecated and will be removed in a future version. Use numpy.ptp instead.



'temp-plot.html'

In [29]:
w = df[df['Country']==country]
w = w.drop(['Country', 'Rank', 'Change from Previous Year', 'Total'], axis=1)
w = w.set_index('Year')
w.loc[(w.index >='2011') & (w.index <= 2015)]

Unnamed: 0_level_0,C1: Security Apparatus,C2: Factionalized Elites,C3: Group Grievance,E1: Economy,E2: Economic Inequality,E3: Human Flight and Brain Drain,P1: State Legitimacy,P2: Public Services,P3: Human Rights,S1: Demographic Pressures,S2: Refugees and IDPs,X1: External Intervention
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2011,7.0,7.3,7.0,6.1,7.3,6.4,7.5,5.8,7.4,6.0,4.8,5.5
2012,6.7,7.3,6.7,5.9,7.2,6.1,7.9,6.3,7.7,5.7,4.5,5.2
2013,6.5,7.3,6.4,5.4,6.9,5.8,7.6,6.5,7.7,5.4,4.8,4.9
2014,6.2,7.7,6.8,5.5,6.9,5.5,7.7,7.5,7.8,5.4,5.1,4.6
2015,6.5,8.1,7.3,6.5,6.7,5.2,8.6,7.2,8.3,5.1,4.8,4.3
