## Machine Learning

In [1]:
import yaml

import pandas as pd
import numpy as np
import seaborn as sns


from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor


In [2]:
try:
    with open('../config.yaml') as file:
        config = yaml.safe_load(file)
except:
    print("Yaml file not found.")

In [3]:
# Load 

In [4]:
# Load merged datasets -- Judicial Autonomy index, macro- and micro-indicators subset & V-Dem dataset

q1_index_macro_micro_vdem = pd.read_csv(config['output_data']['q1_output_index_macro_micro_vdem'])
q1_index_macro_micro_vdem = q1_index_macro_micro_vdem.drop(columns=['Unnamed: 0.1', 'Unnamed: 0'])
q1_index_macro_micro_vdem.head()

Unnamed: 0,country,year,country_year,index_measure,q1_macro_appointment_procedures_ind_measure,q1_micro_appointment_judges_ind_measure,q1_micro_appointment_court_presidents_ind_measure,q1_micro_appointment_veto_ind_measure,q1_macro_selection_criteria_ind_measure,q1_micro_selection_predetermined_law_ind_measure,...,q1_micro_judicial_self_governance_bodies_ind_measure,q1_micro_judicial_self_governance_competences_ind_measure,q1_micro_courts_administration_ind_measure,reg_id,reg_type,v2x_regime,v2x_polyarchy,reg_trans,row_regch_event,dem_ep
0,Albania,2000,Albania_2000,0.52,0.55,0.33,0.33,1.0,0.5,0.67,...,0.5,0.0,1.0,ALB_1914_2004,0.0,1.0,0.407,0.0,0.0,1
1,Albania,2001,Albania_2001,0.52,0.55,0.33,0.33,1.0,0.5,0.67,...,0.5,0.0,1.0,ALB_1914_2004,0.0,1.0,0.44,0.0,0.0,1
2,Albania,2002,Albania_2002,0.52,0.55,0.33,0.33,1.0,0.5,0.67,...,0.5,0.0,1.0,ALB_1914_2004,0.0,1.0,0.484,0.0,0.0,1
3,Albania,2003,Albania_2003,0.52,0.55,0.33,0.33,1.0,0.5,0.67,...,0.5,0.0,1.0,ALB_1914_2004,0.0,1.0,0.49,0.0,0.0,1
4,Albania,2004,Albania_2004,0.55,0.55,0.33,0.33,1.0,0.5,0.67,...,0.5,0.0,1.0,ALB_1914_2004,0.0,1.0,0.49,0.0,0.0,1


### 1. KNN Model

#### 1.1 Classification Model

In [5]:
# Select relevant columns for KNN classification model
selected_columns_knn_class_model=['reg_type', 
                                            'q1_micro_appointment_judges_ind_measure',
                                            'q1_micro_appointment_court_presidents_ind_measure',
                                            'q1_micro_appointment_veto_ind_measure',
                                            'q1_micro_selection_predetermined_law_ind_measure',
                                            'q1_micro_selection_intl_standards_ind_measure',
                                            'q1_micro_transparency_appeal_ind_measure',
                                            'q1_micro_judge_tenure_ind_measure',
                                            'q1_micro_judge_immunity_ind_measure',
                                            'q1_micro_judge_salary_ind_measure',
                                            'q1_micro_disciplinary_proceedings_law_ind_measure',
                                            'q1_micro_disciplinary_proceedings_actors_ind_measure',
                                            'q1_micro_conflict_recusal_evaluation_ind_measure',
                                            'q1_micro_judicial_self_governance_bodies_ind_measure',
                                            'q1_micro_judicial_self_governance_competences_ind_measure',
                                            'q1_micro_courts_administration_ind_measure']

In [6]:
# Check data types of relevant columns
q1_index_macro_micro_vdem[selected_columns_knn_class_model].dtypes

reg_type                                                     float64
q1_micro_appointment_judges_ind_measure                      float64
q1_micro_appointment_court_presidents_ind_measure            float64
q1_micro_appointment_veto_ind_measure                        float64
q1_micro_selection_predetermined_law_ind_measure             float64
q1_micro_selection_intl_standards_ind_measure                float64
q1_micro_transparency_appeal_ind_measure                     float64
q1_micro_judge_tenure_ind_measure                            float64
q1_micro_judge_immunity_ind_measure                          float64
q1_micro_judge_salary_ind_measure                            float64
q1_micro_disciplinary_proceedings_law_ind_measure            float64
q1_micro_disciplinary_proceedings_actors_ind_measure         float64
q1_micro_conflict_recusal_evaluation_ind_measure             float64
q1_micro_judicial_self_governance_bodies_ind_measure         float64
q1_micro_judicial_self_governance_

In [7]:
# Check for missing values
q1_index_macro_micro_vdem[selected_columns_knn_class_model].isna().sum()

reg_type                                                     0
q1_micro_appointment_judges_ind_measure                      0
q1_micro_appointment_court_presidents_ind_measure            0
q1_micro_appointment_veto_ind_measure                        0
q1_micro_selection_predetermined_law_ind_measure             0
q1_micro_selection_intl_standards_ind_measure                0
q1_micro_transparency_appeal_ind_measure                     0
q1_micro_judge_tenure_ind_measure                            0
q1_micro_judge_immunity_ind_measure                          0
q1_micro_judge_salary_ind_measure                            0
q1_micro_disciplinary_proceedings_law_ind_measure            0
q1_micro_disciplinary_proceedings_actors_ind_measure         0
q1_micro_conflict_recusal_evaluation_ind_measure             0
q1_micro_judicial_self_governance_bodies_ind_measure         0
q1_micro_judicial_self_governance_competences_ind_measure    0
q1_micro_courts_administration_ind_measure             

In [8]:
# Subset columns for KNN classification model
knn_classifier_data = q1_index_macro_micro_vdem[selected_columns_knn_class_model].copy()

# Transform regime type (target column) to boolean
knn_classifier_data['reg_type'] = knn_classifier_data['reg_type'].astype('boolean')                                                 

In [9]:
# Define target and features
target = knn_classifier_data['reg_type']
features = knn_classifier_data.drop(columns='reg_type')

In [10]:
# Train Test Split
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.20, random_state=0)

In [11]:
# Initialize KNN instance (with hyperparameter n_neighbors=3)
knn_cl = KNeighborsClassifier(n_neighbors=3)

In [12]:
# Fit the model
knn_cl.fit(X_train, y_train)

In [27]:
# ???
pred = knn_cl.predict(X_train)
#pred

In [14]:
knn_cl.score(X_test, y_test)
#print(f"The accuracy of the model is {round(knn_cl.score(X_test, y_test)*100,2)} %")

0.9347826086956522

**Outcome:** The accuracy of the model is 93.48 %

#### 1.2 Regression Model

In [15]:
# Select relevant columns for KNN regression model
selected_columns_knn_regr_model=['v2x_polyarchy', 
                                 'q1_micro_appointment_judges_ind_measure',
                                 'q1_micro_appointment_court_presidents_ind_measure',
                                 'q1_micro_appointment_veto_ind_measure',
                                 'q1_micro_selection_predetermined_law_ind_measure',
                                 'q1_micro_selection_intl_standards_ind_measure',
                                 'q1_micro_transparency_appeal_ind_measure',
                                 'q1_micro_judge_tenure_ind_measure',
                                 'q1_micro_judge_immunity_ind_measure',
                                 'q1_micro_judge_salary_ind_measure',
                                 'q1_micro_disciplinary_proceedings_law_ind_measure',
                                 'q1_micro_disciplinary_proceedings_actors_ind_measure',
                                 'q1_micro_conflict_recusal_evaluation_ind_measure',
                                 'q1_micro_judicial_self_governance_bodies_ind_measure',
                                 'q1_micro_judicial_self_governance_competences_ind_measure',
                                 'q1_micro_courts_administration_ind_measure']

In [16]:
# Check for missing values
q1_index_macro_micro_vdem[selected_columns_knn_regr_model].isna().sum()

v2x_polyarchy                                                0
q1_micro_appointment_judges_ind_measure                      0
q1_micro_appointment_court_presidents_ind_measure            0
q1_micro_appointment_veto_ind_measure                        0
q1_micro_selection_predetermined_law_ind_measure             0
q1_micro_selection_intl_standards_ind_measure                0
q1_micro_transparency_appeal_ind_measure                     0
q1_micro_judge_tenure_ind_measure                            0
q1_micro_judge_immunity_ind_measure                          0
q1_micro_judge_salary_ind_measure                            0
q1_micro_disciplinary_proceedings_law_ind_measure            0
q1_micro_disciplinary_proceedings_actors_ind_measure         0
q1_micro_conflict_recusal_evaluation_ind_measure             0
q1_micro_judicial_self_governance_bodies_ind_measure         0
q1_micro_judicial_self_governance_competences_ind_measure    0
q1_micro_courts_administration_ind_measure             

In [17]:
# Subset columns for KNN classification model
knn_regression_data = q1_index_macro_micro_vdem[selected_columns_knn_regr_model].copy()


In [18]:
# Pairplot to check distribution of values

# sns.pairplot(knn_regression_data, y_vars=['v2x_polyarchy'], x_vars=knn_regression_data.columns[1:5], kind='scatter')
# sns.pairplot(knn_regression_data, y_vars=['v2x_polyarchy'], x_vars=knn_regression_data.columns[6:11], kind='scatter')
# sns.pairplot(knn_regression_data, y_vars=['v2x_polyarchy'], x_vars=knn_regression_data.columns[12:15], kind='scatter')

In [19]:
# Define target and features
target = knn_regression_data['v2x_polyarchy']
features = knn_regression_data.drop(columns='v2x_polyarchy')

In [20]:
# Train Test Split
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.20, random_state=0)

In [21]:
# Initialize KNN instance (with hyperparameter n_neighbors = 10)
knn_reg = KNeighborsRegressor(n_neighbors=10)

In [22]:
# Fit the model
knn_reg.fit(X_train, y_train)

In [26]:
pred = knn_reg.predict(X_train)
#pred

In [24]:
knn_reg.score(X_test, y_test)
#print(f"The accuracy of the model is {round(knn_reg.score(X_test, y_test)*100,2)} %")

0.9097872811611244

**Outcome:** The accuracy of the model is 90.98 %