## Cleaning Judicial Autonomy Dataset

### 1 Load Dataset

In [1]:
import yaml

import pandas as pd
import numpy as np

#import matplotlib.pyplot as plt
#import seaborn as sns

In [2]:
try:
    with open('../config.yaml') as file:
        config = yaml.safe_load(file)
except:
    print("Yaml file not found.")

In [3]:
# Load dataset q1
q1_df = pd.read_csv(config['input_data']['q1_dataset'])
q1_df.head()

Unnamed: 0,username,country,country_code,question_no,uid,year,answer,value_na,value
0,ALB22uFF4m,Albania,ALB,1,q1c1_apjufc,2000,Mixed system [0.5],0.5,0.5
1,ALB22uFF4m,Albania,ALB,1,q1c1_apjufc,2001,Mixed system [0.5],0.5,0.5
2,ALB22uFF4m,Albania,ALB,1,q1c1_apjufc,2002,Mixed system [0.5],0.5,0.5
3,ALB22uFF4m,Albania,ALB,1,q1c1_apjufc,2003,Mixed system [0.5],0.5,0.5
4,ALB22uFF4m,Albania,ALB,1,q1c1_apjufc,2004,Mixed system [0.5],0.5,0.5


**Output -- DataFrame:** `q1_df`

### 2 Long Version of Data Cleaning -- Step by Step

In [30]:
# Pivot long dataframe to wide dataframe
q1_pivoted = q1_df.pivot(index=['username', 'country', 'country_code', 'year'], 
                      columns=['uid'], 
                      values=['value'])

# Reset index and flattening multi-level column names
# Use lambda function to access tuple storing the multi-level column names
q1_pivoted.reset_index(inplace=True)
q1_pivoted.columns = [col[1] if col[1] else col[0] for col in q1_pivoted.columns]
#q1_pivoted

In [31]:
# Filter out countries / users
remove_usernames = ('ADMIN123', 'ALB22uFF4m','BEL22cEw8t', 'BIH22q2nOU', 'DNK22KFh1N', 'MNE22N8NJv', 'NLD22Ba53p', 'SRB22L4wbh')
q1_countries_cleaned = q1_pivoted[~q1_pivoted['username'].isin(remove_usernames)]
#q1_countries_cleaned

In [32]:
# Replace country names
q1_countries_cleaned.loc[:,'country'] = q1_countries_cleaned['country'].replace({
    "Czech Republic": "Czechia",
    "Republic of Albania": "Albania",
    "Republic of Serbia": "Serbia",
    "Bosnia and Herzegovina (BiH)": "Bosnia and Herzegovina",
    "Montenegro (MON)": "Montenegro",
    "Kingdom of Belgium": "Belgium"})
q1_countries_cleaned

Unnamed: 0,username,country,country_code,year,q1c1_apjuac,q1c1_apjufc,q1c1_apjuhc,q1c1_appealac,q1c1_appealfc,q1c1_appealhc,...,q1c4_casealloc,q1c4_competence,q1c4_manbudget,q1c4_reasondecis,q1c4_regbudget,q1c4_sameright,q1c4_subj,q1c4_whochair,q1c4_whocharge,q1c4_whoselect
46,ALB33wGG5n,Albania,ALA,2000,0.5,0.5,0.0,0.0,0.0,0.0,...,1.0,0.00,1.0,0.0,1.0,0.0,3.0,,0.5,0.5
47,ALB33wGG5n,Albania,ALA,2001,0.5,0.5,0.0,0.0,0.0,0.0,...,1.0,0.00,1.0,0.0,1.0,0.0,3.0,,0.5,0.5
48,ALB33wGG5n,Albania,ALA,2002,0.5,0.5,0.0,0.0,0.0,0.0,...,1.0,0.00,1.0,0.0,1.0,0.0,3.0,,0.5,0.5
49,ALB33wGG5n,Albania,ALA,2003,0.5,0.5,0.0,0.0,0.0,0.0,...,1.0,0.00,1.0,0.0,1.0,0.0,3.0,,0.5,0.5
50,ALB33wGG5n,Albania,ALA,2004,0.5,0.5,0.0,0.0,0.0,0.0,...,1.0,0.00,1.0,0.0,1.0,0.0,3.0,,0.5,0.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1099,UKR22Gf5Kx,Ukraine,UKR,2018,0.5,0.5,0.5,1.0,1.0,1.0,...,0.5,0.67,0.0,0.0,0.0,1.0,,1.0,1.0,1.0
1100,UKR22Gf5Kx,Ukraine,UKR,2019,0.5,0.5,0.5,1.0,1.0,1.0,...,0.5,0.67,0.0,0.0,0.0,1.0,,1.0,1.0,1.0
1101,UKR22Gf5Kx,Ukraine,UKR,2020,0.5,0.5,0.5,1.0,1.0,1.0,...,0.5,0.67,0.0,0.0,0.0,1.0,,1.0,1.0,1.0
1102,UKR22Gf5Kx,Ukraine,UKR,2021,0.5,0.5,0.5,1.0,1.0,1.0,...,0.5,0.67,0.0,0.0,0.0,1.0,,1.0,1.0,1.0


In [7]:
q1_countries_cleaned.shape

(920, 62)

In [33]:
# Drop columns that contain '_subj' (i.e. subjective answers)
q1_subj_cleaned = q1_countries_cleaned.drop(list(q1_countries_cleaned.filter(regex='_subj')), axis=1)

In [34]:
# Check for missing values across columns (in percentage) 
q1_check_nan = q1_subj_cleaned.isna().mean().sort_values(ascending=False)*100
q1_check_nan.head()

q1c2_retireage    85.108696
q1c2_jubonus      51.195652
q1c3_immunlift    31.304348
q1c3_evalints     25.326087
q1c4_whochair     19.891304
dtype: float64

In [35]:
# Filter out all columns with >20% of missing values
q1_columns_cleaned = q1_subj_cleaned.drop(columns = ['q1c2_retireage', 'q1c2_jubonus', 'q1c3_immunlift', 'q1c3_evalints'])

In [36]:
q1_cleaned_manually = q1_columns_cleaned.copy()

**Output -- DataFrame:** `q1_cleaned_manually`

### 3 Short Version of Data Cleaning -- User-defined Function

In [4]:
# Function to clean raw dataset

def cleaning_judicial_autonomy_data(q1_df):
    """
    Objective: Basic data cleaning of raw Judicial Autonomy Dataset.
    Input data: raw Judicial Autonomy Dataset.
    """
    
    # Step 1: Create copy of dataframe
    df = q1_df.copy()
    
    # Step 2: Adjust dataframe shape
    # Pivot long dataframe to wide dataframe
    df_pivoted = df.pivot(index=['username', 'country', 'country_code', 'year'], 
                          columns=['uid'], 
                          values=['value'])
    
    # Reset index and flattening multi-level column names
    df_pivoted.reset_index(inplace=True)
    df_pivoted.columns = [col[1] if col[1] else col[0] for col in df_pivoted.columns]

    # Step 3: Clean country and user names
    # Remove usernames
    remove_usernames = ('ADMIN123', 
                        'ALB22uFF4m',
                        'BEL22cEw8t', 
                        'BIH22q2nOU', 
                        'DNK22KFh1N', 
                        'MNE22N8NJv', 
                        'NLD22Ba53p', 
                        'SRB22L4wbh')
    df_countries_cleaned = df_pivoted[~df_pivoted['username'].isin(remove_usernames)]

    # Replace country names
    df_countries_cleaned.loc[:,'country'] = df_countries_cleaned['country'].replace({
        'Czech Republic': 'Czechia',
        'Republic of Albania': 'Albania',
        'Republic of Serbia': 'Serbia',
        'Bosnia and Herzegovina (BiH)': 'Bosnia and Herzegovina',
        'Montenegro (MON)': 'Montenegro',
        'Kingdom of Belgium': 'Belgium'})
    
    # Step 4: Remove columns
    # Drop columns that contain '_subj'
    # Drop columns with more than 20% values missing values ['q1c2_jubonus', 'q1c2_retireage', 'q1c3_evalints', 'q1c3_immunlift']
    columns_nan_percentage = df_countries_cleaned.isna().mean()*100
    columns_nan_20_percent = columns_nan_percentage[columns_nan_percentage > 20].index
    df_cleaned = df_countries_cleaned.drop(columns=columns_nan_20_percent)

    return df_cleaned

**Milestone -- function:** `cleaning_judicial_autonomy_data()`

In [5]:
# Clean dataset
q1_cleaned = cleaning_judicial_autonomy_data(q1_df) 

# Export dataset
q1_cleaned.to_csv(config['output_data']['q1_dataset'], index=False)

**Output -- DataFrame:** `q1_cleaned`

### 3 Alternative option: recode fuzzy values as binary values

In [6]:
# Function to recode all fuzzy values (i.e. 0.33, 0.5, 0.67) as binary values (conservative coding, i.e. value <= 0.5 as 0.0)

def recoding_fuzzy_to_binary(q1_df):
    """
    Objective: Recode all fuzzy values as binary values (conservative coding).
    Input data: raw Judicial Autonomy Dataset.
    Next step: clean Judicial Autonomy Dataset.
    """
    
    df_fuzzy_values_recoded = q1_df.copy()
    float_columns = df_fuzzy_values_recoded.select_dtypes(include=float).columns

    for col in float_columns:
        df_fuzzy_values_recoded[col] = df_fuzzy_values_recoded[col].apply(lambda value: 0.0 if value <= 0.5 else 1.0)

    return df_fuzzy_values_recoded

**Output -- function:** `recoding_fuzzy_to_binary()`

In [7]:
# Recode fuzzy values
q1_fuzzy_binary_recoded = recoding_fuzzy_to_binary(q1_df)

# Clean dataset
q1_cleaned_binary = cleaning_judicial_autonomy_data(q1_fuzzy_binary_recoded) 

# Export dataset
q1_cleaned_binary.to_csv(config['output_data']['q1_dataset_binary'], index=False)

# Display dataset
#q1_cleaned_binary

**Output -- DataFrame:** `q1_cleaned_binary`

In [None]:
#### DELETE FROM HERE ###

### 4. Create micro and macro indicators

#### 4.1 Micro indicators

#### 4.1.1 Subset dataset to create micro indicators

In [11]:
# Function to merge question items to create micro indicator subsets

def create_micro_indicators_dict(q1_cleaned):
    """ 
    Objective: Create a dictionary with subsets of question items, 
    each including country-year information and the respective question items.
    Input data: cleaned Judicial Autonomy data (fuzzy or binary).
    """
    
    subsets_micro_indicators = {

    # 1 -- Actors involved in appointment procedures of judges
    'q1_micro_appointment_judges': q1_cleaned[[
                                        'country',
                                        'year',
                                        'q1c1_apjufc',
                                        'q1c1_apjuac',
                                        'q1c1_apjuhc']],
        
    # 2 -- Actors involved in appointment procedures of court presidents
 
    'q1_micro_appointment_court_presidents': q1_cleaned[[
                                        'country',
                                        'year',
                                        'q1c1_appresidfc',
                                        'q1c1_appresidac',
                                        'q1c1_appresidhc']],
    
    # 3 -- Veto powers during appointment procedures
    'q1_micro_appointment_veto': q1_cleaned[[
                                        'country',
                                        'year',
                                        'q1c1_exvetofc',
                                        'q1c1_exvetoac',
                                        'q1c1_exvetohc']],
    
    # 4 -- Selection and appointment criteria for judges - predetermined by law
    'q1_micro_selection_predetermined_law': q1_cleaned[[
                                        'country',
                                        'year',
                                        'q1c1_critfclaw', 
                                        'q1c1_critaclaw',
                                        'q1c1_crithclaw']],

    # 5 -- Selection and appointment criteria for judges - in accordance to international standards
    'q1_micro_selection_intl_standards': q1_cleaned[[
                                        'country',
                                        'year', 
                                        'q1c1_critfcints',
                                        'q1c1_critacints', 
                                        'q1c1_crithcints',
                                        'q1c1_probju']],

    # 6 -- Transparency and mechanisms for appeal in appointment procedures of judges
    'q1_micro_transparency_appeal': q1_cleaned[[
                                        'country',
                                        'year',
                                        'q1c1_transplaw', 
                                        'q1c1_appealfc',
                                        'q1c1_appealac',
                                        'q1c1_appealhc']],
        
    # 7 -- Tenure and term in office of judges
    'q1_micro_judge_tenure': q1_cleaned[[
                                        'country',
                                        'year',
                                        'q1c2_termfcju',
                                        'q1c2_termacju',
                                        'q1c2_termpresid',
                                        'q1c2_termhcju']],
    
    # 8 -- Immunity and non-transferability of judges
    'q1_micro_judge_immunity': q1_cleaned[[
                                        'country',
                                        'year',
                                        'q1c2_juabsimmun', 
                                        'q1c2_jufuncimmun',
                                        'q1c2_juremove', 
                                        'q1c2_jutransf']],

    # 9 -- Salaries and bonuses of judges
    'q1_micro_judge_salary': q1_cleaned[[
                                        'country',
                                        'year',
                                        'q1c2_jusalary',
                                        'q1c2_jupension']],
        
    # 10 -- Disciplinary proceedings against judges - predetermined by law
    'q1_micro_disciplinary_proceedings_law': q1_cleaned[[
                                        'country',
                                        'year',
                                        'q1c3_fairtrial', 
                                        'q1c3_disciplaw',
                                        'q1c3_discipints',
                                        'q1c3_sanctscale']],
        
    # 11 -- Disciplinary proceedings against judges - actors involved
    'q1_micro_disciplinary_proceedings_actors': q1_cleaned[[
                                        'country',
                                        'year',
                                        'q1c3_discipbody',
                                        'q1c3_initdiscip', 
                                        'q1c3_decdiscip',
                                        'q1c3_appealdiscip']],
        
    # 12 -- Conflict of interest, recusal from cases and evaluation of judges
    'q1_micro_conflict_recusal_evaluation': q1_cleaned[[
                                        'country',
                                        'year',
                                        'q1c3_judisclos', 
                                        'q1c3_jurestrict',
                                        'q1c3_jurecuse']],
                                          
    # 13 -- Composition of the judicial self-governing bodies
    'q1_micro_judicial_self_governance_bodies': q1_cleaned[[
                                        'country',
                                        'year',
                                        'q1c4_whocharge', 
                                        'q1c4_whoselect',
                                        'q1c4_whochair']], 
        
    # 14 -- Competences and functioning of the judicial self-governing bodies
    'q1_micro_judicial_self_governance_competences': q1_cleaned[[
                                        'country',
                                        'year',
                                        'q1c4_competence',
                                        'q1c4_sameright', 
                                        'q1c4_reasondecis']], 
    
    # 15 -- Administration, functioning and budget of courts                                         
    'q1_micro_courts_administration': q1_cleaned[[
                                        'country',
                                        'year',
                                        'q1c4_casealloc', 
                                        'q1c4_regbudget',
                                        'q1c4_manbudget']]
    }

    return subsets_micro_indicators
    

**Output -- function:** `create_micro_indicators_dict()`

In [12]:
created_micro_indicators = create_micro_indicators_dict(q1_cleaned)

In [13]:
created_micro_indicators_binary = create_micro_indicators_dict(q1_cleaned_binary)

**Output -- dictionary:** `created_micro_indicators` `created_micro_indicators_binary`

#### 4.1.2 Fill in missing values per micro indicator (row-wise mode)

In [14]:
# Nested function to fill in missing values in each micro indicator with row mode

def fill_na_per_micro_indicators(subsets_micro):
    """ 
    Objective: Identify row mode per micro indicator subset and fill in missing values with it.
    Input data: Dictionary of micro indicator subsets.

    """
    
    def get_row_mode(subsets_micro):
        """        
        Objective: Identify row mode.
        Step 1: Check if row contains only missing values and replace all with 0 if true.
        Step 2: Otherwise, calculate the mode for the row. If multiple modes, take the first one.
        Step 3: Replace missing values in the row with row mode.
        """
        row_numeric = subsets_micro[2:]
        
        if row_numeric.isna().all():
            return subsets_micro.fillna(0)
        else:
            numeric_modes = row_numeric.mode()
            mode_value = numeric_modes.iloc[0] if not numeric_modes.empty else np.nan
            return subsets_micro.fillna(mode_value) 
    
    filled_subsets = {}
    for key, subset in subsets_micro.items():
        filled_subset = subset.apply(get_row_mode, axis=1)
        filled_subsets[key] = filled_subset
    return filled_subsets

**Output -- function:** `fill_na_per_micro_indicators()`

In [15]:
filled_na_micro_indicators = fill_na_per_micro_indicators(created_micro_indicators)

In [16]:
filled_na_micro_indicators_binary = fill_na_per_micro_indicators(created_micro_indicators_binary)

**Output -- dictionary:** `filled_na_micro_indicators` `filled_na_micro_indicators_binary`

#### 4.1.3 Calculate micro indicator measure

In [17]:
# Function to calculate micro indicator measure by row-wise mean

def calculate_micro_indicators_mean(subsets_micro):
    """
    Objective: Calculate row mean per micro-indicator subset as an indicator measure.
    Input data: Dictionary of micro-indicator subsets after filling in missing values.
    """
    
    calc_means_subsets = {}
    
    for key, subset in subsets_micro.items():
        subset_copy = subset.copy()
        column_name = f"{key}_ind_measure"
        subset_copy[column_name] = subset_copy.select_dtypes(include=float).mean(axis=1).round(2)
        calc_means_subsets[key] = subset_copy
        
    return calc_means_subsets

**Output -- function:** `calculate_micro_indicators_mean()`

In [18]:
calc_micro_indicators = calculate_micro_indicators_mean(filled_na_micro_indicators)

In [19]:
calc_micro_indicators_binary = calculate_micro_indicators_mean(filled_na_micro_indicators_binary)

**Output -- dictionary:** `calc_micro_indicators` `calc_micro_indicators_binary`

### 4.2. Macro indicators

#### 4.2.1 Merge dataset to create macro indicators

In [20]:
# Function to merge micro indicator subsets to create macro indicators

def create_macro_indicators_dict(subsets_micro_meas):
    """ 
    Objective: Create a new dictionary with subsets of macro indicators, 
    each including country-year information and the respective question items
    and micro indicator measures.
    Input data: Dictionary of micro indicator subsets after calculating micro indicator measures. 
    """
   
    # 1 -- Merge micro-indicators for macro-indicator 'appointment_procedures'
    temp_merge_app = pd.merge(subsets_micro_meas['q1_micro_appointment_judges'],
                              subsets_micro_meas['q1_micro_appointment_court_presidents'],
                              how='outer', on=['country', 'year'])
    temp_merge_app2 = pd.merge(temp_merge_app,
                               subsets_micro_meas['q1_micro_appointment_veto'],
                               how='outer', on=['country', 'year'])

    # 2 -- Merge micro-indicators for macro-indicator 'selection_criteria'
    temp_merge_sel = pd.merge(subsets_micro_meas['q1_micro_selection_predetermined_law'],
                              subsets_micro_meas['q1_micro_selection_intl_standards'],
                              how='outer', on=['country', 'year'])
    temp_merge_sel2 = pd.merge(temp_merge_sel,
                               subsets_micro_meas['q1_micro_transparency_appeal'],
                               how='outer', on=['country', 'year'])
   
    # 3 -- Merge micro-indicators for macro-indicator 'professional_rights'
    temp_merge_rights = pd.merge(subsets_micro_meas['q1_micro_judge_tenure'],
                                 subsets_micro_meas['q1_micro_judge_immunity'],
                                 how='outer', on=['country', 'year'])
    temp_merge_rights2 = pd.merge(temp_merge_rights,
                                  subsets_micro_meas['q1_micro_judge_salary'],
                                  how='outer', on=['country', 'year'])

    # 4 -- Merge micro-indicators for macro-indicator 'professional_obligations'
    temp_merge_obl = pd.merge(subsets_micro_meas['q1_micro_disciplinary_proceedings_law'],
                              subsets_micro_meas['q1_micro_disciplinary_proceedings_actors'],
                              how='outer', on=['country', 'year'])
    temp_merge_obl2 = pd.merge(temp_merge_obl,
                               subsets_micro_meas['q1_micro_conflict_recusal_evaluation'],
                               how='outer', on=['country', 'year'])
        
    # 5 -- Merge micro-indicators for macro-indicator 'judicial_administration'
    temp_merge_adm = pd.merge(subsets_micro_meas['q1_micro_judicial_self_governance_bodies'],
                              subsets_micro_meas['q1_micro_judicial_self_governance_competences'],
                              how='outer', on=['country', 'year'])
    temp_merge_adm2 = pd.merge(temp_merge_adm,
                               subsets_micro_meas['q1_micro_courts_administration'],
                               how='outer', on=['country', 'year'])
    
    subsets_macro_indicators = {'q1_macro_appointment_procedures': temp_merge_app2,
                        'q1_macro_selection_criteria': temp_merge_sel2,
                        'q1_macro_professional_rights': temp_merge_rights2,
                        'q1_macro_professional_obligations': temp_merge_obl2,
                        'q1_macro_judicial_administration': temp_merge_adm2}

    return subsets_macro_indicators
    

**Output -- function:** `create_macro_indicators_dict()`

In [21]:
created_macro_indicators = create_macro_indicators_dict(calc_micro_indicators)

In [22]:
created_macro_indicators_binary = create_macro_indicators_dict(calc_micro_indicators_binary)

**Output -- dictionary:** `created_macro_indicators` `created_macro_indicators_binary`

#### 4.2.2 Aggregate indicator measures to macro indicator

In [23]:
# Function to calculate macro indicator measure based on mean of all micro indicator measures

def aggregate_to_macro_indicators(subsets_macro):
    """
    Objective: Calculate row mean per macro indicator subset as an indicator measure.
    Input data: Dictionary of macro indicator subsets.
    """
    
    calc_macro_ind_measure = {}
    for key, subset in subsets_macro.items():
        calc_macro_ind_measure[key] = subset.copy()
        column_name = f"{key}_ind_measure" # macro indicator measure
        ind_measure_columns = [col for col in subset.columns if col.endswith('_ind_measure')]
        calc_macro_ind_measure[key][column_name] = subset[ind_measure_columns].mean(axis=1).round(2)

    return calc_macro_ind_measure


**Output -- function:** `aggregate_to_macro_indicators`

In [24]:
agg_macro_indicators = aggregate_to_macro_indicators(created_macro_indicators)

In [25]:
agg_macro_indicators_binary = aggregate_to_macro_indicators(created_macro_indicators_binary)

**Output -- dictionary:** `agg_macro_indicators` `agg_macro_indicators_binary`

### 5 Create Judicial Autonomy Index

#### 5.1 Merge dataset to create index

In [26]:
# Function to calculate overall index measure based on mean of all macro indicator measures

def create_index_dataset(subsets_macro_meas):
    """
    Objective: Create a new DataFrame with all macro indicator subsets, including 
    country-year information, all micro indicator measures and respective question items.
    Input data: Dictionary of macro indicator subsets after calculating macro indicator measures. 
    """
    
    temp_merge_index = pd.merge(subsets_macro_meas['q1_macro_appointment_procedures'],
                                subsets_macro_meas['q1_macro_selection_criteria'],
                                how='outer', on=['country', 'year'])
    temp_merge_index2 = pd.merge(temp_merge_index,
                                 subsets_macro_meas['q1_macro_professional_rights'],
                                 how='outer', on=['country', 'year'])
    temp_merge_index3 = pd.merge(temp_merge_index2,
                                 subsets_macro_meas['q1_macro_professional_obligations'],
                                 how='outer', on=['country', 'year'])
    index_merged = pd.merge(temp_merge_index3, 
                            subsets_macro_meas['q1_macro_judicial_administration'], 
                            how='outer', on=['country', 'year'])
    
    return index_merged

**Output -- function:** `create_index_dataset()`

In [27]:
created_index = create_index_dataset(agg_macro_indicators)

In [28]:
created_index_binary = create_index_dataset(agg_macro_indicators_binary)

**Output -- DataFrame:** `created_index` `created_index_binary`

#### 5.2 Aggregate macro indicator measures to overall index (by mean)

In [29]:
# Function to calculate index measure based on mean of all macro indicator measures

def aggregate_overall_index(dataframe_index):
    """    
    Objective: 
    1) Calculate mean of all macro indicator measures as an overall index measure.
    2) Move country-year and index measure columns to front positions. 
    Input data: Dictionary of macro indicator subsets after calculating indicator measures.
    """
    
    # Calculate mean of all macro indicator measures
    calc_overall_index = dataframe_index.copy()
    ind_measure_columns = [col for col in dataframe_index.columns if col.startswith('q1_macro')]
    calc_overall_index['index_measure'] = dataframe_index[ind_measure_columns].mean(axis=1).round(2)

    # Add a new column 'country_year' as UID
    calc_overall_index['country_year'] = calc_overall_index['country'] + "_" + calc_overall_index['year'].astype('string')

    # Remove 'country_year' and 'index_measure' columns from DataFrame
    move_column1 = calc_overall_index.pop('country_year')
    move_column2 = calc_overall_index.pop('index_measure')
    
    # Move 'country_year' and 'index_measure' columns to front positions
    calc_overall_index.insert(2, 'country_year', move_column1)
    calc_overall_index.insert(3, 'index_measure', move_column2)
    
    return calc_overall_index
    

**Output -- function:** `aggregate_overall_index()`

In [30]:
agg_overall_index = aggregate_overall_index(created_index)

In [31]:
agg_overall_index_binary = aggregate_overall_index(created_index_binary)

**Output -- DataFrames:** `agg_overall_index` `agg_overall_index_binary` 

In [32]:
agg_overall_index

Unnamed: 0,country,year,country_year,index_measure,q1c1_apjufc,q1c1_apjuac,q1c1_apjuhc,q1_micro_appointment_judges_ind_measure,q1c1_appresidfc,q1c1_appresidac,...,q1_micro_judicial_self_governance_bodies_ind_measure,q1c4_competence,q1c4_sameright,q1c4_reasondecis,q1_micro_judicial_self_governance_competences_ind_measure,q1c4_casealloc,q1c4_regbudget,q1c4_manbudget,q1_micro_courts_administration_ind_measure,q1_macro_judicial_administration_ind_measure
0,Albania,2000,Albania_2000,0.52,0.5,0.5,0.0,0.33,0.5,0.5,...,0.5,0.00,0.0,0.0,0.00,1.0,1.0,1.0,1.00,0.50
1,Albania,2001,Albania_2001,0.52,0.5,0.5,0.0,0.33,0.5,0.5,...,0.5,0.00,0.0,0.0,0.00,1.0,1.0,1.0,1.00,0.50
2,Albania,2002,Albania_2002,0.52,0.5,0.5,0.0,0.33,0.5,0.5,...,0.5,0.00,0.0,0.0,0.00,1.0,1.0,1.0,1.00,0.50
3,Albania,2003,Albania_2003,0.52,0.5,0.5,0.0,0.33,0.5,0.5,...,0.5,0.00,0.0,0.0,0.00,1.0,1.0,1.0,1.00,0.50
4,Albania,2004,Albania_2004,0.55,0.5,0.5,0.0,0.33,0.5,0.5,...,0.5,0.00,0.0,0.0,0.00,1.0,1.0,1.0,1.00,0.50
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
915,United Kingdom,2018,United Kingdom_2018,0.62,0.5,0.5,0.5,0.50,0.5,0.5,...,1.0,0.67,1.0,1.0,0.89,0.0,1.0,0.0,0.33,0.74
916,United Kingdom,2019,United Kingdom_2019,0.62,0.5,0.5,0.5,0.50,0.5,0.5,...,1.0,0.67,1.0,1.0,0.89,0.0,1.0,0.0,0.33,0.74
917,United Kingdom,2020,United Kingdom_2020,0.62,0.5,0.5,0.5,0.50,0.5,0.5,...,1.0,0.67,1.0,1.0,0.89,0.0,1.0,0.0,0.33,0.74
918,United Kingdom,2021,United Kingdom_2021,0.62,0.5,0.5,0.5,0.50,0.5,0.5,...,1.0,0.67,1.0,1.0,0.89,0.0,1.0,0.0,0.33,0.74


### 6 Create and export subsets

In [99]:
# Set structure for indicators-index subset: 
# overall index
# macro-indicator

index_macro_columns = [
    'country',
    'year',
    'country_year', 
    
    # 0 -- overall index
    'index_measure', 

    # 1 -- appointment_procedures
    'q1_macro_appointment_procedures_ind_measure',

    # 2 -- selection_criteria
     'q1_macro_selection_criteria_ind_measure',

    # 3 -- professional_rights
     'q1_macro_professional_rights_ind_measure',

    # 4 -- professional_obligations
    'q1_macro_professional_obligations_ind_measure', 

    # 5 -- judicial_administration
    'q1_macro_judicial_administration_ind_measure']

In [109]:
# Dataset with macro-indicators and overall index
dataset_index_macro = agg_overall_index[index_macro_columns]

# Export dataset
dataset_index_macro.to_csv(config['output_data']['q1_output_index_macro'], index=False) # index=False

# Display dataset
#dataset_index_macro.head(10)

**Output -- DataFrame:** `dataset_index_macro`

In [104]:
# Set structure for indicators-index subset: 
# overall index
# macro-indicator
# micro-indicator

index_macro_micro_columns = [
    'country',
    'year',
    'country_year', 
    
    # 0 -- overall index
    'index_measure', 

    # 1 -- appointment_procedures
    'q1_macro_appointment_procedures_ind_measure',
    'q1_micro_appointment_judges_ind_measure',
    'q1_micro_appointment_court_presidents_ind_measure',
    'q1_micro_appointment_veto_ind_measure',

    # 2 -- selection_criteria
     'q1_macro_selection_criteria_ind_measure',
     'q1_micro_selection_predetermined_law_ind_measure',
     'q1_micro_selection_intl_standards_ind_measure',
     'q1_micro_transparency_appeal_ind_measure',

    # 3 -- professional_rights
     'q1_macro_professional_rights_ind_measure',
     'q1_micro_judge_tenure_ind_measure',
     'q1_micro_judge_immunity_ind_measure',
     'q1_micro_judge_salary_ind_measure',

    # 4 -- professional_obligations
    'q1_macro_professional_obligations_ind_measure', 
    'q1_micro_disciplinary_proceedings_law_ind_measure',
    'q1_micro_disciplinary_proceedings_actors_ind_measure',
    'q1_micro_conflict_recusal_evaluation_ind_measure',

    # 5 -- judicial_administration
    'q1_macro_judicial_administration_ind_measure',
    'q1_micro_judicial_self_governance_bodies_ind_measure',
    'q1_micro_judicial_self_governance_competences_ind_measure',
    'q1_micro_courts_administration_ind_measure']
    

In [110]:
# Create dataset with micro-indicators, macro-indicators, overall index
dataset_index_macro_micro = agg_overall_index[index_macro_micro_columns]

# Export dataset
dataset_index_macro_micro.to_csv(config['output_data']['q1_output_index_macro_micro'], index=False) # index=False

# Display dataset
dataset_index_macro_micro.head(10)

Unnamed: 0,country,year,country_year,index_measure,q1_macro_appointment_procedures_ind_measure,q1_micro_appointment_judges_ind_measure,q1_micro_appointment_court_presidents_ind_measure,q1_micro_appointment_veto_ind_measure,q1_macro_selection_criteria_ind_measure,q1_micro_selection_predetermined_law_ind_measure,...,q1_micro_judge_immunity_ind_measure,q1_micro_judge_salary_ind_measure,q1_macro_professional_obligations_ind_measure,q1_micro_disciplinary_proceedings_law_ind_measure,q1_micro_disciplinary_proceedings_actors_ind_measure,q1_micro_conflict_recusal_evaluation_ind_measure,q1_macro_judicial_administration_ind_measure,q1_micro_judicial_self_governance_bodies_ind_measure,q1_micro_judicial_self_governance_competences_ind_measure,q1_micro_courts_administration_ind_measure
0,Albania,2000,Albania_2000,0.52,0.55,0.33,0.33,1.0,0.5,0.67,...,0.75,0.0,0.65,0.62,0.67,0.67,0.5,0.5,0.0,1.0
1,Albania,2001,Albania_2001,0.52,0.55,0.33,0.33,1.0,0.5,0.67,...,0.75,0.0,0.65,0.62,0.67,0.67,0.5,0.5,0.0,1.0
2,Albania,2002,Albania_2002,0.52,0.55,0.33,0.33,1.0,0.5,0.67,...,0.75,0.0,0.65,0.62,0.67,0.67,0.5,0.5,0.0,1.0
3,Albania,2003,Albania_2003,0.52,0.55,0.33,0.33,1.0,0.5,0.67,...,0.75,0.0,0.65,0.62,0.67,0.67,0.5,0.5,0.0,1.0
4,Albania,2004,Albania_2004,0.55,0.55,0.33,0.33,1.0,0.5,0.67,...,0.75,0.0,0.76,0.62,0.67,1.0,0.5,0.5,0.0,1.0
5,Albania,2005,Albania_2005,0.55,0.55,0.33,0.33,1.0,0.5,0.67,...,0.75,0.0,0.76,0.62,0.67,1.0,0.5,0.5,0.0,1.0
6,Albania,2006,Albania_2006,0.55,0.55,0.33,0.33,1.0,0.5,0.67,...,0.75,0.0,0.76,0.62,0.67,1.0,0.5,0.5,0.0,1.0
7,Albania,2007,Albania_2007,0.53,0.55,0.33,0.33,1.0,0.42,0.67,...,0.75,0.0,0.76,0.62,0.67,1.0,0.5,0.5,0.0,1.0
8,Albania,2008,Albania_2008,0.58,0.55,0.33,0.33,1.0,0.61,1.0,...,0.5,0.5,0.76,0.62,0.67,1.0,0.5,0.5,0.0,1.0
9,Albania,2009,Albania_2009,0.58,0.55,0.33,0.33,1.0,0.61,1.0,...,0.5,0.5,0.76,0.62,0.67,1.0,0.5,0.5,0.0,1.0


**Output -- DataFrame:** `dataset_index_macro_micro`

In [111]:
# Create dataset with micro-indicators, macro-indicators, overall index, username, country_code

# Subset 'username', 'country', 'country_code', 'year' from original dataset
df_username_country_code_columns = q1_cleaned[['username', 'country', 'country_code', 'year']]
dataset_index_macro_micro = agg_overall_index[index_macro_micro_columns]

# Merge both datasets
dataset_index_macro_micro_add_info = pd.merge(df_username_country_code_columns, 
                                              dataset_index_macro_micro,
                                              how='outer', on=['country', 'year'])

# Export dataset
dataset_index_macro_micro_add_info.to_csv(config['output_data']['q1_output_index_macro_micro_add_info'], index=False) # index=False

# Display dataset
# dataset_index_macro_micro_add_info.head(10)

**Output -- DataFrame:** `dataset_index_macro_micro_add_info`