### Imports

In [48]:
import pandas as pd
import numpy as np

In [49]:
pd.options.display.max_rows = 4000
pd.options.display.max_columns = 4000

In [50]:
# Communities Data

In [51]:
DATA_VERSION = 'v4'

In [52]:
communities_csv = '../data/' + DATA_VERSION + '/communities_data_prepped.csv'

In [53]:
communities_df = pd.read_csv(communities_csv)

In [54]:
communities_df.head()

In [55]:
# Institutions Data

In [56]:
institutions_csv = '../data/v2/Institutions-Grid-view.csv'

In [57]:
institutions_df = pd.read_csv(institutions_csv)

In [58]:
institutions_df

In [59]:
institutions_df['CGI'].value_counts()

In [60]:
institutions_df['CGI'].value_counts().plot(kind='bar', figsize=(20,20))

In [61]:
set(list(institutions_df['Communities']))

In [62]:
len(set(list(institutions_df['Communities'])))

In [63]:
# TODO: Merge CGI data into Communities dataset? Or: Run a separate analysis for Institutions data

# Encode Collective Governance Institutions (CGIs)

In [64]:
institutions_df['CGI']

In [65]:
institutions_df.replace(np.nan, '', regex=True, inplace=True)

In [66]:
institutions_df['CGI']

In [67]:
cgi_list = list(institutions_df['CGI'])

In [68]:
cgi_list

In [69]:
cgi_set = set()

In [70]:
[cgi_set.update(cgis.split(',')) for cgis in cgi_list]
cgi_set

In [71]:
cgi_set.remove('')

In [72]:
cgi_set

### Categorize Mechanisms by: Access, Decision Making, Enforcement

In [73]:
access = [
 'cgis_include_meritocracy',
 'cgis_include_age_boundaries',
 'cgis_include_blood_relations',
 'cgis_include_life_appointment',
 'cgis_include_co-optation',
 'cgis_include_patronage_for_office',
 'cgis_include_heredity',
 'cgis_include_lottery_random_selection',
 'cgis_include_matriarchy',
 'cgis_include_screening_process',
 'cgis_include_property_requirement',
 'cgis_include_induction_rite_ceremony',
 'cgis_include_matrilineality',
 'cgis_include_enfranchisement',
 'cgis_include_popularity_',
 'cgis_include_divine_right',
 'cgis_include_payment_for_occupying_office',
 'cgis_include_gerontocracy',
 'cgis_include_open_political_unit',
 'cgis_include_election',
 'cgis_include_dreaming',
 'cgis_include_female_participation'
]

decision_making = [
 'cgis_include_secret_ballot',
 'cgis_include_vote_by_proxy',
 'cgis_include_voting',
 'cgis_include_mandate',
 'cgis_include_lobbying',
 'cgis_include_representation',
 'cgis_include_consensus',
 'cgis_include_magistrate_official',
 'cgis_include_word_of_mouth',
 'cgis_include_consultation',
 'cgis_include_checks_and_balances',
 'cgis_include_council_central_',
 'cgis_include_unanimity',
 'cgis_include_kingchief-as-figurehead',
 'cgis_include_deliberation',
 'cgis_include_autocratic_leader_chief_bound',
 'cgis_include_plurality_voting',
 'cgis_include_leader_first_among_equals',
 'cgis_include_power_fluidity',
 'cgis_include_petition',
 'cgis_include_delegation',
 'cgis_include_vote_by_show_of_hands',
 'cgis_include_temporary_position_of_power_term_limits',
 'cgis_include_ratification_acclamation_only',
 'cgis_include_rule_of_law',
 'cgis_include_assembly_central',
 'cgis_include_supermajority',
 'cgis_include_handshake',
 'cgis_include_bureaucracy',
 'cgis_include_majority_voting',
 'cgis_include_alliance',
 'cgis_include_board_committee',
 'cgis_include_dual-sex_political_system',
 'cgis_include_remunerated_position',
 'cgis_include_public_meetings',
 'cgis_include_balance_of_power',
 'cgis_include_consent',
 'cgis_include_turnover_rotation',
 'cgis_include_quorum',
 'cgis_include_frequent_and_or_regular_meetings',
 'cgis_include_confederacy',
 'cgis_include_assembly_local',
 'cgis_include_council_local',
 'cgis_include_oath',
 'cgis_include_constitution',
 'cgis_include_president_consul_chief_executive',
 'cgis_include_veto',
 'cgis_include_autocratic_leader_unbound',
 'cgis_include_assembly_elite',
 'cgis_include_agenda_setting',
 'cgis_include_group_voting',
 'cgis_include_coalition',
 'cgis_include_gathering',
]

enforcement = [
 'cgis_include_negotiation',
 'cgis_include_suspension_expulsion_ostracism_proscription',
 'cgis_include_fines',
 'cgis_include_criticism_mocking_joking',
 'cgis_include_corporal_punishment_violence_against_perpetrator',
 'cgis_include_forced_labour',
 'cgis_include_paying_damages_compensation',
 'cgis_include_seizing_of_property',
 'cgis_include_monitoring',
 'cgis_include_reputational_risk',
 'cgis_include_appeal',
 'cgis_include_graduated_sanctions',
 'cgis_include_self-help',
 'cgis_include_civil_disobedience',
 'cgis_include_military_force',
 'cgis_include_arbitration',
 'cgis_include_police',
 'cgis_include_exit',
 'cgis_include_jury_judge',
 'cgis_include_capital_punishment',
 'cgis_include_mediation',
 'cgis_include_destruction_of_property',
 'cgis_include_litigation',
 'cgis_include_tribunal_court_',
 'cgis_include_judicial_review',
 'cgis_include_scrutiny_of_officials',
]


In [74]:
def categorize_cgi(cgi):
    if cgi in access:
        return 'access_'
    elif cgi in decision_making:
        return 'decision_making_'
    elif cgi in enforcement:
        return 'enforcement_'

    return ''

In [75]:
for cgi in cgi_set:
    cgi_str = cgi.lower().replace('/', '_').replace(' ','_').replace('(','').replace(')','') # Normalize text
    cgi_name = 'cgis_include_' + cgi_str
    
    col_name = categorize_cgi(cgi_name) + cgi_name
        
    institutions_df[col_name] = np.where(institutions_df['CGI'].str.contains(cgi, regex=False), 1, 0)

In [76]:
institutions_df.head()

In [77]:
cgi_columns = [col_name for col_name in list(institutions_df) if 'cgis_include_' in col_name]

In [78]:
sorted(cgi_columns)

In [79]:
institutions_df[cgi_columns].sum().sort_values()

## Merge datasets to include Size, Region, Geography, Time span columns

In [80]:
merged_df = pd.merge(communities_df, institutions_df, left_on='Name', right_on='Communities')

In [81]:
merged_df.head()

In [82]:
list(merged_df)

In [83]:
merged_df.rename(columns={'Communities': 'Community_Name', 'Name_y': 'Institution_Name'}, inplace=True)

In [84]:
merged_df.drop(labels=['Name_x', 'Metanotes_y', 'Metanotes_x', 'expert check', 'FC comments', 'Institutions'], axis='columns', inplace=True)

In [85]:
merged_df.head()

In [86]:
# Move Institution_Name and Community_Name to front of dataframe
cols_to_move = ['Institution_Name', 'Community_Name']
merged_df = merged_df[cols_to_move + [col for col in merged_df.columns if col not in cols_to_move ]]

In [87]:
merged_df.head()

### Write to CSV

In [88]:
# Write prepped dataset to csv
merged_df.to_csv('../data/' + DATA_VERSION + '/institutions_data_prepped.csv', index=False)