### Imports

In [1]:
import pandas as pd
import numpy as np

In [2]:
pd.options.display.max_rows = 4000
pd.options.display.max_columns = 4000

# Communities Data Prep

## Clean and explore data

In [3]:
DATA_VERSION = 'v4'

In [4]:
gov_arch_csv = '../data/' + DATA_VERSION + '/Communities-Grid-view.csv'

In [5]:
gov_df = pd.read_csv(gov_arch_csv)

In [6]:
gov_df.head()

Unnamed: 0,Name,Institutions,Mechanism,Notes,Time span,Geography,Size,Source,Metanotes,Heterogeneity,FC comments,expert check,Time span: Start,Time span: End,SCCS
0,Aedui (Celtic Oppida),"Senatus,Elected magistrates,Assemblies","Election,Council (central) ,Assembly (central)...",There are many Celtic oppida throughout northe...,2nd-1st century BC,Europe North,"100,001 - 1 million members","Collis, “Celtic” Oppida, in Hansen, A Comparat...",,,Edited March 23,,-200.0,0.0,
1,Akha,"Council of Elders,Village Founder-Leader (dzom...","Leader (first among equals),Gerontocracy,Blood...",see Institutions tab,20th century - Present,Asia Southeast,"10,001 - 100,000","Main: \nKammerer, Cornelia Ann (1985/86), Gate...",,"Informal inclusion,Gender groups,Informal powe...",Edited May 23,,1900.0,2023.0,
2,Amalfi,"Prefetturii/ Conti/ Dogi,Sedile di congrega,Ta...","Election,Magistrate/official,Heredity,Constitu...","Camera, M, p. 29 (""La sua popolazione che nel ...",4th-12th century AD,Europe West,"10,001 - 100,000","Camera, M. Istoria della città e costiera di A...",,,Edited March 23 (could use more info on CGIs),,300.0,1200.0,
3,Arab nomads,"shaykh or sayyid,shura or mashwara","Consensus,Consultation,Autocratic Leader/Chief...",Nomadic Bedouin society governed by rulers sel...,"1st-7th century AD\n(start date is arbitrary, ...",Middle East,Unknown,"Stasavage, David (2020), The Decline and Rise ...",,,Edited Feb 23 (could use additional sources fo...,,0.0,700.0,
4,Assur,"King,Alum,Limmum,Scribe,City hall","President/Consul/Chief Executive,Assembly (cen...",Independent city-state. Constitutional monarch...,19th century BC\n(1920-1800 BC),"Europe Southeast, Anatolia","1,001 - 10,000","Larsen, M. T. The Old Assyrian City-State in H...",,,Edited Mar 23,Giacomo CHECKED,-1920.0,-1800.0,


In [7]:
gov_df.describe()

Unnamed: 0,Time span: Start,Time span: End
count,65.0,65.0
mean,522.276923,922.907692
std,1415.291738,1279.133065
min,-4100.0,-3300.0
25%,-480.0,-27.0
50%,1081.0,1521.0
75%,1700.0,1900.0
max,1900.0,2023.0


In [8]:
list(gov_df)

['Name',
 'Institutions',
 'Mechanism',
 'Notes',
 'Time span',
 'Geography',
 'Size',
 'Source',
 'Metanotes',
 'Heterogeneity ',
 'FC comments',
 'expert check',
 'Time span: Start',
 'Time span: End',
 'SCCS']

# Data prep

In [9]:
gov_df = gov_df

## Mechanisms

In [10]:
gov_df['Mechanism']

0     Election,Council (central) ,Assembly (central)...
1     Leader (first among equals),Gerontocracy,Blood...
2     Election,Magistrate/official,Heredity,Constitu...
3     Consensus,Consultation,Autocratic Leader/Chief...
4     President/Consul/Chief Executive,Assembly (cen...
5     Election,Plurality voting,Temporary position o...
6     Autocratic Leader/Chief (bound),Heredity,Merit...
7     Assembly (central),Checks and balances,Consent...
8     Blood relations,Tribunal/Court ,Suspension/Exp...
9     Leader (first among equals),Heredity,Matriline...
10    Magistrate/official,Consultation,Heredity,Publ...
11    Council (central) ,Gerontocracy,Matriarchy,Fem...
12    Co-optation,Temporary position of power/Term L...
13    Autocratic Leader/Chief (bound),Consultation,A...
14    Assembly (central),Confederacy,Turnover/rotati...
15    Autocratic Leader/Chief (bound),Council (centr...
16    Petition,Autocratic Leader (unbound),Divine ri...
17    Autocratic Leader/Chief (bound),Jury/Judge

In [11]:
mechanism_list = list(gov_df['Mechanism'])

In [12]:
mech_test = mechanism_list[0]

In [13]:
mech_test.split(',')

['Election',
 'Council (central) ',
 'Assembly (central)',
 'Magistrate/official',
 'Balance of power',
 'Ratification/Acclamation only']

In [14]:
test_set = set()

In [15]:
test_set.update(mech_test.split(','))
test_set

{'Assembly (central)',
 'Balance of power',
 'Council (central) ',
 'Election',
 'Magistrate/official',
 'Ratification/Acclamation only'}

In [16]:
mechanism_set = set()

In [17]:
# Create set of all mechanisms
[mechanism_set.update(mechanisms.split(',')) for mechanisms in mechanism_list]
mechanism_set

{'Age boundaries',
 'Agenda setting',
 'Alliance',
 'Appeal',
 'Arbitration',
 'Assembly (central)',
 'Assembly (elite)',
 'Assembly (local)',
 'Autocratic Leader (unbound)',
 'Autocratic Leader/Chief (bound)',
 'Balance of power',
 'Blood relations',
 'Board/Committee',
 'Bureaucracy',
 'Capital punishment',
 'Checks and balances',
 'Child participation',
 'Civil disobedience',
 'Co-optation',
 'Coalition',
 'Confederacy',
 'Consensus',
 'Consent',
 'Constitution',
 'Consultation',
 'Corporal punishment/Violence against perpetrator',
 'Council (central) ',
 'Council (local)',
 'Criticism/Mocking/Joking',
 'Delegation',
 'Deliberation',
 'Destruction of property',
 'Divine right',
 'Dreaming',
 'Dual-sex political system',
 'Election',
 'Enfranchisement',
 'Exit',
 'Female participation',
 'Fines',
 'Forced labour',
 'Frequent and/or regular meetings',
 'Gathering',
 'Gerontocracy',
 'Graduated sanctions',
 'Group voting',
 'Heredity',
 'Induction rite/ceremony',
 'Judicial review',
 '

### Encode SCCS as true/false

In [18]:
gov_df['is_SCCS'] = gov_df['SCCS'].apply(lambda x: 1 if x == 'yes' else 0)

In [19]:
gov_df[['SCCS', 'is_SCCS']]

Unnamed: 0,SCCS,is_SCCS
0,,0
1,,0
2,,0
3,,0
4,,0
5,,0
6,,0
7,,0
8,,0
9,,0


### Add mechanism columns
Add columns in the format of `mechanism_is_{mechanism}`

In [20]:
test_df = gov_df[gov_df['Mechanism'].str.contains('Patronage (for office)', regex=False)]

In [21]:
test_df

Unnamed: 0,Name,Institutions,Mechanism,Notes,Time span,Geography,Size,Source,Metanotes,Heterogeneity,FC comments,expert check,Time span: Start,Time span: End,SCCS,is_SCCS
16,Egypt,"Pharaoh,Vizier (t3.ty),'Overseer of the Treasu...","Petition,Autocratic Leader (unbound),Divine ri...","Entry focuses on the middle kingdom, when we h...",27th-11th century BC\n\nOld Kingdom: 2700-2200...,Africa North,1 million - 10 million members,"'- Alexandre Alexandrovich Loktionov, Inclusiv...",,,Edited May 23,ALEX CHECKED,-2700.0,-1000.0,,0
24,Hausa City-States,"Council of State,Sarki,Aristocratic Offices,Co...","Autocratic Leader/Chief (bound),Council (centr...","City states in Northern Nigeria, ruled by a ki...",15th to 19th century AD\n(1450-1804),"Africa West, Nigeria","10,001 - 100,000","Robert Griffeth, The Hausa City-States from 14...",,,Edited Apr 23 (perhaps find more info about th...,,1450.0,1804.0,,0
33,Japan (Early Modern),"Shogun,Daimyo,Village Council,Samurai,Emperor,...","Consensus,Election,Council (local),Lottery/Ran...",Corporate Landholding (warichi)\nThe distribut...,17-18th century AD,"Asia, Japan",20 million - 30 million,"Brown, Philip (2006), Arable Land as Commons: ...","There were around 27 provinces, controlled by ...","Social groups,Inclusion through different inst...",Edited May 23 (can use more information abut t...,,1600.0,1800.0,,0


In [22]:
gov_df['Mechanism']

0     Election,Council (central) ,Assembly (central)...
1     Leader (first among equals),Gerontocracy,Blood...
2     Election,Magistrate/official,Heredity,Constitu...
3     Consensus,Consultation,Autocratic Leader/Chief...
4     President/Consul/Chief Executive,Assembly (cen...
5     Election,Plurality voting,Temporary position o...
6     Autocratic Leader/Chief (bound),Heredity,Merit...
7     Assembly (central),Checks and balances,Consent...
8     Blood relations,Tribunal/Court ,Suspension/Exp...
9     Leader (first among equals),Heredity,Matriline...
10    Magistrate/official,Consultation,Heredity,Publ...
11    Council (central) ,Gerontocracy,Matriarchy,Fem...
12    Co-optation,Temporary position of power/Term L...
13    Autocratic Leader/Chief (bound),Consultation,A...
14    Assembly (central),Confederacy,Turnover/rotati...
15    Autocratic Leader/Chief (bound),Council (centr...
16    Petition,Autocratic Leader (unbound),Divine ri...
17    Autocratic Leader/Chief (bound),Jury/Judge

In [23]:
mechanism_set

{'Age boundaries',
 'Agenda setting',
 'Alliance',
 'Appeal',
 'Arbitration',
 'Assembly (central)',
 'Assembly (elite)',
 'Assembly (local)',
 'Autocratic Leader (unbound)',
 'Autocratic Leader/Chief (bound)',
 'Balance of power',
 'Blood relations',
 'Board/Committee',
 'Bureaucracy',
 'Capital punishment',
 'Checks and balances',
 'Child participation',
 'Civil disobedience',
 'Co-optation',
 'Coalition',
 'Confederacy',
 'Consensus',
 'Consent',
 'Constitution',
 'Consultation',
 'Corporal punishment/Violence against perpetrator',
 'Council (central) ',
 'Council (local)',
 'Criticism/Mocking/Joking',
 'Delegation',
 'Deliberation',
 'Destruction of property',
 'Divine right',
 'Dreaming',
 'Dual-sex political system',
 'Election',
 'Enfranchisement',
 'Exit',
 'Female participation',
 'Fines',
 'Forced labour',
 'Frequent and/or regular meetings',
 'Gathering',
 'Gerontocracy',
 'Graduated sanctions',
 'Group voting',
 'Heredity',
 'Induction rite/ceremony',
 'Judicial review',
 '

### Categorize Mechanisms by: Access, Decision Making, Enforcement

In [24]:
access = [
 'mechanisms_include_meritocracy',
 'mechanisms_include_age_boundaries',
 'mechanisms_include_blood_relations',
 'mechanisms_include_life_appointment',
 'mechanisms_include_co-optation',
 'mechanisms_include_patronage_for_office',
 'mechanisms_include_heredity',
 'mechanisms_include_lottery_random_selection',
 'mechanisms_include_matriarchy',
 'mechanisms_include_screening_process',
 'mechanisms_include_property_requirement',
 'mechanisms_include_induction_rite_ceremony',
 'mechanisms_include_matrilineality',
 'mechanisms_include_enfranchisement',
 'mechanisms_include_popularity_',
 'mechanisms_include_divine_right',
 'mechanisms_include_payment_for_occupying_office',
 'mechanisms_include_gerontocracy',
 'mechanisms_include_open_political_unit',
 'mechanisms_include_election',
 'mechanisms_include_dreaming',
 'mechanisms_include_female_participation'
]

decision_making = [
 'mechanisms_include_secret_ballot',
 'mechanisms_include_vote_by_proxy',
 'mechanisms_include_voting',
 'mechanisms_include_mandate',
 'mechanisms_include_lobbying',
 'mechanisms_include_representation',
 'mechanisms_include_consensus',
 'mechanisms_include_magistrate_official',
 'mechanisms_include_word_of_mouth',
 'mechanisms_include_consultation',
 'mechanisms_include_checks_and_balances',
 'mechanisms_include_council_central_',
 'mechanisms_include_unanimity',
 'mechanisms_include_kingchief-as-figurehead',
 'mechanisms_include_deliberation',
 'mechanisms_include_autocratic_leader_chief_bound',
 'mechanisms_include_plurality_voting',
 'mechanisms_include_leader_first_among_equals',
 'mechanisms_include_power_fluidity',
 'mechanisms_include_petition',
 'mechanisms_include_delegation',
 'mechanisms_include_vote_by_show_of_hands',
 'mechanisms_include_temporary_position_of_power_term_limits',
 'mechanisms_include_ratification_acclamation_only',
 'mechanisms_include_rule_of_law',
 'mechanisms_include_assembly_central',
 'mechanisms_include_supermajority',
 'mechanisms_include_handshake',
 'mechanisms_include_bureaucracy',
 'mechanisms_include_majority_voting',
 'mechanisms_include_alliance',
 'mechanisms_include_board_committee',
 'mechanisms_include_dual-sex_political_system',
 'mechanisms_include_remunerated_position',
 'mechanisms_include_public_meetings',
 'mechanisms_include_balance_of_power',
 'mechanisms_include_consent',
 'mechanisms_include_turnover_rotation',
 'mechanisms_include_quorum',
 'mechanisms_include_frequent_and_or_regular_meetings',
 'mechanisms_include_confederacy',
 'mechanisms_include_assembly_local',
 'mechanisms_include_council_local',
 'mechanisms_include_oath',
 'mechanisms_include_constitution',
 'mechanisms_include_president_consul_chief_executive',
 'mechanisms_include_veto',
 'mechanisms_include_autocratic_leader_unbound',
 'mechanisms_include_assembly_elite',
 'mechanisms_include_agenda_setting',
 'mechanisms_include_group_voting',
 'mechanisms_include_coalition',
 'mechanisms_include_gathering'
]

enforcement = [
 'mechanisms_include_negotiation',
 'mechanisms_include_suspension_expulsion_ostracism_proscription',
 'mechanisms_include_fines',
 'mechanisms_include_criticism_mocking_joking',
 'mechanisms_include_corporal_punishment_violence_against_perpetrator',
 'mechanisms_include_forced_labour',
 'mechanisms_include_paying_damages_compensation',
 'mechanisms_include_seizing_of_property',
 'mechanisms_include_monitoring',
 'mechanisms_include_reputational_risk',
 'mechanisms_include_appeal',
 'mechanisms_include_graduated_sanctions',
 'mechanisms_include_self-help',
 'mechanisms_include_civil_disobedience',
 'mechanisms_include_military_force',
 'mechanisms_include_arbitration',
 'mechanisms_include_police',
 'mechanisms_include_exit',
 'mechanisms_include_jury_judge',
 'mechanisms_include_capital_punishment',
 'mechanisms_include_mediation',
 'mechanisms_include_destruction_of_property',
 'mechanisms_include_litigation',
 'mechanisms_include_tribunal_court_',
 'mechanisms_include_judicial_review',
 'mechanisms_include_scrutiny_of_officials'
]

In [25]:
def categorize_mechanism(mechanism):
    if mechanism in access:
        return 'access_'
    elif mechanism in decision_making:
        return 'decision_making_'
    elif mechanism in enforcement:
        return 'enforcement_'

    return ''

In [26]:
for mechanism in mechanism_set:
    mechanism_str = mechanism.lower().replace(' ', '_').replace('/', '_').replace('(','').replace(')','') # Normalize text
    mechanism_name = 'mechanisms_include_' + mechanism_str
    col_name = categorize_mechanism(mechanism_name) + mechanism_name
    
    gov_df[col_name] = np.where(gov_df['Mechanism'].str.contains(mechanism, regex=False), 1, 0)

  gov_df[col_name] = np.where(gov_df['Mechanism'].str.contains(mechanism, regex=False), 1, 0)
  gov_df[col_name] = np.where(gov_df['Mechanism'].str.contains(mechanism, regex=False), 1, 0)
  gov_df[col_name] = np.where(gov_df['Mechanism'].str.contains(mechanism, regex=False), 1, 0)


In [27]:
list(gov_df.columns)

['Name',
 'Institutions',
 'Mechanism',
 'Notes',
 'Time span',
 'Geography',
 'Size',
 'Source',
 'Metanotes',
 'Heterogeneity ',
 'FC comments',
 'expert check',
 'Time span: Start',
 'Time span: End',
 'SCCS',
 'is_SCCS',
 'enforcement_mechanisms_include_jury_judge',
 'access_mechanisms_include_life_appointment',
 'enforcement_mechanisms_include_judicial_review',
 'decision_making_mechanisms_include_president_consul_chief_executive',
 'decision_making_mechanisms_include_secret_ballot',
 'access_mechanisms_include_patronage_for_office',
 'access_mechanisms_include_enfranchisement',
 'enforcement_mechanisms_include_tribunal_court_',
 'decision_making_mechanisms_include_magistrate_official',
 'enforcement_mechanisms_include_forced_labour',
 'access_mechanisms_include_age_boundaries',
 'decision_making_mechanisms_include_plurality_voting',
 'access_mechanisms_include_female_participation',
 'decision_making_mechanisms_include_word_of_mouth',
 'access_mechanisms_include_payment_for_occup

In [28]:
mechanisms = [mechanism for mechanism in list(gov_df.columns) if 'mechanisms_include' in mechanism]

In [29]:
access_mechanisms = [mechanism for mechanism in list(gov_df.columns) if 'access_' in mechanism]

In [30]:
decision_making_mechanisms = [mechanism for mechanism in list(gov_df.columns) if 'decision_making_' in mechanism]

In [31]:
enforcement_mechanisms = [mechanism for mechanism in list(gov_df.columns) if 'enforcement_' in mechanism]

In [32]:
sorted(mechanisms)

['access_mechanisms_include_age_boundaries',
 'access_mechanisms_include_blood_relations',
 'access_mechanisms_include_co-optation',
 'access_mechanisms_include_divine_right',
 'access_mechanisms_include_dreaming',
 'access_mechanisms_include_election',
 'access_mechanisms_include_enfranchisement',
 'access_mechanisms_include_female_participation',
 'access_mechanisms_include_gerontocracy',
 'access_mechanisms_include_heredity',
 'access_mechanisms_include_induction_rite_ceremony',
 'access_mechanisms_include_life_appointment',
 'access_mechanisms_include_lottery_random_selection',
 'access_mechanisms_include_matriarchy',
 'access_mechanisms_include_matrilineality',
 'access_mechanisms_include_meritocracy',
 'access_mechanisms_include_open_political_unit',
 'access_mechanisms_include_patronage_for_office',
 'access_mechanisms_include_payment_for_occupying_office',
 'access_mechanisms_include_popularity_',
 'access_mechanisms_include_property_requirement',
 'access_mechanisms_include_sc

In [33]:
len(mechanisms)

100

In [34]:
len(access_mechanisms) + len(decision_making_mechanisms) + len(enforcement_mechanisms)

99

In [35]:
access_mechanisms

['access_mechanisms_include_life_appointment',
 'access_mechanisms_include_patronage_for_office',
 'access_mechanisms_include_enfranchisement',
 'access_mechanisms_include_age_boundaries',
 'access_mechanisms_include_female_participation',
 'access_mechanisms_include_payment_for_occupying_office',
 'access_mechanisms_include_gerontocracy',
 'access_mechanisms_include_open_political_unit',
 'access_mechanisms_include_blood_relations',
 'access_mechanisms_include_induction_rite_ceremony',
 'access_mechanisms_include_lottery_random_selection',
 'access_mechanisms_include_election',
 'access_mechanisms_include_matrilineality',
 'access_mechanisms_include_meritocracy',
 'access_mechanisms_include_co-optation',
 'access_mechanisms_include_screening_process',
 'access_mechanisms_include_divine_right',
 'access_mechanisms_include_dreaming',
 'access_mechanisms_include_popularity_',
 'access_mechanisms_include_heredity',
 'access_mechanisms_include_property_requirement',
 'access_mechanisms_inc

In [36]:
len(mechanisms)

100

In [37]:
gov_df[mechanisms].sum().sort_values(ascending=True)

decision_making_mechanisms_include_coalition                                        1
enforcement_mechanisms_include_civil_disobedience                                   1
decision_making_mechanisms_include_power_fluidity                                   1
decision_making_mechanisms_include_group_voting                                     1
decision_making_mechanisms_include_lobbying                                         1
decision_making_mechanisms_include_mandate                                          1
decision_making_mechanisms_include_petition                                         1
decision_making_mechanisms_include_vote_by_proxy                                    2
decision_making_mechanisms_include_supermajority                                    2
decision_making_mechanisms_include_rule_of_law                                      2
access_mechanisms_include_screening_process                                         2
enforcement_mechanisms_include_monitoring             

In [38]:
gov_df[mechanisms].head()

Unnamed: 0,enforcement_mechanisms_include_jury_judge,access_mechanisms_include_life_appointment,enforcement_mechanisms_include_judicial_review,decision_making_mechanisms_include_president_consul_chief_executive,decision_making_mechanisms_include_secret_ballot,access_mechanisms_include_patronage_for_office,access_mechanisms_include_enfranchisement,enforcement_mechanisms_include_tribunal_court_,decision_making_mechanisms_include_magistrate_official,enforcement_mechanisms_include_forced_labour,access_mechanisms_include_age_boundaries,decision_making_mechanisms_include_plurality_voting,access_mechanisms_include_female_participation,decision_making_mechanisms_include_word_of_mouth,access_mechanisms_include_payment_for_occupying_office,access_mechanisms_include_gerontocracy,decision_making_mechanisms_include_leader_first_among_equals,enforcement_mechanisms_include_fines,decision_making_mechanisms_include_coalition,decision_making_mechanisms_include_assembly_elite,decision_making_mechanisms_include_rule_of_law,decision_making_mechanisms_include_supermajority,decision_making_mechanisms_include_frequent_and_or_regular_meetings,access_mechanisms_include_open_political_unit,enforcement_mechanisms_include_graduated_sanctions,decision_making_mechanisms_include_council_local,decision_making_mechanisms_include_deliberation,decision_making_mechanisms_include_confederacy,decision_making_mechanisms_include_balance_of_power,decision_making_mechanisms_include_remunerated_position,enforcement_mechanisms_include_seizing_of_property,enforcement_mechanisms_include_capital_punishment,access_mechanisms_include_blood_relations,enforcement_mechanisms_include_reputational_risk,enforcement_mechanisms_include_suspension_expulsion_ostracism_proscription,enforcement_mechanisms_include_police,access_mechanisms_include_induction_rite_ceremony,enforcement_mechanisms_include_corporal_punishment_violence_against_perpetrator,enforcement_mechanisms_include_exit,decision_making_mechanisms_include_agenda_setting,decision_making_mechanisms_include_kingchief-as-figurehead,enforcement_mechanisms_include_arbitration,decision_making_mechanisms_include_unanimity,decision_making_mechanisms_include_bureaucracy,decision_making_mechanisms_include_checks_and_balances,access_mechanisms_include_lottery_random_selection,enforcement_mechanisms_include_self-help,access_mechanisms_include_election,decision_making_mechanisms_include_majority_voting,decision_making_mechanisms_include_board_committee,decision_making_mechanisms_include_mandate,decision_making_mechanisms_include_oath,access_mechanisms_include_matrilineality,decision_making_mechanisms_include_lobbying,access_mechanisms_include_meritocracy,decision_making_mechanisms_include_gathering,enforcement_mechanisms_include_destruction_of_property,decision_making_mechanisms_include_petition,mechanisms_include_child_participation,decision_making_mechanisms_include_assembly_central,decision_making_mechanisms_include_ratification_acclamation_only,enforcement_mechanisms_include_monitoring,access_mechanisms_include_co-optation,access_mechanisms_include_screening_process,decision_making_mechanisms_include_autocratic_leader_unbound,access_mechanisms_include_divine_right,enforcement_mechanisms_include_criticism_mocking_joking,decision_making_mechanisms_include_constitution,decision_making_mechanisms_include_council_central_,decision_making_mechanisms_include_quorum,decision_making_mechanisms_include_group_voting,access_mechanisms_include_dreaming,enforcement_mechanisms_include_litigation,decision_making_mechanisms_include_alliance,enforcement_mechanisms_include_mediation,access_mechanisms_include_popularity_,enforcement_mechanisms_include_civil_disobedience,decision_making_mechanisms_include_vote_by_proxy,enforcement_mechanisms_include_negotiation,decision_making_mechanisms_include_assembly_local,decision_making_mechanisms_include_autocratic_leader_chief_bound,decision_making_mechanisms_include_consensus,decision_making_mechanisms_include_power_fluidity,decision_making_mechanisms_include_representation,enforcement_mechanisms_include_scrutiny_of_officials,decision_making_mechanisms_include_public_meetings,access_mechanisms_include_heredity,decision_making_mechanisms_include_consent,access_mechanisms_include_property_requirement,decision_making_mechanisms_include_turnover_rotation,decision_making_mechanisms_include_dual-sex_political_system,decision_making_mechanisms_include_delegation,decision_making_mechanisms_include_vote_by_show_of_hands,enforcement_mechanisms_include_appeal,decision_making_mechanisms_include_consultation,decision_making_mechanisms_include_voting,enforcement_mechanisms_include_paying_damages_compensation,decision_making_mechanisms_include_veto,access_mechanisms_include_matriarchy,decision_making_mechanisms_include_temporary_position_of_power_term_limits
0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1
2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
4,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1


## Culture/Values/Norms

In [39]:
# gov_df['Culture/Values/Norms'] = gov_df['Culture/Values/Norms'].fillna('')

In [40]:
# gov_df['Culture/Values/Norms']

In [41]:
# values_list = list(gov_df['Culture/Values/Norms'])

In [42]:
# values_test = values_list[0]
# values_test

In [43]:
# [v.strip() for v in values_test.split(',')]

In [44]:
# values_set = set()

In [45]:
# Create set of all values
# [values_set.update([v.strip() for v in values.split(',')]) for values in values_list]
# values_set

In [46]:
# values_set.remove('')

In [47]:
# values_set

In [48]:
"""
for value in values_set:
    value_str = value.lower().replace('/', '_').replace(' ','_').replace('(','').replace(')','') # Normalize text
    col_name = 'values_include_' + value_str
    gov_df[col_name] = np.where(gov_df['Culture/Values/Norms'].str.contains(value, regex=False), 1, 0)
"""

"\nfor value in values_set:\n    value_str = value.lower().replace('/', '_').replace(' ','_').replace('(','').replace(')','') # Normalize text\n    col_name = 'values_include_' + value_str\n    gov_df[col_name] = np.where(gov_df['Culture/Values/Norms'].str.contains(value, regex=False), 1, 0)\n"

## Institutions

In [49]:
# gov_df['Institutions'] = gov_df['Institutions'].fillna('')

In [50]:
# gov_df['Institutions']

In [51]:
# institutions_list = list(gov_df['Institutions'])

In [52]:
# institutions_list

In [53]:
# institutions_test = institutions_list[0]
# institutions_test

In [54]:
# [i.strip() for i in institutions_test.split(',')]

In [55]:
# institutions_set = set()

In [56]:
# String test
# "\'\'\"".strip().replace('"', '').replace('\'','')

In [57]:
# test_string = '"Hunting ""chief"""'
# test_string

In [58]:
# test_string.strip().replace('\"', '').replace('\'','')

In [59]:
# original_institutions_set = set()

In [60]:
# [original_institutions_set.update([i for i in institutions.split(',')]) for institutions in institutions_list]
# original_institutions_set

In [61]:
# Create set of all values
# [institutions_set.update([i.lower().strip() for i in institutions.split(',')]) for institutions in institutions_list]
# institutions_set

In [62]:
# len(institutions_set)

In [63]:
# Cleanup
# cleaned_institutions_set = {i.replace('\"', '').replace('\'','') for i in institutions_set}

In [64]:
# cleaned_institutions_set

In [65]:
# len(cleaned_institutions_set)

In [66]:
# institutions_set.difference(cleaned_institutions_set)

In [67]:
# cleaned_institutions_set.difference(institutions_set)

In [68]:
# gov_df['Institutions']

In [69]:
"""
for institution in original_institutions_set:
    institution_str = institution.lower() \
                                .replace('/', '_') \
                                .replace(' ','_') \
                                .replace('(','') \
                                .replace(')','') \
                                .replace('\"', '') \
                                .replace('\'','') # Normalize text
    col_name = 'institutions_include_' + institution_str
    gov_df[col_name] = np.where(gov_df['Institutions'].str.contains(institution, regex=False), 1, 0)
    
"""
# Exclude institutions for now 

'\nfor institution in original_institutions_set:\n    institution_str = institution.lower()                                 .replace(\'/\', \'_\')                                 .replace(\' \',\'_\')                                 .replace(\'(\',\'\')                                 .replace(\')\',\'\')                                 .replace(\'"\', \'\')                                 .replace(\'\'\',\'\') # Normalize text\n    col_name = \'institutions_include_\' + institution_str\n    gov_df[col_name] = np.where(gov_df[\'Institutions\'].str.contains(institution, regex=False), 1, 0)\n    \n'

In [70]:
# list(cleaned_institutions_set)

In [71]:
# list(gov_df)

In [72]:
# institution_columns = [col_name for col_name in list(gov_df) if 'institutions_include' in col_name]

In [73]:
# institution_columns

In [74]:
# gov_df[institution_columns].sum().sort_values()

In [75]:
# TODO: Institutions need cleanup, formatting is causing issues with encoding

In [76]:
# gov_df.head()

## Size

In [77]:
"""
Replace sizes:

100 - 1,000
1,001 - 10,000
20,000-30,000                  -> 10,001 - 100,000
33,000                         -> 10,001 - 100,000 
10,001 - 100,000
100,001 - 1 million members    -> 100,001 - 1,000,000
1 million - 10 million members -> 1,000,001 - 10,000,000
+10 million members            -> 10,000,000+
20 million - 30 million        -> 10,000,000+
Unknown
"""

'\nReplace sizes:\n\n100 - 1,000\n1,001 - 10,000\n20,000-30,000                  -> 10,001 - 100,000\n33,000                         -> 10,001 - 100,000 \n10,001 - 100,000\n100,001 - 1 million members    -> 100,001 - 1,000,000\n1 million - 10 million members -> 1,000,001 - 10,000,000\n+10 million members            -> 10,000,000+\n20 million - 30 million        -> 10,000,000+\nUnknown\n'

In [78]:
# Replace size values
gov_df['Size'] = gov_df['Size'].replace(['20,000-30,000','33,000'], '10,001 - 100,000')

In [79]:
gov_df['Size'] = gov_df['Size'].replace(['100,001 - 1 million members'], '100,001 - 1,000,000')

In [80]:
gov_df['Size'] = gov_df['Size'].replace(['1 million - 10 million members'], '1,000,001 - 10,000,000')

In [81]:
gov_df['Size'] = gov_df['Size'].replace(['+10 million members','20 million - 30 million'], '10,000,000+')

In [82]:
gov_df['Size'].value_counts()

Size
10,001 - 100,000          37
1,001 - 10,000            22
100,001 - 1,000,000       10
1,000,001 - 10,000,000     6
Unknown                    5
100 - 1,000                5
10,000,000+                4
Name: count, dtype: int64

In [83]:
size_order = [
    '100 - 1,000',
    '1,001 - 10,000',
    '10,001 - 100,000',
    '100,001 - 1,000,000',
    '1,000,001 - 10,000,000',
    '10,000,000+',
    'Unknown'
]

In [84]:
for size in size_order:
    col_name = 'Size: ' + size
    gov_df[col_name] = np.where(gov_df['Size'] == size, 1, 0)

  gov_df[col_name] = np.where(gov_df['Size'] == size, 1, 0)
  gov_df[col_name] = np.where(gov_df['Size'] == size, 1, 0)
  gov_df[col_name] = np.where(gov_df['Size'] == size, 1, 0)
  gov_df[col_name] = np.where(gov_df['Size'] == size, 1, 0)
  gov_df[col_name] = np.where(gov_df['Size'] == size, 1, 0)
  gov_df[col_name] = np.where(gov_df['Size'] == size, 1, 0)
  gov_df[col_name] = np.where(gov_df['Size'] == size, 1, 0)


In [85]:
gov_df.head()

Unnamed: 0,Name,Institutions,Mechanism,Notes,Time span,Geography,Size,Source,Metanotes,Heterogeneity,FC comments,expert check,Time span: Start,Time span: End,SCCS,is_SCCS,enforcement_mechanisms_include_jury_judge,access_mechanisms_include_life_appointment,enforcement_mechanisms_include_judicial_review,decision_making_mechanisms_include_president_consul_chief_executive,decision_making_mechanisms_include_secret_ballot,access_mechanisms_include_patronage_for_office,access_mechanisms_include_enfranchisement,enforcement_mechanisms_include_tribunal_court_,decision_making_mechanisms_include_magistrate_official,enforcement_mechanisms_include_forced_labour,access_mechanisms_include_age_boundaries,decision_making_mechanisms_include_plurality_voting,access_mechanisms_include_female_participation,decision_making_mechanisms_include_word_of_mouth,access_mechanisms_include_payment_for_occupying_office,access_mechanisms_include_gerontocracy,decision_making_mechanisms_include_leader_first_among_equals,enforcement_mechanisms_include_fines,decision_making_mechanisms_include_coalition,decision_making_mechanisms_include_assembly_elite,decision_making_mechanisms_include_rule_of_law,decision_making_mechanisms_include_supermajority,decision_making_mechanisms_include_frequent_and_or_regular_meetings,access_mechanisms_include_open_political_unit,enforcement_mechanisms_include_graduated_sanctions,decision_making_mechanisms_include_council_local,decision_making_mechanisms_include_deliberation,decision_making_mechanisms_include_confederacy,decision_making_mechanisms_include_balance_of_power,decision_making_mechanisms_include_remunerated_position,enforcement_mechanisms_include_seizing_of_property,enforcement_mechanisms_include_capital_punishment,access_mechanisms_include_blood_relations,enforcement_mechanisms_include_reputational_risk,enforcement_mechanisms_include_suspension_expulsion_ostracism_proscription,enforcement_mechanisms_include_police,access_mechanisms_include_induction_rite_ceremony,enforcement_mechanisms_include_corporal_punishment_violence_against_perpetrator,enforcement_mechanisms_include_exit,decision_making_mechanisms_include_agenda_setting,decision_making_mechanisms_include_kingchief-as-figurehead,enforcement_mechanisms_include_arbitration,decision_making_mechanisms_include_unanimity,decision_making_mechanisms_include_bureaucracy,decision_making_mechanisms_include_checks_and_balances,access_mechanisms_include_lottery_random_selection,enforcement_mechanisms_include_self-help,access_mechanisms_include_election,decision_making_mechanisms_include_majority_voting,decision_making_mechanisms_include_board_committee,decision_making_mechanisms_include_mandate,decision_making_mechanisms_include_oath,access_mechanisms_include_matrilineality,decision_making_mechanisms_include_lobbying,access_mechanisms_include_meritocracy,decision_making_mechanisms_include_gathering,enforcement_mechanisms_include_destruction_of_property,decision_making_mechanisms_include_petition,mechanisms_include_child_participation,decision_making_mechanisms_include_assembly_central,decision_making_mechanisms_include_ratification_acclamation_only,enforcement_mechanisms_include_monitoring,access_mechanisms_include_co-optation,access_mechanisms_include_screening_process,decision_making_mechanisms_include_autocratic_leader_unbound,access_mechanisms_include_divine_right,enforcement_mechanisms_include_criticism_mocking_joking,decision_making_mechanisms_include_constitution,decision_making_mechanisms_include_council_central_,decision_making_mechanisms_include_quorum,decision_making_mechanisms_include_group_voting,access_mechanisms_include_dreaming,enforcement_mechanisms_include_litigation,decision_making_mechanisms_include_alliance,enforcement_mechanisms_include_mediation,access_mechanisms_include_popularity_,enforcement_mechanisms_include_civil_disobedience,decision_making_mechanisms_include_vote_by_proxy,enforcement_mechanisms_include_negotiation,decision_making_mechanisms_include_assembly_local,decision_making_mechanisms_include_autocratic_leader_chief_bound,decision_making_mechanisms_include_consensus,decision_making_mechanisms_include_power_fluidity,decision_making_mechanisms_include_representation,enforcement_mechanisms_include_scrutiny_of_officials,decision_making_mechanisms_include_public_meetings,access_mechanisms_include_heredity,decision_making_mechanisms_include_consent,access_mechanisms_include_property_requirement,decision_making_mechanisms_include_turnover_rotation,decision_making_mechanisms_include_dual-sex_political_system,decision_making_mechanisms_include_delegation,decision_making_mechanisms_include_vote_by_show_of_hands,enforcement_mechanisms_include_appeal,decision_making_mechanisms_include_consultation,decision_making_mechanisms_include_voting,enforcement_mechanisms_include_paying_damages_compensation,decision_making_mechanisms_include_veto,access_mechanisms_include_matriarchy,decision_making_mechanisms_include_temporary_position_of_power_term_limits,"Size: 100 - 1,000","Size: 1,001 - 10,000","Size: 10,001 - 100,000","Size: 100,001 - 1,000,000","Size: 1,000,001 - 10,000,000","Size: 10,000,000+",Size: Unknown
0,Aedui (Celtic Oppida),"Senatus,Elected magistrates,Assemblies","Election,Council (central) ,Assembly (central)...",There are many Celtic oppida throughout northe...,2nd-1st century BC,Europe North,"100,001 - 1,000,000","Collis, “Celtic” Oppida, in Hansen, A Comparat...",,,Edited March 23,,-200.0,0.0,,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
1,Akha,"Council of Elders,Village Founder-Leader (dzom...","Leader (first among equals),Gerontocracy,Blood...",see Institutions tab,20th century - Present,Asia Southeast,"10,001 - 100,000","Main: \nKammerer, Cornelia Ann (1985/86), Gate...",,"Informal inclusion,Gender groups,Informal powe...",Edited May 23,,1900.0,2023.0,,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0
2,Amalfi,"Prefetturii/ Conti/ Dogi,Sedile di congrega,Ta...","Election,Magistrate/official,Heredity,Constitu...","Camera, M, p. 29 (""La sua popolazione che nel ...",4th-12th century AD,Europe West,"10,001 - 100,000","Camera, M. Istoria della città e costiera di A...",,,Edited March 23 (could use more info on CGIs),,300.0,1200.0,,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0
3,Arab nomads,"shaykh or sayyid,shura or mashwara","Consensus,Consultation,Autocratic Leader/Chief...",Nomadic Bedouin society governed by rulers sel...,"1st-7th century AD\n(start date is arbitrary, ...",Middle East,Unknown,"Stasavage, David (2020), The Decline and Rise ...",,,Edited Feb 23 (could use additional sources fo...,,0.0,700.0,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1
4,Assur,"King,Alum,Limmum,Scribe,City hall","President/Consul/Chief Executive,Assembly (cen...",Independent city-state. Constitutional monarch...,19th century BC\n(1920-1800 BC),"Europe Southeast, Anatolia","1,001 - 10,000","Larsen, M. T. The Old Assyrian City-State in H...",,,Edited Mar 23,Giacomo CHECKED,-1920.0,-1800.0,,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0


## Geography 

In [86]:
list(gov_df)

['Name',
 'Institutions',
 'Mechanism',
 'Notes',
 'Time span',
 'Geography',
 'Size',
 'Source',
 'Metanotes',
 'Heterogeneity ',
 'FC comments',
 'expert check',
 'Time span: Start',
 'Time span: End',
 'SCCS',
 'is_SCCS',
 'enforcement_mechanisms_include_jury_judge',
 'access_mechanisms_include_life_appointment',
 'enforcement_mechanisms_include_judicial_review',
 'decision_making_mechanisms_include_president_consul_chief_executive',
 'decision_making_mechanisms_include_secret_ballot',
 'access_mechanisms_include_patronage_for_office',
 'access_mechanisms_include_enfranchisement',
 'enforcement_mechanisms_include_tribunal_court_',
 'decision_making_mechanisms_include_magistrate_official',
 'enforcement_mechanisms_include_forced_labour',
 'access_mechanisms_include_age_boundaries',
 'decision_making_mechanisms_include_plurality_voting',
 'access_mechanisms_include_female_participation',
 'decision_making_mechanisms_include_word_of_mouth',
 'access_mechanisms_include_payment_for_occup

In [87]:
geography_list = gov_df['Geography'].unique()

In [88]:
geography_list

array(['Europe North', 'Asia Southeast ', 'Europe West', 'Middle East',
       'Europe Southeast, Anatolia', 'Europe South',
       'Central America, Mexico', 'Middle East, Mesopotamia, Babylonia',
       'Africa North ', 'Africa Central', 'Asia, China', 'Europe East',
       'Europe North, Netherlands', 'Africa North',
       'Africa South, Botswana', 'Europe South, Italy',
       'Europe South, Greece', 'North America', 'Africa West, Nigeria',
       'Asia Southeast', 'Europe, North',
       'Africa West, Mid-western Nigeria', 'Africa West, Niger Delta',
       'Asia, India (northeast)', 'Asia, Japan',
       'Asia, North-eastern India', 'Africa South',
       'Africa North, Algeria', 'Central America', 'Asia Central',
       'Asia, Southwestern China', 'South America, Brazil',
       'North America, Louisiana', 'Asia Southeast, Bali', 'Asia, India',
       'Middle East MENA', 'Europe West, Switzerland',
       'South America, Patagonia', 'South America, Venezuela/Brazil',
       'Oc

In [89]:
for geography in geography_list:
    col_name = 'Geography: ' + geography
    gov_df[col_name] = np.where(gov_df['Geography'] == geography, 1, 0)

  gov_df[col_name] = np.where(gov_df['Geography'] == geography, 1, 0)
  gov_df[col_name] = np.where(gov_df['Geography'] == geography, 1, 0)
  gov_df[col_name] = np.where(gov_df['Geography'] == geography, 1, 0)
  gov_df[col_name] = np.where(gov_df['Geography'] == geography, 1, 0)
  gov_df[col_name] = np.where(gov_df['Geography'] == geography, 1, 0)
  gov_df[col_name] = np.where(gov_df['Geography'] == geography, 1, 0)
  gov_df[col_name] = np.where(gov_df['Geography'] == geography, 1, 0)
  gov_df[col_name] = np.where(gov_df['Geography'] == geography, 1, 0)
  gov_df[col_name] = np.where(gov_df['Geography'] == geography, 1, 0)
  gov_df[col_name] = np.where(gov_df['Geography'] == geography, 1, 0)
  gov_df[col_name] = np.where(gov_df['Geography'] == geography, 1, 0)
  gov_df[col_name] = np.where(gov_df['Geography'] == geography, 1, 0)
  gov_df[col_name] = np.where(gov_df['Geography'] == geography, 1, 0)
  gov_df[col_name] = np.where(gov_df['Geography'] == geography, 1, 0)
  gov_df[col_name] =

### Encode Regions

In [90]:
regions = ['Africa','Europe','Asia','Middle East','North America','Central America','South America','Oceania']

In [91]:
# Create broader regions 
conditions = [
    (gov_df['Geography'].str.contains('Africa')),
    (gov_df['Geography'].str.contains('Europe')),
    (gov_df['Geography'].str.contains('Asia')),
    (gov_df['Geography'].str.contains('Middle East')),
    (gov_df['Geography'].str.contains('North America')),
    (gov_df['Geography'].str.contains('Central America')),
    (gov_df['Geography'].str.contains('South America')),
    (gov_df['Geography'].str.contains('Oceania'))
]

In [92]:
gov_df['Region'] = np.select(conditions, regions)

  gov_df['Region'] = np.select(conditions, regions)


In [93]:
gov_df['Region'].value_counts()

Region
Europe             23
Asia               18
North America      16
Africa             14
Middle East         6
Central America     5
Oceania             5
South America       4
Name: count, dtype: int64

In [94]:
gov_df['Region'].value_counts().sum()

91

In [95]:
for region in regions:
    col_name = 'Region: ' + region
    gov_df[col_name] = np.where(gov_df['Region'] == region, 1, 0)

  gov_df[col_name] = np.where(gov_df['Region'] == region, 1, 0)
  gov_df[col_name] = np.where(gov_df['Region'] == region, 1, 0)
  gov_df[col_name] = np.where(gov_df['Region'] == region, 1, 0)
  gov_df[col_name] = np.where(gov_df['Region'] == region, 1, 0)
  gov_df[col_name] = np.where(gov_df['Region'] == region, 1, 0)
  gov_df[col_name] = np.where(gov_df['Region'] == region, 1, 0)
  gov_df[col_name] = np.where(gov_df['Region'] == region, 1, 0)
  gov_df[col_name] = np.where(gov_df['Region'] == region, 1, 0)


## Time span

In [96]:
gov_df['Time span: Duration'] = gov_df['Time span: End'] - gov_df['Time span: Start']

  gov_df['Time span: Duration'] = gov_df['Time span: End'] - gov_df['Time span: Start']


# Write to CSV

### Check

In [97]:
gov_df.head()

Unnamed: 0,Name,Institutions,Mechanism,Notes,Time span,Geography,Size,Source,Metanotes,Heterogeneity,FC comments,expert check,Time span: Start,Time span: End,SCCS,is_SCCS,enforcement_mechanisms_include_jury_judge,access_mechanisms_include_life_appointment,enforcement_mechanisms_include_judicial_review,decision_making_mechanisms_include_president_consul_chief_executive,decision_making_mechanisms_include_secret_ballot,access_mechanisms_include_patronage_for_office,access_mechanisms_include_enfranchisement,enforcement_mechanisms_include_tribunal_court_,decision_making_mechanisms_include_magistrate_official,enforcement_mechanisms_include_forced_labour,access_mechanisms_include_age_boundaries,decision_making_mechanisms_include_plurality_voting,access_mechanisms_include_female_participation,decision_making_mechanisms_include_word_of_mouth,access_mechanisms_include_payment_for_occupying_office,access_mechanisms_include_gerontocracy,decision_making_mechanisms_include_leader_first_among_equals,enforcement_mechanisms_include_fines,decision_making_mechanisms_include_coalition,decision_making_mechanisms_include_assembly_elite,decision_making_mechanisms_include_rule_of_law,decision_making_mechanisms_include_supermajority,decision_making_mechanisms_include_frequent_and_or_regular_meetings,access_mechanisms_include_open_political_unit,enforcement_mechanisms_include_graduated_sanctions,decision_making_mechanisms_include_council_local,decision_making_mechanisms_include_deliberation,decision_making_mechanisms_include_confederacy,decision_making_mechanisms_include_balance_of_power,decision_making_mechanisms_include_remunerated_position,enforcement_mechanisms_include_seizing_of_property,enforcement_mechanisms_include_capital_punishment,access_mechanisms_include_blood_relations,enforcement_mechanisms_include_reputational_risk,enforcement_mechanisms_include_suspension_expulsion_ostracism_proscription,enforcement_mechanisms_include_police,access_mechanisms_include_induction_rite_ceremony,enforcement_mechanisms_include_corporal_punishment_violence_against_perpetrator,enforcement_mechanisms_include_exit,decision_making_mechanisms_include_agenda_setting,decision_making_mechanisms_include_kingchief-as-figurehead,enforcement_mechanisms_include_arbitration,decision_making_mechanisms_include_unanimity,decision_making_mechanisms_include_bureaucracy,decision_making_mechanisms_include_checks_and_balances,access_mechanisms_include_lottery_random_selection,enforcement_mechanisms_include_self-help,access_mechanisms_include_election,decision_making_mechanisms_include_majority_voting,decision_making_mechanisms_include_board_committee,decision_making_mechanisms_include_mandate,decision_making_mechanisms_include_oath,access_mechanisms_include_matrilineality,decision_making_mechanisms_include_lobbying,access_mechanisms_include_meritocracy,decision_making_mechanisms_include_gathering,enforcement_mechanisms_include_destruction_of_property,decision_making_mechanisms_include_petition,mechanisms_include_child_participation,decision_making_mechanisms_include_assembly_central,decision_making_mechanisms_include_ratification_acclamation_only,enforcement_mechanisms_include_monitoring,access_mechanisms_include_co-optation,access_mechanisms_include_screening_process,decision_making_mechanisms_include_autocratic_leader_unbound,access_mechanisms_include_divine_right,enforcement_mechanisms_include_criticism_mocking_joking,decision_making_mechanisms_include_constitution,decision_making_mechanisms_include_council_central_,decision_making_mechanisms_include_quorum,decision_making_mechanisms_include_group_voting,access_mechanisms_include_dreaming,enforcement_mechanisms_include_litigation,decision_making_mechanisms_include_alliance,enforcement_mechanisms_include_mediation,access_mechanisms_include_popularity_,enforcement_mechanisms_include_civil_disobedience,decision_making_mechanisms_include_vote_by_proxy,enforcement_mechanisms_include_negotiation,decision_making_mechanisms_include_assembly_local,decision_making_mechanisms_include_autocratic_leader_chief_bound,decision_making_mechanisms_include_consensus,decision_making_mechanisms_include_power_fluidity,decision_making_mechanisms_include_representation,enforcement_mechanisms_include_scrutiny_of_officials,decision_making_mechanisms_include_public_meetings,access_mechanisms_include_heredity,decision_making_mechanisms_include_consent,access_mechanisms_include_property_requirement,decision_making_mechanisms_include_turnover_rotation,decision_making_mechanisms_include_dual-sex_political_system,decision_making_mechanisms_include_delegation,decision_making_mechanisms_include_vote_by_show_of_hands,enforcement_mechanisms_include_appeal,decision_making_mechanisms_include_consultation,decision_making_mechanisms_include_voting,enforcement_mechanisms_include_paying_damages_compensation,decision_making_mechanisms_include_veto,access_mechanisms_include_matriarchy,decision_making_mechanisms_include_temporary_position_of_power_term_limits,"Size: 100 - 1,000","Size: 1,001 - 10,000","Size: 10,001 - 100,000","Size: 100,001 - 1,000,000","Size: 1,000,001 - 10,000,000","Size: 10,000,000+",Size: Unknown,Geography: Europe North,Geography: Asia Southeast,Geography: Europe West,Geography: Middle East,"Geography: Europe Southeast, Anatolia",Geography: Europe South,"Geography: Central America, Mexico","Geography: Middle East, Mesopotamia, Babylonia",Geography: Africa North,Geography: Africa Central,"Geography: Asia, China",Geography: Europe East,"Geography: Europe North, Netherlands",Geography: Africa North.1,"Geography: Africa South, Botswana","Geography: Europe South, Italy","Geography: Europe South, Greece",Geography: North America,"Geography: Africa West, Nigeria",Geography: Asia Southeast.1,"Geography: Europe, North","Geography: Africa West, Mid-western Nigeria","Geography: Africa West, Niger Delta","Geography: Asia, India (northeast)","Geography: Asia, Japan","Geography: Asia, North-eastern India",Geography: Africa South,"Geography: Africa North, Algeria",Geography: Central America,Geography: Asia Central,"Geography: Asia, Southwestern China","Geography: South America, Brazil","Geography: North America, Louisiana","Geography: Asia Southeast, Bali","Geography: Asia, India",Geography: Middle East MENA,"Geography: Europe West, Switzerland","Geography: South America, Patagonia","Geography: South America, Venezuela/Brazil","Geography: Oceania, New Caledonia","Geography: Oceania, New Zealand","Geography: Oceania, eastern Polynesia","Geography: Oceania, samoa","Geography: Oceania, Kiribati","Geography: Asia, South","Geography: Asia, East",Geography: Africa,"Geography: North America, Alaska","Geography: North America, Canada",Region,Region: Africa,Region: Europe,Region: Asia,Region: Middle East,Region: North America,Region: Central America,Region: South America,Region: Oceania,Time span: Duration
0,Aedui (Celtic Oppida),"Senatus,Elected magistrates,Assemblies","Election,Council (central) ,Assembly (central)...",There are many Celtic oppida throughout northe...,2nd-1st century BC,Europe North,"100,001 - 1,000,000","Collis, “Celtic” Oppida, in Hansen, A Comparat...",,,Edited March 23,,-200.0,0.0,,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Europe,0,1,0,0,0,0,0,0,200.0
1,Akha,"Council of Elders,Village Founder-Leader (dzom...","Leader (first among equals),Gerontocracy,Blood...",see Institutions tab,20th century - Present,Asia Southeast,"10,001 - 100,000","Main: \nKammerer, Cornelia Ann (1985/86), Gate...",,"Informal inclusion,Gender groups,Informal powe...",Edited May 23,,1900.0,2023.0,,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Asia,0,0,1,0,0,0,0,0,123.0
2,Amalfi,"Prefetturii/ Conti/ Dogi,Sedile di congrega,Ta...","Election,Magistrate/official,Heredity,Constitu...","Camera, M, p. 29 (""La sua popolazione che nel ...",4th-12th century AD,Europe West,"10,001 - 100,000","Camera, M. Istoria della città e costiera di A...",,,Edited March 23 (could use more info on CGIs),,300.0,1200.0,,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Europe,0,1,0,0,0,0,0,0,900.0
3,Arab nomads,"shaykh or sayyid,shura or mashwara","Consensus,Consultation,Autocratic Leader/Chief...",Nomadic Bedouin society governed by rulers sel...,"1st-7th century AD\n(start date is arbitrary, ...",Middle East,Unknown,"Stasavage, David (2020), The Decline and Rise ...",,,Edited Feb 23 (could use additional sources fo...,,0.0,700.0,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Middle East,0,0,0,1,0,0,0,0,700.0
4,Assur,"King,Alum,Limmum,Scribe,City hall","President/Consul/Chief Executive,Assembly (cen...",Independent city-state. Constitutional monarch...,19th century BC\n(1920-1800 BC),"Europe Southeast, Anatolia","1,001 - 10,000","Larsen, M. T. The Old Assyrian City-State in H...",,,Edited Mar 23,Giacomo CHECKED,-1920.0,-1800.0,,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Europe,0,1,0,0,0,0,0,0,120.0


In [98]:
list(gov_df)

['Name',
 'Institutions',
 'Mechanism',
 'Notes',
 'Time span',
 'Geography',
 'Size',
 'Source',
 'Metanotes',
 'Heterogeneity ',
 'FC comments',
 'expert check',
 'Time span: Start',
 'Time span: End',
 'SCCS',
 'is_SCCS',
 'enforcement_mechanisms_include_jury_judge',
 'access_mechanisms_include_life_appointment',
 'enforcement_mechanisms_include_judicial_review',
 'decision_making_mechanisms_include_president_consul_chief_executive',
 'decision_making_mechanisms_include_secret_ballot',
 'access_mechanisms_include_patronage_for_office',
 'access_mechanisms_include_enfranchisement',
 'enforcement_mechanisms_include_tribunal_court_',
 'decision_making_mechanisms_include_magistrate_official',
 'enforcement_mechanisms_include_forced_labour',
 'access_mechanisms_include_age_boundaries',
 'decision_making_mechanisms_include_plurality_voting',
 'access_mechanisms_include_female_participation',
 'decision_making_mechanisms_include_word_of_mouth',
 'access_mechanisms_include_payment_for_occup

In [99]:
gov_df.select_dtypes(include=np.number).sum().sort_values()

Geography: Africa North, Algeria                                                       1.0
Geography: Asia, China                                                                 1.0
Geography: Africa Central                                                              1.0
Geography: Oceania, eastern Polynesia                                                  1.0
Geography: Africa North                                                                1.0
Geography: Middle East, Mesopotamia, Babylonia                                         1.0
Geography: Europe South                                                                1.0
Geography: Europe North                                                                1.0
Geography: Asia Central                                                                1.0
Geography: Asia, Southwestern China                                                    1.0
Geography: Oceania, New Zealand                                                        1.0

In [100]:
# Write prepped dataset to csv
gov_df.to_csv('../data/' + DATA_VERSION + '/communities_data_prepped.csv', index=False)