# Preliminary Data Analysis Summary Data #

### Here is the code for some of the summary statistics on numeric variables ###

In [1]:
import numpy as np
import scipy as sp
from scipy.stats import ttest_ind
import pandas as pd

In [2]:
# Load the data into iPython (comes from our cloned repository downloaded to the desktop)
myanmar_df = pd.read_csv('/Users/katesousa/Desktop/INFX575LA/myanmar_merged_v3.csv')

In [3]:
# Get a better look at all of the variables, size, and data types
# print myanmar_df.shape
# print myanmar_df.columns
# print myanmar_df.dtypes

In [4]:
# Utilize only those columns that you will need. 
myanmar_small_df = myanmar_df.drop(['mimu_stregpcode', 'mimu_stregname',
       'mimu_dspcode', 'mimu_dsname', 'mimu_twpcode', 'mimu_twname','ser_num', 'mimu_plcode', 
                                    'cand_name', 'cand_party', 'cand_party_cleaned', 'party_type',
                                    'lu_name', 'lu_state', 'lu_consnm', 'lu_par','lu_gen', 
                                    'lu_byr', 'lu_age', 'lu_occ', 'lu_occrecode', 'lu_edu',
                                    'lu_edurecode', 'lu_deg', 'lu_hideg', 'lu_hidegrecode', 'lu_nat',
                                    'lu_rel', 'lu_wdvill', 'lu_urbrur', 'lu_vlnum', 'lu_fat',
                                    'lu_fatnat', 'lu_fatrel', 'lu_mot', 'lu_motnat', 'lu_motrel'], axis=1)

In [5]:
# Recast object variables as appropriate data types
myanmar_small_df[['dstot_eligv', 'dstot_ticketv', 'dstot_earlyv', 'dstot_totalv', 'dstot_pctv','dstot_invv', 
                 'dstot_lostv', 'dstot_invlostv', 'cand_pctvalv']] = myanmar_small_df[['dstot_eligv', 'dstot_ticketv', 
                                                                                     'dstot_earlyv', 'dstot_totalv', 
                                                                                     'dstot_pctv', 'dstot_invv', 
                                                                                     'dstot_lostv', 'dstot_invlostv', 
                                                                                     'cand_pctvalv']].astype('float64')

In [6]:
# Take the mean of all the variables on the region/state level by election, state, and constituency
arranged_myanmar_df = myanmar_small_df.groupby(['election', 'state_reg', 'cons_num'])[['dstot_eligv', 'dstot_ticketv', 
                                                                                     'dstot_earlyv', 'dstot_totalv', 
                                                                                     'dstot_pctv', 'dstot_invv', 
                                                                                     'dstot_lostv', 'dstot_invlostv', 
                                                                                     'cand_pctvalv']].mean().reset_index()

In [7]:
# arranged_myanmar_df

In [8]:
# Averages for each variable for each of the elections as a baseline.
avg_election_df = arranged_myanmar_df.groupby('election')[['dstot_eligv', 'dstot_ticketv', 
                                                                                     'dstot_earlyv', 'dstot_totalv', 
                                                                                     'dstot_pctv', 'dstot_invv', 
                                                                                     'dstot_lostv', 'dstot_invlostv', 
                                                                                     'cand_pctvalv']].mean().reset_index()

In [9]:
avg_election_df

Unnamed: 0,election,dstot_eligv,dstot_ticketv,dstot_earlyv,dstot_totalv,dstot_pctv,dstot_invv,dstot_lostv,dstot_invlostv,cand_pctvalv
0,Amyothar Hluttaw,204189.297619,133853.89881,8644.910714,142498.809524,68.595923,6765.952381,561.666667,7327.619048,0.215111
1,Nationality or Ethnic Affairs,154246.827586,95216.413793,8180.586207,103397.0,64.326897,4964.965517,618.586207,5583.551724,0.202627
2,Pyithu Hluttaw,106177.504644,69577.71517,4452.575851,74030.291022,70.720372,4371.4613,258.489164,4629.950464,0.20908
3,State and Region,53242.415873,34858.296825,2236.455556,37094.752381,70.751143,1617.142857,133.571429,1750.714286,0.217569


In [10]:
# Average values for each variable by election and state
avg_elecXstate_df = arranged_myanmar_df.groupby(['election', 'state_reg'])[['dstot_eligv', 'dstot_ticketv', 
                                                                                     'dstot_earlyv', 'dstot_totalv', 
                                                                                     'dstot_pctv', 'dstot_invv', 
                                                                                     'dstot_lostv', 'dstot_invlostv', 
                                                                                     'cand_pctvalv']].mean().reset_index()

In [11]:
avg_elecXstate_df

Unnamed: 0,election,state_reg,dstot_eligv,dstot_ticketv,dstot_earlyv,dstot_totalv,dstot_pctv,dstot_invv,dstot_lostv,dstot_invlostv,cand_pctvalv
0,Amyothar Hluttaw,Ayeyarwady,366968.166667,260199.0,16709.0,276908.0,75.601667,12736.916667,720.916667,13457.833333,0.20903
1,Amyothar Hluttaw,Bago,294226.583333,197581.5,13501.583333,211083.083333,71.66125,9211.083333,680.75,9891.833333,0.206369
2,Amyothar Hluttaw,Chin,22860.666667,16799.25,1268.333333,18067.583333,78.7575,709.75,28.0,737.75,0.213426
3,Amyothar Hluttaw,Kachin,73131.75,45343.333333,4510.333333,49853.666667,68.103333,2926.416667,662.5,3588.916667,0.153772
4,Amyothar Hluttaw,Kayah,14859.666667,10143.416667,842.083333,10985.5,74.034167,768.833333,20.666667,789.5,0.172559
5,Amyothar Hluttaw,Kayin,75414.416667,31298.25,3811.916667,35110.166667,50.169167,2327.083333,179.25,2506.333333,0.167773
6,Amyothar Hluttaw,Magway,252227.75,181836.583333,10989.666667,192826.25,76.5575,7457.333333,397.583333,7854.916667,0.229165
7,Amyothar Hluttaw,Mandalay,431444.916667,301498.916667,14521.666667,316020.583333,73.3575,12804.833333,500.833333,13305.666667,0.280557
8,Amyothar Hluttaw,Mon,128579.5,60489.416667,4507.0,64996.416667,50.658333,3969.0,437.833333,4406.833333,0.171957
9,Amyothar Hluttaw,Rakhine,122232.666667,78834.916667,4821.25,83656.166667,70.419167,6247.083333,457.416667,6704.5,0.211206


In [12]:
stddev_elecXstate_df = arranged_myanmar_df.groupby(['election', 'state_reg'])[['dstot_eligv', 'dstot_ticketv', 
                                                                                     'dstot_earlyv', 'dstot_totalv', 
                                                                                     'dstot_pctv', 'dstot_invv', 
                                                                                     'dstot_lostv', 'dstot_invlostv', 
                                                                                     'cand_pctvalv']].std().reset_index()

In [13]:
stddev_elecXstate_df
# This has some NaN values because I think there are some 0 or missing data for the states in specific elections. 

Unnamed: 0,election,state_reg,dstot_eligv,dstot_ticketv,dstot_earlyv,dstot_totalv,dstot_pctv,dstot_invv,dstot_lostv,dstot_invlostv,cand_pctvalv
0,Amyothar Hluttaw,Ayeyarwady,69016.645902,50004.772363,4546.034816,53728.501272,5.299504,3788.529325,1221.197509,4439.371471,0.054988
1,Amyothar Hluttaw,Bago,50175.833566,38368.624878,4394.57826,42573.874015,6.449391,1912.13543,770.173549,1516.642214,0.050787
2,Amyothar Hluttaw,Chin,7046.589495,5503.237908,519.847239,5911.010396,5.151884,243.125155,28.587982,243.738356,0.070291
3,Amyothar Hluttaw,Kachin,40969.100876,25969.542907,2865.309069,28355.095955,6.465709,2065.675917,1884.467299,3790.91271,0.034785
4,Amyothar Hluttaw,Kayah,14785.119991,10518.612045,667.971346,11128.128631,13.037149,661.577293,25.684567,684.434538,0.045546
5,Amyothar Hluttaw,Kayin,51899.737085,21538.550505,2995.148242,24296.536786,13.262655,1466.910635,580.455953,1518.177516,0.065958
6,Amyothar Hluttaw,Magway,46784.067871,33846.843187,2478.614104,35400.465259,4.379703,3183.465645,461.12914,3415.047995,0.056466
7,Amyothar Hluttaw,Mandalay,117566.730995,85689.424604,3662.076642,86456.565007,5.128585,3288.739102,428.762879,3504.322534,0.086113
8,Amyothar Hluttaw,Mon,31909.122812,16843.261963,1418.659547,17689.697857,8.008953,2080.597248,595.786239,2087.827485,0.066556
9,Amyothar Hluttaw,Rakhine,73736.685568,49375.951186,2545.190513,51558.079166,6.288679,3917.796492,736.832713,4216.563088,0.068272


In [14]:
med_elecXstate_df = arranged_myanmar_df.groupby(['election', 'state_reg'])[['dstot_eligv', 'dstot_ticketv', 
                                                                                     'dstot_earlyv', 'dstot_totalv', 
                                                                                     'dstot_pctv', 'dstot_invv', 
                                                                                     'dstot_lostv', 'dstot_invlostv', 
                                                                                     'cand_pctvalv']].median().reset_index()

In [15]:
med_elecXstate_df

Unnamed: 0,election,state_reg,dstot_eligv,dstot_ticketv,dstot_earlyv,dstot_totalv,dstot_pctv,dstot_invv,dstot_lostv,dstot_invlostv,cand_pctvalv
0,Amyothar Hluttaw,Ayeyarwady,365970.5,253392.0,17293.5,269537.0,77.66,13352.0,325.0,13617.5,0.2
1,Amyothar Hluttaw,Bago,298693.5,204478.5,13315.0,217793.5,71.45,9804.0,409.0,10509.5,0.2
2,Amyothar Hluttaw,Chin,24946.5,17182.5,1304.0,18745.5,78.75,758.5,16.0,778.0,0.183342
3,Amyothar Hluttaw,Kachin,71153.0,42110.0,3858.0,47378.5,67.18,2245.0,59.0,2354.5,0.16665
4,Amyothar Hluttaw,Kayah,7697.5,4124.0,622.0,4676.5,75.705,547.5,7.5,554.5,0.166675
5,Amyothar Hluttaw,Kayin,81149.0,30624.5,3347.0,33971.5,45.915,2662.5,0.0,3046.5,0.16665
6,Amyothar Hluttaw,Magway,259049.5,192143.5,11107.0,204313.5,75.805,8078.0,234.0,8312.0,0.20001
7,Amyothar Hluttaw,Mandalay,393276.5,274823.5,13821.0,290514.5,72.29,13018.0,354.5,13285.0,0.25
8,Amyothar Hluttaw,Mon,125660.5,58952.0,4288.5,63705.0,47.945,3774.5,239.5,4213.5,0.154761
9,Amyothar Hluttaw,Rakhine,136088.0,87031.5,5268.5,92361.0,67.44,5819.0,252.5,6155.0,0.19999


In [16]:
max_elecXstate_df = arranged_myanmar_df.groupby(['election', 'state_reg'])[['dstot_eligv', 'dstot_ticketv', 
                                                                                     'dstot_earlyv', 'dstot_totalv', 
                                                                                     'dstot_pctv', 'dstot_invv', 
                                                                                     'dstot_lostv', 'dstot_invlostv', 
                                                                                     'cand_pctvalv']].max().reset_index()

In [17]:
max_elecXstate_df

Unnamed: 0,election,state_reg,dstot_eligv,dstot_ticketv,dstot_earlyv,dstot_totalv,dstot_pctv,dstot_invv,dstot_lostv,dstot_invlostv,cand_pctvalv
0,Amyothar Hluttaw,Ayeyarwady,465625.0,344964.0,24590.0,369554.0,82.22,17611.0,4532.0,21348.0,0.333333
1,Amyothar Hluttaw,Bago,374571.0,276125.0,21940.0,298065.0,79.94,11195.0,2929.0,11981.0,0.316833
2,Amyothar Hluttaw,Chin,31762.0,23071.0,2423.0,25494.0,86.32,1124.0,78.0,1127.0,0.333333
3,Amyothar Hluttaw,Kachin,178579.0,109561.0,12338.0,121899.0,76.98,8396.0,6625.0,15021.0,0.2
4,Amyothar Hluttaw,Kayah,44221.0,30681.0,2243.0,32924.0,90.37,2139.0,68.0,2194.0,0.25
5,Amyothar Hluttaw,Kayin,164049.0,71466.0,11018.0,82484.0,81.05,4955.0,2019.0,4955.0,0.333367
6,Amyothar Hluttaw,Magway,320686.0,221895.0,16701.0,232868.0,82.26,11449.0,1503.0,11820.0,0.333333
7,Amyothar Hluttaw,Mandalay,654840.0,460931.0,22733.0,476383.0,82.74,19190.0,1756.0,19865.0,0.5
8,Amyothar Hluttaw,Mon,175938.0,92826.0,6359.0,96896.0,67.22,7066.0,2098.0,7432.0,0.3333
9,Amyothar Hluttaw,Rakhine,243019.0,172763.0,10339.0,183102.0,80.87,11805.0,2708.0,12194.0,0.333333


In [18]:
min_elecXstate_df = arranged_myanmar_df.groupby(['election', 'state_reg'])[['dstot_eligv', 'dstot_ticketv', 
                                                                                     'dstot_earlyv', 'dstot_totalv', 
                                                                                     'dstot_pctv', 'dstot_invv', 
                                                                                     'dstot_lostv', 'dstot_invlostv', 
                                                                                     'cand_pctvalv']].min().reset_index()

In [19]:
min_elecXstate_df

Unnamed: 0,election,state_reg,dstot_eligv,dstot_ticketv,dstot_earlyv,dstot_totalv,dstot_pctv,dstot_invv,dstot_lostv,dstot_invlostv,cand_pctvalv
0,Amyothar Hluttaw,Ayeyarwady,209390.0,152960.0,9471.0,162431.0,66.21,6325.0,76.0,6470.0,0.125
1,Amyothar Hluttaw,Bago,198562.0,134020.0,6489.0,140509.0,62.285,4563.0,215.0,7447.0,0.142871
2,Amyothar Hluttaw,Chin,11484.0,8898.0,554.0,9648.0,68.81,351.0,0.0,364.0,0.111111
3,Amyothar Hluttaw,Kachin,16398.0,10580.0,2043.0,12623.0,55.89,577.0,0.0,588.0,0.083342
4,Amyothar Hluttaw,Kayah,3116.0,2252.0,192.0,2703.0,37.09,121.0,0.0,122.0,0.111111
5,Amyothar Hluttaw,Kayin,5810.0,3680.0,838.0,4709.0,37.19,120.0,0.0,121.0,0.09981
6,Amyothar Hluttaw,Magway,177798.0,122948.0,7637.0,130585.0,69.75,812.0,0.0,851.0,0.16665
7,Amyothar Hluttaw,Mandalay,306505.0,210246.0,10388.0,220634.0,66.71,8727.0,156.0,9175.0,0.166667
8,Amyothar Hluttaw,Mon,88647.0,34606.0,2507.0,37405.0,42.2,1141.0,0.0,1310.0,0.10001
9,Amyothar Hluttaw,Rakhine,11380.0,8058.0,982.0,9140.0,63.2,322.0,5.0,327.0,0.124988
