In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
pd.options.display.max_columns = None
pd.options.display.max_rows = None

In [3]:
pip install seaborn==0.11.0

Note: you may need to restart the kernel to use updated packages.


You should consider upgrading via the 'C:\Users\ocnra\anaconda3\python.exe -m pip install --upgrade pip' command.


## I. Explore the data  
  
A. Study variable attributes 
 1. ~Identify variable name and survey item(s) it measures (Codebook available here: https://www.worldvaluessurvey.org/WVSDocumentationWV6.jsp (accessed on 5/25/2021))~  
 2. ~% missing for each variable~
 3. Quick descriptives (check for range of values, distribution shape, skew/outliers, potential errors, etc.)  
 4. Identify target variable (and drop duplicates) - don't forget to do feature engineering on the target variable; break out into varying levels of happiness  
  
B. Visualize the data (based on descriptives)
 1. Explore correlations between attributes
 2. Identify transformations that might be needed
 3. Identify extra data that may be useful (gini coefficient, GDP, etc.)
 4. Summarize findings
 
### Codebook notes:  
  
- Weights (See https://www.worldvaluessurvey.org/WVSContents.jsp for further details): 
    - `S018` and `S019` are weighting factors that transform N's to 1000 and 1500, respectively
    - these variables are useful for cross-country comparisons 
    - useful for EDA and descriptive analyses; should arguably be dropped for random forest algorithm, **right?**
    - **QUESTION:** are weights useful for PCA and logistic regression?
    - **QUESTION:** I see weights, but not specific population or sample size info - do I need this?
        - population data shouldn't be difficult to obtain based on N preserving weightings (`V258`) and this formula  
        
        $$Weight = S018/1000 * Population$$  
  
### Options for handling missing data:  
Advice from https://heartbeat.fritz.ai/data-handling-scenarios-part-2-working-with-missing-values-in-a-dataset-34b758cfc9fa and https://analyticsindiamag.com/5-ways-handle-missing-values-machine-learning-datasets/  
  
**Mean/Median (numerical) & Mode (categorical) imputation**  
1. pros: 
  - easy to do
  - can be integrated into production or for a future unknown dataset
2. cons: 
  - distorts the distribution of the dataset
  - distorts the variance and covariance of the dataset
  - for mode imputation, may lead to an over-representation of the most frequent label if the missing values are quite large
3. when this makes sense: 
  - mean imputation works best for normally distributed distributions
  - median is better for skewed distributions 
  - mode imputation for categorical data works best if the missing values are missing at random
  - best to use this method when the missing values are around 5% (or less) of the total data
  
**Systematic Random Sampling Imputation**  
1. pros: 
  - does not distort variance or distribution 
2. cons: 
  - when replacing missing values in the test set as well, the imputed values from the train set will need to be stored in memory
3. when this makes sense: 
  - can be applied to both numerical and categorical variables
  - used when the values are missing at random
  - when we want to be able to reproduce the same value every time the variable is used (by using a random state)
  
### Thinking ahead to future steps:
- items may need to be normalized or re-scaled so that the ranges are more similar
- items may need to be reverse-coded to assist with interpretability for linear regression
- retain and rename `C_COW_ALPHA` for country labels
- recode age variable `V242`; create age categories based on groupings identified here: https://www.cia.gov/the-world-factbook/field/age-structure/  
- `V74` and `V74B`: Schwartz benevolence value items; consolidate into one variable based on whichever has fewer missings  

In [4]:
wvs_w6 = pd.read_csv('../data/Evaluating_Happiness/w6_feature_selection.csv', low_memory=False)
wvs_w6.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 85898 entries, 0 to 85897
Columns: 166 entries, V2 to V262
dtypes: float64(163), int64(2), object(1)
memory usage: 108.8+ MB


In [5]:
wvs_w6.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 85898 entries, 0 to 85897
Data columns (total 166 columns):
 #   Column       Dtype  
---  ------       -----  
 0   V2           int64  
 1   C_COW_ALPHA  object 
 2   V4           float64
 3   V5           float64
 4   V6           float64
 5   V7           float64
 6   V8           float64
 7   V9           float64
 8   V10          float64
 9   V11          float64
 10  V12          float64
 11  V13          float64
 12  V14          float64
 13  V15          float64
 14  V16          float64
 15  V17          float64
 16  V18          float64
 17  V19          float64
 18  V20          float64
 19  V21          float64
 20  V22          float64
 21  V24          float64
 22  V25          float64
 23  V26          float64
 24  V27          float64
 25  V30          float64
 26  V32          float64
 27  V33          float64
 28  V34          float64
 29  V44          float64
 30  V45          float64
 31  V47          float64
 32  V

In [6]:
wvs_w6.head()

Unnamed: 0,V2,C_COW_ALPHA,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,V20,V21,V22,V24,V25,V26,V27,V30,V32,V33,V34,V44,V45,V47,V48,V49,V51,V52,V53,V54,V55,V56,V57,V58,V59,V60,V61,V62,V63,V64,V65,V66,V67,V68,V69,V70,V71,V72,V73,V74,V74B,V75,V76,V77,V78,V79,V80,V82,V83,V84,V96,V97,V98,V99,V100,V101,V102,V103,V104,V105,V106,V107,V108,V109,V110,V111,V113,V114,V115,V116,V117,V119,V120,V121,V122,V123,V124,V126,V131,V132,V133,V134,V135,V136,V137,V138,V139,V140,V143,V144G,V147,V150,V151,V152,V153,V154,V155,V170,V171,V173,V174,V176,V177,V179,V180,V181,V182,V183,V184,V187,V188,V189,V190,V191,V192,V193,V194,V195,V196,V197,V198,V199,V200,V202,V203,V204,V205,V207,V208,V209,V210,V211,V213,V214,V216,V225,V229,V230,V237,V238,V239,V240,V242,V248,V258,S018,S019,V262
0,12,ALG,1.0,1.0,1.0,,1.0,1.0,2.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,4.0,6.0,0.0,10.0,2.0,3.0,1.0,3.0,3.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,1.0,2.0,3.0,3.0,2.0,2.0,2.0,4.0,8.0,7.0,6.0,8.0,7.0,5.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,3.0,3.0,2.0,2.0,2.0,4.0,3.0,3.0,3.0,2.0,3.0,4.0,4.0,3.0,4.0,3.0,8.0,5.0,6.0,9.0,3.0,4.0,7.0,6.0,7.0,2.0,5.0,1.0,,,10.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,5.0,5.0,5.0,1.0,2.0,2.0,3.0,2.0,2.0,3.0,3.0,3.0,3.0,7.0,8.0,3.0,5.0,6.0,9.0,6.0,6.0,1.0,1.0,1.0,1.0,3.0,1.0,6.0,5.0,1.0,1.0,2.0,2.0,2.0,2.0,6.0,,1.0,4.0,5.0,1.0,21.0,7.0,1.0,0.833333,1.25,2014
1,12,ALG,1.0,2.0,3.0,4.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,1.0,1.0,2.0,1.0,2.0,3.0,2.0,6.0,8.0,6.0,0.0,10.0,2.0,1.0,2.0,3.0,4.0,3.0,1.0,1.0,1.0,2.0,2.0,3.0,2.0,1.0,1.0,1.0,3.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,3.0,7.0,5.0,5.0,4.0,4.0,6.0,1.0,3.0,3.0,3.0,3.0,3.0,1.0,1.0,1.0,2.0,3.0,1.0,2.0,2.0,3.0,2.0,2.0,3.0,2.0,2.0,3.0,3.0,2.0,8.0,8.0,8.0,9.0,2.0,6.0,4.0,2.0,4.0,1.0,5.0,1.0,2.0,1.0,10.0,1.0,1.0,1.0,2.0,1.0,2.0,2.0,5.0,5.0,1.0,5.0,2.0,3.0,4.0,2.0,2.0,2.0,2.0,2.0,2.0,4.0,8.0,4.0,6.0,4.0,8.0,3.0,4.0,7.0,1.0,1.0,1.0,1.0,1.0,3.0,5.0,1.0,2.0,2.0,2.0,2.0,3.0,6.0,,2.0,3.0,6.0,2.0,24.0,7.0,1.0,0.833333,1.25,2014
2,12,ALG,1.0,3.0,2.0,4.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,1.0,2.0,1.0,2.0,1.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,2.0,3.0,1.0,1.0,1.0,1.0,1.0,6.0,8.0,6.0,0.0,6.0,2.0,4.0,1.0,2.0,1.0,4.0,1.0,2.0,2.0,2.0,1.0,2.0,1.0,1.0,4.0,3.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,7.0,7.0,7.0,5.0,7.0,5.0,1.0,3.0,3.0,4.0,4.0,4.0,3.0,2.0,2.0,2.0,4.0,3.0,2.0,2.0,2.0,4.0,3.0,2.0,3.0,2.0,4.0,2.0,2.0,7.0,4.0,8.0,3.0,3.0,6.0,9.0,5.0,6.0,1.0,5.0,1.0,2.0,1.0,6.0,2.0,3.0,1.0,2.0,2.0,3.0,3.0,5.0,5.0,5.0,5.0,2.0,3.0,2.0,3.0,2.0,3.0,3.0,3.0,3.0,4.0,7.0,5.0,5.0,5.0,5.0,5.0,5.0,1.0,1.0,1.0,1.0,4.0,1.0,4.0,5.0,1.0,1.0,3.0,2.0,4.0,2.0,3.0,2.0,1.0,4.0,6.0,2.0,26.0,5.0,1.0,0.833333,1.25,2014
3,12,ALG,1.0,1.0,3.0,4.0,3.0,1.0,2.0,1.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,1.0,1.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,6.0,8.0,6.0,0.0,6.0,2.0,1.0,3.0,1.0,4.0,3.0,1.0,1.0,1.0,1.0,1.0,2.0,3.0,2.0,3.0,3.0,1.0,1.0,2.0,2.0,3.0,1.0,2.0,2.0,2.0,9.0,5.0,6.0,4.0,6.0,8.0,1.0,3.0,3.0,2.0,2.0,3.0,2.0,3.0,4.0,2.0,4.0,2.0,3.0,3.0,4.0,2.0,2.0,3.0,1.0,2.0,4.0,3.0,2.0,7.0,9.0,5.0,5.0,7.0,3.0,8.0,7.0,8.0,2.0,5.0,1.0,2.0,1.0,10.0,2.0,3.0,4.0,1.0,2.0,2.0,2.0,5.0,5.0,1.0,5.0,2.0,3.0,3.0,3.0,2.0,2.0,3.0,3.0,3.0,6.0,6.0,3.0,5.0,5.0,7.0,4.0,6.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,1.0,1.0,4.0,4.0,5.0,2.0,28.0,6.0,1.0,0.833333,1.25,2014
4,12,ALG,1.0,1.0,1.0,2.0,1.0,1.0,1.0,3.0,2.0,1.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,1.0,1.0,1.0,1.0,3.0,2.0,2.0,6.0,6.0,1.0,3.0,4.0,2.0,1.0,2.0,3.0,4.0,2.0,1.0,2.0,1.0,1.0,2.0,5.0,1.0,2.0,3.0,1.0,4.0,3.0,2.0,2.0,3.0,1.0,2.0,2.0,2.0,8.0,4.0,7.0,4.0,6.0,6.0,2.0,2.0,3.0,4.0,2.0,3.0,2.0,3.0,3.0,2.0,3.0,2.0,3.0,3.0,3.0,3.0,3.0,2.0,4.0,3.0,2.0,3.0,2.0,8.0,4.0,7.0,3.0,3.0,8.0,6.0,5.0,6.0,2.0,5.0,1.0,1.0,1.0,10.0,2.0,3.0,2.0,2.0,2.0,3.0,3.0,5.0,5.0,5.0,5.0,2.0,3.0,3.0,4.0,2.0,3.0,3.0,3.0,3.0,6.0,2.0,4.0,4.0,6.0,6.0,6.0,5.0,7.0,1.0,1.0,1.0,3.0,1.0,4.0,5.0,1.0,1.0,2.0,2.0,3.0,2.0,3.0,2.0,2.0,3.0,7.0,2.0,35.0,3.0,1.0,0.833333,1.25,2014


In [7]:
# how many unique countries?
wvs_w6.C_COW_ALPHA.nunique()

57

#### Notes along the way:
There are 194 independent countries in the world; this data set contains 57 of them.

In [8]:
# calculate proportion missing for each feature
pct_missing = pd.DataFrame(wvs_w6.isna().sum())
pct_missing = pct_missing.reset_index().rename(columns = {'index':'variable', 0:'NA_count'})
pct_missing['NA_pct'] = ((pct_missing['NA_count'] / len(wvs_w6)) * 100).round(decimals=2)
pct_missing

Unnamed: 0,variable,NA_count,NA_pct
0,V2,0,0.0
1,C_COW_ALPHA,0,0.0
2,V4,316,0.37
3,V5,502,0.58
4,V6,958,1.12
5,V7,1502,1.75
6,V8,1433,1.67
7,V9,1255,1.46
8,V10,723,0.84
9,V11,302,0.35


In [9]:
wvs_w6.describe()

Unnamed: 0,V2,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,V20,V21,V22,V24,V25,V26,V27,V30,V32,V33,V34,V44,V45,V47,V48,V49,V51,V52,V53,V54,V55,V56,V57,V58,V59,V60,V61,V62,V63,V64,V65,V66,V67,V68,V69,V70,V71,V72,V73,V74,V74B,V75,V76,V77,V78,V79,V80,V82,V83,V84,V96,V97,V98,V99,V100,V101,V102,V103,V104,V105,V106,V107,V108,V109,V110,V111,V113,V114,V115,V116,V117,V119,V120,V121,V122,V123,V124,V126,V131,V132,V133,V134,V135,V136,V137,V138,V139,V140,V143,V144G,V147,V150,V151,V152,V153,V154,V155,V170,V171,V173,V174,V176,V177,V179,V180,V181,V182,V183,V184,V187,V188,V189,V190,V191,V192,V193,V194,V195,V196,V197,V198,V199,V200,V202,V203,V204,V205,V207,V208,V209,V210,V211,V213,V214,V216,V225,V229,V230,V237,V238,V239,V240,V242,V248,V258,S018,S019,V262
count,85898.0,85582.0,85396.0,84940.0,84396.0,84465.0,84643.0,85175.0,85596.0,85884.0,85892.0,85891.0,85881.0,85889.0,85885.0,85886.0,85880.0,85883.0,85879.0,85895.0,83595.0,85293.0,85233.0,85183.0,85090.0,85107.0,85008.0,85058.0,85878.0,84325.0,82573.0,83057.0,82824.0,81947.0,83370.0,82125.0,81469.0,84510.0,84076.0,85693.0,84259.0,85328.0,84201.0,81915.0,84505.0,82634.0,84619.0,82927.0,77327.0,82722.0,82512.0,82239.0,82990.0,83366.0,83863.0,83477.0,80373.0,37101.0,83133.0,82837.0,83635.0,83650.0,83853.0,84865.0,84930.0,84646.0,84952.0,83105.0,80722.0,83717.0,82810.0,83706.0,81419.0,85307.0,84733.0,84826.0,83656.0,79882.0,79646.0,83122.0,83155.0,83244.0,84002.0,83922.0,82685.0,82854.0,80943.0,81580.0,80733.0,79679.0,82030.0,78682.0,77130.0,79284.0,75354.0,82219.0,80256.0,82694.0,82662.0,80273.0,80824.0,81885.0,81336.0,83154.0,83737.0,84290.0,84646.0,82980.0,80608.0,78748.0,83872.0,77374.0,77818.0,78000.0,84123.0,81831.0,78908.0,76478.0,83956.0,84185.0,84559.0,83286.0,80662.0,79188.0,82809.0,82818.0,80446.0,84643.0,84201.0,84320.0,84342.0,83066.0,82790.0,81240.0,79483.0,82395.0,83226.0,83527.0,84236.0,84582.0,84266.0,81432.0,82875.0,83437.0,83329.0,84483.0,84504.0,84466.0,84460.0,83405.0,84393.0,81173.0,84717.0,84419.0,64985.0,79979.0,83479.0,82875.0,85858.0,85789.0,85173.0,85898.0,85898.0,85898.0,85898.0
mean,465.906983,1.096107,1.676847,1.881976,2.639319,1.510057,1.905769,1.836924,2.096184,1.489334,1.390944,1.282335,1.768983,1.317677,1.601816,1.599946,1.599965,1.65956,1.584147,1.730625,1.756158,0.522751,0.327948,0.23824,0.131308,0.189385,0.111331,0.165181,1.831598,2.043724,2.097175,1.604308,1.658662,2.439626,2.946408,2.569851,2.220329,7.064998,5.675615,2.735054,1.852965,5.87207,1.706049,2.562559,2.052742,2.496769,1.879093,2.737504,1.330829,2.28985,1.380369,1.507022,2.78201,3.773541,2.382016,3.2088,2.465816,2.378966,2.915016,3.727477,2.551085,2.540167,2.545788,2.162175,1.877629,1.930581,2.657053,5.281174,5.586432,4.464637,3.876054,4.229518,6.216055,1.21248,2.147192,2.074788,3.076372,2.763101,2.840231,2.149635,2.18801,2.60659,2.508952,2.421439,2.440406,2.591498,2.927888,2.731723,2.140996,2.51213,2.41636,2.37297,2.375172,2.335465,2.638626,6.30195,4.201505,8.004583,7.014178,4.499682,7.398607,5.976064,5.972657,7.899969,8.240646,1.833764,3.514224,1.364256,1.719631,1.722253,7.777244,2.41302,2.473091,2.56841,1.889459,3.16055,3.470992,3.543817,2.645243,3.003682,4.682872,4.579857,2.072847,2.018576,2.094398,2.080888,1.656962,3.437674,3.419852,3.343204,3.072668,7.609467,7.742976,5.514266,5.371564,4.730627,7.240466,2.790774,2.654803,1.814464,1.956388,3.250111,3.22073,4.527128,2.246229,2.058769,2.937601,1.962435,1.563071,1.793609,1.507495,2.074015,2.008204,3.412135,1.972809,2.082897,3.318895,4.811729,1.521547,41.949609,5.642328,0.999997,0.663577,0.995366,2012.07871
std,245.702172,0.349647,0.735412,0.834466,0.983355,0.790373,1.056163,0.721488,0.849496,0.499889,0.487965,0.450138,0.421486,0.465576,0.489527,0.489912,0.489908,0.47386,0.492871,0.443638,0.429401,0.785971,0.663317,0.574447,0.421065,0.512384,0.383687,0.486182,0.374225,0.898268,0.845715,0.782184,0.751456,0.993561,0.931448,0.982362,0.933066,2.246545,2.643275,2.192906,1.786344,2.474347,0.994482,1.035565,1.034149,1.045859,1.166229,1.159411,0.470515,0.850891,0.606461,0.695373,1.41126,1.548782,1.308145,1.535431,1.251423,1.190707,1.458757,1.60053,1.35894,1.302291,1.430469,1.512583,0.327716,0.254166,0.974167,2.964892,2.782168,2.913063,2.626389,2.88713,2.785229,0.513465,0.828651,0.803878,0.792447,0.862158,0.862016,1.028573,0.940216,0.873546,0.878791,0.943479,0.944941,0.957057,0.885295,0.935748,0.857668,0.876781,0.938552,0.872838,0.894017,0.893427,0.954545,2.972462,2.944424,2.476507,2.747213,3.116989,2.57721,3.02011,3.003711,2.580015,2.117877,0.858872,2.991445,0.585602,0.559616,0.562625,2.945356,1.060986,1.078669,1.011257,0.796553,0.905944,0.780825,0.765499,1.968297,2.000008,1.080719,1.226405,1.135002,1.100456,1.104554,1.102361,0.474727,0.873621,0.8747,0.921529,1.041904,2.276941,2.227096,2.812426,2.739253,2.883907,2.3616,2.569415,2.430227,1.8243,1.939336,3.023998,2.76222,3.091537,2.216045,2.043504,2.601847,1.904093,0.787344,0.795737,0.632406,0.959289,0.93103,2.156241,0.851664,0.929324,0.996747,2.097295,0.499538,16.591029,2.408012,0.42548,0.337663,0.506494,1.235757
min,12.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,16.0,1.0,0.050687,0.023785,0.035678,2010.0
25%,276.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,6.0,4.0,1.0,0.0,4.0,1.0,2.0,1.0,2.0,1.0,2.0,1.0,1.0,1.0,1.0,2.0,3.0,1.0,2.0,1.0,1.0,2.0,2.0,1.0,2.0,1.0,1.0,2.0,2.0,2.0,3.0,3.0,2.0,1.0,1.0,5.0,1.0,2.0,2.0,3.0,2.0,2.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,4.0,1.0,7.0,5.0,1.0,6.0,4.0,4.0,6.0,7.0,1.0,1.0,1.0,1.0,1.0,6.0,1.0,1.0,2.0,1.0,3.0,3.0,3.0,1.0,1.0,5.0,5.0,1.0,1.0,1.0,1.0,1.0,3.0,3.0,3.0,2.0,6.0,6.0,3.0,3.0,2.0,6.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,3.0,3.0,1.0,28.0,4.0,0.868382,0.409333,0.613999,2011.0
50%,434.0,1.0,2.0,2.0,3.0,1.0,2.0,2.0,2.0,1.0,1.0,1.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,1.0,2.0,3.0,3.0,3.0,2.0,7.0,6.0,1.0,2.0,6.0,1.0,3.0,2.0,3.0,1.0,3.0,1.0,3.0,1.0,1.0,3.0,4.0,2.0,3.0,2.0,2.0,3.0,4.0,2.0,2.0,2.0,1.0,2.0,2.0,3.0,5.0,5.0,4.0,3.0,4.0,6.0,1.0,2.0,2.0,3.0,3.0,3.0,2.0,2.0,3.0,3.0,2.0,2.0,3.0,3.0,3.0,2.0,2.0,2.0,2.0,2.0,2.0,3.0,7.0,4.0,9.0,8.0,4.0,8.0,6.0,6.0,9.0,9.0,2.0,3.0,1.0,2.0,2.0,10.0,2.0,3.0,3.0,2.0,3.0,4.0,4.0,1.0,5.0,5.0,5.0,2.0,2.0,2.0,2.0,2.0,4.0,4.0,4.0,3.0,8.0,8.0,5.0,5.0,5.0,8.0,1.0,1.0,1.0,1.0,1.0,2.0,5.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,2.0,2.0,3.0,2.0,2.0,3.0,5.0,2.0,40.0,6.0,1.0,0.661376,0.992063,2012.0
75%,702.0,1.0,2.0,2.0,3.0,2.0,3.0,2.0,3.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,3.0,3.0,2.0,2.0,3.0,4.0,3.0,3.0,9.0,8.0,6.0,3.0,8.0,2.0,3.0,3.0,3.0,3.0,4.0,2.0,3.0,2.0,2.0,4.0,5.0,3.0,4.0,3.0,3.0,4.0,5.0,3.0,3.0,3.0,3.0,2.0,2.0,3.0,8.0,8.0,7.0,5.0,6.0,8.0,1.0,3.0,2.0,4.0,3.0,4.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,4.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,9.0,6.0,10.0,10.0,7.0,10.0,9.0,9.0,10.0,10.0,2.0,5.0,2.0,2.0,2.0,10.0,3.0,3.0,3.0,2.0,4.0,4.0,4.0,5.0,5.0,5.0,5.0,3.0,3.0,3.0,3.0,2.0,4.0,4.0,4.0,4.0,10.0,10.0,8.0,7.0,7.0,9.0,4.0,4.0,2.0,2.0,5.0,5.0,7.0,3.0,2.0,5.0,2.0,2.0,2.0,2.0,3.0,3.0,5.0,2.0,3.0,4.0,6.0,2.0,54.0,7.0,1.0,0.833333,1.25,2013.0
max,887.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,3.0,3.0,3.0,4.0,4.0,4.0,4.0,4.0,10.0,10.0,6.0,8.0,10.0,4.0,4.0,4.0,4.0,4.0,4.0,2.0,3.0,3.0,3.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,5.0,2.0,2.0,4.0,10.0,10.0,10.0,10.0,10.0,10.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,4.0,9.0,3.0,4.0,4.0,10.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,5.0,5.0,5.0,5.0,4.0,4.0,4.0,4.0,2.0,4.0,4.0,4.0,4.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,5.0,4.0,4.0,4.0,4.0,8.0,4.0,4.0,5.0,10.0,2.0,102.0,9.0,22.790557,10.694771,16.042156,2016.0


### Remaining feature engineering/data prep steps:
- ~rename `C_COW_ALPHA`~
- ~recode age variable `V242` into groups~ 
    - 0-14 years (children) = _[excluded from analysis]_
    - 15-24 years (early working age) = _[group 1]_
    - 25-54 years (prime working age) = _[group 2]_
    - 55-64 years (mature working age) = _[group 3]_
    - 65 years and over (elderly) = _[group 4]_
- consolidate Schwartz variables `V74` and `V74B`

In [10]:
# how many non-missing age values?
wvs_w6.V242.notna().sum()

85789

In [11]:
# how many missing values for v242?
wvs_w6.V242.isna().sum()

109

#### Rename `C_COW_ALPHA`

In [12]:
# make all column names lowercase
wvs_w6.columns = wvs_w6.columns.str.lower()

# rename country column
wvs_w6.rename(columns = {'c_cow_alpha': 'country'},
              inplace = True)
wvs_w6.head()

Unnamed: 0,v2,country,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13,v14,v15,v16,v17,v18,v19,v20,v21,v22,v24,v25,v26,v27,v30,v32,v33,v34,v44,v45,v47,v48,v49,v51,v52,v53,v54,v55,v56,v57,v58,v59,v60,v61,v62,v63,v64,v65,v66,v67,v68,v69,v70,v71,v72,v73,v74,v74b,v75,v76,v77,v78,v79,v80,v82,v83,v84,v96,v97,v98,v99,v100,v101,v102,v103,v104,v105,v106,v107,v108,v109,v110,v111,v113,v114,v115,v116,v117,v119,v120,v121,v122,v123,v124,v126,v131,v132,v133,v134,v135,v136,v137,v138,v139,v140,v143,v144g,v147,v150,v151,v152,v153,v154,v155,v170,v171,v173,v174,v176,v177,v179,v180,v181,v182,v183,v184,v187,v188,v189,v190,v191,v192,v193,v194,v195,v196,v197,v198,v199,v200,v202,v203,v204,v205,v207,v208,v209,v210,v211,v213,v214,v216,v225,v229,v230,v237,v238,v239,v240,v242,v248,v258,s018,s019,v262
0,12,ALG,1.0,1.0,1.0,,1.0,1.0,2.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,4.0,6.0,0.0,10.0,2.0,3.0,1.0,3.0,3.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,1.0,2.0,3.0,3.0,2.0,2.0,2.0,4.0,8.0,7.0,6.0,8.0,7.0,5.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,3.0,3.0,2.0,2.0,2.0,4.0,3.0,3.0,3.0,2.0,3.0,4.0,4.0,3.0,4.0,3.0,8.0,5.0,6.0,9.0,3.0,4.0,7.0,6.0,7.0,2.0,5.0,1.0,,,10.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,5.0,5.0,5.0,1.0,2.0,2.0,3.0,2.0,2.0,3.0,3.0,3.0,3.0,7.0,8.0,3.0,5.0,6.0,9.0,6.0,6.0,1.0,1.0,1.0,1.0,3.0,1.0,6.0,5.0,1.0,1.0,2.0,2.0,2.0,2.0,6.0,,1.0,4.0,5.0,1.0,21.0,7.0,1.0,0.833333,1.25,2014
1,12,ALG,1.0,2.0,3.0,4.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,1.0,1.0,2.0,1.0,2.0,3.0,2.0,6.0,8.0,6.0,0.0,10.0,2.0,1.0,2.0,3.0,4.0,3.0,1.0,1.0,1.0,2.0,2.0,3.0,2.0,1.0,1.0,1.0,3.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,3.0,7.0,5.0,5.0,4.0,4.0,6.0,1.0,3.0,3.0,3.0,3.0,3.0,1.0,1.0,1.0,2.0,3.0,1.0,2.0,2.0,3.0,2.0,2.0,3.0,2.0,2.0,3.0,3.0,2.0,8.0,8.0,8.0,9.0,2.0,6.0,4.0,2.0,4.0,1.0,5.0,1.0,2.0,1.0,10.0,1.0,1.0,1.0,2.0,1.0,2.0,2.0,5.0,5.0,1.0,5.0,2.0,3.0,4.0,2.0,2.0,2.0,2.0,2.0,2.0,4.0,8.0,4.0,6.0,4.0,8.0,3.0,4.0,7.0,1.0,1.0,1.0,1.0,1.0,3.0,5.0,1.0,2.0,2.0,2.0,2.0,3.0,6.0,,2.0,3.0,6.0,2.0,24.0,7.0,1.0,0.833333,1.25,2014
2,12,ALG,1.0,3.0,2.0,4.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,1.0,2.0,1.0,2.0,1.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,2.0,3.0,1.0,1.0,1.0,1.0,1.0,6.0,8.0,6.0,0.0,6.0,2.0,4.0,1.0,2.0,1.0,4.0,1.0,2.0,2.0,2.0,1.0,2.0,1.0,1.0,4.0,3.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,7.0,7.0,7.0,5.0,7.0,5.0,1.0,3.0,3.0,4.0,4.0,4.0,3.0,2.0,2.0,2.0,4.0,3.0,2.0,2.0,2.0,4.0,3.0,2.0,3.0,2.0,4.0,2.0,2.0,7.0,4.0,8.0,3.0,3.0,6.0,9.0,5.0,6.0,1.0,5.0,1.0,2.0,1.0,6.0,2.0,3.0,1.0,2.0,2.0,3.0,3.0,5.0,5.0,5.0,5.0,2.0,3.0,2.0,3.0,2.0,3.0,3.0,3.0,3.0,4.0,7.0,5.0,5.0,5.0,5.0,5.0,5.0,1.0,1.0,1.0,1.0,4.0,1.0,4.0,5.0,1.0,1.0,3.0,2.0,4.0,2.0,3.0,2.0,1.0,4.0,6.0,2.0,26.0,5.0,1.0,0.833333,1.25,2014
3,12,ALG,1.0,1.0,3.0,4.0,3.0,1.0,2.0,1.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,1.0,1.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,6.0,8.0,6.0,0.0,6.0,2.0,1.0,3.0,1.0,4.0,3.0,1.0,1.0,1.0,1.0,1.0,2.0,3.0,2.0,3.0,3.0,1.0,1.0,2.0,2.0,3.0,1.0,2.0,2.0,2.0,9.0,5.0,6.0,4.0,6.0,8.0,1.0,3.0,3.0,2.0,2.0,3.0,2.0,3.0,4.0,2.0,4.0,2.0,3.0,3.0,4.0,2.0,2.0,3.0,1.0,2.0,4.0,3.0,2.0,7.0,9.0,5.0,5.0,7.0,3.0,8.0,7.0,8.0,2.0,5.0,1.0,2.0,1.0,10.0,2.0,3.0,4.0,1.0,2.0,2.0,2.0,5.0,5.0,1.0,5.0,2.0,3.0,3.0,3.0,2.0,2.0,3.0,3.0,3.0,6.0,6.0,3.0,5.0,5.0,7.0,4.0,6.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,1.0,1.0,4.0,4.0,5.0,2.0,28.0,6.0,1.0,0.833333,1.25,2014
4,12,ALG,1.0,1.0,1.0,2.0,1.0,1.0,1.0,3.0,2.0,1.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,1.0,1.0,1.0,1.0,3.0,2.0,2.0,6.0,6.0,1.0,3.0,4.0,2.0,1.0,2.0,3.0,4.0,2.0,1.0,2.0,1.0,1.0,2.0,5.0,1.0,2.0,3.0,1.0,4.0,3.0,2.0,2.0,3.0,1.0,2.0,2.0,2.0,8.0,4.0,7.0,4.0,6.0,6.0,2.0,2.0,3.0,4.0,2.0,3.0,2.0,3.0,3.0,2.0,3.0,2.0,3.0,3.0,3.0,3.0,3.0,2.0,4.0,3.0,2.0,3.0,2.0,8.0,4.0,7.0,3.0,3.0,8.0,6.0,5.0,6.0,2.0,5.0,1.0,1.0,1.0,10.0,2.0,3.0,2.0,2.0,2.0,3.0,3.0,5.0,5.0,5.0,5.0,2.0,3.0,3.0,4.0,2.0,3.0,3.0,3.0,3.0,6.0,2.0,4.0,4.0,6.0,6.0,6.0,5.0,7.0,1.0,1.0,1.0,3.0,1.0,4.0,5.0,1.0,1.0,2.0,2.0,3.0,2.0,3.0,2.0,2.0,3.0,7.0,2.0,35.0,3.0,1.0,0.833333,1.25,2014


#### Engineer age groups based on responses to `v242`

In [13]:
# recode age variable into groups
bins = [14, 24, 54, 64, 120]
labels = [1, 2, 3, 4]
wvs_w6['v242g'] = pd.cut(wvs_w6['v242'], bins=bins, labels=labels)
wvs_w6['v242g'].value_counts().sort_index()

1    14593
2    50285
3    11084
4     9827
Name: v242g, dtype: int64

In [14]:
# get index for v242
wvs_w6.columns.get_loc('v242')

160

In [15]:
# remove and re-insert v242g into the dataframe
col_name = 'v242g'
v242g = wvs_w6.pop(col_name)
wvs_w6.insert(161, col_name, v242g)
wvs_w6.head()

Unnamed: 0,v2,country,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13,v14,v15,v16,v17,v18,v19,v20,v21,v22,v24,v25,v26,v27,v30,v32,v33,v34,v44,v45,v47,v48,v49,v51,v52,v53,v54,v55,v56,v57,v58,v59,v60,v61,v62,v63,v64,v65,v66,v67,v68,v69,v70,v71,v72,v73,v74,v74b,v75,v76,v77,v78,v79,v80,v82,v83,v84,v96,v97,v98,v99,v100,v101,v102,v103,v104,v105,v106,v107,v108,v109,v110,v111,v113,v114,v115,v116,v117,v119,v120,v121,v122,v123,v124,v126,v131,v132,v133,v134,v135,v136,v137,v138,v139,v140,v143,v144g,v147,v150,v151,v152,v153,v154,v155,v170,v171,v173,v174,v176,v177,v179,v180,v181,v182,v183,v184,v187,v188,v189,v190,v191,v192,v193,v194,v195,v196,v197,v198,v199,v200,v202,v203,v204,v205,v207,v208,v209,v210,v211,v213,v214,v216,v225,v229,v230,v237,v238,v239,v240,v242,v242g,v248,v258,s018,s019,v262
0,12,ALG,1.0,1.0,1.0,,1.0,1.0,2.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,4.0,6.0,0.0,10.0,2.0,3.0,1.0,3.0,3.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,1.0,2.0,3.0,3.0,2.0,2.0,2.0,4.0,8.0,7.0,6.0,8.0,7.0,5.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,3.0,3.0,2.0,2.0,2.0,4.0,3.0,3.0,3.0,2.0,3.0,4.0,4.0,3.0,4.0,3.0,8.0,5.0,6.0,9.0,3.0,4.0,7.0,6.0,7.0,2.0,5.0,1.0,,,10.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,5.0,5.0,5.0,1.0,2.0,2.0,3.0,2.0,2.0,3.0,3.0,3.0,3.0,7.0,8.0,3.0,5.0,6.0,9.0,6.0,6.0,1.0,1.0,1.0,1.0,3.0,1.0,6.0,5.0,1.0,1.0,2.0,2.0,2.0,2.0,6.0,,1.0,4.0,5.0,1.0,21.0,1,7.0,1.0,0.833333,1.25,2014
1,12,ALG,1.0,2.0,3.0,4.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,1.0,1.0,2.0,1.0,2.0,3.0,2.0,6.0,8.0,6.0,0.0,10.0,2.0,1.0,2.0,3.0,4.0,3.0,1.0,1.0,1.0,2.0,2.0,3.0,2.0,1.0,1.0,1.0,3.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,3.0,7.0,5.0,5.0,4.0,4.0,6.0,1.0,3.0,3.0,3.0,3.0,3.0,1.0,1.0,1.0,2.0,3.0,1.0,2.0,2.0,3.0,2.0,2.0,3.0,2.0,2.0,3.0,3.0,2.0,8.0,8.0,8.0,9.0,2.0,6.0,4.0,2.0,4.0,1.0,5.0,1.0,2.0,1.0,10.0,1.0,1.0,1.0,2.0,1.0,2.0,2.0,5.0,5.0,1.0,5.0,2.0,3.0,4.0,2.0,2.0,2.0,2.0,2.0,2.0,4.0,8.0,4.0,6.0,4.0,8.0,3.0,4.0,7.0,1.0,1.0,1.0,1.0,1.0,3.0,5.0,1.0,2.0,2.0,2.0,2.0,3.0,6.0,,2.0,3.0,6.0,2.0,24.0,1,7.0,1.0,0.833333,1.25,2014
2,12,ALG,1.0,3.0,2.0,4.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,1.0,2.0,1.0,2.0,1.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,2.0,3.0,1.0,1.0,1.0,1.0,1.0,6.0,8.0,6.0,0.0,6.0,2.0,4.0,1.0,2.0,1.0,4.0,1.0,2.0,2.0,2.0,1.0,2.0,1.0,1.0,4.0,3.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,7.0,7.0,7.0,5.0,7.0,5.0,1.0,3.0,3.0,4.0,4.0,4.0,3.0,2.0,2.0,2.0,4.0,3.0,2.0,2.0,2.0,4.0,3.0,2.0,3.0,2.0,4.0,2.0,2.0,7.0,4.0,8.0,3.0,3.0,6.0,9.0,5.0,6.0,1.0,5.0,1.0,2.0,1.0,6.0,2.0,3.0,1.0,2.0,2.0,3.0,3.0,5.0,5.0,5.0,5.0,2.0,3.0,2.0,3.0,2.0,3.0,3.0,3.0,3.0,4.0,7.0,5.0,5.0,5.0,5.0,5.0,5.0,1.0,1.0,1.0,1.0,4.0,1.0,4.0,5.0,1.0,1.0,3.0,2.0,4.0,2.0,3.0,2.0,1.0,4.0,6.0,2.0,26.0,2,5.0,1.0,0.833333,1.25,2014
3,12,ALG,1.0,1.0,3.0,4.0,3.0,1.0,2.0,1.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,1.0,1.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,6.0,8.0,6.0,0.0,6.0,2.0,1.0,3.0,1.0,4.0,3.0,1.0,1.0,1.0,1.0,1.0,2.0,3.0,2.0,3.0,3.0,1.0,1.0,2.0,2.0,3.0,1.0,2.0,2.0,2.0,9.0,5.0,6.0,4.0,6.0,8.0,1.0,3.0,3.0,2.0,2.0,3.0,2.0,3.0,4.0,2.0,4.0,2.0,3.0,3.0,4.0,2.0,2.0,3.0,1.0,2.0,4.0,3.0,2.0,7.0,9.0,5.0,5.0,7.0,3.0,8.0,7.0,8.0,2.0,5.0,1.0,2.0,1.0,10.0,2.0,3.0,4.0,1.0,2.0,2.0,2.0,5.0,5.0,1.0,5.0,2.0,3.0,3.0,3.0,2.0,2.0,3.0,3.0,3.0,6.0,6.0,3.0,5.0,5.0,7.0,4.0,6.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,1.0,1.0,4.0,4.0,5.0,2.0,28.0,2,6.0,1.0,0.833333,1.25,2014
4,12,ALG,1.0,1.0,1.0,2.0,1.0,1.0,1.0,3.0,2.0,1.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,1.0,1.0,1.0,1.0,3.0,2.0,2.0,6.0,6.0,1.0,3.0,4.0,2.0,1.0,2.0,3.0,4.0,2.0,1.0,2.0,1.0,1.0,2.0,5.0,1.0,2.0,3.0,1.0,4.0,3.0,2.0,2.0,3.0,1.0,2.0,2.0,2.0,8.0,4.0,7.0,4.0,6.0,6.0,2.0,2.0,3.0,4.0,2.0,3.0,2.0,3.0,3.0,2.0,3.0,2.0,3.0,3.0,3.0,3.0,3.0,2.0,4.0,3.0,2.0,3.0,2.0,8.0,4.0,7.0,3.0,3.0,8.0,6.0,5.0,6.0,2.0,5.0,1.0,1.0,1.0,10.0,2.0,3.0,2.0,2.0,2.0,3.0,3.0,5.0,5.0,5.0,5.0,2.0,3.0,3.0,4.0,2.0,3.0,3.0,3.0,3.0,6.0,2.0,4.0,4.0,6.0,6.0,6.0,5.0,7.0,1.0,1.0,1.0,3.0,1.0,4.0,5.0,1.0,1.0,2.0,2.0,3.0,2.0,3.0,2.0,2.0,3.0,7.0,2.0,35.0,2,3.0,1.0,0.833333,1.25,2014


In [16]:
# check to see that ages are in the correct group
# anyone under 25-yrs-old
wvs_w6[wvs_w6['v242'] < 25.0].sample()

Unnamed: 0,v2,country,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13,v14,v15,v16,v17,v18,v19,v20,v21,v22,v24,v25,v26,v27,v30,v32,v33,v34,v44,v45,v47,v48,v49,v51,v52,v53,v54,v55,v56,v57,v58,v59,v60,v61,v62,v63,v64,v65,v66,v67,v68,v69,v70,v71,v72,v73,v74,v74b,v75,v76,v77,v78,v79,v80,v82,v83,v84,v96,v97,v98,v99,v100,v101,v102,v103,v104,v105,v106,v107,v108,v109,v110,v111,v113,v114,v115,v116,v117,v119,v120,v121,v122,v123,v124,v126,v131,v132,v133,v134,v135,v136,v137,v138,v139,v140,v143,v144g,v147,v150,v151,v152,v153,v154,v155,v170,v171,v173,v174,v176,v177,v179,v180,v181,v182,v183,v184,v187,v188,v189,v190,v191,v192,v193,v194,v195,v196,v197,v198,v199,v200,v202,v203,v204,v205,v207,v208,v209,v210,v211,v213,v214,v216,v225,v229,v230,v237,v238,v239,v240,v242,v242g,v248,v258,s018,s019,v262
22960,332,HAI,2.0,2.0,3.0,3.0,1.0,1.0,2.0,2.0,2.0,1.0,1.0,2.0,2.0,2.0,1.0,2.0,1.0,1.0,2.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,3.0,1.0,2.0,2.0,3.0,3.0,3.0,10.0,1.0,6.0,0.0,2.0,1.0,3.0,2.0,3.0,1.0,4.0,2.0,1.0,2.0,1.0,4.0,1.0,2.0,5.0,1.0,4.0,2.0,5.0,5.0,2.0,1.0,1.0,2.0,2.0,4.0,3.0,5.0,1.0,4.0,2.0,1.0,2.0,4.0,2.0,4.0,2.0,2.0,3.0,3.0,4.0,4.0,3.0,3.0,3.0,4.0,4.0,2.0,4.0,4.0,2.0,2.0,4.0,4.0,2.0,5.0,6.0,6.0,4.0,6.0,5.0,6.0,5.0,10.0,1.0,1.0,1.0,1.0,1.0,10.0,2.0,3.0,3.0,2.0,3.0,3.0,3.0,5.0,5.0,5.0,5.0,1.0,,1.0,1.0,2.0,1.0,4.0,2.0,1.0,8.0,8.0,5.0,8.0,5.0,2.0,4.0,4.0,5.0,4.0,6.0,5.0,4.0,6.0,8.0,9.0,7.0,2.0,2.0,2.0,3.0,1.0,7.0,,2.0,3.0,5.0,2.0,20.0,1,8.0,1.0,0.501002,0.751503,2016


In [17]:
# 25-yr-old
wvs_w6[wvs_w6['v242'] == 25.0].sample()

Unnamed: 0,v2,country,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13,v14,v15,v16,v17,v18,v19,v20,v21,v22,v24,v25,v26,v27,v30,v32,v33,v34,v44,v45,v47,v48,v49,v51,v52,v53,v54,v55,v56,v57,v58,v59,v60,v61,v62,v63,v64,v65,v66,v67,v68,v69,v70,v71,v72,v73,v74,v74b,v75,v76,v77,v78,v79,v80,v82,v83,v84,v96,v97,v98,v99,v100,v101,v102,v103,v104,v105,v106,v107,v108,v109,v110,v111,v113,v114,v115,v116,v117,v119,v120,v121,v122,v123,v124,v126,v131,v132,v133,v134,v135,v136,v137,v138,v139,v140,v143,v144g,v147,v150,v151,v152,v153,v154,v155,v170,v171,v173,v174,v176,v177,v179,v180,v181,v182,v183,v184,v187,v188,v189,v190,v191,v192,v193,v194,v195,v196,v197,v198,v199,v200,v202,v203,v204,v205,v207,v208,v209,v210,v211,v213,v214,v216,v225,v229,v230,v237,v238,v239,v240,v242,v242g,v248,v258,s018,s019,v262
34656,398,KZK,1.0,1.0,1.0,3.0,1.0,3.0,2.0,2.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,3.0,1.0,1.0,1.0,3.0,3.0,2.0,2.0,10.0,6.0,6.0,0.0,4.0,1.0,2.0,1.0,3.0,1.0,4.0,1.0,3.0,1.0,1.0,3.0,3.0,3.0,3.0,4.0,,3.0,3.0,4.0,3.0,5.0,3.0,2.0,2.0,3.0,10.0,6.0,5.0,1.0,1.0,10.0,1.0,1.0,1.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,2.0,2.0,1.0,2.0,2.0,2.0,4.0,4.0,4.0,4.0,4.0,4.0,1.0,1.0,10.0,10.0,1.0,10.0,6.0,10.0,10.0,10.0,3.0,5.0,2.0,2.0,2.0,6.0,,,,2.0,,,,5.0,5.0,5.0,5.0,1.0,2.0,4.0,4.0,2.0,4.0,4.0,4.0,3.0,10.0,10.0,6.0,6.0,1.0,10.0,1.0,1.0,1.0,1.0,1.0,6.0,6.0,6.0,6.0,10.0,1.0,2.0,4.0,1.0,1.0,1.0,2.0,1.0,2.0,3.0,5.0,1.0,25.0,2,5.0,1.243955,0.829304,1.243955,2011


In [18]:
# 55-yr-old
wvs_w6[wvs_w6['v242'] == 55.0].sample()

Unnamed: 0,v2,country,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13,v14,v15,v16,v17,v18,v19,v20,v21,v22,v24,v25,v26,v27,v30,v32,v33,v34,v44,v45,v47,v48,v49,v51,v52,v53,v54,v55,v56,v57,v58,v59,v60,v61,v62,v63,v64,v65,v66,v67,v68,v69,v70,v71,v72,v73,v74,v74b,v75,v76,v77,v78,v79,v80,v82,v83,v84,v96,v97,v98,v99,v100,v101,v102,v103,v104,v105,v106,v107,v108,v109,v110,v111,v113,v114,v115,v116,v117,v119,v120,v121,v122,v123,v124,v126,v131,v132,v133,v134,v135,v136,v137,v138,v139,v140,v143,v144g,v147,v150,v151,v152,v153,v154,v155,v170,v171,v173,v174,v176,v177,v179,v180,v181,v182,v183,v184,v187,v188,v189,v190,v191,v192,v193,v194,v195,v196,v197,v198,v199,v200,v202,v203,v204,v205,v207,v208,v209,v210,v211,v213,v214,v216,v225,v229,v230,v237,v238,v239,v240,v242,v242g,v248,v258,s018,s019,v262
15008,218,ECU,1.0,3.0,2.0,2.0,1.0,1.0,1.0,3.0,2.0,2.0,1.0,2.0,2.0,1.0,2.0,1.0,1.0,1.0,2.0,2.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,3.0,3.0,3.0,1.0,,3.0,3.0,3.0,5.0,8.0,1.0,3.0,5.0,2.0,4.0,2.0,3.0,4.0,1.0,1.0,1.0,1.0,1.0,4.0,5.0,1.0,2.0,1.0,2.0,3.0,6.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,3.0,4.0,3.0,4.0,2.0,3.0,1.0,3.0,3.0,3.0,3.0,3.0,1.0,1.0,3.0,3.0,3.0,3.0,1.0,3.0,3.0,3.0,3.0,1.0,3.0,3.0,2.0,3.0,6.0,5.0,6.0,8.0,9.0,6.0,8.0,7.0,7.0,9.0,1.0,1.0,1.0,2.0,2.0,10.0,2.0,1.0,3.0,3.0,1.0,3.0,4.0,5.0,5.0,5.0,5.0,1.0,1.0,1.0,1.0,2.0,4.0,3.0,4.0,3.0,9.0,10.0,5.0,8.0,8.0,9.0,4.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,4.0,3.0,1.0,1.0,1.0,1.0,1.0,1.0,4.0,,2.0,3.0,7.0,1.0,55.0,3,3.0,1.0,0.831947,1.24792,2013


In [19]:
# anyone 65-yrs-old and up
wvs_w6[wvs_w6['v242'] >= 65.0].sample(5)

Unnamed: 0,v2,country,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13,v14,v15,v16,v17,v18,v19,v20,v21,v22,v24,v25,v26,v27,v30,v32,v33,v34,v44,v45,v47,v48,v49,v51,v52,v53,v54,v55,v56,v57,v58,v59,v60,v61,v62,v63,v64,v65,v66,v67,v68,v69,v70,v71,v72,v73,v74,v74b,v75,v76,v77,v78,v79,v80,v82,v83,v84,v96,v97,v98,v99,v100,v101,v102,v103,v104,v105,v106,v107,v108,v109,v110,v111,v113,v114,v115,v116,v117,v119,v120,v121,v122,v123,v124,v126,v131,v132,v133,v134,v135,v136,v137,v138,v139,v140,v143,v144g,v147,v150,v151,v152,v153,v154,v155,v170,v171,v173,v174,v176,v177,v179,v180,v181,v182,v183,v184,v187,v188,v189,v190,v191,v192,v193,v194,v195,v196,v197,v198,v199,v200,v202,v203,v204,v205,v207,v208,v209,v210,v211,v213,v214,v216,v225,v229,v230,v237,v238,v239,v240,v242,v242g,v248,v258,s018,s019,v262
37267,422,LEB,1.0,1.0,2.0,4.0,1.0,3.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,3.0,3.0,2.0,1.0,4.0,3.0,2.0,6.0,4.0,1.0,4.0,10.0,2.0,1.0,1.0,2.0,1.0,,,3.0,1.0,2.0,3.0,6.0,3.0,1.0,6.0,1.0,2.0,5.0,6.0,2.0,1.0,1.0,2.0,2.0,2.0,3.0,2.0,1.0,1.0,10.0,7.0,2.0,2.0,2.0,4.0,3.0,4.0,2.0,2.0,3.0,4.0,3.0,2.0,1.0,4.0,,3.0,4.0,2.0,3.0,4.0,1.0,4.0,9.0,4.0,10.0,10.0,9.0,6.0,4.0,4.0,7.0,10.0,4.0,5.0,2.0,2.0,2.0,10.0,2.0,4.0,2.0,1.0,3.0,2.0,1.0,1.0,1.0,5.0,5.0,3.0,2.0,3.0,2.0,1.0,4.0,,4.0,4.0,4.0,10.0,5.0,6.0,5.0,1.0,1.0,1.0,1.0,3.0,2.0,4.0,3.0,7.0,1.0,1.0,1.0,1.0,3.0,2.0,1.0,2.0,4.0,2.0,3.0,3.0,2.0,1.0,82.0,4,2.0,1.0,0.833333,1.25,2013
30780,392,JPN,1.0,1.0,1.0,2.0,1.0,4.0,1.0,1.0,1.0,2.0,1.0,2.0,1.0,2.0,1.0,2.0,1.0,2.0,2.0,1.0,0.0,2.0,1.0,0.0,,0.0,0.0,2.0,2.0,,2.0,,3.0,3.0,,,7.0,,1.0,2.0,5.0,1.0,3.0,,,1.0,,,3.0,1.0,2.0,,5.0,,5.0,,3.0,4.0,4.0,4.0,3.0,4.0,5.0,2.0,2.0,2.0,2.0,5.0,1.0,2.0,2.0,,1.0,1.0,2.0,3.0,4.0,4.0,4.0,2.0,2.0,2.0,1.0,1.0,2.0,,,2.0,,2.0,2.0,1.0,,,5.0,1.0,8.0,10.0,,5.0,7.0,,7.0,10.0,2.0,0.0,2.0,,2.0,4.0,,3.0,,2.0,3.0,4.0,4.0,1.0,1.0,5.0,5.0,1.0,1.0,1.0,1.0,2.0,4.0,3.0,4.0,4.0,7.0,8.0,6.0,5.0,5.0,6.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,1.0,1.0,1.0,1.0,3.0,2.0,2.0,2.0,3.0,5.0,2.0,3.0,2.0,1.0,2.0,69.0,4,7.0,1.0,0.409333,0.613999,2010
5343,31,AZE,1.0,3.0,3.0,4.0,3.0,1.0,3.0,4.0,2.0,1.0,1.0,2.0,1.0,1.0,2.0,2.0,2.0,1.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,1.0,2.0,,2.0,3.0,1.0,1.0,3.0,1.0,1.0,7.0,1.0,2.0,3.0,3.0,1.0,1.0,4.0,2.0,3.0,2.0,1.0,4.0,1.0,1.0,1.0,2.0,,3.0,6.0,2.0,3.0,1.0,1.0,2.0,2.0,4.0,3.0,8.0,2.0,5.0,2.0,5.0,1.0,2.0,2.0,4.0,4.0,4.0,1.0,2.0,2.0,2.0,1.0,2.0,1.0,2.0,1.0,3.0,2.0,2.0,3.0,1.0,2.0,2.0,8.0,9.0,9.0,10.0,10.0,6.0,9.0,10.0,10.0,2.0,2.0,5.0,2.0,2.0,1.0,10.0,1.0,1.0,1.0,2.0,2.0,2.0,4.0,1.0,1.0,5.0,5.0,4.0,2.0,1.0,1.0,2.0,1.0,3.0,1.0,1.0,5.0,5.0,5.0,5.0,5.0,8.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,1.0,2.0,2.0,1.0,2.0,4.0,4.0,1.0,4.0,4.0,4.0,2.0,76.0,4,1.0,1.000046,0.99805,1.497075,2011
55759,642,ROM,1.0,1.0,1.0,3.0,1.0,1.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,2.0,2.0,1.0,2.0,2.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,2.0,1.0,1.0,3.0,4.0,4.0,1.0,8.0,10.0,1.0,0.0,8.0,3.0,2.0,2.0,1.0,4.0,3.0,1.0,3.0,1.0,1.0,6.0,6.0,3.0,6.0,1.0,,1.0,6.0,1.0,1.0,1.0,2.0,2.0,2.0,1.0,1.0,10.0,1.0,2.0,1.0,10.0,1.0,3.0,1.0,4.0,2.0,2.0,1.0,1.0,3.0,2.0,2.0,4.0,2.0,4.0,3.0,4.0,3.0,4.0,4.0,1.0,2.0,4.0,10.0,10.0,10.0,10.0,9.0,10.0,10.0,10.0,10.0,10.0,1.0,3.0,1.0,1.0,2.0,10.0,2.0,1.0,2.0,1.0,4.0,4.0,4.0,5.0,1.0,5.0,5.0,4.0,4.0,3.0,4.0,2.0,4.0,4.0,4.0,4.0,10.0,10.0,10.0,7.0,6.0,8.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,5.0,1.0,1.0,1.0,1.0,1.0,1.0,4.0,2.0,2.0,4.0,6.0,1.0,77.0,4,5.0,0.811434,0.539876,0.809814,2012
81218,858,URU,1.0,1.0,1.0,3.0,2.0,2.0,2.0,3.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,3.0,3.0,2.0,1.0,,4.0,3.0,4.0,9.0,5.0,2.0,3.0,6.0,,,,,4.0,2.0,1.0,3.0,1.0,1.0,2.0,5.0,2.0,1.0,2.0,,2.0,3.0,1.0,1.0,2.0,4.0,2.0,2.0,2.0,,2.0,5.0,3.0,2.0,8.0,1.0,1.0,1.0,3.0,2.0,2.0,3.0,3.0,2.0,2.0,2.0,3.0,3.0,3.0,3.0,1.0,1.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,9.0,2.0,10.0,9.0,1.0,10.0,10.0,10.0,2.0,0.0,1.0,2.0,2.0,8.0,3.0,3.0,2.0,2.0,3.0,4.0,4.0,1.0,5.0,1.0,5.0,1.0,4.0,1.0,1.0,,4.0,4.0,4.0,4.0,9.0,8.0,7.0,10.0,7.0,4.0,1.0,1.0,1.0,1.0,10.0,10.0,10.0,9.0,1.0,1.0,1.0,2.0,2.0,1.0,2.0,1.0,2.0,2.0,1.0,4.0,6.0,1.0,77.0,4,4.0,1.0,1.0,1.5,2011


In [20]:
# how many non-missing age values?
wvs_w6.v242g.notna().sum()

85789

In [21]:
# how many missing values for v242g (should be same as for V242)?
wvs_w6.v242g.isna().sum()

109

In [22]:
# remove original age variable 'v242'
#wvs_w6.drop(columns = ['v242'], axis = 1, inplace = True)

#### Consolidate Schwartz `v74` and `v74b` variables

In [23]:
# consolidate v74 and v74b
wvs_w6[['country', 'v74', 'v74b']].sample()

Unnamed: 0,country,v74,v74b
68796,SPN,,1.0


In [24]:
schw_bene = wvs_w6[['country', 'v74', 'v74b']]
schw_bene.sample(10)

Unnamed: 0,country,v74,v74b
46357,NTH,2.0,1.0
44675,MOR,,2.0
21812,GHA,2.0,
83483,YEM,1.0,1.0
28704,IND,3.0,
28945,IND,6.0,
58122,RUS,3.0,
59402,RWA,5.0,4.0
77104,TUR,1.0,
35259,KZK,1.0,


In [25]:
# add columns indicating whether respondent was more likely to relate more to helping society, helping neighbors,
# or helping both equally
schw_bene['help_neighbor'] = schw_bene['v74'] > schw_bene['v74b']
schw_bene['help_society'] = schw_bene['v74b'] > schw_bene['v74']
schw_bene['help_equally'] = schw_bene['v74'] == schw_bene['v74b']
schw_bene.sample(50)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  schw_bene['help_neighbor'] = schw_bene['v74'] > schw_bene['v74b']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  schw_bene['help_society'] = schw_bene['v74b'] > schw_bene['v74']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  schw_bene['help_equally'] = schw_bene['v74'] == schw_bene['v74b']


Unnamed: 0,country,v74,v74b,help_neighbor,help_society,help_equally
4851,AZE,3.0,,False,False,False
46518,NTH,3.0,3.0,False,False,True
66647,SAF,3.0,3.0,False,False,True
84526,ZIM,2.0,,False,False,False
57547,RUS,2.0,,False,False,False
20126,GMY,,2.0,False,False,False
83426,YEM,5.0,6.0,False,True,False
17831,GRG,2.0,3.0,False,True,False
115,ALG,1.0,1.0,False,False,True
69497,SPN,,2.0,False,False,False


In [26]:
# count up total True values for each column by country to get a quick view of patterns by country
schw_bene['missing_entirely'] = schw_bene.v74.isna() & schw_bene.v74b.isna()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  schw_bene['missing_entirely'] = schw_bene.v74.isna() & schw_bene.v74b.isna()


In [27]:
schw_bene.sample(50)

Unnamed: 0,country,v74,v74b,help_neighbor,help_society,help_equally,missing_entirely
72328,THI,4.0,3.0,True,False,False,False
58482,RUS,2.0,,False,False,False,False
42111,MEX,2.0,,False,False,False,False
12364,CHN,2.0,3.0,False,True,False,False
57335,RUS,3.0,,False,False,False,False
57660,RUS,,,False,False,False,True
8176,BRA,3.0,1.0,True,False,False,False
5043,AZE,3.0,,False,False,False,False
59361,RWA,1.0,1.0,False,False,True,False
41086,MAL,3.0,,False,False,False,False


In [28]:
# how many cases (regardless of country) have missing data in both columns?
schw_bene.missing_entirely.sum()

1931

In [29]:
# total cases
schw_bene.missing_entirely.count()

85898

In [30]:
# how many cases by country have missing data in both columns?
schw_bene.groupby(['country']).sum()

Unnamed: 0_level_0,v74,v74b,help_neighbor,help_society,help_equally,missing_entirely
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ALG,2786.0,2728.0,276.0,237.0,595.0,60.0
ARG,2594.0,0.0,0.0,0.0,0.0,16.0
ARM,2454.0,0.0,0.0,0.0,0.0,3.0
AUL,3928.0,0.0,0.0,0.0,0.0,29.0
AZE,2888.0,0.0,0.0,0.0,0.0,0.0
BLR,4506.0,0.0,0.0,0.0,0.0,15.0
BRA,2910.0,2652.0,381.0,168.0,932.0,3.0
CHL,1955.0,0.0,0.0,0.0,0.0,75.0
CHN,5846.0,5402.0,525.0,226.0,1416.0,125.0
COL,2812.0,2784.0,269.0,284.0,955.0,1.0


In [31]:
schw_groupby = schw_bene.groupby(['country']).sum()
schw_groupby.reset_index()
schw_groupby

Unnamed: 0_level_0,v74,v74b,help_neighbor,help_society,help_equally,missing_entirely
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ALG,2786.0,2728.0,276.0,237.0,595.0,60.0
ARG,2594.0,0.0,0.0,0.0,0.0,16.0
ARM,2454.0,0.0,0.0,0.0,0.0,3.0
AUL,3928.0,0.0,0.0,0.0,0.0,29.0
AZE,2888.0,0.0,0.0,0.0,0.0,0.0
BLR,4506.0,0.0,0.0,0.0,0.0,15.0
BRA,2910.0,2652.0,381.0,168.0,932.0,3.0
CHL,1955.0,0.0,0.0,0.0,0.0,75.0
CHN,5846.0,5402.0,525.0,226.0,1416.0,125.0
COL,2812.0,2784.0,269.0,284.0,955.0,1.0


In [32]:
# countries whose respondents valued helping neighbor and helping society (v74 = v74b) most often
schw_groupby.loc[(schw_groupby['help_equally'] > schw_groupby['help_neighbor']) & \
                 (schw_groupby['help_equally'] > schw_groupby['help_society'])]

Unnamed: 0_level_0,v74,v74b,help_neighbor,help_society,help_equally,missing_entirely
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ALG,2786.0,2728.0,276.0,237.0,595.0,60.0
BRA,2910.0,2652.0,381.0,168.0,932.0,3.0
CHN,5846.0,5402.0,525.0,226.0,1416.0,125.0
COL,2812.0,2784.0,269.0,284.0,955.0,1.0
ECU,2669.0,2884.0,238.0,396.0,566.0,1.0
GRG,2414.0,2191.0,286.0,110.0,793.0,3.0
IRQ,2604.0,2541.0,376.0,326.0,485.0,1.0
JOR,2395.0,2197.0,322.0,199.0,672.0,3.0
JPN,8407.0,7766.0,833.0,231.0,1092.0,147.0
LEB,2862.0,2829.0,344.0,321.0,514.0,4.0


In [33]:
help_equal_df = pd.DataFrame(schw_groupby.loc[(schw_groupby['help_equally'] > schw_groupby['help_neighbor']) & \
                 (schw_groupby['help_equally'] > schw_groupby['help_society'])])
help_equal_list = help_equal_df.reset_index().country.tolist()
print(help_equal_list)
print(len(help_equal_list))

['ALG', 'BRA', 'CHN', 'COL', 'ECU', 'GRG', 'IRQ', 'JOR', 'JPN', 'LEB', 'LIB', 'NTH', 'PAK', 'PSE', 'RWA', 'SAF', 'SIN', 'THI', 'TUN', 'YEM']
20


In [34]:
# countries whose respondents valued helping neighbor over helping society (v74 > v74b) most often
help_neighbor_df = pd.DataFrame(schw_groupby.loc[(schw_groupby['help_neighbor'] > schw_groupby['help_society'])])
help_neighbor_list = help_neighbor_df.reset_index().country.tolist()
print(help_neighbor_list)
print(len(help_neighbor_list))

['ALG', 'BRA', 'CHN', 'GRG', 'HKG', 'IRQ', 'JOR', 'JPN', 'LEB', 'LIB', 'NTH', 'PSE', 'RWA', 'SAF', 'TUN', 'YEM']
16


In [35]:
# countries whose respondents valued helping society over helping neighbor (v74b > v74) most often
help_society_df = pd.DataFrame(schw_groupby.loc[(schw_groupby['help_society'] > schw_groupby['help_neighbor'])])
help_society_list = help_society_df.reset_index().country.tolist()
print(help_society_list)
print(len(help_society_list))

['COL', 'ECU', 'HAI', 'PAK', 'SIN', 'THI']
6


In [36]:
len(schw_groupby)

57

### Approach to missings for v74 and v74b should vary by situation:  
  
**For 20 countries in help_equal_list:**  
I am using trends in the data to assume that for the most part, people tend to answer items v74 and v74b in the same way. To handle missing values for these countries:
- step 1: fill v74 or v74b NaN values with values from the other column if those data are available
- step 2: if data are missing for v74 AND v74b columns, proceed to options below

**For 16 countries in help_neighbor_list:**  
For the most part, people tended to show a willingness to help close neighbors more often than help society  
- impute median of v74b (although this skews the distribution of responses)  
  
**For 6 countries in help_society_list:**  
For the most part, people tended to show a willingness to help help society more often than help close neighbors  
- impute median of v74 (although this skews the distribution of responses)  

In [37]:
wvs_w6.head()

Unnamed: 0,v2,country,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13,v14,v15,v16,v17,v18,v19,v20,v21,v22,v24,v25,v26,v27,v30,v32,v33,v34,v44,v45,v47,v48,v49,v51,v52,v53,v54,v55,v56,v57,v58,v59,v60,v61,v62,v63,v64,v65,v66,v67,v68,v69,v70,v71,v72,v73,v74,v74b,v75,v76,v77,v78,v79,v80,v82,v83,v84,v96,v97,v98,v99,v100,v101,v102,v103,v104,v105,v106,v107,v108,v109,v110,v111,v113,v114,v115,v116,v117,v119,v120,v121,v122,v123,v124,v126,v131,v132,v133,v134,v135,v136,v137,v138,v139,v140,v143,v144g,v147,v150,v151,v152,v153,v154,v155,v170,v171,v173,v174,v176,v177,v179,v180,v181,v182,v183,v184,v187,v188,v189,v190,v191,v192,v193,v194,v195,v196,v197,v198,v199,v200,v202,v203,v204,v205,v207,v208,v209,v210,v211,v213,v214,v216,v225,v229,v230,v237,v238,v239,v240,v242,v242g,v248,v258,s018,s019,v262
0,12,ALG,1.0,1.0,1.0,,1.0,1.0,2.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,4.0,6.0,0.0,10.0,2.0,3.0,1.0,3.0,3.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,1.0,2.0,3.0,3.0,2.0,2.0,2.0,4.0,8.0,7.0,6.0,8.0,7.0,5.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,3.0,3.0,2.0,2.0,2.0,4.0,3.0,3.0,3.0,2.0,3.0,4.0,4.0,3.0,4.0,3.0,8.0,5.0,6.0,9.0,3.0,4.0,7.0,6.0,7.0,2.0,5.0,1.0,,,10.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,5.0,5.0,5.0,1.0,2.0,2.0,3.0,2.0,2.0,3.0,3.0,3.0,3.0,7.0,8.0,3.0,5.0,6.0,9.0,6.0,6.0,1.0,1.0,1.0,1.0,3.0,1.0,6.0,5.0,1.0,1.0,2.0,2.0,2.0,2.0,6.0,,1.0,4.0,5.0,1.0,21.0,1,7.0,1.0,0.833333,1.25,2014
1,12,ALG,1.0,2.0,3.0,4.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,1.0,1.0,2.0,1.0,2.0,3.0,2.0,6.0,8.0,6.0,0.0,10.0,2.0,1.0,2.0,3.0,4.0,3.0,1.0,1.0,1.0,2.0,2.0,3.0,2.0,1.0,1.0,1.0,3.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,3.0,7.0,5.0,5.0,4.0,4.0,6.0,1.0,3.0,3.0,3.0,3.0,3.0,1.0,1.0,1.0,2.0,3.0,1.0,2.0,2.0,3.0,2.0,2.0,3.0,2.0,2.0,3.0,3.0,2.0,8.0,8.0,8.0,9.0,2.0,6.0,4.0,2.0,4.0,1.0,5.0,1.0,2.0,1.0,10.0,1.0,1.0,1.0,2.0,1.0,2.0,2.0,5.0,5.0,1.0,5.0,2.0,3.0,4.0,2.0,2.0,2.0,2.0,2.0,2.0,4.0,8.0,4.0,6.0,4.0,8.0,3.0,4.0,7.0,1.0,1.0,1.0,1.0,1.0,3.0,5.0,1.0,2.0,2.0,2.0,2.0,3.0,6.0,,2.0,3.0,6.0,2.0,24.0,1,7.0,1.0,0.833333,1.25,2014
2,12,ALG,1.0,3.0,2.0,4.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,1.0,2.0,1.0,2.0,1.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,2.0,3.0,1.0,1.0,1.0,1.0,1.0,6.0,8.0,6.0,0.0,6.0,2.0,4.0,1.0,2.0,1.0,4.0,1.0,2.0,2.0,2.0,1.0,2.0,1.0,1.0,4.0,3.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,7.0,7.0,7.0,5.0,7.0,5.0,1.0,3.0,3.0,4.0,4.0,4.0,3.0,2.0,2.0,2.0,4.0,3.0,2.0,2.0,2.0,4.0,3.0,2.0,3.0,2.0,4.0,2.0,2.0,7.0,4.0,8.0,3.0,3.0,6.0,9.0,5.0,6.0,1.0,5.0,1.0,2.0,1.0,6.0,2.0,3.0,1.0,2.0,2.0,3.0,3.0,5.0,5.0,5.0,5.0,2.0,3.0,2.0,3.0,2.0,3.0,3.0,3.0,3.0,4.0,7.0,5.0,5.0,5.0,5.0,5.0,5.0,1.0,1.0,1.0,1.0,4.0,1.0,4.0,5.0,1.0,1.0,3.0,2.0,4.0,2.0,3.0,2.0,1.0,4.0,6.0,2.0,26.0,2,5.0,1.0,0.833333,1.25,2014
3,12,ALG,1.0,1.0,3.0,4.0,3.0,1.0,2.0,1.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,1.0,1.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,6.0,8.0,6.0,0.0,6.0,2.0,1.0,3.0,1.0,4.0,3.0,1.0,1.0,1.0,1.0,1.0,2.0,3.0,2.0,3.0,3.0,1.0,1.0,2.0,2.0,3.0,1.0,2.0,2.0,2.0,9.0,5.0,6.0,4.0,6.0,8.0,1.0,3.0,3.0,2.0,2.0,3.0,2.0,3.0,4.0,2.0,4.0,2.0,3.0,3.0,4.0,2.0,2.0,3.0,1.0,2.0,4.0,3.0,2.0,7.0,9.0,5.0,5.0,7.0,3.0,8.0,7.0,8.0,2.0,5.0,1.0,2.0,1.0,10.0,2.0,3.0,4.0,1.0,2.0,2.0,2.0,5.0,5.0,1.0,5.0,2.0,3.0,3.0,3.0,2.0,2.0,3.0,3.0,3.0,6.0,6.0,3.0,5.0,5.0,7.0,4.0,6.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,1.0,1.0,4.0,4.0,5.0,2.0,28.0,2,6.0,1.0,0.833333,1.25,2014
4,12,ALG,1.0,1.0,1.0,2.0,1.0,1.0,1.0,3.0,2.0,1.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,1.0,1.0,1.0,1.0,3.0,2.0,2.0,6.0,6.0,1.0,3.0,4.0,2.0,1.0,2.0,3.0,4.0,2.0,1.0,2.0,1.0,1.0,2.0,5.0,1.0,2.0,3.0,1.0,4.0,3.0,2.0,2.0,3.0,1.0,2.0,2.0,2.0,8.0,4.0,7.0,4.0,6.0,6.0,2.0,2.0,3.0,4.0,2.0,3.0,2.0,3.0,3.0,2.0,3.0,2.0,3.0,3.0,3.0,3.0,3.0,2.0,4.0,3.0,2.0,3.0,2.0,8.0,4.0,7.0,3.0,3.0,8.0,6.0,5.0,6.0,2.0,5.0,1.0,1.0,1.0,10.0,2.0,3.0,2.0,2.0,2.0,3.0,3.0,5.0,5.0,5.0,5.0,2.0,3.0,3.0,4.0,2.0,3.0,3.0,3.0,3.0,6.0,2.0,4.0,4.0,6.0,6.0,6.0,5.0,7.0,1.0,1.0,1.0,3.0,1.0,4.0,5.0,1.0,1.0,2.0,2.0,3.0,2.0,3.0,2.0,2.0,3.0,7.0,2.0,35.0,2,3.0,1.0,0.833333,1.25,2014


In [42]:
# # set a grey background (use sns.set_theme() if seaborn version 0.11.0 or above) 
# sns.set(style="darkgrid")

# fig, axs = plt.subplots(2, 2, figsize=(7, 7))

# sns.barplot(data=wvs_w6, x="v4", color="skyblue", ax=axs[0, 0])
# sns.barplot(data=wvs_w6, x="v5", color="olive", ax=axs[0, 1])
# sns.barplot(data=wvs_w6, x="v6", color="gold", ax=axs[1, 0])
# sns.barplot(data=wvs_w6, x="v7", color="teal", ax=axs[1, 1])

# plt.show()