In [1]:
import pandas as pd
import numpy as np

In [2]:
pd.options.display.max_columns = None
pd.options.display.max_rows = None

## I. Explore the data  
  
A. Study variable attributes 
 1. Identify variable name and survey item(s) it measures (Codebook available here: https://www.worldvaluessurvey.org/WVSDocumentationWV6.jsp (accessed on 5/25/2021))  
 2. % missing for each variable
 3. Quick descriptives (check for range of values, distribution shape, skew/outliers, potential errors, etc.)  
 4. Identify target variable (and drop duplicates) - don't forget to do feature engineering on the target variable; break out into varying levels of happiness  
  
B. Visualize the data (based on descriptives)
 1. Explore correlations between attributes
 2. Identify transformations that might be needed
 3. Identify extra data that may be useful (gini coefficient, GDP, etc.)
 4. Summarize findings

In [3]:
wvs_w6 = pd.read_csv('../data/Evaluating_Happiness/wvs_w6.csv', low_memory=False)

In [4]:
wvs_w6.shape

(89565, 442)

In [5]:
wvs_w6.head()

Unnamed: 0,V1,V2,V2A,cow,C_COW_ALPHA,B_COUNTRY_ALPHA,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,V20,V21,V22,V23,V24,V25,V26,V27,V28,V29,V30,V31,V32,V33,V34,V35,V36,V37,V38,V39,V40,V41,V42,V43,V44,V44_ES,V45,V46,V47,V48,V49,V50,V51,V52,V53,V54,V55,V56,V56_NZ,V57,V58,V59,V60,V61,V62,V63,V64,V65,V66,V67,V68,V69,V70,V71,V72,V73,V74,V74B,V75,V76,V77,V78,V79,V80,V81,V82,V83,V84,V85,V86,V87,V88,V89,V90,V91,V92,V93,V94,V95,V96,V97,V98,V99,V100,V101,V102,V103,V104,V105,V106,V107,V108,V109,V110,V111,V112,V113,V114,V115,V116,V117,V118,V119,V120,V121,V122,V123,V124,V125_00,V125_01,V125_02,V125_03,V125_04,V125_05,V125_06,V125_07,V125_08,V125_09,V125_10,V125_11,V125_12,V125_13,V125_14,V125_15,V125_16,V125_17,V126,V127,V128,V129,V130,V131,V132,V133,V134,V135,V136,V137,V138,V139,V140,V141,V142,V143,V144,V144G,V145,V146,V147,V148,V149,V150,V151,V152,V153,V154,V155,V156,V157,V158,V159,V160,V161,V162,V163,V164,V165,V166,V167,V168,V169,V160A,V160B,V160C,V160D,V160E,V160F,V160G,V160H,V160I,V160J,V170,V171,V172,V173,V174,V175,V176,V177,V178,V179,V180,V181,V182,V183,V184,V185,V186,V187,V188,V189,V190,V191,V192,V193,V194,V195,V196,V197,V198,V199,V200,V201,V202,V203,V203A,V204,V205,V206,V207,V207A,V208,V209,V210,V211,V212,V213,V214,V215_01,V215_02,V215_03,V215_04,V215_05,V215_06,V215_07,V215_08,V215_10,V215_11,V215_12,V215_13,V215_14,V215_15,V215_16,V215_17,V215_18,V216,V217,V218,V219,V220,V221,V222,V223,V224,V218_ESMA,V217_ESMA,V219_ESMA,V220_ESMA,V221_ESMA,V222_ESMA,V223_ESMA,V224_ESMA,V225,V226,V227,V228,V228_LOCAL,V228_2,V228A,V228B,V228C,V228D,V228E,V228F,V228G,V228H,V228I,V228J,V228K,V229,V230,V231,V232,V233,V234,V235,V236,V237,V238,V239,V240,V241,V242,X003R2,X003R,V243,V243_AU,V244,V244_AU,V245,V246,V247,V248,V248_CS,V249,V250,V251,V252,V253,V253_CS,V254,V255,N_REGION_ISO,V256,V256B,V256C,V257,V258,V258A,S018,S019,V260,V261,V262,V265,Y001,Y001_1,Y001_2,Y001_3,Y001_4,Y001_5,Y002,Y003,MN_35A,MN_163A,MN_163B,MN_163C,MN_228L,MN_228M,MN_228N,MN_228O,MN_228P,MN_228Q,MN_228R,MN_228S1,MN_228S2,MN_228S3,MN_228S4,MN_228S5,MN_228S6,MN_228S7,MN_228S8,MN_229A,MN_229B,MN_230A,MN_233A,MN_233B,MN_234A,MN_237A,MN_237B1,MN_237B2,MN_237B3,MN_237B4,MN_237B5,MN_237B6,MN_237B7,MN_237C1,MN_237C2,MN_237C3,MN_237C4,MN_237C5,MN_237C6,MN_249A1,MN_249A2,MN_249A3,sacsecval,secvalwgt,resemaval,weightb,I_AUTHORITY,I_NATIONALISM,I_DEVOUT,defiance,WEIGHT1A,I_RELIGIMP,I_RELIGBEL,I_RELIGPRAC,disbelief,WEIGHT2A,I_NORM1,I_NORM2,I_NORM3,relativism,WEIGHT3A,I_TRUSTARMY,I_TRUSTPOLICE,I_TRUSTCOURTS,scepticism,WEIGHT4A,I_INDEP,I_IMAGIN,I_NONOBED,autonomy,WEIGHT1B,I_WOMJOB,I_WOMPOL,I_WOMEDU,equality,WEIGHT2B,I_HOMOLIB,I_ABORTLIB,I_DIVORLIB,choice,WEIGHT3B,I_VOICE1,I_VOICE2,I_VOI2_00,voice,WEIGHT4B
0,6,12,12,615,ALG,DZA,1,1.0,1.0,1.0,,1.0,1.0,2.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,8.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,2.0,2.0,1.0,2.0,2.0,1.0,1.0,,1.0,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,4.0,,6.0,0.0,10.0,2.0,3.0,1.0,3.0,3.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,1.0,2.0,3.0,3.0,2.0,1.0,2.0,2.0,4.0,,,,,,,,,,,5.0,8.0,7.0,6.0,8.0,7.0,5.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,3.0,3.0,2.0,3.0,2.0,2.0,4.0,3.0,3.0,2.0,3.0,2.0,3.0,4.0,4.0,3.0,,,,4.0,,,,,,,,,,,,,0.0,0.0,4.0,1.0,2.0,2.0,2.0,3.0,8.0,5.0,6.0,9.0,3.0,4.0,7.0,6.0,7.0,5.0,2.0,2.0,50000000.0,5.0,3.0,3.0,1.0,1.0,1.0,,,10.0,1.0,2.0,2.0,1.0,2.0,7.0,4.0,3.0,2.0,3.0,1.0,9.0,1.0,2.0,1.0,2.0,2.0,3.0,2.0,3.0,2.0,1.0,2.0,2.0,3.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,5.0,5.0,1.0,5.0,1.0,2.0,2.0,3.0,2.0,3.0,3.0,2.0,3.0,3.0,3.0,3.0,7.0,8.0,3.0,5.0,6.0,9.0,6.0,6.0,1.0,1.0,1.0,1.0,,1.0,3.0,,1.0,1.0,6.0,5.0,1.0,1.0,2.0,2.0,2.0,,,,,2.0,,,,,,,,,,,,,2.0,3.0,4.0,2.0,2.0,3.0,4.0,2.0,4.0,,,,,,,,,2.0,3.0,3.0,12005.0,12005.0,,3.0,4.0,3.0,3.0,3.0,3.0,4.0,4.0,4.0,2.0,1.0,6.0,,,,,,2.0,1.0,1.0,4.0,5.0,1.0,1993.0,21.0,1.0,1.0,2.0,,2.0,,1.0,1.0,110.0,7.0,,24.0,1.0,2.0,2.0,1.0,,12003.0,1.0,12005.0,12105.0,,,110.0,1.0,1.0,0.833333,1.25,5,,2014,,2.0,1,0,0,1,0,1.0,0.0,0.0,2.0,1.0,2.0,3.0,5.0,4.0,2.0,2.0,3.0,2.0,2.0,4.0,2.0,3.0,3.0,3.0,2.0,3.0,,,,,,,2.0,,,,,,,,,,,,,,1.0,2.0,1.0,0.221111,1.0,0.164352,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.333333,0.111111,1.0,1.0,0.0,0.0,0.333333,1.0,0.66,0.33,0.33,0.44,1.0,1.0,0.0,0.0,0.333333,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.222222,0.074074,1.0,0.0,0.5,0.25,0.25,1.0
1,6,12,12,615,ALG,DZA,2,1.0,2.0,3.0,4.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,5.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,,1.0,,1.0,1.0,2.0,2.0,1.0,2.0,3.0,2.0,6.0,8.0,,6.0,0.0,10.0,2.0,1.0,2.0,3.0,4.0,3.0,1.0,1.0,1.0,2.0,2.0,3.0,2.0,1.0,1.0,1.0,3.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,2.0,3.0,,,,,,,,,,,5.0,7.0,5.0,5.0,4.0,4.0,6.0,1.0,3.0,3.0,3.0,3.0,3.0,1.0,1.0,1.0,2.0,2.0,3.0,1.0,2.0,2.0,3.0,2.0,2.0,2.0,3.0,2.0,2.0,3.0,,,,4.0,,,,,,,,,,,,,0.0,0.0,3.0,1.0,3.0,2.0,1.0,2.0,8.0,8.0,8.0,9.0,2.0,6.0,4.0,2.0,4.0,4.0,3.0,1.0,50000000.0,5.0,2.0,2.0,1.0,1.0,1.0,2.0,1.0,10.0,1.0,1.0,1.0,1.0,4.0,6.0,4.0,4.0,1.0,2.0,3.0,8.0,2.0,3.0,2.0,1.0,2.0,2.0,4.0,4.0,2.0,3.0,2.0,4.0,1.0,1.0,2.0,2.0,1.0,2.0,2.0,2.0,2.0,5.0,5.0,5.0,1.0,5.0,2.0,3.0,4.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,4.0,8.0,4.0,6.0,4.0,8.0,3.0,4.0,7.0,1.0,1.0,1.0,,1.0,1.0,,1.0,1.0,3.0,5.0,1.0,2.0,2.0,2.0,2.0,,,,,2.0,,,,,,,,,,,,,2.0,2.0,3.0,4.0,3.0,4.0,2.0,3.0,3.0,,,,,,,,,3.0,2.0,3.0,12003.0,12003.0,,3.0,4.0,4.0,3.0,4.0,3.0,3.0,4.0,4.0,1.0,1.0,6.0,,,,,,2.0,2.0,2.0,3.0,6.0,2.0,1990.0,24.0,1.0,1.0,2.0,,2.0,,1.0,1.0,110.0,7.0,,25.0,1.0,1.0,2.0,1.0,,12003.0,1.0,12005.0,12105.0,,,110.0,1.0,1.0,0.833333,1.25,5,,2014,,2.0,0,1,0,0,1,2.0,-1.0,0.0,2.0,3.0,2.0,3.0,5.0,4.0,6.0,2.0,3.0,3.0,2.0,3.0,2.0,3.0,3.0,2.0,4.0,3.0,,,,,,,2.0,,,,,,,,,,,,,,2.0,,,0.276389,1.0,0.11,1.0,0.5,0.33,0.33,0.386667,1.0,0.33,0.0,0.166667,0.165556,1.0,1.0,0.0,0.0,0.333333,1.0,0.0,0.66,0.0,0.22,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.33,0.11,1.0,0.0,0.0,0.0,0.0,1.0,0.66,0.0,0.33,0.33,1.0
2,6,12,12,615,ALG,DZA,3,1.0,3.0,2.0,4.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,1.0,2.0,1.0,2.0,1.0,2.0,4.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,2.0,2.0,2.0,1.0,1.0,1.0,2.0,,1.0,,2.0,3.0,1.0,1.0,1.0,1.0,1.0,1.0,6.0,8.0,,6.0,0.0,6.0,2.0,4.0,1.0,2.0,1.0,4.0,1.0,2.0,2.0,2.0,1.0,2.0,1.0,1.0,4.0,3.0,1.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,,,3.0,3.0,3.0,,,,,,6.0,7.0,7.0,7.0,5.0,7.0,5.0,1.0,3.0,3.0,4.0,4.0,4.0,3.0,2.0,2.0,2.0,3.0,4.0,3.0,2.0,2.0,2.0,3.0,4.0,3.0,2.0,3.0,2.0,4.0,,,,3.0,,,,,,,,,,,,,0.0,0.0,2.0,1.0,3.0,2.0,2.0,2.0,7.0,4.0,8.0,3.0,3.0,6.0,9.0,5.0,6.0,7.0,2.0,1.0,50000000.0,5.0,2.0,4.0,1.0,1.0,1.0,2.0,1.0,6.0,2.0,3.0,1.0,2.0,2.0,7.0,4.0,4.0,0.0,2.0,2.0,6.0,2.0,3.0,1.0,2.0,4.0,2.0,3.0,2.0,4.0,3.0,3.0,2.0,4.0,3.0,2.0,2.0,2.0,2.0,3.0,3.0,3.0,5.0,5.0,5.0,5.0,5.0,2.0,3.0,2.0,3.0,4.0,4.0,2.0,3.0,3.0,3.0,3.0,4.0,7.0,5.0,5.0,5.0,5.0,5.0,5.0,1.0,1.0,1.0,1.0,,1.0,4.0,,1.0,1.0,4.0,5.0,1.0,1.0,2.0,3.0,2.0,,,,,2.0,,,,,,,,,,,,,4.0,3.0,4.0,2.0,2.0,4.0,3.0,1.0,1.0,,,,,,,,,2.0,2.0,3.0,,,,4.0,4.0,3.0,4.0,2.0,3.0,2.0,4.0,4.0,2.0,1.0,3.0,2.0,7.0,6.0,5.0,2.0,1.0,,1.0,4.0,6.0,2.0,1988.0,26.0,1.0,2.0,2.0,,2.0,,1.0,1.0,500.0,5.0,,18.0,1.0,1.0,1.0,1.0,,12003.0,1.0,12005.0,12105.0,,,110.0,1.0,1.0,0.833333,1.25,5,,2014,,1.0,0,0,1,0,0,2.0,-2.0,0.0,1.0,3.0,2.0,3.0,6.0,4.0,5.0,1.0,2.0,2.0,2.0,3.0,2.0,4.0,3.0,2.0,4.0,4.0,1.0,1.0,1.0,2.0,3.0,3.0,2.0,,,,,,,,,,,,,,1.0,2.0,1.0,0.304722,1.0,0.152361,1.0,0.5,0.0,0.0,0.166667,1.0,0.0,0.0,0.166667,0.055556,1.0,1.0,0.0,0.0,0.333333,1.0,0.33,1.0,0.66,0.663333,1.0,0.0,1.0,0.0,0.333333,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.333333,0.111111,1.0,0.33,0.0,0.165,0.165,1.0
3,6,12,12,615,ALG,DZA,4,1.0,1.0,3.0,4.0,3.0,1.0,2.0,1.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,1.0,1.0,2.0,8.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,,2.0,,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,6.0,8.0,,6.0,0.0,6.0,2.0,1.0,3.0,1.0,4.0,3.0,1.0,1.0,1.0,1.0,1.0,2.0,3.0,2.0,3.0,3.0,1.0,1.0,2.0,2.0,3.0,1.0,1.0,2.0,2.0,2.0,,,,,,,,,,,6.0,9.0,5.0,6.0,4.0,6.0,8.0,1.0,3.0,3.0,2.0,2.0,3.0,2.0,3.0,4.0,2.0,3.0,4.0,2.0,3.0,3.0,4.0,2.0,2.0,2.0,3.0,1.0,2.0,4.0,,,,3.0,,,,,,,,,,,,,0.0,0.0,3.0,2.0,3.0,4.0,3.0,2.0,7.0,9.0,5.0,5.0,7.0,3.0,8.0,7.0,8.0,8.0,3.0,2.0,50000000.0,5.0,2.0,1.0,1.0,1.0,1.0,2.0,1.0,10.0,2.0,3.0,4.0,2.0,3.0,7.0,6.0,4.0,1.0,3.0,3.0,7.0,3.0,1.0,2.0,3.0,3.0,2.0,3.0,4.0,1.0,2.0,4.0,2.0,2.0,3.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,5.0,5.0,5.0,1.0,5.0,2.0,3.0,3.0,3.0,4.0,4.0,2.0,2.0,3.0,3.0,3.0,6.0,6.0,3.0,5.0,5.0,7.0,4.0,6.0,1.0,1.0,1.0,1.0,,1.0,1.0,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,,,,,2.0,,,,,,,,,,,,,2.0,2.0,3.0,1.0,2.0,3.0,4.0,3.0,3.0,,,,,,,,,2.0,2.0,3.0,,,,2.0,4.0,3.0,3.0,2.0,3.0,2.0,2.0,2.0,1.0,3.0,1.0,1.0,8.0,5.0,6.0,2.0,1.0,,4.0,4.0,5.0,2.0,1986.0,28.0,1.0,2.0,2.0,,2.0,,1.0,1.0,500.0,6.0,,14.0,2.0,2.0,1.0,1.0,,12003.0,1.0,12005.0,12105.0,,,500.0,1.0,1.0,0.833333,1.25,5,,2014,,1.0,0,0,0,0,1,1.0,-1.0,0.0,2.0,2.0,2.0,3.0,7.0,5.0,6.0,2.0,3.0,2.0,2.0,3.0,2.0,2.0,4.0,3.0,3.0,2.0,2.0,2.0,1.0,2.0,2.0,3.0,2.0,,,,,,,,,,,,,,1.0,2.0,2.0,0.263056,1.0,0.096667,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.166667,0.055556,1.0,1.0,0.0,0.0,0.333333,1.0,0.66,1.0,0.33,0.663333,1.0,0.0,0.0,0.0,0.0,1.0,0.5,0.33,0.33,0.386667,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
4,6,12,12,615,ALG,DZA,5,1.0,1.0,1.0,2.0,1.0,1.0,1.0,3.0,2.0,1.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,2.0,8.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,1.0,1.0,2.0,2.0,1.0,1.0,2.0,,1.0,,1.0,1.0,1.0,2.0,1.0,3.0,2.0,2.0,6.0,6.0,,1.0,3.0,4.0,2.0,1.0,2.0,3.0,4.0,2.0,1.0,2.0,1.0,1.0,2.0,5.0,1.0,2.0,3.0,1.0,4.0,3.0,2.0,2.0,3.0,1.0,2.0,2.0,2.0,2.0,,,,,,,,,,,5.0,8.0,4.0,7.0,4.0,6.0,6.0,2.0,2.0,3.0,4.0,2.0,3.0,2.0,3.0,3.0,2.0,4.0,3.0,2.0,3.0,3.0,3.0,2.0,3.0,3.0,2.0,4.0,3.0,2.0,,,,3.0,,,,,,,,,,,,,0.0,0.0,3.0,2.0,3.0,4.0,2.0,2.0,8.0,4.0,7.0,3.0,3.0,8.0,6.0,5.0,6.0,7.0,3.0,2.0,50000000.0,5.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,10.0,2.0,3.0,2.0,2.0,2.0,4.0,4.0,3.0,1.0,3.0,2.0,8.0,2.0,3.0,3.0,3.0,3.0,2.0,4.0,3.0,3.0,2.0,4.0,3.0,2.0,2.0,3.0,2.0,2.0,3.0,3.0,3.0,3.0,5.0,5.0,5.0,5.0,5.0,2.0,3.0,3.0,4.0,4.0,4.0,2.0,3.0,3.0,3.0,3.0,6.0,2.0,4.0,4.0,6.0,6.0,6.0,5.0,7.0,1.0,1.0,1.0,,1.0,3.0,,1.0,1.0,4.0,5.0,1.0,1.0,2.0,2.0,2.0,,,,,2.0,,,,,,,,,,,,,3.0,3.0,4.0,2.0,2.0,4.0,3.0,2.0,3.0,,,,,,,,,2.0,3.0,3.0,2.0,2.0,,3.0,3.0,4.0,2.0,3.0,3.0,4.0,3.0,3.0,1.0,2.0,3.0,2.0,5.0,6.0,9.0,2.0,2.0,1.0,2.0,3.0,7.0,2.0,1979.0,35.0,2.0,3.0,2.0,,2.0,,1.0,1.0,500.0,3.0,,8.0,1.0,1.0,1.0,1.0,,12003.0,1.0,12005.0,12105.0,,,110.0,1.0,1.0,0.833333,1.25,5,,2014,,2.0,0,1,0,0,1,2.0,1.0,0.0,2.0,3.0,3.0,3.0,4.0,7.0,4.0,2.0,2.0,3.0,2.0,3.0,1.0,4.0,3.0,1.0,3.0,2.0,2.0,2.0,1.0,2.0,2.0,3.0,2.0,,,,,,,,,,,,,,1.0,2.0,2.0,0.220833,1.0,0.239352,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.333333,1.0,0.66,0.66,0.33,0.55,1.0,0.0,0.0,1.0,0.333333,1.0,0.0,0.0,0.66,0.22,1.0,0.0,0.0,0.222222,0.074074,1.0,0.66,0.0,0.33,0.33,1.0


In [6]:
wvs_w6.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 89565 entries, 0 to 89564
Columns: 442 entries, V1 to WEIGHT4B
dtypes: float64(428), int64(12), object(2)
memory usage: 302.0+ MB


In [7]:
wvs_w6.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 89565 entries, 0 to 89564
Data columns (total 442 columns):
 #   Column           Dtype  
---  ------           -----  
 0   V1               int64  
 1   V2               int64  
 2   V2A              int64  
 3   cow              int64  
 4   C_COW_ALPHA      object 
 5   B_COUNTRY_ALPHA  object 
 6   V3               int64  
 7   V4               float64
 8   V5               float64
 9   V6               float64
 10  V7               float64
 11  V8               float64
 12  V9               float64
 13  V10              float64
 14  V11              float64
 15  V12              float64
 16  V13              float64
 17  V14              float64
 18  V15              float64
 19  V16              float64
 20  V17              float64
 21  V18              float64
 22  V19              float64
 23  V20              float64
 24  V21              float64
 25  V22              float64
 26  V23              float64
 27  V24            

### Codebook notes:  
  
- Weights (See https://www.worldvaluessurvey.org/WVSContents.jsp for further details): 
    - S018 and S019 are weighting factors that transform N's to 1000 and 1500, respectively
    - these variables are useful for cross-country comparisons 
    - useful for EDA and descriptive analyses; should arguably be dropped for random forest algorithm, **right?**
    - **QUESTION:** are weights useful for PCA and linear regression?
    - **QUESTION:** I see weights, but not specific population or sample size info - do I need this?
        - sample size shouldn't be too difficult to obtain based on max V3 values for each country
        - population data shouldn't be difficult to obtain based on N preserving weightings (V258) and this formula  
        
        $$Weight = S018/1000 * Population$$  

In [8]:
# sample sizes for each participating country (if needed)
wvs_w6.groupby('C_COW_ALPHA')['V3'].count()

C_COW_ALPHA
ALG    1200
ARG    1030
ARM    1100
AUL    1477
AZE    1002
BLR    1535
BRA    1486
CHL    1000
CHN    2300
COL    1512
CYP    1000
ECU    1202
EGY    1523
EST    1533
GHA    1552
GMY    2046
GRG    1202
HAI    1996
HKG    1000
IND    4078
IRQ    1200
JOR    1200
JPN    2443
KUW    1303
KYR    1500
KZK    1500
LEB    1200
LIB    2131
MAL    1300
MEX    2000
MOR    1200
NEW     841
NIG    1759
NTH    1902
PAK    1200
PER    1210
PHI    1200
POL     966
PSE    1000
QAT    1060
ROK    1200
ROM    1503
RUS    2500
RWA    1527
SAF    3531
SIN    1972
SLV    1069
SPN    1189
SWD    1206
TAW    1238
THI    1200
TRI     999
TUN    1205
TUR    1605
UKR    1500
URU    1000
USA    2232
UZB    1500
YEM    1000
ZIM    1500
Name: V3, dtype: int64

In [9]:
# how many columns with null values?
wvs_w6.isna().sum().count()

442

In [10]:
# another approach for finding the number of columns with null values
#sum(map(any, wvs_w6.isnull()))

In [11]:
# how many null values in each column?
wvs_w6.isna().sum()

V1                     0
V2                     0
V2A                    0
cow                    0
C_COW_ALPHA            0
B_COUNTRY_ALPHA        0
V3                     0
V4                   341
V5                   545
V6                  1035
V7                  1620
V8                  1519
V9                  1348
V10                  758
V11                  326
V12                   14
V13                    6
V14                    7
V15                   17
V16                    9
V17                   13
V18                   12
V19                   18
V20                   15
V21                   19
V22                    3
V23                  586
V24                 2388
V25                  840
V26                  923
V27                  993
V28                 3211
V29                 3129
V30                 1118
V31                 2223
V32                 1055
V33                 1214
V34                 1197
V35                 9696
V36                 3979


In [12]:
# calculate proportion missing
pct_missing = pd.DataFrame(wvs_w6.isna().sum())
pct_missing = pct_missing.reset_index().rename(columns = {'index':'variable', 0:'NA_count'})
pct_missing

Unnamed: 0,variable,NA_count
0,V1,0
1,V2,0
2,V2A,0
3,cow,0
4,C_COW_ALPHA,0
5,B_COUNTRY_ALPHA,0
6,V3,0
7,V4,341
8,V5,545
9,V6,1035


In [13]:
pct_missing['NA_pct'] = (pct_missing['NA_count'] / len(wvs_w6)) * 100
pct_missing['NA_pct'] = pct_missing['NA_pct'].round(decimals=2)
pct_missing

Unnamed: 0,variable,NA_count,NA_pct
0,V1,0,0.0
1,V2,0,0.0
2,V2A,0,0.0
3,cow,0,0.0
4,C_COW_ALPHA,0,0.0
5,B_COUNTRY_ALPHA,0,0.0
6,V3,0,0.0
7,V4,341,0.38
8,V5,545,0.61
9,V6,1035,1.16


In [14]:
# how many columns contain missing values?
print(np.count_nonzero(wvs_w6.isna().sum() != 0), 'columns contain missing values')

# how many columns do not?
print(np.count_nonzero(wvs_w6.isna().sum() == 0), 'columns DO NOT contain missing values')

414 columns contain missing values
28 columns DO NOT contain missing values


#### Observations so far:
This data set contains 28 columns without any missing information, and 414 columns (out of 442) that contain at least 1 missing value.

#### Next steps:
- figure out if these missing values matter for my analyses
- decide what to do about missing values

In [15]:
# list columns containing missing data
wvs_w6.columns[wvs_w6.isna().any()].tolist()

['V4',
 'V5',
 'V6',
 'V7',
 'V8',
 'V9',
 'V10',
 'V11',
 'V12',
 'V13',
 'V14',
 'V15',
 'V16',
 'V17',
 'V18',
 'V19',
 'V20',
 'V21',
 'V22',
 'V23',
 'V24',
 'V25',
 'V26',
 'V27',
 'V28',
 'V29',
 'V30',
 'V31',
 'V32',
 'V33',
 'V34',
 'V35',
 'V36',
 'V37',
 'V38',
 'V39',
 'V40',
 'V41',
 'V42',
 'V43',
 'V44',
 'V44_ES',
 'V45',
 'V46',
 'V47',
 'V48',
 'V49',
 'V50',
 'V51',
 'V52',
 'V53',
 'V54',
 'V55',
 'V56',
 'V56_NZ',
 'V57',
 'V58',
 'V59',
 'V60',
 'V61',
 'V62',
 'V63',
 'V64',
 'V65',
 'V66',
 'V67',
 'V68',
 'V69',
 'V70',
 'V71',
 'V72',
 'V73',
 'V74',
 'V74B',
 'V75',
 'V76',
 'V77',
 'V78',
 'V79',
 'V80',
 'V81',
 'V82',
 'V83',
 'V84',
 'V85',
 'V86',
 'V87',
 'V88',
 'V89',
 'V90',
 'V91',
 'V92',
 'V93',
 'V94',
 'V95',
 'V96',
 'V97',
 'V98',
 'V99',
 'V100',
 'V101',
 'V102',
 'V103',
 'V104',
 'V105',
 'V106',
 'V107',
 'V108',
 'V109',
 'V110',
 'V111',
 'V112',
 'V113',
 'V114',
 'V115',
 'V116',
 'V117',
 'V118',
 'V119',
 'V120',
 'V121',
 'V122',


In [16]:
# view variables containing missing values and % missing
pct_missing[pct_missing['NA_count'] != 0]

Unnamed: 0,variable,NA_count,NA_pct
7,V4,341,0.38
8,V5,545,0.61
9,V6,1035,1.16
10,V7,1620,1.81
11,V8,1519,1.7
12,V9,1348,1.51
13,V10,758,0.85
14,V11,326,0.36
15,V12,14,0.02
16,V13,6,0.01


In [17]:
# create list of variables where pct missing is 50% or higher
half_missing = pct_missing[pct_missing['NA_pct'] >= 50.00]['variable'].tolist()
half_missing

['V44_ES',
 'V56_NZ',
 'V74B',
 'V90',
 'V91',
 'V92',
 'V93',
 'V94',
 'V125_00',
 'V125_01',
 'V125_02',
 'V125_03',
 'V125_04',
 'V125_05',
 'V125_06',
 'V125_07',
 'V125_08',
 'V125_09',
 'V125_10',
 'V125_11',
 'V125_12',
 'V125_13',
 'V125_14',
 'V125_15',
 'V160A',
 'V160B',
 'V160C',
 'V160D',
 'V160E',
 'V160F',
 'V160G',
 'V160H',
 'V160I',
 'V160J',
 'V207A',
 'V215_01',
 'V215_02',
 'V215_03',
 'V215_04',
 'V215_05',
 'V215_06',
 'V215_07',
 'V215_08',
 'V215_10',
 'V215_11',
 'V215_12',
 'V215_13',
 'V215_14',
 'V215_15',
 'V215_16',
 'V215_17',
 'V215_18',
 'V218_ESMA',
 'V217_ESMA',
 'V219_ESMA',
 'V220_ESMA',
 'V221_ESMA',
 'V222_ESMA',
 'V223_ESMA',
 'V224_ESMA',
 'V228_2',
 'V243_AU',
 'V244_AU',
 'V248_CS',
 'V253_CS',
 'V256B',
 'V256C',
 'V265',
 'MN_35A',
 'MN_163A',
 'MN_163B',
 'MN_163C',
 'MN_228L',
 'MN_228M',
 'MN_228N',
 'MN_228O',
 'MN_228P',
 'MN_228Q',
 'MN_228R',
 'MN_228S1',
 'MN_228S2',
 'MN_228S3',
 'MN_228S4',
 'MN_228S5',
 'MN_228S6',
 'MN_228S7',
 

### Options for handling missing data:  
Advice from https://heartbeat.fritz.ai/data-handling-scenarios-part-2-working-with-missing-values-in-a-dataset-34b758cfc9fa and https://analyticsindiamag.com/5-ways-handle-missing-values-machine-learning-datasets/  
  
**Drop rows and columns with missing data**  
1. pros: 
  - quick and easy approach 
2. cons: 
  - could mean losing important information about the data along with the missing values
  - when using your model in production, the model will not automatically know how to handle missing data
3. when this makes sense: 
  - rows containing NULL values (missing values) are around 5% (or less) of the total data
  - NULL values in columns are significantly more than the other values present; it wouldn’t make sense to keep these, as they hold little or no descriptive information about the data  
  
**Mean/Median (numerical) & Mode (categorical) imputation**  
1. pros: 
  - easy to do
  - can be integrated into production or for a future unknown dataset
2. cons: 
  - distorts the distribution of the dataset
  - distorts the variance and covariance of the dataset
  - for mode imputation, may lead to an over-representation of the most frequent label if the missing values are quite large
3. when this makes sense: 
  - mean imputation works best for normally distributed distributions
  - median is better for skewed distributions 
  - mode imputation for categorical data works best if the missing values are missing at random
  - best to use this method when the missing values are around 5% (or less) of the total data
  
**Systematic Random Sampling Imputation**  
1. pros: 
  - does not distort variance or distribution 
2. cons: 
  - when replacing missing values in the test set as well, the imputed values from the train set will need to be stored in memory
3. when this makes sense: 
  - can be applied to both numerical and categorical variables
  - used when the values are missing at random
  - when we want to be able to reproduce the same value every time the variable is used (by using a random state)
  
### Remaining Questions:
Is "stratified" mean/median imputation an option?

In [18]:
# first step in droping columns with 50% or more missing observations
half_missing

['V44_ES',
 'V56_NZ',
 'V74B',
 'V90',
 'V91',
 'V92',
 'V93',
 'V94',
 'V125_00',
 'V125_01',
 'V125_02',
 'V125_03',
 'V125_04',
 'V125_05',
 'V125_06',
 'V125_07',
 'V125_08',
 'V125_09',
 'V125_10',
 'V125_11',
 'V125_12',
 'V125_13',
 'V125_14',
 'V125_15',
 'V160A',
 'V160B',
 'V160C',
 'V160D',
 'V160E',
 'V160F',
 'V160G',
 'V160H',
 'V160I',
 'V160J',
 'V207A',
 'V215_01',
 'V215_02',
 'V215_03',
 'V215_04',
 'V215_05',
 'V215_06',
 'V215_07',
 'V215_08',
 'V215_10',
 'V215_11',
 'V215_12',
 'V215_13',
 'V215_14',
 'V215_15',
 'V215_16',
 'V215_17',
 'V215_18',
 'V218_ESMA',
 'V217_ESMA',
 'V219_ESMA',
 'V220_ESMA',
 'V221_ESMA',
 'V222_ESMA',
 'V223_ESMA',
 'V224_ESMA',
 'V228_2',
 'V243_AU',
 'V244_AU',
 'V248_CS',
 'V253_CS',
 'V256B',
 'V256C',
 'V265',
 'MN_35A',
 'MN_163A',
 'MN_163B',
 'MN_163C',
 'MN_228L',
 'MN_228M',
 'MN_228N',
 'MN_228O',
 'MN_228P',
 'MN_228Q',
 'MN_228R',
 'MN_228S1',
 'MN_228S2',
 'MN_228S3',
 'MN_228S4',
 'MN_228S5',
 'MN_228S6',
 'MN_228S7',
 

In [19]:
len(half_missing)

110

#### Re-insert variables that shouldn't be dropped
- V74B (Schwartz value scale item)
- V125_00 - V125_15; use these to create a "confidence in political/economic union" variable (V125_C)
- V207A (euthanasia justifiable); curious to see if this could be imputed
- V215_01 - V215_18; use these to create a "sense of belonging to political/economic union" variable (V215_B)

In [20]:
# create a for loop to re-insert variables into the list that shouldn't be dropped (at this point)
# original list: high_missings_list
# new list
vars_to_keep = ['V56_NZ', 'V74B', 'V125_00', 'V125_01', 'V125_02', 'V125_03', 'V125_04', 'V125_05', 'V125_06', 'V125_07', 'V125_08',
 'V125_09', 'V125_10', 'V125_11', 'V125_12', 'V125_13', 'V125_14', 'V125_15', 'V207A', 'V215_01', 'V215_02', 'V215_03', 'V215_04',
 'V215_05', 'V215_06', 'V215_07', 'V215_08', 'V215_10', 'V215_11', 'V215_12', 'V215_13', 'V215_14', 'V215_15', 'V215_16',
 'V215_17', 'V215_18']

vars_to_drop = []

for variable in half_missing:
    if variable not in vars_to_keep:
        
        vars_to_drop.append(variable)

print(vars_to_drop)

['V44_ES', 'V90', 'V91', 'V92', 'V93', 'V94', 'V160A', 'V160B', 'V160C', 'V160D', 'V160E', 'V160F', 'V160G', 'V160H', 'V160I', 'V160J', 'V218_ESMA', 'V217_ESMA', 'V219_ESMA', 'V220_ESMA', 'V221_ESMA', 'V222_ESMA', 'V223_ESMA', 'V224_ESMA', 'V228_2', 'V243_AU', 'V244_AU', 'V248_CS', 'V253_CS', 'V256B', 'V256C', 'V265', 'MN_35A', 'MN_163A', 'MN_163B', 'MN_163C', 'MN_228L', 'MN_228M', 'MN_228N', 'MN_228O', 'MN_228P', 'MN_228Q', 'MN_228R', 'MN_228S1', 'MN_228S2', 'MN_228S3', 'MN_228S4', 'MN_228S5', 'MN_228S6', 'MN_228S7', 'MN_228S8', 'MN_229A', 'MN_229B', 'MN_230A', 'MN_233A', 'MN_233B', 'MN_234A', 'MN_237A', 'MN_237B1', 'MN_237B2', 'MN_237B3', 'MN_237B4', 'MN_237B5', 'MN_237B6', 'MN_237B7', 'MN_237C1', 'MN_237C2', 'MN_237C3', 'MN_237C4', 'MN_237C5', 'MN_237C6', 'MN_249A1', 'MN_249A2', 'MN_249A3']


In [21]:
len(vars_to_drop)

74

In [22]:
# view original imported data set
wvs_w6.head()

Unnamed: 0,V1,V2,V2A,cow,C_COW_ALPHA,B_COUNTRY_ALPHA,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,V20,V21,V22,V23,V24,V25,V26,V27,V28,V29,V30,V31,V32,V33,V34,V35,V36,V37,V38,V39,V40,V41,V42,V43,V44,V44_ES,V45,V46,V47,V48,V49,V50,V51,V52,V53,V54,V55,V56,V56_NZ,V57,V58,V59,V60,V61,V62,V63,V64,V65,V66,V67,V68,V69,V70,V71,V72,V73,V74,V74B,V75,V76,V77,V78,V79,V80,V81,V82,V83,V84,V85,V86,V87,V88,V89,V90,V91,V92,V93,V94,V95,V96,V97,V98,V99,V100,V101,V102,V103,V104,V105,V106,V107,V108,V109,V110,V111,V112,V113,V114,V115,V116,V117,V118,V119,V120,V121,V122,V123,V124,V125_00,V125_01,V125_02,V125_03,V125_04,V125_05,V125_06,V125_07,V125_08,V125_09,V125_10,V125_11,V125_12,V125_13,V125_14,V125_15,V125_16,V125_17,V126,V127,V128,V129,V130,V131,V132,V133,V134,V135,V136,V137,V138,V139,V140,V141,V142,V143,V144,V144G,V145,V146,V147,V148,V149,V150,V151,V152,V153,V154,V155,V156,V157,V158,V159,V160,V161,V162,V163,V164,V165,V166,V167,V168,V169,V160A,V160B,V160C,V160D,V160E,V160F,V160G,V160H,V160I,V160J,V170,V171,V172,V173,V174,V175,V176,V177,V178,V179,V180,V181,V182,V183,V184,V185,V186,V187,V188,V189,V190,V191,V192,V193,V194,V195,V196,V197,V198,V199,V200,V201,V202,V203,V203A,V204,V205,V206,V207,V207A,V208,V209,V210,V211,V212,V213,V214,V215_01,V215_02,V215_03,V215_04,V215_05,V215_06,V215_07,V215_08,V215_10,V215_11,V215_12,V215_13,V215_14,V215_15,V215_16,V215_17,V215_18,V216,V217,V218,V219,V220,V221,V222,V223,V224,V218_ESMA,V217_ESMA,V219_ESMA,V220_ESMA,V221_ESMA,V222_ESMA,V223_ESMA,V224_ESMA,V225,V226,V227,V228,V228_LOCAL,V228_2,V228A,V228B,V228C,V228D,V228E,V228F,V228G,V228H,V228I,V228J,V228K,V229,V230,V231,V232,V233,V234,V235,V236,V237,V238,V239,V240,V241,V242,X003R2,X003R,V243,V243_AU,V244,V244_AU,V245,V246,V247,V248,V248_CS,V249,V250,V251,V252,V253,V253_CS,V254,V255,N_REGION_ISO,V256,V256B,V256C,V257,V258,V258A,S018,S019,V260,V261,V262,V265,Y001,Y001_1,Y001_2,Y001_3,Y001_4,Y001_5,Y002,Y003,MN_35A,MN_163A,MN_163B,MN_163C,MN_228L,MN_228M,MN_228N,MN_228O,MN_228P,MN_228Q,MN_228R,MN_228S1,MN_228S2,MN_228S3,MN_228S4,MN_228S5,MN_228S6,MN_228S7,MN_228S8,MN_229A,MN_229B,MN_230A,MN_233A,MN_233B,MN_234A,MN_237A,MN_237B1,MN_237B2,MN_237B3,MN_237B4,MN_237B5,MN_237B6,MN_237B7,MN_237C1,MN_237C2,MN_237C3,MN_237C4,MN_237C5,MN_237C6,MN_249A1,MN_249A2,MN_249A3,sacsecval,secvalwgt,resemaval,weightb,I_AUTHORITY,I_NATIONALISM,I_DEVOUT,defiance,WEIGHT1A,I_RELIGIMP,I_RELIGBEL,I_RELIGPRAC,disbelief,WEIGHT2A,I_NORM1,I_NORM2,I_NORM3,relativism,WEIGHT3A,I_TRUSTARMY,I_TRUSTPOLICE,I_TRUSTCOURTS,scepticism,WEIGHT4A,I_INDEP,I_IMAGIN,I_NONOBED,autonomy,WEIGHT1B,I_WOMJOB,I_WOMPOL,I_WOMEDU,equality,WEIGHT2B,I_HOMOLIB,I_ABORTLIB,I_DIVORLIB,choice,WEIGHT3B,I_VOICE1,I_VOICE2,I_VOI2_00,voice,WEIGHT4B
0,6,12,12,615,ALG,DZA,1,1.0,1.0,1.0,,1.0,1.0,2.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,8.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,2.0,2.0,1.0,2.0,2.0,1.0,1.0,,1.0,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,4.0,,6.0,0.0,10.0,2.0,3.0,1.0,3.0,3.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,1.0,2.0,3.0,3.0,2.0,1.0,2.0,2.0,4.0,,,,,,,,,,,5.0,8.0,7.0,6.0,8.0,7.0,5.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,3.0,3.0,2.0,3.0,2.0,2.0,4.0,3.0,3.0,2.0,3.0,2.0,3.0,4.0,4.0,3.0,,,,4.0,,,,,,,,,,,,,0.0,0.0,4.0,1.0,2.0,2.0,2.0,3.0,8.0,5.0,6.0,9.0,3.0,4.0,7.0,6.0,7.0,5.0,2.0,2.0,50000000.0,5.0,3.0,3.0,1.0,1.0,1.0,,,10.0,1.0,2.0,2.0,1.0,2.0,7.0,4.0,3.0,2.0,3.0,1.0,9.0,1.0,2.0,1.0,2.0,2.0,3.0,2.0,3.0,2.0,1.0,2.0,2.0,3.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,5.0,5.0,1.0,5.0,1.0,2.0,2.0,3.0,2.0,3.0,3.0,2.0,3.0,3.0,3.0,3.0,7.0,8.0,3.0,5.0,6.0,9.0,6.0,6.0,1.0,1.0,1.0,1.0,,1.0,3.0,,1.0,1.0,6.0,5.0,1.0,1.0,2.0,2.0,2.0,,,,,2.0,,,,,,,,,,,,,2.0,3.0,4.0,2.0,2.0,3.0,4.0,2.0,4.0,,,,,,,,,2.0,3.0,3.0,12005.0,12005.0,,3.0,4.0,3.0,3.0,3.0,3.0,4.0,4.0,4.0,2.0,1.0,6.0,,,,,,2.0,1.0,1.0,4.0,5.0,1.0,1993.0,21.0,1.0,1.0,2.0,,2.0,,1.0,1.0,110.0,7.0,,24.0,1.0,2.0,2.0,1.0,,12003.0,1.0,12005.0,12105.0,,,110.0,1.0,1.0,0.833333,1.25,5,,2014,,2.0,1,0,0,1,0,1.0,0.0,0.0,2.0,1.0,2.0,3.0,5.0,4.0,2.0,2.0,3.0,2.0,2.0,4.0,2.0,3.0,3.0,3.0,2.0,3.0,,,,,,,2.0,,,,,,,,,,,,,,1.0,2.0,1.0,0.221111,1.0,0.164352,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.333333,0.111111,1.0,1.0,0.0,0.0,0.333333,1.0,0.66,0.33,0.33,0.44,1.0,1.0,0.0,0.0,0.333333,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.222222,0.074074,1.0,0.0,0.5,0.25,0.25,1.0
1,6,12,12,615,ALG,DZA,2,1.0,2.0,3.0,4.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,5.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,,1.0,,1.0,1.0,2.0,2.0,1.0,2.0,3.0,2.0,6.0,8.0,,6.0,0.0,10.0,2.0,1.0,2.0,3.0,4.0,3.0,1.0,1.0,1.0,2.0,2.0,3.0,2.0,1.0,1.0,1.0,3.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,2.0,3.0,,,,,,,,,,,5.0,7.0,5.0,5.0,4.0,4.0,6.0,1.0,3.0,3.0,3.0,3.0,3.0,1.0,1.0,1.0,2.0,2.0,3.0,1.0,2.0,2.0,3.0,2.0,2.0,2.0,3.0,2.0,2.0,3.0,,,,4.0,,,,,,,,,,,,,0.0,0.0,3.0,1.0,3.0,2.0,1.0,2.0,8.0,8.0,8.0,9.0,2.0,6.0,4.0,2.0,4.0,4.0,3.0,1.0,50000000.0,5.0,2.0,2.0,1.0,1.0,1.0,2.0,1.0,10.0,1.0,1.0,1.0,1.0,4.0,6.0,4.0,4.0,1.0,2.0,3.0,8.0,2.0,3.0,2.0,1.0,2.0,2.0,4.0,4.0,2.0,3.0,2.0,4.0,1.0,1.0,2.0,2.0,1.0,2.0,2.0,2.0,2.0,5.0,5.0,5.0,1.0,5.0,2.0,3.0,4.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,4.0,8.0,4.0,6.0,4.0,8.0,3.0,4.0,7.0,1.0,1.0,1.0,,1.0,1.0,,1.0,1.0,3.0,5.0,1.0,2.0,2.0,2.0,2.0,,,,,2.0,,,,,,,,,,,,,2.0,2.0,3.0,4.0,3.0,4.0,2.0,3.0,3.0,,,,,,,,,3.0,2.0,3.0,12003.0,12003.0,,3.0,4.0,4.0,3.0,4.0,3.0,3.0,4.0,4.0,1.0,1.0,6.0,,,,,,2.0,2.0,2.0,3.0,6.0,2.0,1990.0,24.0,1.0,1.0,2.0,,2.0,,1.0,1.0,110.0,7.0,,25.0,1.0,1.0,2.0,1.0,,12003.0,1.0,12005.0,12105.0,,,110.0,1.0,1.0,0.833333,1.25,5,,2014,,2.0,0,1,0,0,1,2.0,-1.0,0.0,2.0,3.0,2.0,3.0,5.0,4.0,6.0,2.0,3.0,3.0,2.0,3.0,2.0,3.0,3.0,2.0,4.0,3.0,,,,,,,2.0,,,,,,,,,,,,,,2.0,,,0.276389,1.0,0.11,1.0,0.5,0.33,0.33,0.386667,1.0,0.33,0.0,0.166667,0.165556,1.0,1.0,0.0,0.0,0.333333,1.0,0.0,0.66,0.0,0.22,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.33,0.11,1.0,0.0,0.0,0.0,0.0,1.0,0.66,0.0,0.33,0.33,1.0
2,6,12,12,615,ALG,DZA,3,1.0,3.0,2.0,4.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,1.0,2.0,1.0,2.0,1.0,2.0,4.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,2.0,2.0,2.0,1.0,1.0,1.0,2.0,,1.0,,2.0,3.0,1.0,1.0,1.0,1.0,1.0,1.0,6.0,8.0,,6.0,0.0,6.0,2.0,4.0,1.0,2.0,1.0,4.0,1.0,2.0,2.0,2.0,1.0,2.0,1.0,1.0,4.0,3.0,1.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,,,3.0,3.0,3.0,,,,,,6.0,7.0,7.0,7.0,5.0,7.0,5.0,1.0,3.0,3.0,4.0,4.0,4.0,3.0,2.0,2.0,2.0,3.0,4.0,3.0,2.0,2.0,2.0,3.0,4.0,3.0,2.0,3.0,2.0,4.0,,,,3.0,,,,,,,,,,,,,0.0,0.0,2.0,1.0,3.0,2.0,2.0,2.0,7.0,4.0,8.0,3.0,3.0,6.0,9.0,5.0,6.0,7.0,2.0,1.0,50000000.0,5.0,2.0,4.0,1.0,1.0,1.0,2.0,1.0,6.0,2.0,3.0,1.0,2.0,2.0,7.0,4.0,4.0,0.0,2.0,2.0,6.0,2.0,3.0,1.0,2.0,4.0,2.0,3.0,2.0,4.0,3.0,3.0,2.0,4.0,3.0,2.0,2.0,2.0,2.0,3.0,3.0,3.0,5.0,5.0,5.0,5.0,5.0,2.0,3.0,2.0,3.0,4.0,4.0,2.0,3.0,3.0,3.0,3.0,4.0,7.0,5.0,5.0,5.0,5.0,5.0,5.0,1.0,1.0,1.0,1.0,,1.0,4.0,,1.0,1.0,4.0,5.0,1.0,1.0,2.0,3.0,2.0,,,,,2.0,,,,,,,,,,,,,4.0,3.0,4.0,2.0,2.0,4.0,3.0,1.0,1.0,,,,,,,,,2.0,2.0,3.0,,,,4.0,4.0,3.0,4.0,2.0,3.0,2.0,4.0,4.0,2.0,1.0,3.0,2.0,7.0,6.0,5.0,2.0,1.0,,1.0,4.0,6.0,2.0,1988.0,26.0,1.0,2.0,2.0,,2.0,,1.0,1.0,500.0,5.0,,18.0,1.0,1.0,1.0,1.0,,12003.0,1.0,12005.0,12105.0,,,110.0,1.0,1.0,0.833333,1.25,5,,2014,,1.0,0,0,1,0,0,2.0,-2.0,0.0,1.0,3.0,2.0,3.0,6.0,4.0,5.0,1.0,2.0,2.0,2.0,3.0,2.0,4.0,3.0,2.0,4.0,4.0,1.0,1.0,1.0,2.0,3.0,3.0,2.0,,,,,,,,,,,,,,1.0,2.0,1.0,0.304722,1.0,0.152361,1.0,0.5,0.0,0.0,0.166667,1.0,0.0,0.0,0.166667,0.055556,1.0,1.0,0.0,0.0,0.333333,1.0,0.33,1.0,0.66,0.663333,1.0,0.0,1.0,0.0,0.333333,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.333333,0.111111,1.0,0.33,0.0,0.165,0.165,1.0
3,6,12,12,615,ALG,DZA,4,1.0,1.0,3.0,4.0,3.0,1.0,2.0,1.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,1.0,1.0,2.0,8.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,,2.0,,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,6.0,8.0,,6.0,0.0,6.0,2.0,1.0,3.0,1.0,4.0,3.0,1.0,1.0,1.0,1.0,1.0,2.0,3.0,2.0,3.0,3.0,1.0,1.0,2.0,2.0,3.0,1.0,1.0,2.0,2.0,2.0,,,,,,,,,,,6.0,9.0,5.0,6.0,4.0,6.0,8.0,1.0,3.0,3.0,2.0,2.0,3.0,2.0,3.0,4.0,2.0,3.0,4.0,2.0,3.0,3.0,4.0,2.0,2.0,2.0,3.0,1.0,2.0,4.0,,,,3.0,,,,,,,,,,,,,0.0,0.0,3.0,2.0,3.0,4.0,3.0,2.0,7.0,9.0,5.0,5.0,7.0,3.0,8.0,7.0,8.0,8.0,3.0,2.0,50000000.0,5.0,2.0,1.0,1.0,1.0,1.0,2.0,1.0,10.0,2.0,3.0,4.0,2.0,3.0,7.0,6.0,4.0,1.0,3.0,3.0,7.0,3.0,1.0,2.0,3.0,3.0,2.0,3.0,4.0,1.0,2.0,4.0,2.0,2.0,3.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,5.0,5.0,5.0,1.0,5.0,2.0,3.0,3.0,3.0,4.0,4.0,2.0,2.0,3.0,3.0,3.0,6.0,6.0,3.0,5.0,5.0,7.0,4.0,6.0,1.0,1.0,1.0,1.0,,1.0,1.0,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,,,,,2.0,,,,,,,,,,,,,2.0,2.0,3.0,1.0,2.0,3.0,4.0,3.0,3.0,,,,,,,,,2.0,2.0,3.0,,,,2.0,4.0,3.0,3.0,2.0,3.0,2.0,2.0,2.0,1.0,3.0,1.0,1.0,8.0,5.0,6.0,2.0,1.0,,4.0,4.0,5.0,2.0,1986.0,28.0,1.0,2.0,2.0,,2.0,,1.0,1.0,500.0,6.0,,14.0,2.0,2.0,1.0,1.0,,12003.0,1.0,12005.0,12105.0,,,500.0,1.0,1.0,0.833333,1.25,5,,2014,,1.0,0,0,0,0,1,1.0,-1.0,0.0,2.0,2.0,2.0,3.0,7.0,5.0,6.0,2.0,3.0,2.0,2.0,3.0,2.0,2.0,4.0,3.0,3.0,2.0,2.0,2.0,1.0,2.0,2.0,3.0,2.0,,,,,,,,,,,,,,1.0,2.0,2.0,0.263056,1.0,0.096667,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.166667,0.055556,1.0,1.0,0.0,0.0,0.333333,1.0,0.66,1.0,0.33,0.663333,1.0,0.0,0.0,0.0,0.0,1.0,0.5,0.33,0.33,0.386667,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
4,6,12,12,615,ALG,DZA,5,1.0,1.0,1.0,2.0,1.0,1.0,1.0,3.0,2.0,1.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,2.0,8.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,1.0,1.0,2.0,2.0,1.0,1.0,2.0,,1.0,,1.0,1.0,1.0,2.0,1.0,3.0,2.0,2.0,6.0,6.0,,1.0,3.0,4.0,2.0,1.0,2.0,3.0,4.0,2.0,1.0,2.0,1.0,1.0,2.0,5.0,1.0,2.0,3.0,1.0,4.0,3.0,2.0,2.0,3.0,1.0,2.0,2.0,2.0,2.0,,,,,,,,,,,5.0,8.0,4.0,7.0,4.0,6.0,6.0,2.0,2.0,3.0,4.0,2.0,3.0,2.0,3.0,3.0,2.0,4.0,3.0,2.0,3.0,3.0,3.0,2.0,3.0,3.0,2.0,4.0,3.0,2.0,,,,3.0,,,,,,,,,,,,,0.0,0.0,3.0,2.0,3.0,4.0,2.0,2.0,8.0,4.0,7.0,3.0,3.0,8.0,6.0,5.0,6.0,7.0,3.0,2.0,50000000.0,5.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,10.0,2.0,3.0,2.0,2.0,2.0,4.0,4.0,3.0,1.0,3.0,2.0,8.0,2.0,3.0,3.0,3.0,3.0,2.0,4.0,3.0,3.0,2.0,4.0,3.0,2.0,2.0,3.0,2.0,2.0,3.0,3.0,3.0,3.0,5.0,5.0,5.0,5.0,5.0,2.0,3.0,3.0,4.0,4.0,4.0,2.0,3.0,3.0,3.0,3.0,6.0,2.0,4.0,4.0,6.0,6.0,6.0,5.0,7.0,1.0,1.0,1.0,,1.0,3.0,,1.0,1.0,4.0,5.0,1.0,1.0,2.0,2.0,2.0,,,,,2.0,,,,,,,,,,,,,3.0,3.0,4.0,2.0,2.0,4.0,3.0,2.0,3.0,,,,,,,,,2.0,3.0,3.0,2.0,2.0,,3.0,3.0,4.0,2.0,3.0,3.0,4.0,3.0,3.0,1.0,2.0,3.0,2.0,5.0,6.0,9.0,2.0,2.0,1.0,2.0,3.0,7.0,2.0,1979.0,35.0,2.0,3.0,2.0,,2.0,,1.0,1.0,500.0,3.0,,8.0,1.0,1.0,1.0,1.0,,12003.0,1.0,12005.0,12105.0,,,110.0,1.0,1.0,0.833333,1.25,5,,2014,,2.0,0,1,0,0,1,2.0,1.0,0.0,2.0,3.0,3.0,3.0,4.0,7.0,4.0,2.0,2.0,3.0,2.0,3.0,1.0,4.0,3.0,1.0,3.0,2.0,2.0,2.0,1.0,2.0,2.0,3.0,2.0,,,,,,,,,,,,,,1.0,2.0,2.0,0.220833,1.0,0.239352,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.333333,1.0,0.66,0.66,0.33,0.55,1.0,0.0,0.0,1.0,0.333333,1.0,0.0,0.0,0.66,0.22,1.0,0.0,0.0,0.222222,0.074074,1.0,0.66,0.0,0.33,0.33,1.0


In [23]:
# drop variables in the vars_to_drop list
cleaning_w6 = wvs_w6.drop(columns=vars_to_drop)

In [24]:
# check out current freq of missing values
cleaning_w6.isna().sum()

V1                     0
V2                     0
V2A                    0
cow                    0
C_COW_ALPHA            0
B_COUNTRY_ALPHA        0
V3                     0
V4                   341
V5                   545
V6                  1035
V7                  1620
V8                  1519
V9                  1348
V10                  758
V11                  326
V12                   14
V13                    6
V14                    7
V15                   17
V16                    9
V17                   13
V18                   12
V19                   18
V20                   15
V21                   19
V22                    3
V23                  586
V24                 2388
V25                  840
V26                  923
V27                  993
V28                 3211
V29                 3129
V30                 1118
V31                 2223
V32                 1055
V33                 1214
V34                 1197
V35                 9696
V36                 3979


In [25]:
# peek at data to ensure that correct columns are being dropped so far
cleaning_w6.head()

Unnamed: 0,V1,V2,V2A,cow,C_COW_ALPHA,B_COUNTRY_ALPHA,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,V20,V21,V22,V23,V24,V25,V26,V27,V28,V29,V30,V31,V32,V33,V34,V35,V36,V37,V38,V39,V40,V41,V42,V43,V44,V45,V46,V47,V48,V49,V50,V51,V52,V53,V54,V55,V56,V56_NZ,V57,V58,V59,V60,V61,V62,V63,V64,V65,V66,V67,V68,V69,V70,V71,V72,V73,V74,V74B,V75,V76,V77,V78,V79,V80,V81,V82,V83,V84,V85,V86,V87,V88,V89,V95,V96,V97,V98,V99,V100,V101,V102,V103,V104,V105,V106,V107,V108,V109,V110,V111,V112,V113,V114,V115,V116,V117,V118,V119,V120,V121,V122,V123,V124,V125_00,V125_01,V125_02,V125_03,V125_04,V125_05,V125_06,V125_07,V125_08,V125_09,V125_10,V125_11,V125_12,V125_13,V125_14,V125_15,V125_16,V125_17,V126,V127,V128,V129,V130,V131,V132,V133,V134,V135,V136,V137,V138,V139,V140,V141,V142,V143,V144,V144G,V145,V146,V147,V148,V149,V150,V151,V152,V153,V154,V155,V156,V157,V158,V159,V160,V161,V162,V163,V164,V165,V166,V167,V168,V169,V170,V171,V172,V173,V174,V175,V176,V177,V178,V179,V180,V181,V182,V183,V184,V185,V186,V187,V188,V189,V190,V191,V192,V193,V194,V195,V196,V197,V198,V199,V200,V201,V202,V203,V203A,V204,V205,V206,V207,V207A,V208,V209,V210,V211,V212,V213,V214,V215_01,V215_02,V215_03,V215_04,V215_05,V215_06,V215_07,V215_08,V215_10,V215_11,V215_12,V215_13,V215_14,V215_15,V215_16,V215_17,V215_18,V216,V217,V218,V219,V220,V221,V222,V223,V224,V225,V226,V227,V228,V228_LOCAL,V228A,V228B,V228C,V228D,V228E,V228F,V228G,V228H,V228I,V228J,V228K,V229,V230,V231,V232,V233,V234,V235,V236,V237,V238,V239,V240,V241,V242,X003R2,X003R,V243,V244,V245,V246,V247,V248,V249,V250,V251,V252,V253,V254,V255,N_REGION_ISO,V256,V257,V258,V258A,S018,S019,V260,V261,V262,Y001,Y001_1,Y001_2,Y001_3,Y001_4,Y001_5,Y002,Y003,sacsecval,secvalwgt,resemaval,weightb,I_AUTHORITY,I_NATIONALISM,I_DEVOUT,defiance,WEIGHT1A,I_RELIGIMP,I_RELIGBEL,I_RELIGPRAC,disbelief,WEIGHT2A,I_NORM1,I_NORM2,I_NORM3,relativism,WEIGHT3A,I_TRUSTARMY,I_TRUSTPOLICE,I_TRUSTCOURTS,scepticism,WEIGHT4A,I_INDEP,I_IMAGIN,I_NONOBED,autonomy,WEIGHT1B,I_WOMJOB,I_WOMPOL,I_WOMEDU,equality,WEIGHT2B,I_HOMOLIB,I_ABORTLIB,I_DIVORLIB,choice,WEIGHT3B,I_VOICE1,I_VOICE2,I_VOI2_00,voice,WEIGHT4B
0,6,12,12,615,ALG,DZA,1,1.0,1.0,1.0,,1.0,1.0,2.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,8.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,2.0,2.0,1.0,2.0,2.0,1.0,1.0,1.0,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,4.0,,6.0,0.0,10.0,2.0,3.0,1.0,3.0,3.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,1.0,2.0,3.0,3.0,2.0,1.0,2.0,2.0,4.0,,,,,,5.0,8.0,7.0,6.0,8.0,7.0,5.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,3.0,3.0,2.0,3.0,2.0,2.0,4.0,3.0,3.0,2.0,3.0,2.0,3.0,4.0,4.0,3.0,,,,4.0,,,,,,,,,,,,,0.0,0.0,4.0,1.0,2.0,2.0,2.0,3.0,8.0,5.0,6.0,9.0,3.0,4.0,7.0,6.0,7.0,5.0,2.0,2.0,50000000.0,5.0,3.0,3.0,1.0,1.0,1.0,,,10.0,1.0,2.0,2.0,1.0,2.0,7.0,4.0,3.0,2.0,3.0,1.0,9.0,1.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,5.0,5.0,1.0,5.0,1.0,2.0,2.0,3.0,2.0,3.0,3.0,2.0,3.0,3.0,3.0,3.0,7.0,8.0,3.0,5.0,6.0,9.0,6.0,6.0,1.0,1.0,1.0,1.0,,1.0,3.0,,1.0,1.0,6.0,5.0,1.0,1.0,2.0,2.0,2.0,,,,,2.0,,,,,,,,,,,,,2.0,3.0,4.0,2.0,2.0,3.0,4.0,2.0,4.0,2.0,3.0,3.0,12005.0,12005.0,3.0,4.0,3.0,3.0,3.0,3.0,4.0,4.0,4.0,2.0,1.0,6.0,,,,,,2.0,1.0,1.0,4.0,5.0,1.0,1993.0,21.0,1.0,1.0,2.0,2.0,1.0,1.0,110.0,7.0,24.0,1.0,2.0,2.0,1.0,12003.0,1.0,12005.0,12105.0,110.0,1.0,1.0,0.833333,1.25,5,,2014,2.0,1,0,0,1,0,1.0,0.0,0.221111,1.0,0.164352,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.333333,0.111111,1.0,1.0,0.0,0.0,0.333333,1.0,0.66,0.33,0.33,0.44,1.0,1.0,0.0,0.0,0.333333,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.222222,0.074074,1.0,0.0,0.5,0.25,0.25,1.0
1,6,12,12,615,ALG,DZA,2,1.0,2.0,3.0,4.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,5.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,,1.0,1.0,2.0,2.0,1.0,2.0,3.0,2.0,6.0,8.0,,6.0,0.0,10.0,2.0,1.0,2.0,3.0,4.0,3.0,1.0,1.0,1.0,2.0,2.0,3.0,2.0,1.0,1.0,1.0,3.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,2.0,3.0,,,,,,5.0,7.0,5.0,5.0,4.0,4.0,6.0,1.0,3.0,3.0,3.0,3.0,3.0,1.0,1.0,1.0,2.0,2.0,3.0,1.0,2.0,2.0,3.0,2.0,2.0,2.0,3.0,2.0,2.0,3.0,,,,4.0,,,,,,,,,,,,,0.0,0.0,3.0,1.0,3.0,2.0,1.0,2.0,8.0,8.0,8.0,9.0,2.0,6.0,4.0,2.0,4.0,4.0,3.0,1.0,50000000.0,5.0,2.0,2.0,1.0,1.0,1.0,2.0,1.0,10.0,1.0,1.0,1.0,1.0,4.0,6.0,4.0,4.0,1.0,2.0,3.0,8.0,2.0,3.0,2.0,1.0,2.0,2.0,1.0,2.0,2.0,2.0,2.0,5.0,5.0,5.0,1.0,5.0,2.0,3.0,4.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,4.0,8.0,4.0,6.0,4.0,8.0,3.0,4.0,7.0,1.0,1.0,1.0,,1.0,1.0,,1.0,1.0,3.0,5.0,1.0,2.0,2.0,2.0,2.0,,,,,2.0,,,,,,,,,,,,,2.0,2.0,3.0,4.0,3.0,4.0,2.0,3.0,3.0,3.0,2.0,3.0,12003.0,12003.0,3.0,4.0,4.0,3.0,4.0,3.0,3.0,4.0,4.0,1.0,1.0,6.0,,,,,,2.0,2.0,2.0,3.0,6.0,2.0,1990.0,24.0,1.0,1.0,2.0,2.0,1.0,1.0,110.0,7.0,25.0,1.0,1.0,2.0,1.0,12003.0,1.0,12005.0,12105.0,110.0,1.0,1.0,0.833333,1.25,5,,2014,2.0,0,1,0,0,1,2.0,-1.0,0.276389,1.0,0.11,1.0,0.5,0.33,0.33,0.386667,1.0,0.33,0.0,0.166667,0.165556,1.0,1.0,0.0,0.0,0.333333,1.0,0.0,0.66,0.0,0.22,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.33,0.11,1.0,0.0,0.0,0.0,0.0,1.0,0.66,0.0,0.33,0.33,1.0
2,6,12,12,615,ALG,DZA,3,1.0,3.0,2.0,4.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,1.0,2.0,1.0,2.0,1.0,2.0,4.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,2.0,2.0,2.0,1.0,1.0,1.0,2.0,1.0,,2.0,3.0,1.0,1.0,1.0,1.0,1.0,1.0,6.0,8.0,,6.0,0.0,6.0,2.0,4.0,1.0,2.0,1.0,4.0,1.0,2.0,2.0,2.0,1.0,2.0,1.0,1.0,4.0,3.0,1.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,,,3.0,3.0,3.0,6.0,7.0,7.0,7.0,5.0,7.0,5.0,1.0,3.0,3.0,4.0,4.0,4.0,3.0,2.0,2.0,2.0,3.0,4.0,3.0,2.0,2.0,2.0,3.0,4.0,3.0,2.0,3.0,2.0,4.0,,,,3.0,,,,,,,,,,,,,0.0,0.0,2.0,1.0,3.0,2.0,2.0,2.0,7.0,4.0,8.0,3.0,3.0,6.0,9.0,5.0,6.0,7.0,2.0,1.0,50000000.0,5.0,2.0,4.0,1.0,1.0,1.0,2.0,1.0,6.0,2.0,3.0,1.0,2.0,2.0,7.0,4.0,4.0,0.0,2.0,2.0,6.0,2.0,3.0,1.0,2.0,4.0,2.0,2.0,2.0,3.0,3.0,3.0,5.0,5.0,5.0,5.0,5.0,2.0,3.0,2.0,3.0,4.0,4.0,2.0,3.0,3.0,3.0,3.0,4.0,7.0,5.0,5.0,5.0,5.0,5.0,5.0,1.0,1.0,1.0,1.0,,1.0,4.0,,1.0,1.0,4.0,5.0,1.0,1.0,2.0,3.0,2.0,,,,,2.0,,,,,,,,,,,,,4.0,3.0,4.0,2.0,2.0,4.0,3.0,1.0,1.0,2.0,2.0,3.0,,,4.0,4.0,3.0,4.0,2.0,3.0,2.0,4.0,4.0,2.0,1.0,3.0,2.0,7.0,6.0,5.0,2.0,1.0,,1.0,4.0,6.0,2.0,1988.0,26.0,1.0,2.0,2.0,2.0,1.0,1.0,500.0,5.0,18.0,1.0,1.0,1.0,1.0,12003.0,1.0,12005.0,12105.0,110.0,1.0,1.0,0.833333,1.25,5,,2014,1.0,0,0,1,0,0,2.0,-2.0,0.304722,1.0,0.152361,1.0,0.5,0.0,0.0,0.166667,1.0,0.0,0.0,0.166667,0.055556,1.0,1.0,0.0,0.0,0.333333,1.0,0.33,1.0,0.66,0.663333,1.0,0.0,1.0,0.0,0.333333,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.333333,0.111111,1.0,0.33,0.0,0.165,0.165,1.0
3,6,12,12,615,ALG,DZA,4,1.0,1.0,3.0,4.0,3.0,1.0,2.0,1.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,1.0,1.0,2.0,8.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,6.0,8.0,,6.0,0.0,6.0,2.0,1.0,3.0,1.0,4.0,3.0,1.0,1.0,1.0,1.0,1.0,2.0,3.0,2.0,3.0,3.0,1.0,1.0,2.0,2.0,3.0,1.0,1.0,2.0,2.0,2.0,,,,,,6.0,9.0,5.0,6.0,4.0,6.0,8.0,1.0,3.0,3.0,2.0,2.0,3.0,2.0,3.0,4.0,2.0,3.0,4.0,2.0,3.0,3.0,4.0,2.0,2.0,2.0,3.0,1.0,2.0,4.0,,,,3.0,,,,,,,,,,,,,0.0,0.0,3.0,2.0,3.0,4.0,3.0,2.0,7.0,9.0,5.0,5.0,7.0,3.0,8.0,7.0,8.0,8.0,3.0,2.0,50000000.0,5.0,2.0,1.0,1.0,1.0,1.0,2.0,1.0,10.0,2.0,3.0,4.0,2.0,3.0,7.0,6.0,4.0,1.0,3.0,3.0,7.0,3.0,1.0,2.0,3.0,3.0,1.0,2.0,2.0,2.0,2.0,2.0,5.0,5.0,5.0,1.0,5.0,2.0,3.0,3.0,3.0,4.0,4.0,2.0,2.0,3.0,3.0,3.0,6.0,6.0,3.0,5.0,5.0,7.0,4.0,6.0,1.0,1.0,1.0,1.0,,1.0,1.0,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,,,,,2.0,,,,,,,,,,,,,2.0,2.0,3.0,1.0,2.0,3.0,4.0,3.0,3.0,2.0,2.0,3.0,,,2.0,4.0,3.0,3.0,2.0,3.0,2.0,2.0,2.0,1.0,3.0,1.0,1.0,8.0,5.0,6.0,2.0,1.0,,4.0,4.0,5.0,2.0,1986.0,28.0,1.0,2.0,2.0,2.0,1.0,1.0,500.0,6.0,14.0,2.0,2.0,1.0,1.0,12003.0,1.0,12005.0,12105.0,500.0,1.0,1.0,0.833333,1.25,5,,2014,1.0,0,0,0,0,1,1.0,-1.0,0.263056,1.0,0.096667,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.166667,0.055556,1.0,1.0,0.0,0.0,0.333333,1.0,0.66,1.0,0.33,0.663333,1.0,0.0,0.0,0.0,0.0,1.0,0.5,0.33,0.33,0.386667,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
4,6,12,12,615,ALG,DZA,5,1.0,1.0,1.0,2.0,1.0,1.0,1.0,3.0,2.0,1.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,2.0,8.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,1.0,1.0,2.0,2.0,1.0,1.0,2.0,1.0,,1.0,1.0,1.0,2.0,1.0,3.0,2.0,2.0,6.0,6.0,,1.0,3.0,4.0,2.0,1.0,2.0,3.0,4.0,2.0,1.0,2.0,1.0,1.0,2.0,5.0,1.0,2.0,3.0,1.0,4.0,3.0,2.0,2.0,3.0,1.0,2.0,2.0,2.0,2.0,,,,,,5.0,8.0,4.0,7.0,4.0,6.0,6.0,2.0,2.0,3.0,4.0,2.0,3.0,2.0,3.0,3.0,2.0,4.0,3.0,2.0,3.0,3.0,3.0,2.0,3.0,3.0,2.0,4.0,3.0,2.0,,,,3.0,,,,,,,,,,,,,0.0,0.0,3.0,2.0,3.0,4.0,2.0,2.0,8.0,4.0,7.0,3.0,3.0,8.0,6.0,5.0,6.0,7.0,3.0,2.0,50000000.0,5.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,10.0,2.0,3.0,2.0,2.0,2.0,4.0,4.0,3.0,1.0,3.0,2.0,8.0,2.0,3.0,3.0,3.0,3.0,2.0,2.0,3.0,3.0,3.0,3.0,5.0,5.0,5.0,5.0,5.0,2.0,3.0,3.0,4.0,4.0,4.0,2.0,3.0,3.0,3.0,3.0,6.0,2.0,4.0,4.0,6.0,6.0,6.0,5.0,7.0,1.0,1.0,1.0,,1.0,3.0,,1.0,1.0,4.0,5.0,1.0,1.0,2.0,2.0,2.0,,,,,2.0,,,,,,,,,,,,,3.0,3.0,4.0,2.0,2.0,4.0,3.0,2.0,3.0,2.0,3.0,3.0,2.0,2.0,3.0,3.0,4.0,2.0,3.0,3.0,4.0,3.0,3.0,1.0,2.0,3.0,2.0,5.0,6.0,9.0,2.0,2.0,1.0,2.0,3.0,7.0,2.0,1979.0,35.0,2.0,3.0,2.0,2.0,1.0,1.0,500.0,3.0,8.0,1.0,1.0,1.0,1.0,12003.0,1.0,12005.0,12105.0,110.0,1.0,1.0,0.833333,1.25,5,,2014,2.0,0,1,0,0,1,2.0,1.0,0.220833,1.0,0.239352,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.333333,1.0,0.66,0.66,0.33,0.55,1.0,0.0,0.0,1.0,0.333333,1.0,0.0,0.0,0.66,0.22,1.0,0.0,0.0,0.222222,0.074074,1.0,0.66,0.0,0.33,0.33,1.0


In [26]:
# original shape for comparison
wvs_w6.shape

(89565, 442)

In [27]:
# current shape so far
cleaning_w6.shape

(89565, 368)

In [28]:
# Columns to drop
# drop a handful of item scales at the end of the data set since these come from items that appear earlier in the survey
# also dropping their associated weights
cleaning_w6 = cleaning_w6.drop(['Y001_1', 'Y001_2', 'Y001_3', 'Y001_4', 'Y001_5', 'Y001', 'Y002', 'Y003', 'sacsecval',
 'secvalwgt', 'resemaval', 'weightb', 'I_AUTHORITY', 'I_NATIONALISM', 'I_DEVOUT', 'defiance', 'I_RELIGIMP', 'I_RELIGBEL',
 'I_RELIGPRAC', 'disbelief', 'I_NORM1', 'I_NORM2', 'I_NORM3', 'relativism', 'I_TRUSTARMY', 'I_TRUSTPOLICE', 'I_TRUSTCOURTS',
 'scepticism', 'I_INDEP', 'I_IMAGIN', 'I_NONOBED',  'autonomy',  'I_WOMJOB', 'I_WOMPOL', 'I_WOMEDU', 'equality',
 'I_HOMOLIB', 'I_ABORTLIB', 'I_DIVORLIB', 'choice', 'I_VOICE1', 'I_VOICE2', 'I_VOI2_00', 'voice', 'WEIGHT1A',  'WEIGHT2A', 
 'WEIGHT3A',  'WEIGHT4A', 'WEIGHT1B',  'WEIGHT2B',  'WEIGHT3B', 'WEIGHT4B'], axis=1)

In [29]:
cleaning_w6.shape

(89565, 316)

In [30]:
# dropping interviewer observations and other items with high proportion of missings
cleaning_w6 = cleaning_w6.drop(['V228', # items assessing country-specific political party preferences
 'V228_LOCAL', 'V228A', 'V228B', 'V228C', 'V228D', 'V228E', 'V228F', 'V228G', 'V228H', 'V228I', 'V228J', 'V228K',

# keep these employment, income, and savings items
# 'V230', 'V231', 'V232', 'V233', 'V234', 'V236', 'V237', 'V239' (# scale of incomes)
                                
 'V241', # year of birth
                                
# these X003 items are age groups; drop them since I'll use and scale the actual age values (from V242) anyway
 'X003R2', 'X003R',
                                
# keep these immigrant status items
# 'V243', 'V244', 'V245', 'V246', 'V247'
# keeping highest education level ('V248') 
                                
 'V249',
# 'V250', # respondent lives with parents
# items assessing whether respondent was interested; would be nice/interesting to include, but missings here would be
# difficult to impute
 'V251',
 'V252',
 'V253', # size of town; interesting, but arguably hard to impute missing values
 'V254', # ethnic group
 'V255', # literacy; difficult to impute
                                
# region where interview was conducted
 'N_REGION_ISO',
 'V256', 
 'V257', # interview language
 'V260', # questionnaire version
 'V261'], # date of interview
axis=1)

In [31]:
print(cleaning_w6.shape)

(89565, 289)


In [32]:
cleaning_w6.head()

Unnamed: 0,V1,V2,V2A,cow,C_COW_ALPHA,B_COUNTRY_ALPHA,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,V20,V21,V22,V23,V24,V25,V26,V27,V28,V29,V30,V31,V32,V33,V34,V35,V36,V37,V38,V39,V40,V41,V42,V43,V44,V45,V46,V47,V48,V49,V50,V51,V52,V53,V54,V55,V56,V56_NZ,V57,V58,V59,V60,V61,V62,V63,V64,V65,V66,V67,V68,V69,V70,V71,V72,V73,V74,V74B,V75,V76,V77,V78,V79,V80,V81,V82,V83,V84,V85,V86,V87,V88,V89,V95,V96,V97,V98,V99,V100,V101,V102,V103,V104,V105,V106,V107,V108,V109,V110,V111,V112,V113,V114,V115,V116,V117,V118,V119,V120,V121,V122,V123,V124,V125_00,V125_01,V125_02,V125_03,V125_04,V125_05,V125_06,V125_07,V125_08,V125_09,V125_10,V125_11,V125_12,V125_13,V125_14,V125_15,V125_16,V125_17,V126,V127,V128,V129,V130,V131,V132,V133,V134,V135,V136,V137,V138,V139,V140,V141,V142,V143,V144,V144G,V145,V146,V147,V148,V149,V150,V151,V152,V153,V154,V155,V156,V157,V158,V159,V160,V161,V162,V163,V164,V165,V166,V167,V168,V169,V170,V171,V172,V173,V174,V175,V176,V177,V178,V179,V180,V181,V182,V183,V184,V185,V186,V187,V188,V189,V190,V191,V192,V193,V194,V195,V196,V197,V198,V199,V200,V201,V202,V203,V203A,V204,V205,V206,V207,V207A,V208,V209,V210,V211,V212,V213,V214,V215_01,V215_02,V215_03,V215_04,V215_05,V215_06,V215_07,V215_08,V215_10,V215_11,V215_12,V215_13,V215_14,V215_15,V215_16,V215_17,V215_18,V216,V217,V218,V219,V220,V221,V222,V223,V224,V225,V226,V227,V229,V230,V231,V232,V233,V234,V235,V236,V237,V238,V239,V240,V242,V243,V244,V245,V246,V247,V248,V250,V258,V258A,S018,S019,V262
0,6,12,12,615,ALG,DZA,1,1.0,1.0,1.0,,1.0,1.0,2.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,8.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,2.0,2.0,1.0,2.0,2.0,1.0,1.0,1.0,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,4.0,,6.0,0.0,10.0,2.0,3.0,1.0,3.0,3.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,1.0,2.0,3.0,3.0,2.0,1.0,2.0,2.0,4.0,,,,,,5.0,8.0,7.0,6.0,8.0,7.0,5.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,3.0,3.0,2.0,3.0,2.0,2.0,4.0,3.0,3.0,2.0,3.0,2.0,3.0,4.0,4.0,3.0,,,,4.0,,,,,,,,,,,,,0.0,0.0,4.0,1.0,2.0,2.0,2.0,3.0,8.0,5.0,6.0,9.0,3.0,4.0,7.0,6.0,7.0,5.0,2.0,2.0,50000000.0,5.0,3.0,3.0,1.0,1.0,1.0,,,10.0,1.0,2.0,2.0,1.0,2.0,7.0,4.0,3.0,2.0,3.0,1.0,9.0,1.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,5.0,5.0,1.0,5.0,1.0,2.0,2.0,3.0,2.0,3.0,3.0,2.0,3.0,3.0,3.0,3.0,7.0,8.0,3.0,5.0,6.0,9.0,6.0,6.0,1.0,1.0,1.0,1.0,,1.0,3.0,,1.0,1.0,6.0,5.0,1.0,1.0,2.0,2.0,2.0,,,,,2.0,,,,,,,,,,,,,2.0,3.0,4.0,2.0,2.0,3.0,4.0,2.0,4.0,2.0,3.0,3.0,6.0,,,,,,2.0,1.0,1.0,4.0,5.0,1.0,21.0,2.0,2.0,1.0,1.0,110.0,7.0,1.0,1.0,1.0,0.833333,1.25,2014
1,6,12,12,615,ALG,DZA,2,1.0,2.0,3.0,4.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,5.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,,1.0,1.0,2.0,2.0,1.0,2.0,3.0,2.0,6.0,8.0,,6.0,0.0,10.0,2.0,1.0,2.0,3.0,4.0,3.0,1.0,1.0,1.0,2.0,2.0,3.0,2.0,1.0,1.0,1.0,3.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,2.0,3.0,,,,,,5.0,7.0,5.0,5.0,4.0,4.0,6.0,1.0,3.0,3.0,3.0,3.0,3.0,1.0,1.0,1.0,2.0,2.0,3.0,1.0,2.0,2.0,3.0,2.0,2.0,2.0,3.0,2.0,2.0,3.0,,,,4.0,,,,,,,,,,,,,0.0,0.0,3.0,1.0,3.0,2.0,1.0,2.0,8.0,8.0,8.0,9.0,2.0,6.0,4.0,2.0,4.0,4.0,3.0,1.0,50000000.0,5.0,2.0,2.0,1.0,1.0,1.0,2.0,1.0,10.0,1.0,1.0,1.0,1.0,4.0,6.0,4.0,4.0,1.0,2.0,3.0,8.0,2.0,3.0,2.0,1.0,2.0,2.0,1.0,2.0,2.0,2.0,2.0,5.0,5.0,5.0,1.0,5.0,2.0,3.0,4.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,4.0,8.0,4.0,6.0,4.0,8.0,3.0,4.0,7.0,1.0,1.0,1.0,,1.0,1.0,,1.0,1.0,3.0,5.0,1.0,2.0,2.0,2.0,2.0,,,,,2.0,,,,,,,,,,,,,2.0,2.0,3.0,4.0,3.0,4.0,2.0,3.0,3.0,3.0,2.0,3.0,6.0,,,,,,2.0,2.0,2.0,3.0,6.0,2.0,24.0,2.0,2.0,1.0,1.0,110.0,7.0,1.0,1.0,1.0,0.833333,1.25,2014
2,6,12,12,615,ALG,DZA,3,1.0,3.0,2.0,4.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,1.0,2.0,1.0,2.0,1.0,2.0,4.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,2.0,2.0,2.0,1.0,1.0,1.0,2.0,1.0,,2.0,3.0,1.0,1.0,1.0,1.0,1.0,1.0,6.0,8.0,,6.0,0.0,6.0,2.0,4.0,1.0,2.0,1.0,4.0,1.0,2.0,2.0,2.0,1.0,2.0,1.0,1.0,4.0,3.0,1.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,,,3.0,3.0,3.0,6.0,7.0,7.0,7.0,5.0,7.0,5.0,1.0,3.0,3.0,4.0,4.0,4.0,3.0,2.0,2.0,2.0,3.0,4.0,3.0,2.0,2.0,2.0,3.0,4.0,3.0,2.0,3.0,2.0,4.0,,,,3.0,,,,,,,,,,,,,0.0,0.0,2.0,1.0,3.0,2.0,2.0,2.0,7.0,4.0,8.0,3.0,3.0,6.0,9.0,5.0,6.0,7.0,2.0,1.0,50000000.0,5.0,2.0,4.0,1.0,1.0,1.0,2.0,1.0,6.0,2.0,3.0,1.0,2.0,2.0,7.0,4.0,4.0,0.0,2.0,2.0,6.0,2.0,3.0,1.0,2.0,4.0,2.0,2.0,2.0,3.0,3.0,3.0,5.0,5.0,5.0,5.0,5.0,2.0,3.0,2.0,3.0,4.0,4.0,2.0,3.0,3.0,3.0,3.0,4.0,7.0,5.0,5.0,5.0,5.0,5.0,5.0,1.0,1.0,1.0,1.0,,1.0,4.0,,1.0,1.0,4.0,5.0,1.0,1.0,2.0,3.0,2.0,,,,,2.0,,,,,,,,,,,,,4.0,3.0,4.0,2.0,2.0,4.0,3.0,1.0,1.0,2.0,2.0,3.0,3.0,2.0,7.0,6.0,5.0,2.0,1.0,,1.0,4.0,6.0,2.0,26.0,2.0,2.0,1.0,1.0,500.0,5.0,1.0,1.0,1.0,0.833333,1.25,2014
3,6,12,12,615,ALG,DZA,4,1.0,1.0,3.0,4.0,3.0,1.0,2.0,1.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,1.0,1.0,2.0,8.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,6.0,8.0,,6.0,0.0,6.0,2.0,1.0,3.0,1.0,4.0,3.0,1.0,1.0,1.0,1.0,1.0,2.0,3.0,2.0,3.0,3.0,1.0,1.0,2.0,2.0,3.0,1.0,1.0,2.0,2.0,2.0,,,,,,6.0,9.0,5.0,6.0,4.0,6.0,8.0,1.0,3.0,3.0,2.0,2.0,3.0,2.0,3.0,4.0,2.0,3.0,4.0,2.0,3.0,3.0,4.0,2.0,2.0,2.0,3.0,1.0,2.0,4.0,,,,3.0,,,,,,,,,,,,,0.0,0.0,3.0,2.0,3.0,4.0,3.0,2.0,7.0,9.0,5.0,5.0,7.0,3.0,8.0,7.0,8.0,8.0,3.0,2.0,50000000.0,5.0,2.0,1.0,1.0,1.0,1.0,2.0,1.0,10.0,2.0,3.0,4.0,2.0,3.0,7.0,6.0,4.0,1.0,3.0,3.0,7.0,3.0,1.0,2.0,3.0,3.0,1.0,2.0,2.0,2.0,2.0,2.0,5.0,5.0,5.0,1.0,5.0,2.0,3.0,3.0,3.0,4.0,4.0,2.0,2.0,3.0,3.0,3.0,6.0,6.0,3.0,5.0,5.0,7.0,4.0,6.0,1.0,1.0,1.0,1.0,,1.0,1.0,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,,,,,2.0,,,,,,,,,,,,,2.0,2.0,3.0,1.0,2.0,3.0,4.0,3.0,3.0,2.0,2.0,3.0,1.0,1.0,8.0,5.0,6.0,2.0,1.0,,4.0,4.0,5.0,2.0,28.0,2.0,2.0,1.0,1.0,500.0,6.0,2.0,1.0,1.0,0.833333,1.25,2014
4,6,12,12,615,ALG,DZA,5,1.0,1.0,1.0,2.0,1.0,1.0,1.0,3.0,2.0,1.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,2.0,8.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,1.0,1.0,2.0,2.0,1.0,1.0,2.0,1.0,,1.0,1.0,1.0,2.0,1.0,3.0,2.0,2.0,6.0,6.0,,1.0,3.0,4.0,2.0,1.0,2.0,3.0,4.0,2.0,1.0,2.0,1.0,1.0,2.0,5.0,1.0,2.0,3.0,1.0,4.0,3.0,2.0,2.0,3.0,1.0,2.0,2.0,2.0,2.0,,,,,,5.0,8.0,4.0,7.0,4.0,6.0,6.0,2.0,2.0,3.0,4.0,2.0,3.0,2.0,3.0,3.0,2.0,4.0,3.0,2.0,3.0,3.0,3.0,2.0,3.0,3.0,2.0,4.0,3.0,2.0,,,,3.0,,,,,,,,,,,,,0.0,0.0,3.0,2.0,3.0,4.0,2.0,2.0,8.0,4.0,7.0,3.0,3.0,8.0,6.0,5.0,6.0,7.0,3.0,2.0,50000000.0,5.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,10.0,2.0,3.0,2.0,2.0,2.0,4.0,4.0,3.0,1.0,3.0,2.0,8.0,2.0,3.0,3.0,3.0,3.0,2.0,2.0,3.0,3.0,3.0,3.0,5.0,5.0,5.0,5.0,5.0,2.0,3.0,3.0,4.0,4.0,4.0,2.0,3.0,3.0,3.0,3.0,6.0,2.0,4.0,4.0,6.0,6.0,6.0,5.0,7.0,1.0,1.0,1.0,,1.0,3.0,,1.0,1.0,4.0,5.0,1.0,1.0,2.0,2.0,2.0,,,,,2.0,,,,,,,,,,,,,3.0,3.0,4.0,2.0,2.0,4.0,3.0,2.0,3.0,2.0,3.0,3.0,3.0,2.0,5.0,6.0,9.0,2.0,2.0,1.0,2.0,3.0,7.0,2.0,35.0,2.0,2.0,1.0,1.0,500.0,3.0,1.0,1.0,1.0,0.833333,1.25,2014


In [33]:
cleaning_w6.describe()

Unnamed: 0,V1,V2,V2A,cow,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,V20,V21,V22,V23,V24,V25,V26,V27,V28,V29,V30,V31,V32,V33,V34,V35,V36,V37,V38,V39,V40,V41,V42,V43,V44,V45,V46,V47,V48,V49,V50,V51,V52,V53,V54,V55,V56,V56_NZ,V57,V58,V59,V60,V61,V62,V63,V64,V65,V66,V67,V68,V69,V70,V71,V72,V73,V74,V74B,V75,V76,V77,V78,V79,V80,V81,V82,V83,V84,V85,V86,V87,V88,V89,V95,V96,V97,V98,V99,V100,V101,V102,V103,V104,V105,V106,V107,V108,V109,V110,V111,V112,V113,V114,V115,V116,V117,V118,V119,V120,V121,V122,V123,V124,V125_00,V125_01,V125_02,V125_03,V125_04,V125_05,V125_06,V125_07,V125_08,V125_09,V125_10,V125_11,V125_12,V125_13,V125_14,V125_15,V125_16,V125_17,V126,V127,V128,V129,V130,V131,V132,V133,V134,V135,V136,V137,V138,V139,V140,V141,V142,V143,V144,V144G,V145,V146,V147,V148,V149,V150,V151,V152,V153,V154,V155,V156,V157,V158,V159,V160,V161,V162,V163,V164,V165,V166,V167,V168,V169,V170,V171,V172,V173,V174,V175,V176,V177,V178,V179,V180,V181,V182,V183,V184,V185,V186,V187,V188,V189,V190,V191,V192,V193,V194,V195,V196,V197,V198,V199,V200,V201,V202,V203,V203A,V204,V205,V206,V207,V207A,V208,V209,V210,V211,V212,V213,V214,V215_01,V215_02,V215_03,V215_04,V215_05,V215_06,V215_07,V215_08,V215_10,V215_11,V215_12,V215_13,V215_14,V215_15,V215_16,V215_17,V215_18,V216,V217,V218,V219,V220,V221,V222,V223,V224,V225,V226,V227,V229,V230,V231,V232,V233,V234,V235,V236,V237,V238,V239,V240,V242,V243,V244,V245,V246,V247,V248,V250,V258,V258A,S018,S019,V262
count,89565.0,89565.0,89565.0,89565.0,89565.0,89224.0,89020.0,88530.0,87945.0,88046.0,88217.0,88807.0,89239.0,89551.0,89559.0,89558.0,89548.0,89556.0,89552.0,89553.0,89547.0,89550.0,89546.0,89562.0,88979.0,87177.0,88725.0,88642.0,88572.0,86354.0,86436.0,88447.0,87342.0,88510.0,88351.0,88368.0,79869.0,85586.0,88024.0,85583.0,88019.0,84284.0,86724.0,85591.0,86724.0,89545.0,87949.0,81958.0,86109.0,86599.0,86372.0,85517.0,85467.0,86960.0,85653.0,84909.0,88117.0,86839.0,783.0,89321.0,87898.0,88917.0,86284.0,83948.0,86628.0,84730.0,86753.0,85039.0,80660.0,86291.0,86099.0,85781.0,86588.0,86970.0,87474.0,87079.0,83963.0,39887.0,86721.0,86433.0,87229.0,87239.0,87444.0,88450.0,83017.0,88510.0,88214.0,88568.0,78941.0,76604.0,78775.0,78248.0,69604.0,68199.0,86680.0,84267.0,87312.0,86411.0,87307.0,84963.0,88110.0,87502.0,87610.0,86435.0,82609.0,82376.0,86638.0,85168.0,86782.0,87546.0,78395.0,87487.0,86227.0,86387.0,83212.0,85068.0,83910.0,84193.0,83097.0,85542.0,82017.0,80238.0,82736.0,16200.0,4549.0,2694.0,1683.0,1940.0,4090.0,3693.0,10023.0,10984.0,5475.0,643.0,1077.0,3104.0,1059.0,1192.0,6760.0,89486.0,89559.0,78727.0,81514.0,80596.0,79608.0,82999.0,85771.0,83763.0,86199.0,86191.0,82246.0,84314.0,85442.0,84888.0,86728.0,87335.0,82553.0,85430.0,87889.0,88313.0,88292.0,84720.0,83840.0,84916.0,79847.0,77586.0,84131.0,82224.0,84679.0,80868.0,79806.0,81437.0,77151.0,81017.0,81365.0,80997.0,82868.0,81527.0,80928.0,82169.0,82351.0,83615.0,80683.0,82907.0,79820.0,78730.0,87721.0,85276.0,84613.0,82079.0,79575.0,76070.0,87467.0,87705.0,84874.0,88102.0,86791.0,84029.0,82574.0,86323.0,86323.0,82033.0,81254.0,83899.0,88242.0,87795.0,87915.0,87917.0,86673.0,86379.0,84827.0,83034.0,85969.0,86820.0,87105.0,87816.0,88175.0,84362.0,87834.0,82179.0,64335.0,84917.0,86965.0,77072.0,86881.0,41253.0,88091.0,88110.0,88064.0,88034.0,83823.0,86066.0,87145.0,15730.0,1112.0,1189.0,2916.0,1724.0,7650.0,4147.0,5515.0,1722.0,11516.0,10622.0,5646.0,6461.0,1051.0,2113.0,1221.0,0.0,83887.0,85354.0,85078.0,85515.0,85262.0,85013.0,84826.0,84974.0,85242.0,88318.0,83277.0,86232.0,88038.0,68158.0,73377.0,73173.0,73212.0,76199.0,85633.0,66325.0,83241.0,86883.0,86311.0,89474.0,89385.0,80830.0,80736.0,78665.0,80429.0,86800.0,88766.0,87836.0,89565.0,89565.0,89565.0,89565.0,89565.0
mean,6.0,471.966147,486.231943,494.832155,39943.26,1.094537,1.673961,1.888693,2.624129,1.516605,1.887856,1.854291,2.094499,1.485835,1.396242,1.287121,1.77,1.317031,1.605894,1.604871,1.590349,1.659296,1.583019,1.731281,6.834219,1.752985,0.513959,0.328806,0.237547,0.183431,0.185386,0.130677,0.16369,0.19286,0.110729,0.163419,0.134921,1.163765,1.81421,1.572018,1.747077,1.481681,1.807677,1.306399,1.728483,1.830376,2.028676,1.472852,2.096157,1.611554,1.654089,2.480595,2.419811,2.94126,2.552975,2.208894,7.071337,5.693248,1.761175,2.720749,1.858017,5.884454,1.700246,2.564754,2.051173,2.496542,1.874759,2.74193,1.330846,2.292464,1.3799,1.498164,2.780027,3.772163,2.381336,3.210544,2.462859,2.363427,2.908557,3.723532,2.54115,2.537294,2.531975,2.153488,1.53224,1.877178,1.931224,2.639881,2.322773,2.619302,2.411184,2.570162,2.605281,5.654482,5.280676,5.602976,4.451713,3.84267,4.192928,6.237645,1.208092,2.135003,2.059879,3.07055,2.76212,2.845671,2.14182,2.182686,2.613941,2.50899,2.715275,2.412107,2.428833,2.585655,2.932173,2.733531,2.588309,2.146912,2.514928,2.414159,2.386566,2.391473,2.332008,2.670556,2.456364,2.817001,3.063577,2.47268,2.852812,2.725968,2.382021,2.587673,2.322922,2.427683,2.640669,2.598905,2.749764,2.744128,3.232692,0.031279,0.034045,2.656039,2.632026,2.352958,3.08214,1.681382,6.306712,4.233623,8.006926,6.998004,4.4804,7.405354,5.982269,6.006809,7.880696,8.253232,5.997444,2.378251,1.833688,50864050.0,3.40605,4.107731,3.71448,1.365903,1.142222,1.378818,1.71435,1.713794,7.754496,2.384342,2.466957,2.566303,2.1418,5.369873,6.909334,5.67284,6.937443,2.764704,2.47887,3.04711,6.036247,2.337475,2.928857,3.163014,2.597557,2.536911,1.901814,3.148905,2.906043,3.472191,3.536877,3.361154,2.659277,3.009737,4.736456,4.685319,4.582399,2.078663,2.020297,2.094448,2.083906,2.236344,2.565191,1.650222,3.442692,3.409283,3.348678,3.086161,7.624116,7.747022,5.49878,5.378423,4.736696,7.253893,2.798301,2.641762,1.815991,2.271674,1.957192,3.274218,2.757597,3.22847,4.550589,4.563823,2.247292,3.310402,2.082914,2.948735,1.972611,1.556035,2.002732,1.78606,1.504848,2.244692,2.186151,2.121951,2.05144,2.008701,1.820784,2.19701,1.976609,2.588269,2.447725,1.816889,2.073858,2.017025,2.501427,1.17558,1.588043,,2.07441,2.80295,3.684537,1.551833,2.528899,2.839754,3.637705,3.30254,2.012846,2.007462,1.632251,1.595081,3.400668,1.986869,5.124589,4.619518,6.438398,1.6843,1.56407,1.25749,2.080705,3.313939,4.825017,1.522509,41.938804,1.920661,1.92006,1.042636,1.014037,2159.070265,5.650294,1.70043,0.999997,1.005937,0.669904,1.004856,2012.112198
std,0.0,245.114798,251.387109,251.203268,191843.1,0.346876,0.734792,0.837273,0.98591,0.796788,1.054521,0.741204,0.85171,0.499802,0.489118,0.452421,0.420835,0.465322,0.488661,0.488881,0.491772,0.473948,0.493062,0.443296,2.274908,0.431278,0.781936,0.664859,0.574433,0.487573,0.489744,0.420481,0.477121,0.517943,0.383026,0.48417,0.443735,0.370064,0.38894,0.494789,0.434689,0.499667,0.394128,0.461,0.444744,0.375304,0.90039,0.74715,0.845553,0.784604,0.753147,0.91738,0.997887,0.936599,0.987174,0.932282,2.251519,2.647946,0.426638,2.188875,1.789133,2.485472,0.992579,1.035385,1.034761,1.046136,1.166069,1.160827,0.47052,0.850311,0.606197,0.691646,1.412249,1.549026,1.307573,1.53361,1.24921,1.188178,1.457765,1.60135,1.357345,1.30127,1.429862,1.50874,0.562384,0.328235,0.253074,0.976105,0.778201,0.599865,0.71325,0.639849,0.593384,2.358175,2.977975,2.794246,2.919882,2.62259,2.88445,2.78805,0.508802,0.829647,0.805362,0.79549,0.862962,0.865,1.028706,0.938031,0.874388,0.881314,0.897022,0.945028,0.947158,0.957196,0.88299,0.936093,0.896902,0.863057,0.879427,0.939982,0.87925,0.900419,0.898237,0.819945,0.720749,0.925117,0.966809,1.084742,0.820887,0.878706,0.913339,0.947188,0.791473,0.741921,1.016563,0.985495,0.996538,0.886961,0.912909,0.289185,0.33297,0.959279,1.053171,0.93269,0.950176,0.779901,2.976814,2.960424,2.479094,2.754898,3.116436,2.575625,3.027969,3.003679,2.590198,2.114488,2.535919,0.87321,0.860794,31131380.0,3.014599,2.189,2.695868,0.586407,0.34928,0.485096,0.557491,0.561546,2.960739,1.068442,1.082328,1.014164,0.884948,2.378603,1.976169,2.660349,2.43349,1.11632,1.181345,1.055321,2.758472,0.911224,0.871462,0.811289,0.902446,0.889714,0.813611,0.911989,1.049022,0.780578,0.771827,0.960368,1.970775,1.999988,0.992336,1.076899,1.223123,1.138784,1.102335,1.106842,1.105828,1.172436,1.179069,0.476902,0.871608,0.885282,0.919773,1.038908,2.269153,2.223732,2.815202,2.745192,2.886767,2.356384,2.580357,2.420803,1.821476,2.185223,1.939492,3.038461,2.497447,2.765436,3.091655,3.36375,2.217978,2.978405,2.065761,2.609065,1.909986,0.784855,0.881643,0.794068,0.631844,0.886573,0.618109,0.654766,0.847615,0.991218,0.90807,0.902914,0.870045,0.87282,1.071946,0.782593,0.755517,0.704709,0.955559,0.419607,0.772364,,0.96294,1.593819,1.297602,1.129369,1.654349,1.820169,1.674299,1.778908,1.370702,0.929785,0.782287,0.779044,2.149471,0.876479,3.059375,2.832743,2.759067,0.464797,0.495881,0.437255,0.924673,0.998554,2.109361,0.499496,16.552421,0.270269,0.271201,0.202037,0.117645,1736.196124,2.421468,0.458072,0.446249,0.489467,0.350799,0.526198,1.241979
min,6.0,12.0,12.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,10000000.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,16.0,1.0,1.0,1.0,1.0,20.0,1.0,1.0,0.050687,0.0213,0.023785,0.035678,2010.0
25%,6.0,276.0,332.0,349.0,391.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,5.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,1.0,1.0,1.0,2.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,1.0,6.0,4.0,2.0,1.0,0.0,4.0,1.0,2.0,1.0,2.0,1.0,2.0,1.0,1.0,1.0,1.0,2.0,3.0,1.0,2.0,1.0,1.0,2.0,2.0,1.0,2.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,5.0,3.0,3.0,2.0,1.0,1.0,5.0,1.0,2.0,2.0,3.0,2.0,2.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,3.0,0.0,0.0,2.0,2.0,2.0,2.0,1.0,4.0,1.0,7.0,5.0,1.0,6.0,4.0,4.0,6.0,7.0,4.0,2.0,1.0,20000000.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,6.0,1.0,1.0,2.0,2.0,4.0,6.0,4.0,5.0,2.0,2.0,2.0,4.0,2.0,2.0,3.0,2.0,2.0,1.0,3.0,2.0,3.0,3.0,3.0,1.0,1.0,5.0,5.0,5.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,3.0,3.0,3.0,2.0,6.0,6.0,3.0,3.0,2.0,6.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,1.0,1.0,1.0,2.0,1.0,2.0,2.0,1.0,2.0,2.0,2.0,1.0,1.0,,1.0,1.0,3.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,5.0,1.0,1.0,1.0,1.0,3.0,3.0,1.0,28.0,2.0,2.0,1.0,1.0,1240.0,4.0,1.0,0.868382,0.87026,0.409333,0.613999,2011.0
50%,6.0,434.0,484.0,560.0,812.0,1.0,2.0,2.0,3.0,1.0,1.0,2.0,2.0,1.0,1.0,1.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,7.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,2.0,2.0,1.0,2.0,1.0,2.0,2.0,2.0,1.0,2.0,1.0,2.0,3.0,2.0,3.0,3.0,2.0,7.0,6.0,2.0,1.0,2.0,6.0,1.0,3.0,2.0,3.0,1.0,3.0,1.0,3.0,1.0,1.0,3.0,4.0,2.0,3.0,2.0,2.0,3.0,4.0,2.0,2.0,2.0,1.0,1.0,2.0,2.0,3.0,3.0,3.0,3.0,3.0,3.0,5.0,5.0,5.0,4.0,3.0,4.0,7.0,1.0,2.0,2.0,3.0,3.0,3.0,2.0,2.0,3.0,3.0,3.0,2.0,2.0,3.0,3.0,3.0,3.0,2.0,2.0,2.0,2.0,2.0,2.0,3.0,2.0,3.0,3.0,2.0,3.0,3.0,2.0,3.0,2.0,2.0,3.0,3.0,3.0,3.0,3.0,0.0,0.0,3.0,3.0,2.0,3.0,2.0,7.0,4.0,9.0,8.0,4.0,8.0,6.0,6.0,9.0,9.0,6.0,2.0,2.0,50000000.0,3.0,4.0,3.0,1.0,1.0,1.0,2.0,2.0,9.0,2.0,3.0,3.0,2.0,5.0,7.0,6.0,7.0,3.0,3.0,3.0,6.0,2.0,3.0,3.0,3.0,3.0,2.0,3.0,3.0,4.0,4.0,4.0,1.0,5.0,5.0,5.0,5.0,2.0,2.0,2.0,2.0,2.0,3.0,2.0,4.0,4.0,4.0,3.0,8.0,8.0,5.0,5.0,5.0,8.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,5.0,4.0,1.0,2.0,1.0,1.0,1.0,1.0,2.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,3.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,,2.0,2.0,4.0,1.0,2.0,2.0,5.0,4.0,1.0,2.0,1.0,1.0,3.0,2.0,5.0,5.0,7.0,2.0,2.0,1.0,2.0,3.0,5.0,2.0,40.0,2.0,2.0,1.0,1.0,1580.0,6.0,2.0,1.0,1.0,0.661376,0.992063,2012.0
75%,6.0,702.0,710.0,705.0,1356.0,1.0,2.0,2.0,3.0,2.0,3.0,2.0,3.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,8.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,3.0,2.0,3.0,2.0,2.0,3.0,3.0,4.0,3.0,3.0,9.0,8.0,2.0,6.0,3.0,8.0,2.0,3.0,3.0,3.0,3.0,4.0,2.0,3.0,2.0,2.0,4.0,5.0,3.0,4.0,3.0,3.0,4.0,5.0,3.0,3.0,3.0,3.0,2.0,2.0,2.0,3.0,3.0,3.0,3.0,3.0,3.0,7.0,8.0,8.0,7.0,5.0,6.0,8.0,1.0,3.0,2.0,4.0,3.0,4.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,4.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,4.0,4.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,4.0,3.0,4.0,0.0,0.0,3.0,4.0,3.0,4.0,2.0,9.0,6.0,10.0,10.0,7.0,10.0,9.0,9.0,10.0,10.0,8.0,3.0,2.0,70000000.0,5.0,6.0,7.0,2.0,1.0,2.0,2.0,2.0,10.0,3.0,3.0,3.0,3.0,7.0,8.0,8.0,9.0,4.0,3.0,4.0,8.0,3.0,4.0,4.0,3.0,3.0,2.0,4.0,4.0,4.0,4.0,4.0,5.0,5.0,5.0,5.0,5.0,3.0,3.0,3.0,3.0,3.0,4.0,2.0,4.0,4.0,4.0,4.0,10.0,10.0,8.0,7.0,7.0,9.0,4.0,4.0,2.0,3.0,2.0,5.0,4.0,5.0,7.0,8.0,3.0,5.0,2.0,5.0,2.0,2.0,2.0,2.0,2.0,3.0,2.0,2.0,3.0,3.0,2.0,3.0,2.0,3.0,3.0,2.0,3.0,2.0,3.0,1.0,2.0,,3.0,5.0,5.0,2.0,4.0,5.0,5.0,5.0,2.0,3.0,2.0,2.0,5.0,2.0,8.0,7.0,9.0,2.0,2.0,2.0,3.0,4.0,6.0,2.0,54.0,2.0,2.0,1.0,1.0,3580.0,8.0,2.0,1.0,1.0,0.833333,1.25,2013.0
max,6.0,887.0,901.0,920.0,1394604.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,10.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,3.0,3.0,3.0,3.0,4.0,4.0,4.0,4.0,4.0,4.0,10.0,10.0,2.0,6.0,8.0,10.0,4.0,4.0,4.0,4.0,4.0,4.0,2.0,3.0,3.0,3.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,5.0,3.0,2.0,2.0,4.0,3.0,3.0,3.0,3.0,3.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,4.0,4.0,100000000.0,9.0,7.0,8.0,3.0,2.0,2.0,4.0,4.0,10.0,4.0,4.0,4.0,4.0,10.0,10.0,10.0,10.0,4.0,4.0,4.0,10.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,5.0,5.0,5.0,5.0,5.0,4.0,4.0,4.0,4.0,4.0,4.0,2.0,4.0,4.0,4.0,4.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,5.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,,4.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,4.0,3.0,3.0,8.0,4.0,10.0,10.0,10.0,2.0,2.0,2.0,4.0,5.0,10.0,2.0,102.0,2.0,2.0,2.0,2.0,9900.0,9.0,2.0,22.790557,22.790557,10.694771,16.042156,2016.0


#### Additional variables that may need to be dropped:
- 'V258A' (sample weights for "splitted samples"; note, Germany is the only country in this data set that appears to have a splitted sample); when using weights, use 'V258' instead
- 'V2A' (use 'V2' instead)
- 'C_COW_ALPHA'
- 'V144' and 'V144G' (religious affiliation and groups; I can't find the codes for these values, so I'm not sure how to interpret these)
- 'V1' (wave; there shouldn't be any variation here as these are all wave 6 data)
- 'V3' (interview number; unnecessary identifier)
- 'V23' (satisfaction with life question; redundant because using V10 as target variable instead)
- 'V262' (survey year)
- 'V247' (language spoken at home)

#### Variables to un-drop:
- Return V56_NZ to the data set (**done!**)
    - Next, use V56_NZ to fill V56 missing values if 'C_COW_ALPHA' = 'NEW'? (justify why or why not)
  
#### Thinking ahead to future steps:
- items may need to be normalized or re-scaled so that the ranges are more similar
- items may need to be reverse-coded to assist with interpretability for linear regression
- as mentioned earlier, create a new variable for V125 and V215 items
- retain and rename B_COUNTRY_ALPHA for country labels
- recode age variable 'V242'; create age categories based on groupings identified here: https://www.cia.gov/the-world-factbook/field/age-structure/

In [34]:
# what's the difference between V2 and V2A for countries that have splitted samples? Germany is the only one I'm seeing
# for this wave; there's variation in V2A values, but not in V2, COW_ALPHA or COUNTRY_ALPHA distinctions
# may not be worth it to hang on to splitted samples for this survey wave
cleaning_w6[cleaning_w6['C_COW_ALPHA'] == 'GMY'].sample(50)

Unnamed: 0,V1,V2,V2A,cow,C_COW_ALPHA,B_COUNTRY_ALPHA,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,V20,V21,V22,V23,V24,V25,V26,V27,V28,V29,V30,V31,V32,V33,V34,V35,V36,V37,V38,V39,V40,V41,V42,V43,V44,V45,V46,V47,V48,V49,V50,V51,V52,V53,V54,V55,V56,V56_NZ,V57,V58,V59,V60,V61,V62,V63,V64,V65,V66,V67,V68,V69,V70,V71,V72,V73,V74,V74B,V75,V76,V77,V78,V79,V80,V81,V82,V83,V84,V85,V86,V87,V88,V89,V95,V96,V97,V98,V99,V100,V101,V102,V103,V104,V105,V106,V107,V108,V109,V110,V111,V112,V113,V114,V115,V116,V117,V118,V119,V120,V121,V122,V123,V124,V125_00,V125_01,V125_02,V125_03,V125_04,V125_05,V125_06,V125_07,V125_08,V125_09,V125_10,V125_11,V125_12,V125_13,V125_14,V125_15,V125_16,V125_17,V126,V127,V128,V129,V130,V131,V132,V133,V134,V135,V136,V137,V138,V139,V140,V141,V142,V143,V144,V144G,V145,V146,V147,V148,V149,V150,V151,V152,V153,V154,V155,V156,V157,V158,V159,V160,V161,V162,V163,V164,V165,V166,V167,V168,V169,V170,V171,V172,V173,V174,V175,V176,V177,V178,V179,V180,V181,V182,V183,V184,V185,V186,V187,V188,V189,V190,V191,V192,V193,V194,V195,V196,V197,V198,V199,V200,V201,V202,V203,V203A,V204,V205,V206,V207,V207A,V208,V209,V210,V211,V212,V213,V214,V215_01,V215_02,V215_03,V215_04,V215_05,V215_06,V215_07,V215_08,V215_10,V215_11,V215_12,V215_13,V215_14,V215_15,V215_16,V215_17,V215_18,V216,V217,V218,V219,V220,V221,V222,V223,V224,V225,V226,V227,V229,V230,V231,V232,V233,V234,V235,V236,V237,V238,V239,V240,V242,V243,V244,V245,V246,V247,V248,V250,V258,V258A,S018,S019,V262
21402,6,276,900,255,GMY,DEU,447,1.0,1.0,3.0,3.0,1.0,1.0,2.0,2.0,1.0,2.0,1.0,2.0,1.0,2.0,1.0,1.0,2.0,2.0,2.0,4.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,2.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,3.0,3.0,3.0,1.0,1.0,3.0,3.0,3.0,3.0,1.0,5.0,7.0,,1.0,2.0,5.0,1.0,3.0,2.0,4.0,2.0,3.0,1.0,1.0,1.0,1.0,1.0,5.0,1.0,4.0,,1.0,3.0,5.0,2.0,4.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,5.0,1.0,10.0,1.0,5.0,5.0,5.0,1.0,3.0,2.0,4.0,2.0,2.0,1.0,2.0,3.0,3.0,2.0,2.0,2.0,2.0,3.0,2.0,2.0,2.0,2.0,2.0,2.0,3.0,2.0,2.0,,,,,,,,,,,,,,,,0.0,0.0,,2.0,3.0,4.0,1.0,8.0,4.0,10.0,10.0,1.0,,4.0,1.0,10.0,10.0,7.0,1.0,3.0,50000000.0,5.0,2.0,3.0,1.0,1.0,1.0,1.0,1.0,10.0,1.0,1.0,1.0,2.0,4.0,8.0,8.0,7.0,4.0,2.0,4.0,8.0,2.0,3.0,4.0,4.0,2.0,1.0,4.0,4.0,4.0,4.0,4.0,5.0,5.0,5.0,5.0,5.0,4.0,2.0,4.0,4.0,4.0,4.0,,4.0,4.0,4.0,4.0,10.0,10.0,5.0,,8.0,10.0,5.0,1.0,1.0,,1.0,5.0,1.0,1.0,1.0,1.0,1.0,,1.0,1.0,1.0,2.0,1.0,1.0,2.0,2.0,,,,,,,,,,,,,,,,,2.0,1.0,3.0,1.0,1.0,1.0,3.0,2.0,1.0,2.0,2.0,3.0,1.0,2.0,8.0,7.0,5.0,2.0,1.0,,4.0,4.0,4.0,1.0,43.0,2.0,2.0,2.0,2.0,4370.0,3.0,2.0,1.222688,1.7776,0.597599,0.896399,2013
21482,6,276,901,255,GMY,DEU,1638,1.0,1.0,2.0,4.0,2.0,3.0,2.0,2.0,2.0,2.0,1.0,1.0,1.0,2.0,1.0,2.0,2.0,2.0,1.0,8.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,,1.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,3.0,2.0,3.0,1.0,2.0,4.0,3.0,4.0,3.0,3.0,7.0,10.0,,1.0,3.0,7.0,3.0,4.0,2.0,3.0,3.0,2.0,2.0,1.0,3.0,1.0,3.0,4.0,2.0,4.0,4.0,,3.0,6.0,4.0,4.0,3.0,3.0,3.0,2.0,2.0,3.0,1.0,2.0,1.0,1.0,2.0,,1.0,10.0,1.0,5.0,5.0,7.0,1.0,1.0,1.0,2.0,2.0,2.0,4.0,3.0,3.0,3.0,2.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,4.0,4.0,2.0,2.0,1.0,3.0,,,,,,,,,,,,,,,,0.0,0.0,3.0,4.0,3.0,4.0,2.0,10.0,1.0,10.0,9.0,1.0,7.0,10.0,1.0,10.0,7.0,8.0,2.0,3.0,100000020.0,0.0,7.0,8.0,2.0,2.0,2.0,1.0,1.0,1.0,3.0,4.0,4.0,3.0,10.0,10.0,8.0,10.0,4.0,2.0,2.0,10.0,2.0,4.0,2.0,4.0,4.0,1.0,4.0,3.0,3.0,3.0,4.0,1.0,1.0,5.0,5.0,5.0,3.0,3.0,2.0,3.0,3.0,1.0,2.0,4.0,4.0,4.0,4.0,3.0,5.0,5.0,5.0,7.0,5.0,1.0,1.0,1.0,,1.0,5.0,1.0,1.0,5.0,7.0,1.0,,1.0,1.0,1.0,2.0,,2.0,2.0,3.0,,,,,,,,,,,,,,,,,2.0,2.0,4.0,1.0,1.0,1.0,2.0,2.0,1.0,3.0,,,1.0,2.0,7.0,5.0,5.0,2.0,2.0,1.0,2.0,3.0,5.0,2.0,48.0,2.0,2.0,1.0,1.0,1530.0,5.0,2.0,0.460452,0.1709,0.22505,0.337574,2013
21770,6,276,900,255,GMY,DEU,876,1.0,2.0,2.0,2.0,4.0,2.0,2.0,2.0,1.0,2.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,1.0,8.0,1.0,1.0,0.0,2.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,,1.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,1.0,3.0,1.0,3.0,1.0,3.0,2.0,1.0,3.0,3.0,1.0,5.0,6.0,,1.0,3.0,9.0,3.0,1.0,2.0,3.0,2.0,1.0,1.0,3.0,3.0,1.0,3.0,5.0,3.0,3.0,,2.0,2.0,6.0,5.0,3.0,1.0,5.0,1.0,1.0,2.0,1.0,1.0,2.0,2.0,2.0,2.0,7.0,8.0,1.0,6.0,7.0,2.0,8.0,1.0,1.0,2.0,3.0,2.0,3.0,2.0,1.0,3.0,3.0,3.0,1.0,2.0,2.0,2.0,2.0,3.0,2.0,4.0,3.0,2.0,4.0,3.0,4.0,,,,,,,,,,,,,,,,0.0,0.0,2.0,3.0,2.0,4.0,1.0,9.0,1.0,10.0,8.0,3.0,10.0,1.0,1.0,7.0,10.0,3.0,2.0,2.0,10000000.0,1.0,2.0,3.0,1.0,1.0,2.0,1.0,2.0,9.0,3.0,2.0,1.0,2.0,5.0,8.0,2.0,6.0,3.0,2.0,1.0,7.0,2.0,4.0,4.0,4.0,3.0,1.0,4.0,3.0,3.0,4.0,4.0,1.0,5.0,5.0,5.0,5.0,4.0,4.0,3.0,2.0,4.0,1.0,1.0,4.0,4.0,4.0,4.0,8.0,8.0,7.0,9.0,2.0,8.0,1.0,1.0,1.0,,2.0,7.0,5.0,1.0,5.0,9.0,2.0,,1.0,1.0,1.0,2.0,3.0,1.0,1.0,4.0,,,,,,,,,,,,,,,,,2.0,1.0,4.0,1.0,1.0,1.0,1.0,1.0,2.0,3.0,1.0,1.0,4.0,3.0,4.0,7.0,9.0,1.0,1.0,,1.0,2.0,7.0,1.0,72.0,2.0,2.0,1.0,1.0,1530.0,3.0,2.0,2.927821,4.2567,1.430998,2.146497,2013
21839,6,276,901,255,GMY,DEU,1729,1.0,2.0,2.0,3.0,3.0,4.0,2.0,2.0,2.0,2.0,1.0,2.0,1.0,2.0,1.0,2.0,2.0,1.0,1.0,7.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,1.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,3.0,1.0,1.0,1.0,4.0,4.0,2.0,4.0,2.0,3.0,9.0,4.0,,1.0,2.0,8.0,3.0,1.0,1.0,3.0,1.0,4.0,2.0,1.0,1.0,1.0,5.0,4.0,2.0,4.0,,2.0,3.0,6.0,3.0,5.0,4.0,1.0,2.0,2.0,2.0,2.0,3.0,3.0,3.0,3.0,3.0,5.0,3.0,5.0,10.0,3.0,4.0,5.0,1.0,2.0,2.0,3.0,,3.0,4.0,2.0,3.0,3.0,3.0,3.0,2.0,4.0,4.0,3.0,3.0,2.0,3.0,4.0,3.0,,2.0,3.0,,,,,,,,,,,,,,,,0.0,0.0,2.0,4.0,2.0,4.0,1.0,6.0,1.0,10.0,8.0,1.0,10.0,3.0,1.0,10.0,9.0,5.0,2.0,3.0,100000020.0,0.0,7.0,8.0,3.0,2.0,2.0,1.0,1.0,1.0,4.0,,3.0,2.0,6.0,9.0,6.0,8.0,4.0,2.0,2.0,7.0,3.0,4.0,1.0,4.0,3.0,1.0,4.0,4.0,4.0,4.0,4.0,1.0,1.0,5.0,5.0,5.0,4.0,4.0,4.0,4.0,4.0,2.0,1.0,4.0,4.0,4.0,4.0,7.0,8.0,3.0,2.0,4.0,7.0,1.0,1.0,1.0,,1.0,6.0,2.0,5.0,5.0,6.0,2.0,,1.0,3.0,2.0,2.0,3.0,2.0,2.0,4.0,,,,,,,,,,,,,,,,,2.0,1.0,2.0,1.0,1.0,1.0,5.0,5.0,2.0,1.0,1.0,1.0,4.0,2.0,9.0,9.0,7.0,1.0,1.0,,1.0,3.0,6.0,1.0,74.0,2.0,2.0,1.0,1.0,1530.0,9.0,2.0,0.166135,0.0616,0.0812,0.1218,2013
20754,6,276,901,255,GMY,DEU,1085,2.0,2.0,2.0,3.0,2.0,3.0,2.0,2.0,1.0,2.0,1.0,1.0,2.0,2.0,1.0,2.0,2.0,2.0,2.0,8.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,3.0,1.0,2.0,1.0,3.0,3.0,4.0,3.0,,,5.0,7.0,,6.0,0.0,9.0,3.0,1.0,2.0,4.0,4.0,1.0,1.0,2.0,1.0,2.0,3.0,3.0,4.0,3.0,,5.0,3.0,4.0,3.0,3.0,4.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,4.0,5.0,6.0,5.0,4.0,5.0,5.0,1.0,2.0,2.0,3.0,,2.0,3.0,2.0,3.0,3.0,2.0,1.0,1.0,3.0,3.0,3.0,2.0,1.0,2.0,3.0,2.0,2.0,2.0,2.0,,,,,,,,,,,,,,,,0.0,0.0,2.0,4.0,2.0,4.0,2.0,8.0,1.0,10.0,8.0,8.0,10.0,6.0,3.0,10.0,8.0,7.0,1.0,1.0,100000020.0,0.0,6.0,8.0,3.0,2.0,2.0,1.0,2.0,2.0,4.0,4.0,4.0,2.0,4.0,8.0,5.0,8.0,3.0,3.0,3.0,8.0,3.0,4.0,4.0,3.0,4.0,1.0,4.0,2.0,4.0,4.0,4.0,5.0,5.0,5.0,5.0,5.0,3.0,2.0,4.0,4.0,4.0,4.0,1.0,4.0,4.0,4.0,3.0,9.0,9.0,2.0,2.0,2.0,9.0,2.0,2.0,2.0,,1.0,7.0,9.0,9.0,9.0,10.0,3.0,,3.0,3.0,1.0,1.0,3.0,2.0,2.0,2.0,,,,,,,,,,,,,,,,,4.0,3.0,3.0,2.0,2.0,1.0,1.0,1.0,2.0,3.0,1.0,1.0,1.0,2.0,7.0,6.0,5.0,2.0,1.0,,1.0,3.0,4.0,1.0,33.0,2.0,2.0,1.0,1.0,1530.0,9.0,2.0,0.211454,0.0785,0.10335,0.155025,2013
21889,6,276,900,255,GMY,DEU,992,4.0,1.0,1.0,1.0,1.0,4.0,2.0,2.0,2.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,10.0,2.0,0.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,,1.0,1.0,2.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,3.0,4.0,1.0,1.0,2.0,1.0,1.0,10.0,1.0,,6.0,0.0,7.0,2.0,3.0,1.0,3.0,2.0,1.0,1.0,1.0,1.0,1.0,6.0,2.0,4.0,1.0,6.0,,3.0,1.0,5.0,6.0,5.0,3.0,2.0,2.0,2.0,3.0,2.0,2.0,2.0,2.0,3.0,1.0,5.0,1.0,6.0,1.0,3.0,4.0,4.0,2.0,3.0,2.0,4.0,4.0,4.0,1.0,3.0,2.0,4.0,4.0,3.0,4.0,4.0,4.0,2.0,4.0,3.0,4.0,4.0,3.0,2.0,4.0,,,,,,,,,,,,,,,,0.0,0.0,2.0,1.0,1.0,3.0,2.0,6.0,4.0,7.0,4.0,8.0,7.0,5.0,9.0,5.0,5.0,6.0,3.0,3.0,100000020.0,0.0,7.0,8.0,2.0,1.0,2.0,1.0,2.0,3.0,4.0,4.0,2.0,1.0,7.0,10.0,10.0,5.0,3.0,4.0,4.0,10.0,4.0,4.0,3.0,1.0,4.0,2.0,4.0,3.0,3.0,3.0,4.0,5.0,5.0,5.0,5.0,5.0,2.0,4.0,3.0,2.0,4.0,2.0,1.0,4.0,4.0,4.0,4.0,10.0,7.0,1.0,6.0,9.0,4.0,6.0,5.0,2.0,,5.0,6.0,7.0,5.0,10.0,10.0,6.0,,5.0,3.0,3.0,2.0,1.0,4.0,2.0,3.0,,,,,,,,,,,,,,,,,1.0,5.0,4.0,2.0,1.0,1.0,1.0,1.0,1.0,3.0,2.0,2.0,1.0,2.0,2.0,3.0,5.0,2.0,1.0,,1.0,4.0,4.0,1.0,33.0,2.0,2.0,1.0,1.0,1530.0,5.0,2.0,3.068586,4.4613,1.499798,2.249696,2013
21683,6,276,900,255,GMY,DEU,608,1.0,2.0,2.0,3.0,1.0,4.0,1.0,1.0,1.0,2.0,1.0,2.0,1.0,2.0,1.0,2.0,2.0,2.0,1.0,7.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,,1.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,3.0,3.0,3.0,1.0,2.0,3.0,3.0,4.0,4.0,3.0,8.0,7.0,,1.0,1.0,7.0,3.0,1.0,4.0,2.0,3.0,1.0,2.0,2.0,1.0,3.0,2.0,4.0,4.0,4.0,4.0,,3.0,5.0,3.0,3.0,2.0,1.0,1.0,2.0,2.0,2.0,1.0,3.0,2.0,3.0,2.0,3.0,5.0,5.0,5.0,3.0,5.0,8.0,1.0,1.0,1.0,3.0,3.0,3.0,3.0,2.0,3.0,3.0,3.0,2.0,2.0,3.0,4.0,3.0,2.0,2.0,4.0,3.0,2.0,3.0,2.0,3.0,,,,,,,,,,,,,,,,0.0,0.0,3.0,4.0,4.0,4.0,1.0,6.0,1.0,10.0,10.0,1.0,10.0,2.0,1.0,10.0,10.0,8.0,1.0,2.0,80400035.0,8.0,4.0,8.0,2.0,1.0,2.0,2.0,2.0,3.0,4.0,2.0,3.0,3.0,4.0,7.0,5.0,10.0,4.0,3.0,3.0,10.0,3.0,3.0,4.0,3.0,3.0,1.0,4.0,4.0,4.0,4.0,4.0,5.0,5.0,5.0,5.0,5.0,4.0,2.0,3.0,3.0,4.0,3.0,2.0,4.0,4.0,4.0,4.0,5.0,8.0,2.0,2.0,2.0,9.0,1.0,1.0,1.0,,1.0,6.0,6.0,6.0,6.0,10.0,6.0,,1.0,1.0,1.0,3.0,3.0,1.0,2.0,3.0,,,,,,,,,,,,,,,,,2.0,1.0,3.0,1.0,1.0,5.0,5.0,3.0,1.0,3.0,1.0,1.0,1.0,2.0,3.0,8.0,6.0,1.0,1.0,,1.0,3.0,5.0,1.0,52.0,2.0,2.0,1.0,1.0,1530.0,3.0,2.0,1.188315,1.7276,0.580799,0.871199,2013
21842,6,276,901,255,GMY,DEU,1782,2.0,1.0,2.0,2.0,4.0,4.0,3.0,3.0,1.0,1.0,1.0,2.0,1.0,2.0,1.0,2.0,2.0,2.0,2.0,5.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,1.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,3.0,2.0,3.0,1.0,4.0,3.0,3.0,4.0,3.0,2.0,6.0,5.0,,5.0,2.0,7.0,3.0,1.0,2.0,4.0,2.0,1.0,2.0,3.0,1.0,1.0,1.0,3.0,2.0,3.0,,2.0,3.0,5.0,4.0,3.0,4.0,4.0,1.0,2.0,2.0,2.0,1.0,2.0,2.0,3.0,2.0,5.0,4.0,7.0,7.0,3.0,5.0,3.0,1.0,2.0,2.0,3.0,2.0,3.0,4.0,2.0,3.0,3.0,2.0,2.0,2.0,3.0,3.0,3.0,3.0,1.0,3.0,4.0,2.0,2.0,3.0,3.0,,,,,,,,,,,,,,,,0.0,0.0,3.0,4.0,1.0,4.0,1.0,9.0,1.0,10.0,7.0,3.0,10.0,6.0,1.0,10.0,10.0,7.0,1.0,3.0,100000020.0,0.0,7.0,8.0,3.0,2.0,2.0,1.0,2.0,1.0,4.0,4.0,4.0,2.0,6.0,9.0,8.0,8.0,3.0,2.0,2.0,8.0,2.0,4.0,3.0,4.0,3.0,2.0,4.0,4.0,3.0,4.0,4.0,1.0,1.0,5.0,5.0,5.0,4.0,4.0,3.0,2.0,4.0,2.0,2.0,4.0,4.0,4.0,4.0,8.0,9.0,1.0,2.0,4.0,7.0,1.0,2.0,1.0,,1.0,4.0,3.0,6.0,5.0,8.0,3.0,,1.0,1.0,2.0,2.0,3.0,1.0,2.0,3.0,,,,,,,,,,,,,,,,,2.0,1.0,3.0,1.0,2.0,4.0,3.0,2.0,2.0,2.0,1.0,1.0,4.0,1.0,8.0,8.0,7.0,2.0,1.0,,1.0,2.0,5.0,2.0,72.0,2.0,2.0,1.0,1.0,1530.0,9.0,2.0,0.222093,0.0824,0.10855,0.162825,2013
21148,6,276,900,255,GMY,DEU,637,1.0,2.0,2.0,2.0,3.0,2.0,2.0,3.0,2.0,2.0,1.0,2.0,2.0,1.0,2.0,2.0,2.0,1.0,1.0,5.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,1.0,1.0,1.0,2.0,1.0,2.0,1.0,2.0,2.0,1.0,1.0,3.0,1.0,,3.0,3.0,3.0,3.0,1.0,5.0,5.0,,1.0,1.0,2.0,1.0,3.0,3.0,2.0,4.0,1.0,2.0,1.0,1.0,1.0,2.0,5.0,2.0,4.0,,3.0,2.0,6.0,2.0,2.0,2.0,1.0,1.0,2.0,2.0,2.0,1.0,2.0,3.0,3.0,3.0,5.0,6.0,8.0,3.0,5.0,6.0,5.0,1.0,2.0,2.0,3.0,2.0,2.0,1.0,2.0,3.0,3.0,2.0,2.0,3.0,2.0,2.0,2.0,2.0,3.0,3.0,3.0,2.0,2.0,2.0,2.0,,,,,,,,,,,,,,,,0.0,0.0,2.0,2.0,2.0,4.0,1.0,8.0,3.0,10.0,9.0,1.0,10.0,6.0,1.0,10.0,10.0,10.0,2.0,1.0,80400035.0,8.0,2.0,3.0,1.0,1.0,1.0,1.0,2.0,10.0,1.0,1.0,3.0,2.0,3.0,7.0,4.0,3.0,2.0,2.0,1.0,3.0,2.0,4.0,3.0,,,1.0,4.0,4.0,3.0,4.0,4.0,1.0,1.0,5.0,5.0,1.0,,,3.0,2.0,3.0,2.0,1.0,4.0,4.0,4.0,4.0,5.0,5.0,7.0,8.0,8.0,5.0,1.0,1.0,1.0,,1.0,1.0,1.0,1.0,5.0,1.0,1.0,,1.0,1.0,1.0,2.0,2.0,1.0,1.0,1.0,,,,,,,,,,,,,,,,,3.0,5.0,5.0,1.0,5.0,1.0,5.0,5.0,2.0,1.0,1.0,2.0,5.0,1.0,1.0,4.0,5.0,2.0,2.0,2.0,4.0,3.0,3.0,2.0,64.0,2.0,2.0,1.0,1.0,1530.0,3.0,2.0,0.877733,1.2761,0.428999,0.643499,2013
21413,6,276,901,255,GMY,DEU,1445,1.0,1.0,1.0,3.0,1.0,3.0,2.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,1.0,8.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,3.0,1.0,1.0,3.0,2.0,2.0,1.0,3.0,8.0,2.0,,1.0,0.0,5.0,3.0,1.0,2.0,4.0,2.0,1.0,2.0,1.0,1.0,1.0,2.0,3.0,2.0,3.0,5.0,,1.0,3.0,1.0,2.0,3.0,1.0,2.0,2.0,2.0,3.0,2.0,2.0,2.0,1.0,2.0,7.0,1.0,7.0,3.0,3.0,8.0,8.0,1.0,1.0,1.0,2.0,,2.0,3.0,3.0,3.0,3.0,3.0,3.0,4.0,3.0,3.0,4.0,3.0,2.0,4.0,3.0,2.0,,2.0,3.0,,,,,,,,,,,,,,,,0.0,0.0,3.0,2.0,1.0,3.0,1.0,10.0,1.0,10.0,10.0,1.0,10.0,10.0,1.0,10.0,9.0,4.0,3.0,1.0,100000020.0,0.0,7.0,8.0,3.0,2.0,2.0,,2.0,1.0,4.0,,4.0,4.0,5.0,10.0,9.0,8.0,4.0,4.0,4.0,9.0,1.0,3.0,1.0,3.0,1.0,1.0,4.0,3.0,4.0,4.0,4.0,1.0,5.0,1.0,5.0,1.0,1.0,,1.0,1.0,1.0,1.0,2.0,4.0,4.0,4.0,3.0,8.0,9.0,10.0,5.0,1.0,8.0,1.0,1.0,1.0,,3.0,3.0,5.0,2.0,5.0,10.0,1.0,,1.0,1.0,1.0,2.0,2.0,2.0,2.0,3.0,,,,,,,,,,,,,,,,,1.0,1.0,4.0,1.0,1.0,1.0,2.0,2.0,1.0,3.0,2.0,2.0,1.0,3.0,5.0,3.0,8.0,2.0,1.0,,3.0,4.0,4.0,1.0,46.0,2.0,2.0,1.0,1.0,1530.0,5.0,2.0,0.558864,0.2074,0.27315,0.409724,2013


In [35]:
# how many respondents from various countries did not respond to V56?
# are these values missing at random, or en masse for some samples?
cleaning_w6[~cleaning_w6.V56.notnull()]['C_COW_ALPHA'].value_counts()

NEW    841
JPN    216
CHN    179
RUS    147
NTH    116
MOR     88
LIB     87
UZB     75
YEM     73
KUW     63
SPN     60
ALG     57
ARM     49
TAW     45
PER     42
URU     41
GRG     37
SAF     36
POL     34
EST     32
ROM     32
TUN     31
CHL     31
USA     27
ARG     26
IND     25
AUL     25
BRA     24
SWD     23
GMY     19
COL     18
TUR     16
SLV     15
MEX     13
IRQ     13
HAI      9
ECU      8
QAT      8
CYP      7
PSE      7
TRI      6
JOR      5
ROK      5
THI      4
BLR      3
LEB      2
KYR      2
HKG      2
SIN      2
Name: C_COW_ALPHA, dtype: int64

In [36]:
# how many respondents from various countries responded to V56_NZ?
wvs_w6[wvs_w6.V56_NZ.notnull()]['C_COW_ALPHA'].value_counts()

# V56 is missing entirely for repondents from New Zeland; all responses to V56_NZ are from New Zeland
# responses have a different range/scale, but could possibly be meaningfully combined into the same variable...
# justify why or why not do this

NEW    783
Name: C_COW_ALPHA, dtype: int64

In [37]:
# what to do about age?
cleaning_w6.V242.value_counts()

# create age groups based on CIA.gov categories

25.0     2383
30.0     2362
23.0     2309
20.0     2252
24.0     2215
22.0     2147
40.0     2125
26.0     2112
35.0     2092
27.0     2069
28.0     2064
18.0     2062
19.0     1999
32.0     1983
21.0     1981
29.0     1911
31.0     1886
45.0     1854
42.0     1821
36.0     1815
50.0     1805
38.0     1804
33.0     1787
34.0     1660
37.0     1633
39.0     1606
43.0     1604
41.0     1532
46.0     1524
44.0     1511
47.0     1494
55.0     1486
52.0     1463
48.0     1438
49.0     1414
60.0     1405
51.0     1373
54.0     1288
53.0     1231
56.0     1220
58.0     1213
62.0     1157
57.0     1147
65.0     1024
63.0     1008
61.0      994
59.0      967
64.0      898
70.0      839
66.0      822
68.0      723
67.0      675
72.0      647
69.0      602
73.0      597
71.0      560
74.0      511
75.0      488
76.0      432
77.0      363
78.0      297
80.0      262
79.0      257
81.0      232
82.0      173
83.0      164
84.0      153
85.0      137
17.0       56
86.0       48
87.0       42
89.0  