# Wealth Index Calculation

In [1]:
# Importing libraries
import pandas as pd
import numpy as np

In [2]:
data = pd.read_csv('clean5.csv')     # Readimg cleaned csv

In [3]:
data.columns     # Displaying columns

Index(['Unnamed: 0', 'district', 'rural', 'stratum_code', 'age',
       'marital_status', 'delivered_any_baby', 'born_alive_female',
       'born_alive_male', 'born_alive_total',
       ...
       'no_of_times_conceived', 'age_at_first_conception',
       'is_injectable_contraceptive', 'health_prob_afters_fp_use',
       'counselled_for_menstrual_hyg', 'aware_abt_haf', 'aware_abt_ort_ors',
       'aware_abt_ort_ors_zinc', 'aware_abt_danger_signs_new_born',
       'iscoveredbyhealthscheme'],
      dtype='object', length=101)

In [4]:
data.drop('Unnamed: 0', axis = 1,inplace = True)   # Dropping extra columns

In [5]:
# Displaying unique values in each column of dataset
for i in data.columns:
    print(i , data[i].unique())

district [13  6  5  1  4 11  3  7 10  9  2  8 12]
rural [1 2]
stratum_code [2 1 0]
age [ 42  47  32  20  27  29  26  30  40  39  38  28  22  19  34  25  23  37
  36  33  31  43  35  41  21  44  24  46  49  45  18  48  17  16  15 548]
marital_status [3 4 5 6 7 2 8]
delivered_any_baby [ 1  2 -1]
born_alive_female [ 2  3  1  0  4  6  5  7  8  9 11 10 -1 12 15 32 20 13 47 21 54 41 14 31
 30 22 63 65 61 45]
born_alive_male [ 1  3  2  0  6  4  5  7 10  8  9 11 -1 12 32 14 41 20 21 13 22 23 44 47
 45 31]
born_alive_total [ 3  6  4  1  5  2  7 10  0  9  8 13 11 12 15 14 -1 20 32 36 23 24 34 35
 16 50 59 43 45 53 17 22 21 33 46 47 52 63 66 62]
surviving_female [ 2  3  1  0  4  5  6  7  8  9 10 -1 12 11]
surviving_male [ 0  3  2  1  4  6  5  9  7  8 -1 20 10]
surviving_total [ 2  6  3  1  4  5  0  8  7 10  9 12 11 13 -1 14 23 21]
mother_age_when_baby_was_born [18 17 19 20 21 22 16 -1 23 24 25 27 32 15 26 28 29 34 30 31 14 35 33 13
 40 39 36 38 41 37 42 43 46 44 45 47 48  3 49 12]
outcome_pregnan

In [6]:
# Selecting columns for calculation of AHS Wealth Index. These 21 columns are selected as per previous AHS results.
cols_for_wealth_index = ['house_structure', 'drinking_water_source', 'toilet_used', 'household_have_electricity', 'lighting_source', 'cooking_fuel', 'kitchen_availability', 'is_radio', 'is_television', 'is_computer', 'is_telephone', 'is_washing_machine', 'is_refrigerator', 'is_sewing_machine', 'is_bicycle', 'is_scooter', 'is_car', 'is_tractor', 'is_water_pump', 'cart', 'land_possessed']
for i in cols_for_wealth_index:
    print(i)

house_structure
drinking_water_source
toilet_used
household_have_electricity
lighting_source
cooking_fuel
kitchen_availability
is_radio
is_television
is_computer
is_telephone
is_washing_machine
is_refrigerator
is_sewing_machine
is_bicycle
is_scooter
is_car
is_tractor
is_water_pump
cart
land_possessed


### Data Cleaning

Converting selected columns to bool type

In [7]:
data['house_structure'] = data['house_structure'].replace([2,3,4], 0)
data['house_structure'].unique()

array([0, 1], dtype=int64)

In [8]:
data['drinking_water_source'] = data['drinking_water_source'].replace([2,3,4,5], 1)
data['drinking_water_source'] = data['drinking_water_source'].replace([6,7,8,9], 0)
data['drinking_water_source'].unique()

array([1, 0], dtype=int64)

In [9]:
data['toilet_used'] = data['toilet_used'].replace([2,3,4,5,6],1)
data['toilet_used'] = data['toilet_used'].replace([7,8,9],0)
data['toilet_used'].unique()

array([1, 0], dtype=int64)

In [10]:
data['household_have_electricity'] = data['household_have_electricity'].replace(2,0)
data['household_have_electricity'].unique()

array([1, 0], dtype=int64)

In [11]:
data['lighting_source'] = data['lighting_source'].replace(3,1)
data['lighting_source'] = data['lighting_source'].replace([2,4,5,6],0)
data['lighting_source'].unique()

array([1, 0], dtype=int64)

In [12]:
data['cooking_fuel'] = data['cooking_fuel'].replace([1,2,3,4,5,9],0)
data['cooking_fuel'] = data['cooking_fuel'].replace([6,7,8],1)
data['cooking_fuel'].unique()

array([1, 0], dtype=int64)

In [13]:
data['kitchen_availability'] = data['kitchen_availability'].replace([2,3,4,5],0)
data['kitchen_availability'].unique()

array([0, 1], dtype=int64)

In [14]:
data[['is_radio', 'is_television']] = data[['is_radio', 'is_television']].replace(2,0)

In [15]:
data['is_computer'] = data['is_computer'].replace(2,1)
data['is_computer'] = data['is_computer'].replace(3,0)
data['is_computer'].unique()

array([0, 1], dtype=int64)

In [16]:
data['is_telephone'] = data['is_telephone'].replace([2,3],1)
data['is_telephone'] = data['is_telephone'].replace(4,0)
data['is_telephone'].unique()

array([1, 0], dtype=int64)

In [17]:
data[['is_washing_machine', 'is_refrigerator', 'is_sewing_machine', 'is_bicycle', 'is_scooter', 'is_car', 'is_tractor', 'is_water_pump']] = data[['is_washing_machine', 'is_refrigerator', 'is_sewing_machine', 'is_bicycle', 'is_scooter', 'is_car', 'is_tractor', 'is_water_pump']].replace(2,0)

In [18]:
data['cart'] = data['cart'].replace([2,3],1)
data['cart'] = data['cart'].replace(4,0)
data['cart'].unique()

array([0, 1], dtype=int64)

In [19]:
# Converting 'land_possessed' as per previous AHS guidlines
data['land_possessed'] = data['land_possessed'].replace(6,0)
data['land_possessed'] = data['land_possessed'].replace(2,49)
data['land_possessed'] = data['land_possessed'].replace(3,250)
data['land_possessed'] = data['land_possessed'].replace(4,700)
data['land_possessed'] = data['land_possessed'].replace(5,1000)
data['land_possessed'].unique()

array([   0,    1,   49,  700,  250, 1000], dtype=int64)

## PCA

In [20]:
# Importing librarires for PCA
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
%matplotlib inline

In [21]:
x = data[cols_for_wealth_index]          # Creating copy dataset for applying PCA on selected columns

In [22]:
# Normalizing 'land_possessed' column.
x['land_possessed'] = ((x['land_possessed'] - x['land_possessed'].mean())/x['land_possessed'].std())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [23]:
x.head()

Unnamed: 0,house_structure,drinking_water_source,toilet_used,household_have_electricity,lighting_source,cooking_fuel,kitchen_availability,is_radio,is_television,is_computer,...,is_washing_machine,is_refrigerator,is_sewing_machine,is_bicycle,is_scooter,is_car,is_tractor,is_water_pump,cart,land_possessed
0,0,1,1,1,1,1,0,1,1,0,...,0,1,1,0,1,0,0,0,0,-0.376887
1,0,1,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,-0.376887
2,0,1,0,1,1,0,0,1,1,0,...,0,0,0,1,0,0,0,0,0,-0.376887
3,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,-0.376887
4,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,-0.376887


### PCA Projection to 1-D

In [24]:
pca = PCA(n_components=1)        # Running PCA for Single Component

In [25]:
principalComponents = pca.fit_transform(x)

In [26]:
principalDf = pd.DataFrame(data = principalComponents)

In [27]:
principalDf

Unnamed: 0,0
0,0.868532
1,-1.344183
2,-0.360148
3,-1.478582
4,-1.478582
...,...
905972,0.856038
905973,0.292126
905974,-1.973313
905975,1.126807


In [28]:
# Percentage of variance explained by each of the selected components.
# This means principalDf represents 27.166% of variance of dataset
pca.explained_variance_ratio_

array([0.27166759])

In [29]:
# Principal axes in feature space, representing the directions of maximum variance in the data
z = pca.components_
print(z)

[[0.27312352 0.21700541 0.34685541 0.22654657 0.22385735 0.31511422
  0.27188659 0.18733346 0.34629766 0.06754594 0.27772584 0.13700616
  0.2707688  0.24879792 0.13439891 0.18154306 0.05823307 0.01229433
  0.01126893 0.00985664 0.19458829]]


In [30]:
# Multiplying each column with their respective feature score
j = 0
for i in cols_for_wealth_index:
    x[i] = x[i]*z[0][j]
    j += 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


In [31]:
# Calculating Wealth Index by adding scores for all selected columns.
data['Wealth_Index'] = x.sum(axis = 1)

In [32]:
cols_for_wealth_index.append('Wealth_Index')

In [33]:
# Displaying Wealth Index along with selected variables
data[cols_for_wealth_index]

Unnamed: 0,house_structure,drinking_water_source,toilet_used,household_have_electricity,lighting_source,cooking_fuel,kitchen_availability,is_radio,is_television,is_computer,...,is_refrigerator,is_sewing_machine,is_bicycle,is_scooter,is_car,is_tractor,is_water_pump,cart,land_possessed,Wealth_Index
0,0,1,1,1,1,1,0,1,1,0,...,1,1,0,1,0,0,0,0,0,2.768508
1,0,1,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0.555792
2,0,1,0,1,1,0,0,1,1,0,...,0,0,1,0,0,0,0,0,0,1.539827
3,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0.421394
4,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0.421394
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
905972,1,1,1,1,1,1,1,0,1,0,...,0,1,0,0,0,0,0,0,49,2.756014
905973,1,1,1,1,1,0,1,0,1,0,...,0,0,0,0,0,0,0,0,49,2.192102
905974,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,-0.073338
905975,1,1,1,1,1,1,1,0,1,0,...,1,1,0,0,0,0,0,0,49,3.026783


In [34]:
# Displaying Dataset with Wealth Index
data

Unnamed: 0,district,rural,stratum_code,age,marital_status,delivered_any_baby,born_alive_female,born_alive_male,born_alive_total,surviving_female,...,age_at_first_conception,is_injectable_contraceptive,health_prob_afters_fp_use,counselled_for_menstrual_hyg,aware_abt_haf,aware_abt_ort_ors,aware_abt_ort_ors_zinc,aware_abt_danger_signs_new_born,iscoveredbyhealthscheme,Wealth_Index
0,13,1,2,42,3,1,2,1,3,2,...,18,2,2,2,1,1,2,2,2,2.768508
1,13,1,2,47,3,1,3,3,6,3,...,17,2,-1,2,1,1,2,2,2,0.555792
2,13,1,2,32,3,1,2,2,4,1,...,19,1,-1,2,1,1,2,2,2,1.539827
3,13,1,2,20,3,1,1,0,1,1,...,17,2,-1,1,1,1,2,2,1,0.421394
4,13,1,2,27,3,1,0,3,3,0,...,19,1,2,1,1,1,2,2,1,0.421394
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
905972,2,2,0,36,3,1,2,4,6,2,...,19,1,-1,2,1,1,1,1,2,2.756014
905973,2,2,0,39,3,1,2,1,3,2,...,15,2,-1,2,1,1,2,1,2,2.192102
905974,2,2,0,41,3,1,1,2,3,1,...,22,-1,-1,2,2,2,2,2,2,-0.073338
905975,2,2,0,38,3,1,1,1,2,1,...,22,2,-1,2,1,1,2,1,2,3.026783


In [35]:
data.to_csv('akash.csv')