# Sectorial need calculations

Calculate the needs of each household for each sector based on the answers to the survey.
In this notebook Protection index is computed

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
%matplotlib inline

In [None]:
df = pd.read_excel('../../data/raw/reach_nga_msna_clean_dataset_final.xlsx',sheet_name='clean_hh_data') 
# Remove respondents who refuse to take the interview
df = df[df['Consent'] == 'Yes, agrees to interview']

### Create useful functions

In [None]:
# Create alphabet
# alphabet: list of length 26 with all characters in capital letters
alphabet = []
for letter in range(97,123):
    alphabet.append(chr(letter).upper())
    
# Create list of excel's sheet column headers
# columns_index: list with all column headers in excel (A, B, .. AA, AB..) in order
columns_index = []
for letter1 in alphabet:
    columns_index.append(letter1)
for letter1 in alphabet:
    for letter2 in alphabet:
        columns_index.append(letter1+letter2)
for letter1 in alphabet:
    for letter2 in alphabet:
        for letter3 in alphabet:
            columns_index.append(letter1+letter2+letter3)
            
# Create dictionary that matches excel's headers and pandas column names
# index2name: dictionary where keys are excel headers (RG), values are pandas column names
index2name = {}
for i, index in enumerate(columns_index):
    index2name[index] = df. columns[i]
    if index == 'AJK':
        break
        
def add_column_range_names(columns, column_ranges):
    """Add the names of the columns in your range of interest to the columns list
    columns: list of names of columns of interest
    column_ranges: list of pairs of initial and final indices of ranges of columns of interest"""
    for i, column_range in enumerate(column_ranges):
        initial_index = columns_index.index(column_range[0])
        final_index = columns_index.index(column_range[1])
        for ii in range(initial_index, final_index+1):
            columns.append(index2name[columns_index[ii]])
    return columns

#def reset_df(df)

### 1) Protection sector (MARCO)

<table style="width:100%">
  <tr>
    <th>Indicator</th>
    <th>Dataset header</th> 
    <th>Weighting</th>
  </tr>
  <tr>
    <td>HH is located in ward where explosive incidents were reported</td>
    <td>'SECURITY INCIDENT TYPE - Presence of landmines / UXOs'</td>
    <td>2</td>
  </tr>
  <tr>
    <td>HH has experienced a security incident in previous 3 months</td>
    <td>'Has anyone in your household experienced any security incidents in the last thre'</td>
    <td>2</td>
  </tr>
  <tr>
    <td>HH adult members do not have any legal documentation</td>
    <td>Of the adults in the household, how many have some form of legal documentation?</td>
    <td>3</td>
  </tr>
  <tr>
    <td>HH experiences movement restrictions</td>
    <td>'Has anyone in your household experienced movement restrictions in your area in t'</td>
    <td>2</td>
  </tr>
  <tr>
    <td>HH has members that are missing / detained</td>
    <td>'Is there any member of your household who is missing or being detained?'</td>
    <td>2</td>
  </tr>
</table>

</body>
</html>


In [None]:
# Define the columns of interest
Protection_columns = ['SECURITY INCIDENT TYPE - Presence of landmines / UXOs',
               'Has anyone in your household experienced any security incidents in the last thre.1',
               'Of the adults in the household, how many have some form of legal documentation?',
               'Has anyone in your household experienced movement restrictions in your area in t',
               'Is there any member of your household who is missing or being detained?']
for i in range (len(Protection_columns)):
    print(Protection_columns[i])
# Define the column ranges of interest
# list containing the pairs of initial and last columns to be included in the tage
#Protection_column_ranges = [['YH', 'ZZ']]

# Add column names for the ranges to your list of columns of interest
#Protection_columns = add_column_range_names(Protection_columns, Protection_column_ranges)

In [5]:
df_Protection = []
df_Protection = df[Protection_columns].copy()


##### Question 1)SECURITY INCIDENT TYPE - Presence of landmines / UXOs

In [6]:
question = 'SECURITY INCIDENT TYPE - Presence of landmines / UXOs'

In [7]:
np.unique(list(df_Protection[question]))

array(['No', 'Yes', 'nan'], dtype='<U32')

In [8]:
#Run this only once
df_Protection['index_Protection'] = 0
df_Protection
df_Protection['index_Protection'] = (df_Protection[question] == 'Yes')*2
df_Protection['index_Protection'].sum()

132

##### Question 2)Has anyone in your household experienced any security incidents in the last thre.1

In [9]:
question = 'Has anyone in your household experienced any security incidents in the last thre.1'

In [10]:
np.unique(list(df_Protection[question]))

array([ 1.,  2., 98., 99.])

In [11]:

print(((df_Protection[question] == 1)*2).sum())
df_Protection['index_Protection'] = df_Protection['index_Protection']+(df_Protection[question] == 1)*2
df_Protection['index_Protection'].sum()

1268


1400

##### Question 3)Of the adults in the household, how many have some form of legal documentation?

In [12]:
question = 'Of the adults in the household, how many have some form of legal documentation?'

In [13]:
np.unique(list(df_Protection[question]))

array([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 15.])

In [14]:
#Run this only once

df_Protection['index_Protection'] = df_Protection['index_Protection']+(df_Protection[question] == 0)*2
print(((df_Protection[question] == 0)*2).sum())
df_Protection['index_Protection'].sum()

4614


6014

##### Question 4)''Has anyone in your household experienced movement restrictions in your area in t'

In [15]:
question = 'Has anyone in your household experienced movement restrictions in your area in t'


In [16]:
np.unique(list(df_Protection[question]))

array(['Don’t know', 'No movement restriction', 'No response',
       'Yes, but only during the evening and nighttime',
       'Yes, but only if there are multiple household members',
       'Yes, complete movement restrictions',
       'Yes, from 5-10km outside of the camp or community'], dtype='<U53')

In [17]:


df_Protection['index_Protection'] = df_Protection['index_Protection']+((df_Protection[question]== 'Yes, but only during the evening and nighttime')*2 +
                            (df_Protection[question]== 'Yes, from 5-10km outside of the camp or community')*2 +
                            (df_Protection[question]== 'Yes, but only if there are multiple household members')*2 +
                            (df_Protection[question]== 'Yes, complete movement restrictions')*2
                          )
df_Protection['index_Protection'].sum()

10938

##### Question 5)Is there any member of your household who is missing or being detained?

In [18]:
question = 'Is there any member of your household who is missing or being detained?'

In [19]:
np.unique(list(df_Protection[question]))

array(["Don't know", 'No', 'No response', 'Yes'], dtype='<U11')

In [20]:
print(((df_Protection[question] == 'Yes')*2).sum())
df_Protection['index_Protection'] = df_Protection['index_Protection']+(df_Protection[question] == 'Yes')*2
df_Protection['index_Protection'].sum()

784


11722

In [21]:
#Be sure that at the beginning this was empty
df_Protection


Unnamed: 0,SECURITY INCIDENT TYPE - Presence of landmines / UXOs,Has anyone in your household experienced any security incidents in the last thre.1,"Of the adults in the household, how many have some form of legal documentation?",Has anyone in your household experienced movement restrictions in your area in t,Is there any member of your household who is missing or being detained?,index_Protection
1,,2.0,1.0,No movement restriction,No,0
2,,2.0,1.0,No movement restriction,No,0
3,,2.0,1.0,No movement restriction,No,0
4,,2.0,1.0,No movement restriction,No,0
5,,2.0,1.0,No movement restriction,No,0
6,,2.0,2.0,No movement restriction,No,0
7,,2.0,1.0,No movement restriction,No,0
8,,2.0,3.0,No movement restriction,No,0
9,No,1.0,4.0,"Yes, but only during the evening and nighttime",No,4
10,,2.0,3.0,No movement restriction,No,0


In [22]:
(df_Protection['index_Protection']>4).sum()

280

In [37]:
severity = pd.read_csv('severity.csv')
severity['Protection'] = df_Protection['index_Protection'].values
severity.to_csv('severity.csv')