In [47]:
import pandas as pd
import seaborn as sns
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, confusion_matrix, roc_curve
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, precision_recall_curve,f1_score, fbeta_score
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import PolynomialFeatures

import ipywidgets as widgets
from ipywidgets import interact, interact_manual

In [48]:
df_train = pd.read_csv('pre_processed_train.csv')
df_test = pd.read_csv('pre_processed_test.csv')

In [49]:
df_train.drop(columns='Unnamed: 0', inplace = True)
df_test.drop(columns='Unnamed: 0', inplace = True)

In [50]:
#Get rid of population outliers
df_train = df_train[~(df_train.population > 12000)]
df_test = df_test[~(df_test.population > 12000)]

In [51]:
# 1 represents non-functional or needs repair water pumps. 0 represents functional water pumps.
def change_target(row):
    if row['status_group'] == 3:
        return 0
    else:
        return 1
    
df_train['status_group'] = df_train.apply(lambda row: change_target(row), axis = 1)
df_test['status_group'] = df_test.apply(lambda row: change_target(row), axis = 1)

In [52]:
# Drop columns with strong correlations

df_train.drop(columns='extraction_type_class_handpump', inplace = True)
df_test.drop(columns='extraction_type_class_handpump', inplace = True)

df_train.drop(columns='scheme_management_Private operator', inplace = True)
df_test.drop(columns='scheme_management_Private operator', inplace = True)

df_train.drop(columns='scheme_management_Other', inplace = True)
df_test.drop(columns='scheme_management_Other', inplace = True)

df_train.drop(columns='scheme_management_Parastatal', inplace = True)
df_test.drop(columns='scheme_management_Parastatal', inplace = True)

df_train.drop(columns='scheme_management_VWS', inplace = True)
df_test.drop(columns='scheme_management_VWS', inplace = True)

df_train.drop(columns='scheme_management_WUG', inplace = True)
df_test.drop(columns='scheme_management_WUG', inplace = True)

df_train.drop(columns='scheme_management_Water Board', inplace = True)
df_test.drop(columns='scheme_management_Water Board', inplace = True)

df_train.drop(columns='scheme_management_Water authority', inplace = True)
df_test.drop(columns='scheme_management_Water authority', inplace = True)

df_train.drop(columns='waterpoint_type_group_other', inplace = True)
df_test.drop(columns='waterpoint_type_group_other', inplace = True)

df_train.head()

Unnamed: 0,amount_tsh,gps_height,population,public_meeting,permit,status_group,extraction_type_class_motorpump,extraction_type_class_other,extraction_type_class_rope pump,extraction_type_class_submersible,...,source_type_spring,management_group_other,management_group_parastatal,management_group_unknown,management_group_user-group,construction_year_1970s,construction_year_1980s,construction_year_2000s,construction_year_2010s,construction_year_Unknown
0,6000.0,1390,109,1,0,0,0,0,0,0,...,1,0,0,0,1,0,0,0,0,1
1,25.0,686,250,1,1,0,0,0,0,0,...,0,0,0,0,1,0,0,1,0,0
2,0.0,263,58,1,1,1,0,0,0,1,...,0,0,0,0,1,0,1,0,0,0
3,20.0,0,1,1,1,0,0,0,0,1,...,0,0,0,0,1,0,0,1,0,0
4,0.0,0,0,1,1,1,0,0,0,0,...,0,0,0,0,1,0,0,0,0,1


# Build Model

In [53]:
X = df_train.drop(columns=['status_group', 'construction_year_1970s', 'construction_year_1980s', 
                  'construction_year_2000s', 'construction_year_2010s', 'construction_year_Unknown'])
y = df_train['status_group']

In [54]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=4)

In [55]:
#Scale since there are multiple features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test) # Scale test features

In [46]:
# Find the best C value without polynomial features
C = [.001, .1, 1]

for c in C:
    # Regularize features. L1 shrinks coefficients to 0
    logistic_model = LogisticRegression(penalty='l1', solver='saga', C=c, max_iter=500) 

    #Fit model on training data
    logistic_model.fit(X_train, y_train) 
    
    print("C is:", c)
    for feature, coef in zip(X.columns, logistic_model.coef_[0]):
        print(feature, ':', f'{coef:.2f}')
    print('Training accuracy:', logistic_model.score(X_train, y_train))
    print('Test accuracy:', logistic_model.score(X_test, y_test))
    print('')

C is: 0.001
amount_tsh : 0.00
gps_height : -0.03
population : 0.00
public_meeting : -0.00
permit : 0.00
extraction_type_class_motorpump : 0.05
extraction_type_class_other : 0.37
extraction_type_class_rope pump : 0.00
extraction_type_class_submersible : 0.00
extraction_type_class_wind-powered : 0.00
waterpoint_type_group_communal standpipe : 0.00
waterpoint_type_group_dam : 0.00
waterpoint_type_group_hand pump : -0.02
waterpoint_type_group_improved spring : 0.00
payment_type_monthly : 0.00
payment_type_never pay : 0.18
payment_type_on failure : 0.00
payment_type_other : 0.00
payment_type_per bucket : 0.00
payment_type_unknown : 0.00
quality_group_fluoride : 0.00
quality_group_good : 0.00
quality_group_milky : 0.00
quality_group_salty : 0.00
quality_group_unknown : 0.04
quantity_enough : -0.39
quantity_insufficient : -0.05
quantity_seasonal : -0.03
quantity_unknown : 0.00
source_type_dam : 0.00
source_type_other : 0.00
source_type_rainwater harvesting : 0.00
source_type_river/lake : 0.00

In [12]:
# Find the best C value with polynomial features
C = [.001, .1, 1]

p = PolynomialFeatures(degree=2)
X_train_poly = p.fit_transform(X_train)

for c in C:
    # Regularize features. L1 shrinks coefficients to 0
    logistic_model = LogisticRegression(penalty='l1', solver='liblinear', C=c) 

    #Fit model on training data
    logistic_model.fit(X_train_poly, y_train) 
    
    print("C is:", c)
    myList = list(zip(p.get_feature_names(X.columns),logistic_model.coef_[0]))
    for element in myList:
        print(element[0], ": ", round(element[1], 2))
    print('Training accuracy:', logistic_model.score(X_train_poly, y_train))
    print('Test accuracy:', logistic_model.score(p.transform(X_test), y_test))
    print('')

C is: 0.001
1 :  0.0
amount_tsh :  0.0
gps_height :  0.0
population :  0.0
public_meeting :  0.0
permit :  0.0
extraction_type_class_motorpump :  0.0
extraction_type_class_other :  0.0
extraction_type_class_rope pump :  0.0
extraction_type_class_submersible :  0.0
extraction_type_class_wind-powered :  0.0
waterpoint_type_group_communal standpipe :  0.0
waterpoint_type_group_dam :  0.0
waterpoint_type_group_hand pump :  0.0
waterpoint_type_group_improved spring :  0.0
payment_type_monthly :  0.0
payment_type_never pay :  0.14
payment_type_on failure :  0.0
payment_type_other :  0.0
payment_type_per bucket :  0.0
payment_type_unknown :  0.0
quality_group_fluoride :  0.0
quality_group_good :  0.0
quality_group_milky :  0.0
quality_group_salty :  0.0
quality_group_unknown :  0.0
quantity_enough :  -0.49
quantity_insufficient :  0.0
quantity_seasonal :  0.0
quantity_unknown :  0.0
source_type_dam :  0.0
source_type_other :  0.0
source_type_rainwater harvesting :  0.0
source_type_river/lake 

waterpoint_type_group_communal standpipe quality_group_unknown :  0.0
waterpoint_type_group_communal standpipe quantity_enough :  0.0
waterpoint_type_group_communal standpipe quantity_insufficient :  0.0
waterpoint_type_group_communal standpipe quantity_seasonal :  0.0
waterpoint_type_group_communal standpipe quantity_unknown :  0.0
waterpoint_type_group_communal standpipe source_type_dam :  0.0
waterpoint_type_group_communal standpipe source_type_other :  0.0
waterpoint_type_group_communal standpipe source_type_rainwater harvesting :  0.0
waterpoint_type_group_communal standpipe source_type_river/lake :  0.0
waterpoint_type_group_communal standpipe source_type_shallow well :  0.0
waterpoint_type_group_communal standpipe source_type_spring :  0.0
waterpoint_type_group_communal standpipe management_group_other :  0.0
waterpoint_type_group_communal standpipe management_group_parastatal :  0.0
waterpoint_type_group_communal standpipe management_group_unknown :  0.0
waterpoint_type_group_c

quality_group_milky quantity_seasonal :  0.0
quality_group_milky quantity_unknown :  0.0
quality_group_milky source_type_dam :  0.0
quality_group_milky source_type_other :  0.0
quality_group_milky source_type_rainwater harvesting :  0.0
quality_group_milky source_type_river/lake :  0.0
quality_group_milky source_type_shallow well :  0.0
quality_group_milky source_type_spring :  0.0
quality_group_milky management_group_other :  0.0
quality_group_milky management_group_parastatal :  0.0
quality_group_milky management_group_unknown :  0.0
quality_group_milky management_group_user-group :  0.0
quality_group_milky construction_year_1970s :  0.0
quality_group_milky construction_year_1980s :  0.0
quality_group_milky construction_year_2000s :  0.0
quality_group_milky construction_year_2010s :  0.0
quality_group_milky construction_year_Unknown :  0.0
quality_group_salty^2 :  0.0
quality_group_salty quality_group_unknown :  0.0
quality_group_salty quantity_enough :  0.0
quality_group_salty quant

Test accuracy: 0.7238871473354231

C is: 0.1
1 :  0.0
amount_tsh :  -0.06
gps_height :  0.05
population :  -0.05
public_meeting :  0.0
permit :  0.0
extraction_type_class_motorpump :  0.0
extraction_type_class_other :  0.0
extraction_type_class_rope pump :  0.0
extraction_type_class_submersible :  0.0
extraction_type_class_wind-powered :  0.0
waterpoint_type_group_communal standpipe :  0.0
waterpoint_type_group_dam :  0.0
waterpoint_type_group_hand pump :  -0.21
waterpoint_type_group_improved spring :  0.0
payment_type_monthly :  0.0
payment_type_never pay :  0.19
payment_type_on failure :  0.0
payment_type_other :  0.0
payment_type_per bucket :  0.0
payment_type_unknown :  0.0
quality_group_fluoride :  0.0
quality_group_good :  0.0
quality_group_milky :  0.0
quality_group_salty :  0.0
quality_group_unknown :  0.0
quantity_enough :  -1.18
quantity_insufficient :  0.0
quantity_seasonal :  0.0
quantity_unknown :  0.0
source_type_dam :  0.0
source_type_other :  0.0
source_type_rainwater h

extraction_type_class_wind-powered quality_group_fluoride :  0.0
extraction_type_class_wind-powered quality_group_good :  0.0
extraction_type_class_wind-powered quality_group_milky :  0.01
extraction_type_class_wind-powered quality_group_salty :  -0.01
extraction_type_class_wind-powered quality_group_unknown :  0.0
extraction_type_class_wind-powered quantity_enough :  -0.02
extraction_type_class_wind-powered quantity_insufficient :  0.0
extraction_type_class_wind-powered quantity_seasonal :  -0.02
extraction_type_class_wind-powered quantity_unknown :  0.01
extraction_type_class_wind-powered source_type_dam :  0.01
extraction_type_class_wind-powered source_type_other :  0.0
extraction_type_class_wind-powered source_type_rainwater harvesting :  0.0
extraction_type_class_wind-powered source_type_river/lake :  -0.01
extraction_type_class_wind-powered source_type_shallow well :  0.01
extraction_type_class_wind-powered source_type_spring :  0.0
extraction_type_class_wind-powered management_g

quality_group_fluoride source_type_spring :  -0.04
quality_group_fluoride management_group_other :  0.01
quality_group_fluoride management_group_parastatal :  0.0
quality_group_fluoride management_group_unknown :  0.0
quality_group_fluoride management_group_user-group :  0.0
quality_group_fluoride construction_year_1970s :  0.0
quality_group_fluoride construction_year_1980s :  0.0
quality_group_fluoride construction_year_2000s :  0.0
quality_group_fluoride construction_year_2010s :  -0.02
quality_group_fluoride construction_year_Unknown :  -0.02
quality_group_good^2 :  0.0
quality_group_good quality_group_milky :  0.0
quality_group_good quality_group_salty :  -0.02
quality_group_good quality_group_unknown :  0.0
quality_group_good quantity_enough :  0.08
quality_group_good quantity_insufficient :  0.05
quality_group_good quantity_seasonal :  -0.03
quality_group_good quantity_unknown :  -0.03
quality_group_good source_type_dam :  -0.0
quality_group_good source_type_other :  0.02
quality

Test accuracy: 0.7549843260188088

C is: 1
1 :  0.17
amount_tsh :  0.0
gps_height :  0.11
population :  -0.06
public_meeting :  0.0
permit :  0.0
extraction_type_class_motorpump :  0.0
extraction_type_class_other :  0.0
extraction_type_class_rope pump :  0.0
extraction_type_class_submersible :  0.0
extraction_type_class_wind-powered :  0.0
waterpoint_type_group_communal standpipe :  -0.07
waterpoint_type_group_dam :  0.0
waterpoint_type_group_hand pump :  -0.46
waterpoint_type_group_improved spring :  0.0
payment_type_monthly :  -0.01
payment_type_never pay :  0.1
payment_type_on failure :  0.0
payment_type_other :  0.0
payment_type_per bucket :  -0.02
payment_type_unknown :  0.0
quality_group_fluoride :  0.0
quality_group_good :  0.16
quality_group_milky :  0.0
quality_group_salty :  0.0
quality_group_unknown :  0.0
quantity_enough :  -1.75
quantity_insufficient :  -0.58
quantity_seasonal :  -0.31
quantity_unknown :  0.0
source_type_dam :  0.0
source_type_other :  0.0
source_type_rain

extraction_type_class_submersible payment_type_other :  -0.1
extraction_type_class_submersible payment_type_per bucket :  -0.03
extraction_type_class_submersible payment_type_unknown :  -0.04
extraction_type_class_submersible quality_group_fluoride :  -0.17
extraction_type_class_submersible quality_group_good :  0.15
extraction_type_class_submersible quality_group_milky :  0.0
extraction_type_class_submersible quality_group_salty :  0.04
extraction_type_class_submersible quality_group_unknown :  -0.09
extraction_type_class_submersible quantity_enough :  0.02
extraction_type_class_submersible quantity_insufficient :  0.1
extraction_type_class_submersible quantity_seasonal :  0.0
extraction_type_class_submersible quantity_unknown :  -0.0
extraction_type_class_submersible source_type_dam :  0.03
extraction_type_class_submersible source_type_other :  0.03
extraction_type_class_submersible source_type_rainwater harvesting :  0.14
extraction_type_class_submersible source_type_river/lake :  0

payment_type_unknown quality_group_fluoride :  -0.04
payment_type_unknown quality_group_good :  -0.01
payment_type_unknown quality_group_milky :  -0.01
payment_type_unknown quality_group_salty :  0.03
payment_type_unknown quality_group_unknown :  0.04
payment_type_unknown quantity_enough :  0.06
payment_type_unknown quantity_insufficient :  0.0
payment_type_unknown quantity_seasonal :  0.02
payment_type_unknown quantity_unknown :  0.01
payment_type_unknown source_type_dam :  0.03
payment_type_unknown source_type_other :  -0.03
payment_type_unknown source_type_rainwater harvesting :  -0.17
payment_type_unknown source_type_river/lake :  -0.11
payment_type_unknown source_type_shallow well :  -0.19
payment_type_unknown source_type_spring :  -0.07
payment_type_unknown management_group_other :  -0.0
payment_type_unknown management_group_parastatal :  0.05
payment_type_unknown management_group_unknown :  0.0
payment_type_unknown management_group_user-group :  0.07
payment_type_unknown constru

Test accuracy: 0.7551097178683386

C is: 10
1 :  0.22
amount_tsh :  0.33
gps_height :  0.16
population :  -0.06
public_meeting :  0.0
permit :  -0.02
extraction_type_class_motorpump :  0.12
extraction_type_class_other :  0.0
extraction_type_class_rope pump :  0.0
extraction_type_class_submersible :  0.0
extraction_type_class_wind-powered :  0.06
waterpoint_type_group_communal standpipe :  -0.19
waterpoint_type_group_dam :  0.0
waterpoint_type_group_hand pump :  -0.48
waterpoint_type_group_improved spring :  0.0
payment_type_monthly :  -0.09
payment_type_never pay :  -0.07
payment_type_on failure :  0.0
payment_type_other :  0.0
payment_type_per bucket :  -0.16
payment_type_unknown :  0.0
quality_group_fluoride :  0.0
quality_group_good :  0.2
quality_group_milky :  -0.09
quality_group_salty :  0.0
quality_group_unknown :  0.0
quantity_enough :  -1.92
quantity_insufficient :  -0.48
quantity_seasonal :  -0.34
quantity_unknown :  -0.02
source_type_dam :  -0.05
source_type_other :  0.53
so

extraction_type_class_rope pump quantity_seasonal :  -0.19
extraction_type_class_rope pump quantity_unknown :  0.0
extraction_type_class_rope pump source_type_dam :  0.0
extraction_type_class_rope pump source_type_other :  0.0
extraction_type_class_rope pump source_type_rainwater harvesting :  0.24
extraction_type_class_rope pump source_type_river/lake :  0.0
extraction_type_class_rope pump source_type_shallow well :  0.37
extraction_type_class_rope pump source_type_spring :  0.14
extraction_type_class_rope pump management_group_other :  -0.14
extraction_type_class_rope pump management_group_parastatal :  -0.07
extraction_type_class_rope pump management_group_unknown :  0.0
extraction_type_class_rope pump management_group_user-group :  -0.03
extraction_type_class_rope pump construction_year_1970s :  0.0
extraction_type_class_rope pump construction_year_1980s :  0.0
extraction_type_class_rope pump construction_year_2000s :  0.0
extraction_type_class_rope pump construction_year_2010s :  

payment_type_other source_type_shallow well :  -0.09
payment_type_other source_type_spring :  -0.29
payment_type_other management_group_other :  -0.07
payment_type_other management_group_parastatal :  -0.12
payment_type_other management_group_unknown :  0.0
payment_type_other management_group_user-group :  -0.11
payment_type_other construction_year_1970s :  -0.0
payment_type_other construction_year_1980s :  0.02
payment_type_other construction_year_2000s :  -0.01
payment_type_other construction_year_2010s :  0.04
payment_type_other construction_year_Unknown :  0.07
payment_type_per bucket^2 :  0.05
payment_type_per bucket payment_type_unknown :  0.0
payment_type_per bucket quality_group_fluoride :  0.06
payment_type_per bucket quality_group_good :  0.07
payment_type_per bucket quality_group_milky :  -0.21
payment_type_per bucket quality_group_salty :  0.1
payment_type_per bucket quality_group_unknown :  0.15
payment_type_per bucket quantity_enough :  0.17
payment_type_per bucket quanti

Training accuracy: 0.7704388714733542
Test accuracy: 0.7531034482758621



In [155]:
logistic_model = LogisticRegression(penalty='l1', solver='liblinear', C=.005)

logistic_model.fit(X_train, y_train) #Fit model on training data

preds = logistic_model.predict(X_test) # Make predictions using test features



In [156]:
def make_confusion_matrix(model, threshold=.5):
    # Predict class 1 if probability of being in class 1 is greater than threshold
    # (model.predict(X_test) does this automatically with a threshold of 0.5)
    fig, ax = plt.subplots(figsize=(8,6))
    y_predict = (model.predict_proba(X_test)[:, 1] >= threshold)
    functional_confusion = confusion_matrix(y_test, y_predict)

    #Print the sns map
    group_names = ['True Neg','False Po','False Neg','True Pos']
    group_counts = ['{0:0.0f}'.format(value) for value in
            functional_confusion.flatten()]
    group_percentages = ['{0:.2%}'.format(value) for value in
            functional_confusion.flatten()/np.sum(functional_confusion)]
    labels = [f'{v1}\n{v2}\n{v3}' for v1, v2, v3 in
          zip(group_names,group_counts,group_percentages)]
    labels = np.asarray(labels).reshape(2,2)
    xlabels = ['functional', 'non-functional']
    ylabels = ['functional', 'non-functional']
    sns.heatmap(functional_confusion, annot=labels, fmt='', cmap='Blues', 
                xticklabels=xlabels, yticklabels=ylabels)
    ax.set_ylim([2,0])
    plt.xlabel('prediction')
    plt.ylabel('actual')
    
    #Princt various scores/metrics
    print("Precision: {:6.4f},   Recall: {:6.4f}".format(precision_score(y_test, y_predict), 
                                                     recall_score(y_test, y_predict)))
    print("F1 Score: ", f1_score(y_test, y_predict))
    print("ROC AUC score : ", roc_auc_score(y_test, model.predict_proba(X_test)[:,1]))
    
    print("Accuracy score: ", accuracy_score(y_test, y_predict))
    
    print('Training accuracy:', model.score(X_train, y_train))
    print('Test accuracy:', model.score(X_test, y_test))


In [157]:
# Let's see how our confusion matrix changes with changes to the cutoff! 

from ipywidgets import interactive, FloatSlider

interactive(lambda threshold: make_confusion_matrix(logistic_model, threshold), threshold=(0.0,1.0,0.02))

interactive(children=(FloatSlider(value=0.5, description='threshold', max=1.0, step=0.02), Output()), _dom_cla…

In [158]:
from math import e

In [159]:
list_coef = list(zip(X.columns, logistic_model.coef_[0]))
list_coef

[('amount_tsh', -0.0942432593224147),
 ('gps_height', -0.07737272140033416),
 ('population', 0.0),
 ('public_meeting', -0.10544213056344014),
 ('permit', -0.06533379742757844),
 ('extraction_type_class_motorpump', 0.10479641443812097),
 ('extraction_type_class_other', 0.386608424897655),
 ('extraction_type_class_rope pump', 0.0),
 ('extraction_type_class_submersible', 0.057363400639336426),
 ('extraction_type_class_wind-powered', 0.0),
 ('waterpoint_type_group_communal standpipe', -0.26152328856773516),
 ('waterpoint_type_group_dam', -0.009292994984575347),
 ('waterpoint_type_group_hand pump', -0.4518066800685296),
 ('waterpoint_type_group_improved spring', -0.106786785758562),
 ('payment_type_monthly', 0.0),
 ('payment_type_never pay', 0.2595363005222685),
 ('payment_type_on failure', 0.0),
 ('payment_type_other', 0.0),
 ('payment_type_per bucket', -0.14678072624142452),
 ('payment_type_unknown', 0.1015919673897067),
 ('quality_group_fluoride', -0.014098359580379334),
 ('quality_group

In [160]:
new_tuple = ()
for itup in list_coef:
    new_tuple += (round(1 - e**itup[1], 2),)
    
interpretable = list(zip(X.columns, new_tuple))

sorted_interp = sorted(interpretable, key = lambda x: abs((x[1])), reverse = True)
print(sorted_interp)

sorted_interp_feat = []
for element in sorted_interp:
    sorted_interp_feat.append(element[0])

new_tuple = ()
for itup in sorted_interp:
    new_tuple += (str(itup[1]) + '%',)
    
perc_interp = list(zip(sorted_interp_feat, new_tuple))
perc_interp

[('quantity_enough', 0.72), ('quantity_insufficient', 0.58), ('extraction_type_class_other', -0.47), ('quantity_seasonal', 0.44), ('waterpoint_type_group_hand pump', 0.36), ('payment_type_never pay', -0.3), ('waterpoint_type_group_communal standpipe', 0.23), ('source_type_spring', 0.15), ('payment_type_per bucket', 0.14), ('extraction_type_class_motorpump', -0.11), ('payment_type_unknown', -0.11), ('source_type_rainwater harvesting', 0.11), ('public_meeting', 0.1), ('waterpoint_type_group_improved spring', 0.1), ('amount_tsh', 0.09), ('gps_height', 0.07), ('permit', 0.06), ('extraction_type_class_submersible', -0.06), ('quality_group_unknown', -0.06), ('source_type_other', 0.05), ('quality_group_salty', -0.03), ('quantity_unknown', 0.03), ('source_type_shallow well', 0.02), ('waterpoint_type_group_dam', 0.01), ('quality_group_fluoride', 0.01), ('quality_group_milky', 0.01), ('source_type_dam', -0.01), ('management_group_other', 0.01), ('management_group_parastatal', 0.01), ('management

[('quantity_enough', '0.72%'),
 ('quantity_insufficient', '0.58%'),
 ('extraction_type_class_other', '-0.47%'),
 ('quantity_seasonal', '0.44%'),
 ('waterpoint_type_group_hand pump', '0.36%'),
 ('payment_type_never pay', '-0.3%'),
 ('waterpoint_type_group_communal standpipe', '0.23%'),
 ('source_type_spring', '0.15%'),
 ('payment_type_per bucket', '0.14%'),
 ('extraction_type_class_motorpump', '-0.11%'),
 ('payment_type_unknown', '-0.11%'),
 ('source_type_rainwater harvesting', '0.11%'),
 ('public_meeting', '0.1%'),
 ('waterpoint_type_group_improved spring', '0.1%'),
 ('amount_tsh', '0.09%'),
 ('gps_height', '0.07%'),
 ('permit', '0.06%'),
 ('extraction_type_class_submersible', '-0.06%'),
 ('quality_group_unknown', '-0.06%'),
 ('source_type_other', '0.05%'),
 ('quality_group_salty', '-0.03%'),
 ('quantity_unknown', '0.03%'),
 ('source_type_shallow well', '0.02%'),
 ('waterpoint_type_group_dam', '0.01%'),
 ('quality_group_fluoride', '0.01%'),
 ('quality_group_milky', '0.01%'),
 ('source_