In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
from datetime import datetime
from sqlalchemy import create_engine
from sqlalchemy import Table, Column, Integer, String, Float, MetaData
import pymongo
import json
# from config import password


In [2]:
# pull in percent of safe water csv
safe_water_file = "data/percent_safe_water.csv"
safe_water_df = pd.read_csv(safe_water_file)
safe_water_df.head(5)


Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,Unnamed: 65
0,Aruba,ABW,People using safely managed drinking water ser...,SH.H2O.SMDW.ZS,,,,,,,...,,,,,,,,,,
1,Afghanistan,AFG,People using safely managed drinking water ser...,SH.H2O.SMDW.ZS,,,,,,,...,,,,,,,,,,
2,Angola,AGO,People using safely managed drinking water ser...,SH.H2O.SMDW.ZS,,,,,,,...,,,,,,,,,,
3,Albania,ALB,People using safely managed drinking water ser...,SH.H2O.SMDW.ZS,,,,,,,...,65.901394,67.893339,69.903702,69.9445,69.984329,70.02315,,,,
4,Andorra,AND,People using safely managed drinking water ser...,SH.H2O.SMDW.ZS,,,,,,,...,90.640001,90.64,90.639998,90.640002,90.64,90.639997,,,,


In [3]:
#select columns for analysis
safe_water_df = safe_water_df[['Country Name', 'Country Code','2017']]
safe_water_df

Unnamed: 0,Country Name,Country Code,2017
0,Aruba,ABW,
1,Afghanistan,AFG,
2,Angola,AGO,
3,Albania,ALB,70.023150
4,Andorra,AND,90.639997
5,Arab World,ARB,
6,United Arab Emirates,ARE,
7,Argentina,ARG,
8,Armenia,ARM,86.476355
9,American Samoa,ASM,12.575083


In [4]:
#convert strings to lower case for PG Admin connection
safe_water_df.columns = safe_water_df.columns.str.lower()
safe_water_df.columns = safe_water_df.columns.str.replace(' ','_')
safe_water_df

Unnamed: 0,country_name,country_code,2017
0,Aruba,ABW,
1,Afghanistan,AFG,
2,Angola,AGO,
3,Albania,ALB,70.023150
4,Andorra,AND,90.639997
5,Arab World,ARB,
6,United Arab Emirates,ARE,
7,Argentina,ARG,
8,Armenia,ARM,86.476355
9,American Samoa,ASM,12.575083


In [5]:
#drop NaN columns
safe_water_df = safe_water_df.dropna()
safe_water_df

Unnamed: 0,country_name,country_code,2017
3,Albania,ALB,70.023150
4,Andorra,AND,90.639997
8,Armenia,ARM,86.476355
9,American Samoa,ASM,12.575083
12,Austria,AUT,98.906201
13,Azerbaijan,AZE,73.564331
15,Belgium,BEL,99.523189
18,Bangladesh,BGD,55.441226
19,Bulgaria,BGR,96.945935
20,Bahrain,BHR,98.983262


In [6]:
#check data types (need to convert object to float)
safe_water_df.dtypes

country_name     object
country_code     object
2017            float64
dtype: object

In [8]:
# pull in death by risk factor
cause_of_death_file = "data/number_of_deaths_by_risk_factor.csv"
cod_df = pd.read_csv(cause_of_death_file)
cod_df

Unnamed: 0,Entity,Code,Year,Deaths - Unsafe water source - Sex: Both - Age: All Ages (Number),Deaths - Unsafe sanitation - Sex: Both - Age: All Ages (Number),Deaths - No access to handwashing facility - Sex: Both - Age: All Ages (Number),Deaths - Household air pollution from solid fuels - Sex: Both - Age: All Ages (Number),Deaths - Non-exclusive breastfeeding - Sex: Both - Age: All Ages (Number),Deaths - Discontinued breastfeeding - Sex: Both - Age: All Ages (Number),Deaths - Child wasting - Sex: Both - Age: All Ages (Number),...,Deaths - Air pollution - Sex: Both - Age: All Ages (Number),Deaths – Outdoor air pollution (all ages) (IHME),Deaths - Diet low in fiber - Sex: Both - Age: All Ages (Number),Deaths - Diet high in sodium - Sex: Both - Age: All Ages (Number),Deaths - Diet low in legumes - Sex: Both - Age: All Ages (Number),Deaths - Diet low in calcium - Sex: Both - Age: All Ages (Number),Deaths - Diet high in red meat - Sex: Both - Age: All Ages (Number),Deaths - Diet low in whole grains - Sex: Both - Age: All Ages (Number),Deaths - Diet low in nuts and seeds - Sex: Both - Age: All Ages (Number),Deaths - Diet low in seafood omega-3 fatty acids - Sex: Both - Age: All Ages (Number)
0,Afghanistan,AFG,1990,7554.049543,5887.747628,5412.314513,22388.497233,3221.138842,156.097553,22778.849249,...,26598.006727,4383.83,2864.234512,2737.197934,3418.965194,181.287701,2.204843e-01,11381.377345,7299.867330,6456.565238
1,Afghanistan,AFG,1991,7359.676749,5732.770160,5287.891103,22128.758206,3150.559597,151.539851,22292.691113,...,26379.532218,4426.36,2974.479775,2741.184956,3457.398250,184.251584,1.847140e-01,11487.832390,7386.764303,6511.344276
2,Afghanistan,AFG,1992,7650.437822,5954.804987,5506.657363,22873.768789,3331.349048,156.609194,23102.197940,...,27263.127914,4568.91,3168.591633,2798.560245,3586.490291,191.318450,1.513539e-01,11866.235572,7640.628526,6703.095594
3,Afghanistan,AFG,1993,10270.731383,7986.736613,7104.620351,25599.756284,4477.006100,206.834451,27902.669960,...,30495.561499,5080.29,3401.045056,2853.301679,3746.266658,199.767293,1.093919e-01,12335.961682,7968.311853,6946.196824
4,Afghanistan,AFG,1994,11409.177112,8863.010065,8051.515953,28013.167200,5102.622054,233.930571,32929.005932,...,33323.161401,5499.23,3599.709735,2880.025765,3869.288402,206.619714,5.910348e-02,12672.950191,8244.368430,7138.030745
5,Afghanistan,AFG,1995,12676.647424,9840.848680,8770.686294,29062.618787,5402.660379,262.793340,35631.997028,...,34511.845284,5641.29,3732.598460,2892.221692,3935.209932,211.137298,3.992213e-02,12854.592679,8375.861883,7225.040846
6,Afghanistan,AFG,1996,12154.942315,9426.896445,8610.686857,29407.321601,5263.644458,253.668181,36114.593233,...,34952.101583,5739.07,3877.026009,2905.798225,4019.906785,215.778490,2.629072e-02,13089.157280,8537.651676,7336.080630
7,Afghanistan,AFG,1997,12329.131710,9553.555998,8722.943079,29674.398315,5271.771800,258.134146,36749.117453,...,35306.566595,5828.94,4017.069696,2919.683894,4104.371202,220.586247,1.709040e-02,13326.500377,8694.523034,7446.826074
8,Afghanistan,AFG,1998,12133.609541,9390.042030,8621.884918,29807.453214,5165.923779,254.708078,36569.465573,...,35360.168867,5751.54,4137.050160,2925.391235,4173.597768,224.793817,1.072458e-02,13516.547571,8818.249272,7532.322210
9,Afghanistan,AFG,1999,11990.396328,9268.489611,8502.729990,29484.612215,5044.307573,251.898824,36124.046557,...,35040.349506,5756.43,4211.727042,2936.213764,4201.930953,229.298613,7.500705e-03,13584.239432,8867.619444,7553.269512


In [9]:
#select only year 2017 (most recent data)
cod_df = cod_df[cod_df.Year == 2017]
cod_df

Unnamed: 0,Entity,Code,Year,Deaths - Unsafe water source - Sex: Both - Age: All Ages (Number),Deaths - Unsafe sanitation - Sex: Both - Age: All Ages (Number),Deaths - No access to handwashing facility - Sex: Both - Age: All Ages (Number),Deaths - Household air pollution from solid fuels - Sex: Both - Age: All Ages (Number),Deaths - Non-exclusive breastfeeding - Sex: Both - Age: All Ages (Number),Deaths - Discontinued breastfeeding - Sex: Both - Age: All Ages (Number),Deaths - Child wasting - Sex: Both - Age: All Ages (Number),...,Deaths - Air pollution - Sex: Both - Age: All Ages (Number),Deaths – Outdoor air pollution (all ages) (IHME),Deaths - Diet low in fiber - Sex: Both - Age: All Ages (Number),Deaths - Diet high in sodium - Sex: Both - Age: All Ages (Number),Deaths - Diet low in legumes - Sex: Both - Age: All Ages (Number),Deaths - Diet low in calcium - Sex: Both - Age: All Ages (Number),Deaths - Diet high in red meat - Sex: Both - Age: All Ages (Number),Deaths - Diet low in whole grains - Sex: Both - Age: All Ages (Number),Deaths - Diet low in nuts and seeds - Sex: Both - Age: All Ages (Number),Deaths - Diet low in seafood omega-3 fatty acids - Sex: Both - Age: All Ages (Number)
27,Afghanistan,AFG,2017,5.256649e+03,3783.111117,4156.209013,1.939738e+04,2422.369175,94.118135,1.457527e+04,...,2.605420e+04,6872.06,4889.502247,3.457084e+03,4384.887948,325.722066,9.994555e-04,1.488496e+04,9.074388e+03,7.775130e+03
55,Albania,ALB,2017,4.091664e+00,1.597907,4.501301,7.368815e+02,8.173119,0.071829,4.061321e+01,...,1.638571e+03,919.59,428.785803,1.668336e+03,429.775831,23.218640,2.710364e+00,2.206717e+03,1.130544e+03,8.457940e+02
83,Algeria,DZA,2017,1.899361e+02,58.910622,191.730462,5.811236e+01,126.552097,7.723609,6.354547e+02,...,1.322228e+04,13315.30,3004.104009,5.828760e+03,2761.644707,370.146859,8.200700e-03,1.610308e+04,1.148331e+04,8.810311e+03
111,American Samoa,ASM,2017,7.200565e-01,0.179205,0.293769,7.550280e+00,0.118482,0.003936,2.285860e+00,...,2.217993e+01,14.63,2.679124,8.519993e+00,4.836963,1.089152,3.212988e-01,2.563875e+01,1.752416e+01,8.137023e+00
139,Andean Latin America,,2017,1.257987e+03,733.649228,1578.129626,5.773893e+03,344.102079,20.477721,3.615349e+03,...,1.768615e+04,12011.09,3337.535571,9.758082e+03,2169.986257,1033.264841,5.867989e+01,1.096710e+04,8.049420e+03,4.788967e+03
167,Andorra,AND,2017,2.702152e-02,0.018488,0.206306,2.506900e-01,0.003816,0.000652,1.546603e-01,...,2.332316e+01,23.67,8.737433,1.695782e+01,7.155082,2.530075,8.273237e-01,2.327595e+01,1.333294e+01,8.509188e+00
195,Angola,AGO,2017,1.465692e+04,9615.556653,9735.710078,7.129378e+03,2363.336774,212.088470,1.596958e+04,...,1.072838e+04,3724.05,642.995792,1.894787e+03,107.732281,232.904059,2.226318e+00,3.972962e+03,2.617705e+03,1.626438e+03
223,Antigua and Barbuda,ATG,2017,1.040682e+00,0.328517,1.778131,1.390788e+00,0.210435,0.012582,2.980396e+00,...,3.137556e+01,29.99,7.213135,1.297270e+01,3.928267,2.406587,7.822828e-02,3.218534e+01,2.072595e+01,8.597258e+00
251,Argentina,ARG,2017,2.671392e+02,68.635807,541.182586,1.240409e+03,76.817928,5.073310,1.298508e+03,...,1.701134e+04,15930.98,6235.515037,1.079348e+04,5768.980094,1884.661162,7.921396e+02,1.726954e+04,1.311967e+04,5.032258e+03
279,Armenia,ARM,2017,3.045523e+00,4.379285,12.664885,4.815417e+02,7.747662,0.284809,3.458214e+01,...,2.629636e+03,2189.15,780.643428,1.961583e+03,1091.331943,78.165252,4.955998e+00,2.855040e+03,2.159566e+03,1.325420e+03


In [10]:
#sum all deaths
cod_sum = cod_df.sum(axis=1)
cod_sum

27      2.815395e+05
55      3.314386e+04
83      2.377406e+05
111     2.456121e+03
139     2.576397e+05
167     2.510520e+03
195     1.838618e+05
223     2.607276e+03
251     3.331364e+05
279     4.578739e+04
307     1.812263e+05
335     1.515151e+05
363     9.225982e+04
391     1.311772e+05
419     4.584233e+03
447     6.921886e+03
475     1.116940e+06
503     4.850639e+03
531     2.081458e+05
559     1.055575e+05
587     3.964786e+03
615     7.176054e+04
643     2.545581e+03
671     6.357799e+03
699     6.398792e+04
727     6.115725e+04
755     1.886840e+04
783     1.332175e+06
811     4.046794e+03
839     1.778403e+05
            ...     
5655    1.789879e+05
5683    7.370305e+04
5711    3.026320e+05
5739    4.661359e+05
5767    1.088343e+04
5795    4.759062e+04
5823    2.910738e+03
5851    1.804459e+04
5879    1.371411e+06
5907    9.722451e+04
5935    5.050023e+05
5963    5.810853e+04
5991    1.694526e+05
6019    1.234861e+06
6047    3.441496e+04
6075    5.263808e+05
6103    2.902

In [11]:
#add in sum column (https://www.geeksforgeeks.org/adding-new-column-to-existing-dataframe-in-pandas/)
cod_df['sum'] = cod_sum
cod_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


Unnamed: 0,Entity,Code,Year,Deaths - Unsafe water source - Sex: Both - Age: All Ages (Number),Deaths - Unsafe sanitation - Sex: Both - Age: All Ages (Number),Deaths - No access to handwashing facility - Sex: Both - Age: All Ages (Number),Deaths - Household air pollution from solid fuels - Sex: Both - Age: All Ages (Number),Deaths - Non-exclusive breastfeeding - Sex: Both - Age: All Ages (Number),Deaths - Discontinued breastfeeding - Sex: Both - Age: All Ages (Number),Deaths - Child wasting - Sex: Both - Age: All Ages (Number),...,Deaths – Outdoor air pollution (all ages) (IHME),Deaths - Diet low in fiber - Sex: Both - Age: All Ages (Number),Deaths - Diet high in sodium - Sex: Both - Age: All Ages (Number),Deaths - Diet low in legumes - Sex: Both - Age: All Ages (Number),Deaths - Diet low in calcium - Sex: Both - Age: All Ages (Number),Deaths - Diet high in red meat - Sex: Both - Age: All Ages (Number),Deaths - Diet low in whole grains - Sex: Both - Age: All Ages (Number),Deaths - Diet low in nuts and seeds - Sex: Both - Age: All Ages (Number),Deaths - Diet low in seafood omega-3 fatty acids - Sex: Both - Age: All Ages (Number),sum
27,Afghanistan,AFG,2017,5.256649e+03,3783.111117,4156.209013,1.939738e+04,2422.369175,94.118135,1.457527e+04,...,6872.06,4889.502247,3.457084e+03,4384.887948,325.722066,9.994555e-04,1.488496e+04,9.074388e+03,7.775130e+03,2.815395e+05
55,Albania,ALB,2017,4.091664e+00,1.597907,4.501301,7.368815e+02,8.173119,0.071829,4.061321e+01,...,919.59,428.785803,1.668336e+03,429.775831,23.218640,2.710364e+00,2.206717e+03,1.130544e+03,8.457940e+02,3.314386e+04
83,Algeria,DZA,2017,1.899361e+02,58.910622,191.730462,5.811236e+01,126.552097,7.723609,6.354547e+02,...,13315.30,3004.104009,5.828760e+03,2761.644707,370.146859,8.200700e-03,1.610308e+04,1.148331e+04,8.810311e+03,2.377406e+05
111,American Samoa,ASM,2017,7.200565e-01,0.179205,0.293769,7.550280e+00,0.118482,0.003936,2.285860e+00,...,14.63,2.679124,8.519993e+00,4.836963,1.089152,3.212988e-01,2.563875e+01,1.752416e+01,8.137023e+00,2.456121e+03
139,Andean Latin America,,2017,1.257987e+03,733.649228,1578.129626,5.773893e+03,344.102079,20.477721,3.615349e+03,...,12011.09,3337.535571,9.758082e+03,2169.986257,1033.264841,5.867989e+01,1.096710e+04,8.049420e+03,4.788967e+03,2.576397e+05
167,Andorra,AND,2017,2.702152e-02,0.018488,0.206306,2.506900e-01,0.003816,0.000652,1.546603e-01,...,23.67,8.737433,1.695782e+01,7.155082,2.530075,8.273237e-01,2.327595e+01,1.333294e+01,8.509188e+00,2.510520e+03
195,Angola,AGO,2017,1.465692e+04,9615.556653,9735.710078,7.129378e+03,2363.336774,212.088470,1.596958e+04,...,3724.05,642.995792,1.894787e+03,107.732281,232.904059,2.226318e+00,3.972962e+03,2.617705e+03,1.626438e+03,1.838618e+05
223,Antigua and Barbuda,ATG,2017,1.040682e+00,0.328517,1.778131,1.390788e+00,0.210435,0.012582,2.980396e+00,...,29.99,7.213135,1.297270e+01,3.928267,2.406587,7.822828e-02,3.218534e+01,2.072595e+01,8.597258e+00,2.607276e+03
251,Argentina,ARG,2017,2.671392e+02,68.635807,541.182586,1.240409e+03,76.817928,5.073310,1.298508e+03,...,15930.98,6235.515037,1.079348e+04,5768.980094,1884.661162,7.921396e+02,1.726954e+04,1.311967e+04,5.032258e+03,3.331364e+05
279,Armenia,ARM,2017,3.045523e+00,4.379285,12.664885,4.815417e+02,7.747662,0.284809,3.458214e+01,...,2189.15,780.643428,1.961583e+03,1091.331943,78.165252,4.955998e+00,2.855040e+03,2.159566e+03,1.325420e+03,4.578739e+04


In [12]:
#select columns for analysis
cod_df = cod_df[['Entity', 'Code','Year', 'Deaths - Unsafe water source - Sex: Both - Age: All Ages (Number)', 'Deaths - Unsafe sanitation - Sex: Both - Age: All Ages (Number)', 'Deaths - No access to handwashing facility - Sex: Both - Age: All Ages (Number)', 'sum']]
cod_df

Unnamed: 0,Entity,Code,Year,Deaths - Unsafe water source - Sex: Both - Age: All Ages (Number),Deaths - Unsafe sanitation - Sex: Both - Age: All Ages (Number),Deaths - No access to handwashing facility - Sex: Both - Age: All Ages (Number),sum
27,Afghanistan,AFG,2017,5.256649e+03,3783.111117,4156.209013,2.815395e+05
55,Albania,ALB,2017,4.091664e+00,1.597907,4.501301,3.314386e+04
83,Algeria,DZA,2017,1.899361e+02,58.910622,191.730462,2.377406e+05
111,American Samoa,ASM,2017,7.200565e-01,0.179205,0.293769,2.456121e+03
139,Andean Latin America,,2017,1.257987e+03,733.649228,1578.129626,2.576397e+05
167,Andorra,AND,2017,2.702152e-02,0.018488,0.206306,2.510520e+03
195,Angola,AGO,2017,1.465692e+04,9615.556653,9735.710078,1.838618e+05
223,Antigua and Barbuda,ATG,2017,1.040682e+00,0.328517,1.778131,2.607276e+03
251,Argentina,ARG,2017,2.671392e+02,68.635807,541.182586,3.331364e+05
279,Armenia,ARM,2017,3.045523e+00,4.379285,12.664885,4.578739e+04


In [13]:
#rename columns 
cod_df = cod_df.rename(columns={'Deaths - Unsafe water source - Sex: Both - Age: All Ages (Number)': 'unsafe_water_source', 'Deaths - Unsafe sanitation - Sex: Both - Age: All Ages (Number)':'unsafe_sanitation', 'Deaths - No access to handwashing facility - Sex: Both - Age: All Ages (Number)':'no_access_handwashing'})
cod_df

Unnamed: 0,Entity,Code,Year,unsafe_water_source,unsafe_sanitation,no_access_handwashing,sum
27,Afghanistan,AFG,2017,5.256649e+03,3783.111117,4156.209013,2.815395e+05
55,Albania,ALB,2017,4.091664e+00,1.597907,4.501301,3.314386e+04
83,Algeria,DZA,2017,1.899361e+02,58.910622,191.730462,2.377406e+05
111,American Samoa,ASM,2017,7.200565e-01,0.179205,0.293769,2.456121e+03
139,Andean Latin America,,2017,1.257987e+03,733.649228,1578.129626,2.576397e+05
167,Andorra,AND,2017,2.702152e-02,0.018488,0.206306,2.510520e+03
195,Angola,AGO,2017,1.465692e+04,9615.556653,9735.710078,1.838618e+05
223,Antigua and Barbuda,ATG,2017,1.040682e+00,0.328517,1.778131,2.607276e+03
251,Argentina,ARG,2017,2.671392e+02,68.635807,541.182586,3.331364e+05
279,Armenia,ARM,2017,3.045523e+00,4.379285,12.664885,4.578739e+04


In [14]:
cod_df = cod_df.dropna()
cod_df

Unnamed: 0,Entity,Code,Year,unsafe_water_source,unsafe_sanitation,no_access_handwashing,sum
27,Afghanistan,AFG,2017,5.256649e+03,3783.111117,4156.209013,2.815395e+05
55,Albania,ALB,2017,4.091664e+00,1.597907,4.501301,3.314386e+04
83,Algeria,DZA,2017,1.899361e+02,58.910622,191.730462,2.377406e+05
111,American Samoa,ASM,2017,7.200565e-01,0.179205,0.293769,2.456121e+03
167,Andorra,AND,2017,2.702152e-02,0.018488,0.206306,2.510520e+03
195,Angola,AGO,2017,1.465692e+04,9615.556653,9735.710078,1.838618e+05
223,Antigua and Barbuda,ATG,2017,1.040682e+00,0.328517,1.778131,2.607276e+03
251,Argentina,ARG,2017,2.671392e+02,68.635807,541.182586,3.331364e+05
279,Armenia,ARM,2017,3.045523e+00,4.379285,12.664885,4.578739e+04
335,Australia,AUS,2017,2.669516e+01,5.672185,57.881022,1.515151e+05


In [15]:
#convert strings to lower case for PG Admin connection
cod_df.columns = cod_df.columns.str.lower()
cod_df

Unnamed: 0,entity,code,year,unsafe_water_source,unsafe_sanitation,no_access_handwashing,sum
27,Afghanistan,AFG,2017,5.256649e+03,3783.111117,4156.209013,2.815395e+05
55,Albania,ALB,2017,4.091664e+00,1.597907,4.501301,3.314386e+04
83,Algeria,DZA,2017,1.899361e+02,58.910622,191.730462,2.377406e+05
111,American Samoa,ASM,2017,7.200565e-01,0.179205,0.293769,2.456121e+03
167,Andorra,AND,2017,2.702152e-02,0.018488,0.206306,2.510520e+03
195,Angola,AGO,2017,1.465692e+04,9615.556653,9735.710078,1.838618e+05
223,Antigua and Barbuda,ATG,2017,1.040682e+00,0.328517,1.778131,2.607276e+03
251,Argentina,ARG,2017,2.671392e+02,68.635807,541.182586,3.331364e+05
279,Armenia,ARM,2017,3.045523e+00,4.379285,12.664885,4.578739e+04
335,Australia,AUS,2017,2.669516e+01,5.672185,57.881022,1.515151e+05


In [16]:
#find percentage of deaths (https://stackoverflow.com/questions/36619631/how-to-divide-two-columns-element-wise-in-a-pandas-dataframe)
cod_df["unsafe_water_perct"] = (cod_df['unsafe_water_source']/cod_df['sum'])*100
cod_df["unsafe_sanitation_perct"] = (cod_df['unsafe_sanitation']/cod_df['sum'])*100
cod_df["no_handwashing_perct"] = (cod_df['no_access_handwashing']/cod_df['sum'])*100
cod_df


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


Unnamed: 0,entity,code,year,unsafe_water_source,unsafe_sanitation,no_access_handwashing,sum,unsafe_water_perct,unsafe_sanitation_perct,no_handwashing_perct
27,Afghanistan,AFG,2017,5.256649e+03,3783.111117,4156.209013,2.815395e+05,1.867109,1.343723,1.476244
55,Albania,ALB,2017,4.091664e+00,1.597907,4.501301,3.314386e+04,0.012345,0.004821,0.013581
83,Algeria,DZA,2017,1.899361e+02,58.910622,191.730462,2.377406e+05,0.079892,0.024779,0.080647
111,American Samoa,ASM,2017,7.200565e-01,0.179205,0.293769,2.456121e+03,0.029317,0.007296,0.011961
167,Andorra,AND,2017,2.702152e-02,0.018488,0.206306,2.510520e+03,0.001076,0.000736,0.008218
195,Angola,AGO,2017,1.465692e+04,9615.556653,9735.710078,1.838618e+05,7.971706,5.229775,5.295125
223,Antigua and Barbuda,ATG,2017,1.040682e+00,0.328517,1.778131,2.607276e+03,0.039915,0.012600,0.068199
251,Argentina,ARG,2017,2.671392e+02,68.635807,541.182586,3.331364e+05,0.080189,0.020603,0.162451
279,Armenia,ARM,2017,3.045523e+00,4.379285,12.664885,4.578739e+04,0.006651,0.009564,0.027660
335,Australia,AUS,2017,2.669516e+01,5.672185,57.881022,1.515151e+05,0.017619,0.003744,0.038201


In [17]:
#check data types (need to convert object to float)
cod_df.dtypes

entity                      object
code                        object
year                         int64
unsafe_water_source        float64
unsafe_sanitation          float64
no_access_handwashing      float64
sum                        float64
unsafe_water_perct         float64
unsafe_sanitation_perct    float64
no_handwashing_perct       float64
dtype: object

In [18]:
# Creating our engine for PostGRES database

rds_connection_string = (f'postgres:0000@localhost:5432/water_data')
engine = create_engine(f'postgresql://{rds_connection_string}')

In [19]:
meta = MetaData()
sanitation = Table(
   'sanitation', meta, 
   Column('entity', String), 
   Column('code', String, primary_key = True), 
   Column('year', Integer),
   Column('unsafe_water_source', Float),
   Column('unsafe_sanitation', Float),
   Column('no_access_handwashing', Float),
   Column('sum', Float),
   Column('unsafe_water_perct', Float),
   Column('no_handwashing_perct', Float),
   Column('unsafe_sanitation_perct', Float)
)
meta.create_all(engine)

In [20]:
engine.table_names()

['sanitation']

In [21]:
#uploading the data to the SQL database sanitation
cod_df.to_sql(name='sanitation', con=engine, if_exists='append', index=False)
conn = engine.connect()

In [22]:
test= pd.read_sql("""select * from sanitation""", conn)

In [23]:
test

Unnamed: 0,entity,code,year,unsafe_water_source,unsafe_sanitation,no_access_handwashing,sum,unsafe_water_perct,no_handwashing_perct,unsafe_sanitation_perct
0,Afghanistan,AFG,2017,5.256649e+03,3783.111117,4156.209013,2.815395e+05,1.867109,1.476244,1.343723
1,Albania,ALB,2017,4.091664e+00,1.597907,4.501301,3.314386e+04,0.012345,0.013581,0.004821
2,Algeria,DZA,2017,1.899361e+02,58.910622,191.730462,2.377406e+05,0.079892,0.080647,0.024779
3,American Samoa,ASM,2017,7.200565e-01,0.179205,0.293769,2.456121e+03,0.029317,0.011961,0.007296
4,Andorra,AND,2017,2.702152e-02,0.018488,0.206306,2.510520e+03,0.001076,0.008218,0.000736
5,Angola,AGO,2017,1.465692e+04,9615.556653,9735.710078,1.838618e+05,7.971706,5.295125,5.229775
6,Antigua and Barbuda,ATG,2017,1.040682e+00,0.328517,1.778131,2.607276e+03,0.039915,0.068199,0.012600
7,Argentina,ARG,2017,2.671392e+02,68.635807,541.182586,3.331364e+05,0.080189,0.162451,0.020603
8,Armenia,ARM,2017,3.045523e+00,4.379285,12.664885,4.578739e+04,0.006651,0.027660,0.009564
9,Australia,AUS,2017,2.669516e+01,5.672185,57.881022,1.515151e+05,0.017619,0.038201,0.003744
