**Exploratory Data Analysis of Fetal Deaths in USA, 2014-2017**

{Describe Dataset and Project}

***Preliminary Wrangling***

In [1]:
# import all packages and set plots to be embedded inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb

%matplotlib inline

In [3]:
# load the dataset
df = pd.read_csv('us_fetal_deaths_2014-2017.txt', sep = "	")
df.head()

Unnamed: 0,Notes,Age of Mother Year,Age of Mother Year Code,Age of Father Year,Age of Father Year Code,Mother's Pre-pregnancy BMI,Mother's Pre-pregnancy BMI Code,Cause of Death,Cause of Death Code,Fetal Deaths
0,,12 years and under,12,Unknown or Not Stated,99.0,Normal 18.5-24.9,2.0,Other preterm infants,P07.3,1.0
1,,12 years and under,12,Unknown or Not Stated,99.0,Overweight 25.0-29.9,3.0,Newborn affected by other forms of placental s...,P02.1,1.0
2,,12 years and under,12,Unknown or Not Stated,99.0,Overweight 25.0-29.9,3.0,Fetal death of unspecified cause,P95,1.0
3,,12 years and under,12,Unknown or Not Stated,99.0,Overweight 25.0-29.9,3.0,Not Reported,NR,2.0
4,,12 years and under,12,Unknown or Not Stated,99.0,Obesity II 35.0-39.9,5.0,Not Reported,NR,1.0


In [6]:
# drop unnecessary columns
df = df.drop(['Notes', 'Age of Mother Year Code', 'Age of Father Year Code', \
         "Mother's Pre-pregnancy BMI Code", "Cause of Death Code"], axis = 1)
df.head()

Unnamed: 0,Age of Mother Year,Age of Father Year,Mother's Pre-pregnancy BMI,Cause of Death,Fetal Deaths
0,12 years and under,Unknown or Not Stated,Normal 18.5-24.9,Other preterm infants,1.0
1,12 years and under,Unknown or Not Stated,Overweight 25.0-29.9,Newborn affected by other forms of placental s...,1.0
2,12 years and under,Unknown or Not Stated,Overweight 25.0-29.9,Fetal death of unspecified cause,1.0
3,12 years and under,Unknown or Not Stated,Overweight 25.0-29.9,Not Reported,2.0
4,12 years and under,Unknown or Not Stated,Obesity II 35.0-39.9,Not Reported,1.0


In [8]:
# rename the columns
df.columns = ['mother_age', 'father_age', 'mother_BMI', 'death_cause', 'death_count']
df.head()

Unnamed: 0,mother_age,father_age,mother_BMI,death_cause,death_count
0,12 years and under,Unknown or Not Stated,Normal 18.5-24.9,Other preterm infants,1.0
1,12 years and under,Unknown or Not Stated,Overweight 25.0-29.9,Newborn affected by other forms of placental s...,1.0
2,12 years and under,Unknown or Not Stated,Overweight 25.0-29.9,Fetal death of unspecified cause,1.0
3,12 years and under,Unknown or Not Stated,Overweight 25.0-29.9,Not Reported,2.0
4,12 years and under,Unknown or Not Stated,Obesity II 35.0-39.9,Not Reported,1.0


In [9]:
# explore unique mother_age values

df.mother_age.unique()

array(['12 years and under', '13 years', '14 years', '15 years',
       '16 years', '17 years', '18 years', '19 years', '20 years',
       '21 years', '22 years', '23 years', '24 years', '25 years',
       '26 years', '27 years', '28 years', '29 years', '30 years',
       '31 years', '32 years', '33 years', '34 years', '35 years',
       '36 years', '37 years', '38 years', '39 years', '40 years',
       '41 years', '42 years', '43 years', '44 years', '45 years',
       '46 years', '47 years', '48 years', '49 years',
       '50 years and over', nan], dtype=object)

In [10]:
# explore unique father_age values

df.father_age.unique()

array(['Unknown or Not Stated', '13 years', '14 years', '15 years',
       '16 years', '17 years', '18 years', '20 years', 'Not Reported',
       '19 years', '21 years', '23 years', '24 years', '26 years',
       '28 years', '30 years', '35 years', '22 years', '25 years',
       '27 years', '29 years', '34 years', '36 years', '37 years',
       '31 years', '32 years', '33 years', '38 years', '39 years',
       '40 years', '44 years', '49 years', '50 years', '41 years',
       '42 years', '58 years', '59 years', '43 years', '45 years',
       '47 years', '64 years', '69 years', '9 years', '46 years',
       '48 years', '53 years', '52 years', '55 years', '57 years',
       '51 years', '61 years', '75 years', '54 years', '67 years',
       '60 years', '63 years', '56 years', '62 years', '73 years',
       '65 years', '70 years', '66 years', '72 years', '71 years',
       '78 years', '77 years', '68 years', nan], dtype=object)

In [11]:
# explore unique mother_BMI values

df.mother_BMI.unique()

array(['Normal 18.5-24.9', 'Overweight 25.0-29.9', 'Obesity II 35.0-39.9',
       'Underweight <18.5', 'Unknown or Not Stated',
       'Obesity I 30.0-34.9', 'Not Reported',
       'Extreme Obesity III > 39.9', nan], dtype=object)

In [13]:
# number of unique death_cause values

df.death_cause.nunique()

433