### This is the notebook for LBW Data labelling

## 1. Imports

In [1]:
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from lbw.load_data import get_data

## 2. Dataset

In [2]:
# Read dataset

path = '../data/lbw.csv'

lbw_df = get_data(path = path)

print('Number of lbw data samples are',lbw_df.shape)
lbw_df.head()

Number of lbw data samples are (50000, 10)


Unnamed: 0,Black,Married,Boy,MomAge,MomSmoke,CigsPerDay,MomWtGain,Visit,MomEdLevel,weightcoded
0,0,1,1,-3,0,0,-16,1,0,0
1,0,1,0,1,0,0,2,3,2,0
2,0,1,1,0,0,0,-3,3,0,0
3,0,1,1,-1,0,0,-5,3,2,1
4,0,1,1,-6,0,0,-20,3,0,0


## 3. Add lables to features

In [3]:
# Rename column weightcoded to InfantWeight
lbw_df.rename(columns = {'weightcoded': 'InfantWeight'}, inplace = True)

# add labels to InfantWeight; 1: low, 0: normal
weight_labels = ['low' if x==1 else 'normal' for x in lbw_df['InfantWeight']]
lbw_df['InfantWeight'] = weight_labels 

# add labels to Black; 1: black mother, 0: white mother
black_labels = ['black mother' if x==1 else 'white mother' for x in lbw_df['Black']]
lbw_df['Black'] = black_labels 

# add labels to Boy; 1: baby boy, 0: baby girl
boy_labels = ['baby boy' if x==1 else 'baby girl' for x in lbw_df['Boy']]
lbw_df['Boy'] = boy_labels 

# add labels to MomSmoke; 1: smoking mother, 0: non smoking mother
smoke_labels = ['smoking mother' if x==1 else 'non smoking mother' for x in lbw_df['MomSmoke']]
lbw_df['MomSmoke'] = smoke_labels 

# add labels to Visit; 0:  0-prenatal visit, 1: 1-prenatal visit, 2: 2-prenatal visit, 3:3-prenatal visit
visit_labels = ['0-prenatal visit' if x==0 else '1-prenatal visit' if x==1
else '2-prenatal visit' if x==2 else '3-prenatal visit' for x in lbw_df['Visit']]
lbw_df['Visit'] = visit_labels 

# add labels to Visit; 0:  0-prenatal visit, 1: 1-prenatal visit, 2: 2-prenatal visit, 3:3-prenatal visit
education_labels = ['Never' if x==0 else 'Primary' if x==1
else 'Secondary' if x==2 else 'Tertiary' for x in lbw_df['MomEdLevel']]
lbw_df['MomEdLevel'] = education_labels 

# add 24 to MomAge to remove negative values and make age fall in range (15 - 45), the female reproductive age.
black_labels = [x+24 for x in lbw_df['MomAge']]
lbw_df['MomAge'] = black_labels 

## 4. View the labelled dataset

In [5]:
lbw_df

Unnamed: 0,Black,Married,Boy,MomAge,MomSmoke,CigsPerDay,MomWtGain,Visit,MomEdLevel,InfantWeight
0,white mother,1,baby boy,21,non smoking mother,0,-16,1-prenatal visit,Never,normal
1,white mother,1,baby girl,25,non smoking mother,0,2,3-prenatal visit,Secondary,normal
2,white mother,1,baby boy,24,non smoking mother,0,-3,3-prenatal visit,Never,normal
3,white mother,1,baby boy,23,non smoking mother,0,-5,3-prenatal visit,Secondary,low
4,white mother,1,baby boy,18,non smoking mother,0,-20,3-prenatal visit,Never,normal
...,...,...,...,...,...,...,...,...,...,...
49995,white mother,1,baby boy,26,non smoking mother,0,5,3-prenatal visit,Primary,normal
49996,white mother,0,baby girl,27,non smoking mother,0,-1,3-prenatal visit,Tertiary,normal
49997,black mother,0,baby girl,34,non smoking mother,0,5,3-prenatal visit,Never,normal
49998,white mother,1,baby girl,40,non smoking mother,0,6,3-prenatal visit,Secondary,normal


## 5. Save the dataset

In [4]:
# Save the dataframe
lbw_df.to_csv('../data/labelled_lbw_data.csv')