# Data Preparation for Classification of Gestational Term

In [1]:
import pandas as pd
import numpy as np

The main dataframe that contains all necessary data must first be loaded.

In [2]:
mainDf = pd.read_csv('C:/Users/Nefeli/Desktop/biomed_project_data/mainDf_int.csv')

According to the ACOG : https://www.acog.org/clinical/clinical-guidance/committee-opinion/articles/2013/11/definition-of-term-pregnancy, pregnancy terms can be classified in weeks as follows:

<br>ET: Early Term -> weeks>= 37 and <=38</br>
<br>FT: Full Term -> weeks>=39 and <=40 </br>
<br>LT: Late Term -> weeks=41 </br>
<br>P: Postterm -> weeks>=42 </br>

In [3]:
def gestTermClassAssigner_4(x):
    if x>=37.0 and x<=38.0:
        return 'ET'
    elif x>=39.0 and x<=40.0:
        return 'FT'
    elif x==41.0:
        return 'LT'
    elif x>= 42.0:
        return 'P'
    else:
        return np.nan

In [4]:
mainDf['Gest_Term_4'] = [None]*len(mainDf) 

In [5]:
mainDf['Gest_Term_4']  = mainDf['Gest.weeks'].apply(lambda x : gestTermClassAssigner_4(x))

In [6]:
y_labels, counts = np.unique(mainDf['Gest_Term_4'], return_counts=True)
print(y_labels)
print(counts)

['ET' 'FT' 'LT' 'P']
[ 21 119  72  10]


All four classes do appear, but there is prominent class imbalance. To find a good mindway point between interpretability and class balance, the 'FT' class can remain the same and the rest of the classes can be merged as 'OT' (other). So the two categories will be full term pregnancies and 'other' pregnancies where some type of pathological phenomenon may exist.

<br>OT: Other -> weeks>= 37 and <=38</br>
<br>FT: Full Term -> weeks>=39 and <=40 </br>
<br>OT: Other -> weeks=41 </br>
<br>OT: Other -> weeks>=42 </br>

In [7]:
def gestTermClassAssigner(x):
    if x>=37.0 and x<=38.0:
        return 'OT'
    elif x>=39.0 and x<=40.0:
        return 'FT'
    elif x==41.0:
        return 'OT'
    elif x>= 42.0:
        return 'OT'
    else:
        return np.nan

In [8]:
mainDf['Gest_Term'] = [None]*len(mainDf) 

In [9]:
mainDf['Gest_Term']  = mainDf['Gest.weeks'].apply(lambda x : gestTermClassAssigner(x))

In [10]:
y_labels, counts = np.unique(mainDf['Gest_Term'], return_counts=True)
print(y_labels)
print(counts)

['FT' 'OT']
[119 103]


In [11]:
to_drop=['Gest.weeks','Gest_Term_4']
termDf= mainDf.drop(columns=to_drop).copy()
#termDf.info()
termDf.head(3)

Unnamed: 0,pH,BDecf,pCO2,BE,Apgar1,Apgar5,Weight(g),Sex,Age,Gravidity,...,FHR_II_ffill_total_power,FHR_II_ffill_vlf,FHR_II_ffill_haar_stdev,FHR_II_ffill_haar_mean,FHR_II_ffill_samp_entr,FHR_II_ffill_bub_entr,diff_nni20,diff_lf_hf,diff_haar_std,Gest_Term
0,7.14,8.14,7.7,-10.5,6.0,8.0,2660.0,2.0,32.0,1.0,...,368.077564,210.991854,1.549748,-0.000147,0.031682,0.179575,16,3.059011,0.437721,OT
1,7.0,7.92,12.0,-12.0,8.0,8.0,2900.0,2.0,23.0,1.0,...,573.335415,388.247905,3.125196,-0.01974,0.053499,0.15447,2,0.020674,1.175809,OT
2,7.2,3.03,8.3,-5.6,7.0,9.0,3770.0,1.0,31.0,1.0,...,285.519025,149.841842,2.459169,0.030445,0.052023,0.205526,8,1.917875,1.699915,FT


In [12]:
termDf.to_csv('C:/Users/Nefeli/Desktop/biomed_project_data/termDf_bin.csv',index=False)