# Cardiotocographic Classification using TensorFlow and Keras
##### Cardiotocographic classification for fetal heart-rate and uterine contractions, implemented with a PySpark Pipeline

Dataset from the UCI data repository: https://archive.ics.uci.edu/ml/datasets/cardiotocography

In [1]:
#Pipeline dependencies
from pyspark.ml import Pipeline
from pyspark.ml.classification import LogisticRegression
from pyspark.ml.feature import HashingTF, Tokenizer
from pyspark.sql import SparkSession

#Data manipulation, analysis and plotting tools
import pandas as pd
import numpy as np
import matplotlib as plt

#Machine Learning libraries
import sklearn as sk
import tensorflow as tf

In [2]:
#Define the session and cluster
spark = SparkSession.builder \
                    .master('local[4]') \
                    .appName('cardiotocography_pipeline') \
                    .getOrCreate()

In [5]:
#Read the CSV dataset file and print the schema
df = spark.read.csv("CTG_data.csv")
df.printSchema()

root
 |-- _c0: string (nullable = true)
 |-- _c1: string (nullable = true)
 |-- _c2: string (nullable = true)
 |-- _c3: string (nullable = true)
 |-- _c4: string (nullable = true)
 |-- _c5: string (nullable = true)
 |-- _c6: string (nullable = true)
 |-- _c7: string (nullable = true)
 |-- _c8: string (nullable = true)
 |-- _c9: string (nullable = true)
 |-- _c10: string (nullable = true)
 |-- _c11: string (nullable = true)
 |-- _c12: string (nullable = true)
 |-- _c13: string (nullable = true)
 |-- _c14: string (nullable = true)
 |-- _c15: string (nullable = true)
 |-- _c16: string (nullable = true)
 |-- _c17: string (nullable = true)
 |-- _c18: string (nullable = true)
 |-- _c19: string (nullable = true)
 |-- _c20: string (nullable = true)
 |-- _c21: string (nullable = true)
 |-- _c22: string (nullable = true)
 |-- _c23: string (nullable = true)
 |-- _c24: string (nullable = true)
 |-- _c25: string (nullable = true)
 |-- _c26: string (nullable = true)
 |-- _c27: string (nullable = tru