In [1]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense

import numpy as np
import json

from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from google.cloud import bigquery
from witwidget.notebook.visualization import WitWidget, WitConfigBuilder

In [2]:
query="""
SELECT
  weight_pounds,
  is_male,
  mother_age,
  plurality,
  gestation_weeks
FROM
  publicdata.samples.natality
WHERE year > 2000
LIMIT 10000
"""
df = bigquery.Client().query(query).to_dataframe()
df.head()

Unnamed: 0,weight_pounds,is_male,mother_age,plurality,gestation_weeks
0,7.568469,True,22,1,46.0
1,8.807467,True,39,1,42.0
2,8.313632,True,23,1,35.0
3,8.000575,False,27,1,40.0
4,6.563162,False,29,1,39.0


In [3]:
df.describe()


Unnamed: 0,weight_pounds,mother_age,plurality,gestation_weeks
count,9991.0,10000.0,10000.0,9888.0
mean,7.278609,27.3653,1.0303,38.681634
std,1.354406,6.235699,0.183808,2.622498
min,0.500449,12.0,1.0,19.0
25%,6.624891,22.0,1.0,38.0
50%,7.374463,27.0,1.0,39.0
75%,8.124034,32.0,1.0,40.0
max,12.936726,51.0,4.0,47.0


In [4]:
df['is_male'].value_counts()


True     5190
False    4810
Name: is_male, dtype: int64

In [5]:
df = df.dropna()
df = shuffle(df, random_state=2)

In [6]:
labels = df['weight_pounds']
data = df.drop(columns=['weight_pounds'])
data['is_male'] = data['is_male'].astype(int)

In [7]:
x,y = data,labels
x_train,x_test,y_train,y_test = train_test_split(x,y)

In [8]:
model = Sequential([
    Dense(64, activation='relu', input_shape=(len(x_train.iloc[0]),)),
    Dense(32, activation='relu'),
    Dense(1)]
)



In [9]:
model.compile(optimizer=tf.keras.optimizers.RMSprop(),
              loss=tf.keras.losses.MeanSquaredError(),
              metrics=['mae', 'mse'])

In [10]:
model.fit(x_train, y_train, epochs=10, validation_split=0.1)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7fc849d6de10>

In [11]:
num_examples = 10
predictions = model.predict(x_test[:num_examples])



In [12]:
for i in range(num_examples):
    print('Predicted val: ', predictions[i][0])
    print('Actual val: ',y_test.iloc[i])
    print()



Predicted val:  7.0532465
Actual val:  8.24969784404

Predicted val:  8.297029
Actual val:  7.3744626639

Predicted val:  8.078236
Actual val:  9.7775013197

Predicted val:  6.9599485
Actual val:  6.87401332916

Predicted val:  7.5341306
Actual val:  6.3118345610599995

Predicted val:  7.314255
Actual val:  7.50012615324

Predicted val:  7.8596883
Actual val:  7.84184265934

Predicted val:  5.98059
Actual val:  5.5997414548

Predicted val:  7.5318003
Actual val:  6.93794738514

Predicted val:  7.853184
Actual val:  8.68841774542

