# MEST DAY 3

## Morning Session

### Activation Functions
Recall that $\hat{y} = w.x$
* Sigmoid $\sigma(\hat{y}) = \frac{1}{1 + e^{-\hat{y}}} $
* Hyperbolic Tangent
* Rectified Linear Unit

### Playground
* Gradient Descent
* Learning Rates

* http://playground.tensorflow.org/

In [None]:
!pip install tensorflow==2.0.0-beta0

In [1]:
import tensorflow as tf
import numpy as np

In [None]:
print(tf.__version__)

In [None]:
a = np.random.normal(0, 1, (5, 2))
b = np.random.normal(0, 1, (2, 3))

c = tf.matmul(a,b)
print(c)

In [None]:
print(c.numpy())

In [None]:
x = np.random.rand(10, 3)
w = np.random.randn(1, x.shape[1])
b = np.random.randn(x.shape[0], 1)

y_pred = tf.matmul(x, w, transpose_b=True) + b
print(y_pred)

### 0D Tensor

In [None]:
print(tf.add(1,1))

### 1D Tensor

In [None]:
_a = np.ones(5)
_a = tf.multiply(_a, 1)
print(_a)

In [None]:
print(_a.shape)

### 2D Tensor

In [None]:
_b = np.ones([2, 5])
_b = tf.multiply(_b, 1)
print(_b)
print(_b.shape)

### 3D Tensor

In [None]:
from tensorflow import keras

file_path = keras.utils.get_file('cat.jpg', 'https://www.petmd.com/sites/default/files/what-does-it-mean-when-cat-wags-tail.jpg')
img_raw = tf.io.read_file(file_path)
print(repr(img_raw)[:100]+"...")

In [None]:
img_tensor = tf.image.decode_image(img_raw)

print(img_tensor.shape)
print(img_tensor.dtype)

In [None]:
print(img_tensor)

In [None]:
img_final = tf.image.resize(img_tensor, [192, 192])
img_final = img_final/255.0
print(img_final.shape)
print(img_final.numpy().min())
print(img_final.numpy().max())

### Define a Linear Regression Model

$\hat{y} = w.x + b$

$L(y, \hat{y})$

In [11]:
class Model(object):
  def __init__(self):
    self.W = None
    self.b = None
    
  def __call__(self, x):
    if self.W == None:
      self.W = tf.Variable(tf.random.normal(shape=(1, x.shape[1])))
    if self.b == None:
      self.b = tf.Variable(tf.random.normal(shape=(x.shape[0], 1)))
    return tf.matmul(x, self.W, transpose_b=True) + self.b

In [12]:
model = Model()
output = model(tf.constant([3.0, 3.1, 1.9, 2.0, 2.5, 2.9], shape=(3,2)))
print(output)

tf.Tensor(
[[6.9633856]
 [3.3688776]
 [6.135957 ]], shape=(3, 1), dtype=float32)


### Define a Loss Function

In [4]:
@tf.function
def loss(y_pred, y):
  return tf.reduce_mean(tf.square(y-y_pred))

### Define a training function

In [5]:
def train(model, x, y, alpha):
  x = tf.convert_to_tensor(x, np.float32) 
  y = tf.convert_to_tensor(y, np.float32)
  with tf.GradientTape() as t:
    t.watch(x)
    current_loss = loss(model(x), y)
  #print(current_loss)
  dW, db = t.gradient(current_loss, [model.W, model.b])
  #print(dW, db)
  model.W.assign_sub(alpha * dW)
  model.b.assign_sub(alpha * db)

In [2]:
import pandas as pd

#from google.colab import drive
#drive.mount('/content/gdrive')

df = pd.read_csv('./boston/train.csv', index_col='ID')

In [3]:
train_df = df.sample(frac=0.8,random_state=0)
test_df = df.drop(train_df.index)

columns = ['nox', 'rm', 'chas', 'dis', 'ptratio', 'lstat', 'rad']

X_train = train_df[columns].values
X_test = test_df[columns].values
y_train = train_df[['medv']].values
y_test = test_df[['medv']].values

In [4]:
df[columns].head()

Unnamed: 0_level_0,nox,rm,chas,dis,ptratio,lstat,rad
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,0.538,6.575,0,4.09,15.3,4.98,1
2,0.469,6.421,0,4.9671,17.8,9.14,2
4,0.458,6.998,0,6.0622,18.7,2.94,3
5,0.458,7.147,0,6.0622,18.7,5.33,3
7,0.524,6.012,0,5.5605,15.2,12.43,5


In [5]:
df[['medv']].head()

Unnamed: 0_level_0,medv
ID,Unnamed: 1_level_1
1,24.0
2,21.6
4,33.4
5,36.2
7,22.9


In [13]:
model = Model()
train(model, X_train, y_train, alpha=0.1)

### Logistic Regression

In [None]:
class LogisticModel(object):
  def __init__(self):
    self.W = None
    self.b = None
    
  def __call__(self, x):
    if self.W == None:
      self.W = tf.Variable(tf.random.normal(shape=(1, x.shape[1])))
    if self.b == None:
      self.b = tf.Variable(tf.random.normal(shape=(x.shape[0], 1)))
    y = tf.matmul(x, self.W, transpose_b=True) + self.b
    return tf.math.sigmoid(y)

### Train for 10 epochs

In [14]:
print(model.W)

<tf.Variable 'Variable:0' shape=(1, 7) dtype=float32, numpy=
array([[ 1.0251641 , 23.1939    , -0.73740137, 14.599553  , 66.53072   ,
        39.247177  , 26.141218  ]], dtype=float32)>


In [15]:
epochs = 10
model = Model()
for i in range(epochs):
  train(model, X_train, y_train, alpha=0.1)

ValueError: Attempt to convert a value (None) with an unsupported type (<class 'NoneType'>) to a Tensor.

In [None]:
print(model.W)

## Tensorflow with Keras API

In [6]:
import tensorflow as tf
from tensorflow import keras

In [8]:
# define a neural network
model = keras.Sequential([
    keras.layers.Dense(10, input_shape=(X_train.shape[1],), activation='relu'),
    keras.layers.Dense(1)
])
print(model.summary())

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 10)                80        
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 11        
Total params: 91
Trainable params: 91
Non-trainable params: 0
_________________________________________________________________
None


In [7]:
# define a linear model with a regression output
model = keras.Sequential([
    keras.layers.Dense(1, input_shape=(X_train.shape[1],))
])
print(model.summary())

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 1)                 8         
Total params: 8
Trainable params: 8
Non-trainable params: 0
_________________________________________________________________
None


In [9]:
adam = keras.optimizers.Adam(0.001)
model.compile(optimizer=adam, loss='mse')

In [10]:
model.fit(X_train, y_train, epochs=100, validation_split=0.1)

Train on 239 samples, validate on 27 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Ep

Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<tensorflow.python.keras.callbacks.History at 0x62eb47710>

In [11]:
y_pred = model.predict(X_test)

In [12]:
comp = pd.DataFrame({'actuals': y_test.reshape(-1), 'predicted': y_pred.reshape(-1)})

In [13]:
comp.head(n=10)

Unnamed: 0,actuals,predicted
0,18.2,24.604994
1,24.7,23.737633
2,20.0,22.628204
3,19.7,23.315868
4,20.5,23.58149
5,23.3,26.026188
6,18.7,24.974365
7,33.0,27.869051
8,24.2,24.386507
9,20.8,22.61606


### Can we engineer new features?

In [52]:
# we need a new way of getting data into the model
def df_to_dataset(df, columns, shuffle=True, batch_size=64):
  df = df.copy()
  labels = df.pop('medv')
  features_df = df[columns]
  ds = tf.data.Dataset.from_tensor_slices( (dict(features_df), labels) )
  if shuffle:
    ds = ds.shuffle(buffer_size=len(df))
  ds = ds.batch(batch_size)
  return ds

In [53]:
from sklearn.model_selection import train_test_split
train, val = train_test_split(df, test_size=0.1)
train_ds = df_to_dataset(train, columns)
val_ds = df_to_dataset(val, columns)

In [54]:
df[columns].describe()

Unnamed: 0,nox,rm,chas,dis,ptratio,lstat,rad
count,333.0,333.0,333.0,333.0,333.0,333.0,333.0
mean,0.557144,6.265619,0.06006,3.709934,18.448048,12.515435,9.633634
std,0.114955,0.703952,0.237956,1.981123,2.151821,7.067781,8.742174
min,0.385,3.561,0.0,1.1296,12.6,1.73,1.0
25%,0.453,5.884,0.0,2.1224,17.4,7.18,4.0
50%,0.538,6.202,0.0,3.0923,19.0,10.97,5.0
75%,0.631,6.595,0.0,5.1167,20.2,16.42,24.0
max,0.871,8.725,1.0,10.7103,21.2,37.97,24.0


In [55]:
feature_columns = []

# numeric columns
for _col in columns:
  feature_columns.append(tf.feature_column.numeric_column(_col))
  
# bucketize number of rooms
rm_buckets = tf.feature_column.bucketized_column(tf.feature_column.numeric_column('rm'), boundaries=[1, 2, 3, 4, 5, 6, 7, 8, 9])
rad_buckets = tf.feature_column.bucketized_column(tf.feature_column.numeric_column('rad'), boundaries=[1, 5, 10])
nox_buckets = tf.feature_column.bucketized_column(tf.feature_column.numeric_column('nox'), boundaries=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])

feature_columns.append(rm_buckets)
feature_columns.append(rad_buckets)
feature_columns.append(nox_buckets)

# cross rooms and nox


In [65]:
featuresLayer = keras.layers.DenseFeatures(feature_columns)
model = keras.Sequential([
    featuresLayer,
    keras.layers.Dense(50, activation='relu'),
    keras.layers.Dense(1)
])

model.compile(optimizer='adam', loss='mse')

model.fit(train_ds, epochs=200, validation_data=val_ds)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

Epoch 163/200
Epoch 164/200
Epoch 165/200
Epoch 166/200
Epoch 167/200
Epoch 168/200
Epoch 169/200
Epoch 170/200
Epoch 171/200
Epoch 172/200
Epoch 173/200
Epoch 174/200
Epoch 175/200
Epoch 176/200
Epoch 177/200
Epoch 178/200
Epoch 179/200
Epoch 180/200
Epoch 181/200
Epoch 182/200
Epoch 183/200
Epoch 184/200
Epoch 185/200
Epoch 186/200
Epoch 187/200
Epoch 188/200
Epoch 189/200
Epoch 190/200
Epoch 191/200
Epoch 192/200
Epoch 193/200
Epoch 194/200
Epoch 195/200
Epoch 196/200
Epoch 197/200
Epoch 198/200
Epoch 199/200
Epoch 200/200


<tensorflow.python.keras.callbacks.History at 0x1a3ad07ed0>

In [66]:
y_preds = model.predict(val_ds)

In [67]:
_c = pd.DataFrame({'actuals': val['medv'], 'predicted':y_preds.reshape(-1)})

In [68]:
_c.head(n=10)

Unnamed: 0_level_0,actuals,predicted
ID,Unnamed: 1_level_1,Unnamed: 2_level_1
345,31.2,29.459932
226,50.0,12.835381
353,18.6,17.685524
15,18.2,14.355935
482,23.7,30.157345
478,12.0,36.57254
254,42.8,20.001629
154,19.4,12.120832
117,21.2,23.368683
171,17.4,32.473309


$$

### TensorFlow Estimators

In [74]:
def train_fn():
  df = train_df.copy()
  labels = df.pop('medv')
  features_df = df[columns]
  ds = tf.data.Dataset.from_tensor_slices( (dict(features_df), labels) )
  ds = ds.shuffle(1000).batch(64).repeat(5)
  
  return ds

def val_fn():
  df = test_df.copy()
  labels = df.pop('medv')
  features_df = df[columns]
  ds = tf.data.Dataset.from_tensor_slices( (dict(features_df), labels) )
  ds = ds.batch(64).repeat(1)
  
  return ds

In [72]:
estimator = tf.estimator.LinearRegressor(feature_columns=feature_columns)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/var/folders/xm/rvswt1sx4rdf56_wpqz5rt4c0000gn/T/tmp71smebtj', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x1a3c504b10>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [76]:
estimator.train(input_fn=train_fn, steps=None)

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.
INFO:tensorflow:Calling model_fn.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Instructions for updating:
Please use `layer.add_weight` method instead.
Instructions for updating:
Use `tf.cast` instead.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running loca

<tensorflow_estimator.python.estimator.canned.linear.LinearRegressorV2 at 0x1a3c4b56d0>

## Afternoon
### Pytorch 1.0
* You should probably restart the runtime at this point

In [None]:
! pip install torch torchvision

In [None]:
import torch

In [None]:
a = torch.rand(2,2)
b = torch.rand(2,2)
c = a + b
print(c)

In [None]:
d = torch.add(a, b)
print(d)

In [None]:
# a += 5
print(a)
a.add_(5)
print(a)

In [None]:
print(a.shape)

In [None]:
print(a * b)
print(a)
a.mul(b)
print(a)
a.mul_(b)
print(a)

In [None]:
a = torch.rand(10000, 10000)
b = torch.rand(10000, 10000)
a.matmul(b)

### Move to GPU

In [None]:
print(torch.cuda.is_available())

In [None]:
a = a.cuda()
b = b.cuda()
a.matmul(b)

In [None]:
features = torch.randn( (1,5))
weights = torch.randn_like(features)
bias = torch.randn( (1,1))

print(torch.sum(features * weights + bias) )

In [None]:
print(torch.mm(features, weights.view(5,1)) + bias)

### Gradients

In [None]:
x = torch.autograd.Variable(torch.ones(2, 2), requires_grad=True)

In [None]:
y = x.mean()
print(y)

In [None]:
print(y.backward())

In [None]:
print(x.grad)

In [None]:
print(x.grad_fn)

In [None]:
print(x.data)

In [None]:
print(y.grad_fn)

### Import Data

In [None]:
import pandas as pd

from google.colab import drive
drive.mount('/content/gdrive')

df = pd.read_csv('/content/gdrive/My Drive/boston/train.csv', index_col='ID')

In [None]:
train_df = df.sample(frac=0.8,random_state=0)
test_df = df.drop(train_df.index)

columns = ['nox', 'rm', 'chas', 'dis', 'ptratio', 'lstat', 'rad']

X_train = train_df[columns].values
X_test = test_df[columns].values
y_train = train_df[['medv']].values
y_test = test_df[['medv']].values

### Simple Linear Model

In [None]:
class SimpleModel():
  def __init__(self, x, y, lr=0.001):
    self.X = torch.autograd.Variable(torch.from_numpy(x).type(torch.FloatTensor))
    self.y = torch.autograd.Variable(torch.from_numpy(y).type(torch.FloatTensor))
    self.W = torch.autograd.Variable(torch.randn(x.shape[1], 1), requires_grad=True)
    self.b = torch.autograd.Variable(torch.randn(1, 1), requires_grad=True)
    self.alpha = lr
    
  def pred(self):
    return torch.matmul(self.X, self.W) + self.b
  
  def loss_fn(self):
    loss = (self.y - self.pred()).pow(2).sum()/self.X.shape[0]
    for p in [self.W, self.b]:
      if not p.grad is None:
        p.grad.data.zero_()
    loss.backward()
    return loss.data.item()
  
  def optimize(self):
    self.W.data -= self.alpha * self.W.grad.data
    self.b.data -= self.alpha * self.b.grad.data
    
  def train(self, epochs=100):
    for i in range(epochs):
      l = self.loss_fn()
      if i%10 == 0:
        print('Step:{} -- Current Loss: {}'.format(i, l))
      self.optimize()

In [None]:
model = SimpleModel(X_train, y_train)

In [None]:
model.train(50)