# AMMI Day 7

## Morning Session

### Activation Functions
Recall that $\hat{y} = w.x$
* Sigmoid $\sigma(\hat{y}) = \frac{1}{1 + e^{-\hat{y}}} $
* Hyperbolic Tangent
* Rectified Linear Unit

### Playground
* Gradient Descent
* Learning Rates

* http://playground.tensorflow.org/

In [0]:
!pip install tensorflow==2.0.0-beta0

Collecting tensorflow==2.0.0-beta0
[?25l  Downloading https://files.pythonhosted.org/packages/6c/19/0d0c7f240db7bcd6b83783b9a89a67f38584d100e23ad5ae93114be92232/tensorflow-2.0.0b0-cp36-cp36m-manylinux1_x86_64.whl (87.9MB)
[K     |████████████████████████████████| 87.9MB 35.1MB/s 
Collecting tb-nightly<1.14.0a20190604,>=1.14.0a20190603 (from tensorflow==2.0.0-beta0)
[?25l  Downloading https://files.pythonhosted.org/packages/a4/96/571b875cd81dda9d5dfa1422a4f9d749e67c0a8d4f4f0b33a4e5f5f35e27/tb_nightly-1.14.0a20190603-py3-none-any.whl (3.1MB)
[K     |████████████████████████████████| 3.1MB 47.3MB/s 
Collecting tf-estimator-nightly<1.14.0.dev2019060502,>=1.14.0.dev2019060501 (from tensorflow==2.0.0-beta0)
[?25l  Downloading https://files.pythonhosted.org/packages/32/dd/99c47dd007dcf10d63fd895611b063732646f23059c618a373e85019eb0e/tf_estimator_nightly-1.14.0.dev2019060501-py2.py3-none-any.whl (496kB)
[K     |████████████████████████████████| 501kB 47.7MB/s 
Installing collected package

In [0]:
import tensorflow as tf
import numpy as np

In [0]:
print(tf.__version__)

2.0.0-beta0


In [0]:
a = np.random.normal(0, 1, (5, 2))
b = np.random.normal(0, 1, (2, 3))

c = tf.matmul(a,b)
print(c)

tf.Tensor(
[[-0.77965303 -1.68241699  0.57305434]
 [ 0.71512504  0.92490428 -0.10120611]
 [-0.41558034  0.33548567 -0.54045347]
 [ 0.74015119 -1.52071728  1.59630656]
 [-1.04718139  1.08824247 -1.52856885]], shape=(5, 3), dtype=float64)


In [0]:
print(c.numpy())

[[-0.77965303 -1.68241699  0.57305434]
 [ 0.71512504  0.92490428 -0.10120611]
 [-0.41558034  0.33548567 -0.54045347]
 [ 0.74015119 -1.52071728  1.59630656]
 [-1.04718139  1.08824247 -1.52856885]]


In [0]:
x = np.random.rand(10, 3)
w = np.random.randn(1, x.shape[1])
b = np.random.randn(x.shape[0], 1)

y_pred = tf.matmul(x, w, transpose_b=True) + b
print(y_pred)

tf.Tensor(
[[-1.33219625]
 [ 1.05405803]
 [-0.51153602]
 [-0.18726801]
 [ 0.21037553]
 [ 0.23572902]
 [-1.4108347 ]
 [-0.36349185]
 [-0.92623722]
 [-1.36084031]], shape=(10, 1), dtype=float64)


### 0D Tensor

In [0]:
print(tf.add(1,1))

tf.Tensor(2, shape=(), dtype=int32)


### 1D Tensor

In [0]:
_a = np.ones(5)
_a = tf.multiply(_a, 1)
print(_a)
print(_a.shape)

tf.Tensor([1. 1. 1. 1. 1.], shape=(5,), dtype=float64)
(5,)


### 2D Tensor

In [0]:
_b = np.ones([2, 5])
_b = tf.multiply(_b, 1)
print(_b)
print(_b.shape)

tf.Tensor(
[[1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]], shape=(2, 5), dtype=float64)
(2, 5)


### 3D Tensor

In [0]:
from tensorflow import keras

file_path = keras.utils.get_file('cat.jpg', 'https://www.petmd.com/sites/default/files/what-does-it-mean-when-cat-wags-tail.jpg')
img_raw = tf.io.read_file(file_path)
print(repr(img_raw)[:100]+"...")

<tf.Tensor: id=1299, shape=(), dtype=string, numpy=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00\x0...


In [0]:
img_tensor = tf.image.decode_image(img_raw)

print(img_tensor.shape)
print(img_tensor.dtype)

(428, 590, 3)
<dtype: 'uint8'>


In [0]:
print(img_tensor)

tf.Tensor(
[[[253 253 253]
  [253 253 253]
  [253 253 253]
  ...
  [253 253 253]
  [253 253 253]
  [253 253 253]]

 [[253 253 253]
  [253 253 253]
  [254 254 254]
  ...
  [254 254 254]
  [254 254 254]
  [254 254 254]]

 [[254 254 254]
  [254 254 254]
  [255 255 255]
  ...
  [255 255 255]
  [255 255 255]
  [255 255 255]]

 ...

 [[254 254 254]
  [254 254 254]
  [255 255 255]
  ...
  [255 255 255]
  [255 255 255]
  [255 255 255]]

 [[254 254 254]
  [254 254 254]
  [254 254 254]
  ...
  [254 254 254]
  [254 254 254]
  [254 254 254]]

 [[253 253 253]
  [253 253 253]
  [254 254 254]
  ...
  [254 254 254]
  [254 254 254]
  [255 255 255]]], shape=(428, 590, 3), dtype=uint8)


In [0]:
img_final = tf.image.resize(img_tensor, [192, 192])
img_final = img_final/255.0
print(img_final.shape)
print(img_final.numpy().min())
print(img_final.numpy().max())

(192, 192, 3)
0.0
1.0


### Define a Linear Regression Model

$\hat{y} = w.x + b$

$L(y, \hat{y})$

In [0]:
class Model(object):
  def __init__(self):
    self.W = None
    self.b = None
    
  def __call__(self, x):
    if self.W == None:
      self.W = tf.Variable(tf.random.normal(shape=(1, x.shape[1])))
    if self.b == None:
      self.b = tf.Variable(tf.random.normal(shape=(x.shape[0], 1)))
    return tf.matmul(x, self.W, transpose_b=True) + self.b

In [0]:
model = Model()
output = model(tf.constant([3.0, 3.1, 1.9, 2.0, 2.5, 2.9], shape=(3,2)))
print(output)

tf.Tensor(
[[-1.1354551]
 [-2.6164463]
 [ 1.3604654]], shape=(3, 1), dtype=float32)


### Define a Loss Function

In [0]:
@tf.function
def loss(y_pred, y):
  return tf.reduce_mean(tf.square(y-y_pred))

### Define a training function

In [0]:
def train(model, x, y, alpha):
  x = tf.convert_to_tensor(x, np.float32) 
  y = tf.convert_to_tensor(y, np.float32)
  with tf.GradientTape() as t:
    t.watch(x)
    current_loss = loss(model(x), y)
  #print(current_loss)
  dW, db = t.gradient(current_loss, [model.W, model.b])
  #print(dW, db)
  model.W.assign_sub(alpha * dW)
  model.b.assign_sub(alpha * db)

In [0]:
import pandas as pd

from google.colab import drive
drive.mount('/content/gdrive')

df = pd.read_csv('/content/gdrive/My Drive/boston/train.csv', index_col='ID')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [0]:
train_df = df.sample(frac=0.8,random_state=0)
test_df = df.drop(train_df.index)

columns = ['nox', 'rm', 'chas', 'dis', 'ptratio', 'lstat', 'rad']

X_train = train_df[columns].values
X_test = test_df[columns].values
y_train = train_df[['medv']].values
y_test = test_df[['medv']].values

In [0]:
model = Model()
train(model, X_train, y_train, alpha=0.1)

### Logistic Regression

In [0]:
class LogisticModel(object):
  def __init__(self):
    self.W = None
    self.b = None
    
  def __call__(self, x):
    if self.W == None:
      self.W = tf.Variable(tf.random.normal(shape=(1, x.shape[1])))
    if self.b == None:
      self.b = tf.Variable(tf.random.normal(shape=(x.shape[0], 1)))
    y = tf.matmul(x, self.W, transpose_b=True) + self.b
    return tf.math.sigmoid(y)

### Train for 10 epochs

In [0]:
epochs = 10
model = Model()
for i in range(epochs):
  train(model, X_train, y_train, alpha=0.1)

In [0]:
print(model.W)

<tf.Variable 'Variable:0' shape=(1, 7) dtype=float32, numpy=
array([[-3.9231674e+18, -4.2280822e+19, -3.9986613e+17, -2.3798404e+19,
        -1.2810750e+20, -9.5477420e+19, -7.6335363e+19]], dtype=float32)>


## Tensorflow with Keras API

In [0]:
import tensorflow as tf
from tensorflow import keras

In [0]:
model = keras.Sequential([
    keras.layers.Dense(50, input_shape=(7,), activation='relu'),
    keras.layers.Dense(50, activation='relu'),
    keras.layers.Dense(50, activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(1)
])
print(model.summary())

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 50)                400       
_________________________________________________________________
dense_1 (Dense)              (None, 50)                2550      
_________________________________________________________________
dense_2 (Dense)              (None, 50)                2550      
_________________________________________________________________
dropout (Dropout)            (None, 50)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 51        
Total params: 5,551
Trainable params: 5,551
Non-trainable params: 0
_________________________________________________________________
None


In [0]:
adam = keras.optimizers.Adam(0.001)
model.compile(optimizer=adam, loss='mse')

In [0]:
model.fit(X_train, y_train, epochs=2000, validation_split=0.1)

### Can we engineer new features?

In [0]:
# we need a new way of getting data into the model
def df_to_dataset(df, columns, shuffle=True, batch_size=64):
  df = df.copy()
  labels = df.pop('medv')
  features_df = df[columns]
  ds = tf.data.Dataset.from_tensor_slices( (dict(features_df), labels) )
  if shuffle:
    ds = ds.shuffle(buffer_size=len(df))
  ds = ds.batch(batch_size)
  return ds

In [0]:
from sklearn.model_selection import train_test_split
train, val = train_test_split(df, test_size=0.1)
train_ds = df_to_dataset(train, columns)
val_ds = df_to_dataset(val, columns)

In [0]:
df[columns].describe()

Unnamed: 0,nox,rm,chas,dis,ptratio,lstat,rad
count,333.0,333.0,333.0,333.0,333.0,333.0,333.0
mean,0.557144,6.265619,0.06006,3.709934,18.448048,12.515435,9.633634
std,0.114955,0.703952,0.237956,1.981123,2.151821,7.067781,8.742174
min,0.385,3.561,0.0,1.1296,12.6,1.73,1.0
25%,0.453,5.884,0.0,2.1224,17.4,7.18,4.0
50%,0.538,6.202,0.0,3.0923,19.0,10.97,5.0
75%,0.631,6.595,0.0,5.1167,20.2,16.42,24.0
max,0.871,8.725,1.0,10.7103,21.2,37.97,24.0


In [0]:
feature_columns = []

# numeric columns
for _col in columns:
  feature_columns.append(tf.feature_column.numeric_column(_col))
  
# bucketize number of rooms
rm_buckets = tf.feature_column.bucketized_column(tf.feature_column.numeric_column('rm'), boundaries=[1, 2, 3, 4, 5, 6, 7, 8, 9])
rad_buckets = tf.feature_column.bucketized_column(tf.feature_column.numeric_column('rad'), boundaries=[1, 5, 10])
nox_buckets = tf.feature_column.bucketized_column(tf.feature_column.numeric_column('nox'), boundaries=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])

feature_columns.append(rm_buckets)
feature_columns.append(rad_buckets)
feature_columns.append(nox_buckets)

# cross rooms and nox


In [0]:
featuresLayer = keras.layers.DenseFeatures(feature_columns)
model = keras.Sequential([
    featuresLayer,
    keras.layers.Dense(50, activation='relu'),
    keras.layers.Dense(50, activation='relu'),
    keras.layers.Dense(50, activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(1)
])

model.compile(optimizer='adam', loss='mse')

model.fit(train_ds, epochs=50, validation_data=val_ds)

$$

### TensorFlow Estimators

In [0]:
def train_fn():
  df = train_df.copy()
  labels = df.pop('medv')
  features_df = df[columns]
  ds = tf.data.Dataset.from_tensor_slices( (dict(features_df), labels) )
  ds = ds.shuffle(1000).batch(64).repeat(5)
  
  return ds

def val_fn():
  df = test_df.copy()
  labels = df.pop('medv')
  features_df = df[columns]
  ds = tf.data.Dataset.from_tensor_slices( (dict(features_df), labels) )
  ds = ds.batch(64).repeat(1)
  
  return ds

In [0]:
estimator = tf.estimator.LinearRegressor(feature_columns=feature_columns)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmpphudyalz', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f0971a85390>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [0]:
estimator.train(input_fn=input_fn, steps=None)

INFO:tensorflow:Calling model_fn.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Instructions for updating:
Use `tf.cast` instead.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into /tmp/tmpphudyalz/model.ckpt.
INFO:tensorflow:loss = 609.0302, step = 0
INFO:tensorflow:Saving checkpoints for 25 into /tmp/tmpphudyalz/model.ckpt.
INFO:tensorflow:Loss for final step: 35.415752.


<tensorflow_estimator.python.estimator.canned.linear.LinearRegressorV2 at 0x7f09e455fa58>

## Afternoon
### Pytorch 1.0
* You should probably restart the runtime at this point

In [0]:
! pip install torch torchvision

In [0]:
import torch

In [0]:
a = torch.rand(2,2)
b = torch.rand(2,2)
c = a + b
print(c)

tensor([[1.0091, 1.5567],
        [0.2449, 1.7678]])


In [0]:
d = torch.add(a, b)
print(d)

tensor([[0.9202, 0.5588],
        [1.1854, 1.7655]])


In [0]:
# a += 5
print(a)
a.add_(5)
print(a)

tensor([[0.9367, 0.6118],
        [0.2269, 0.9910]])
tensor([[5.9367, 5.6118],
        [5.2269, 5.9910]])


In [0]:
print(a.shape)

torch.Size([2, 2])


In [0]:
print(a * b)
print(a)
a.mul(b)
print(a)
a.mul_(b)
print(a)

tensor([[3.1084e-02, 5.0109e+00],
        [1.6914e-03, 3.6155e+00]])
tensor([[0.4296, 5.3028],
        [0.0940, 4.6541]])
tensor([[0.4296, 5.3028],
        [0.0940, 4.6541]])
tensor([[3.1084e-02, 5.0109e+00],
        [1.6914e-03, 3.6155e+00]])


In [0]:
a = torch.rand(10000, 10000)
b = torch.rand(10000, 10000)
a.matmul(b)

tensor([[2469.5039, 2511.7175, 2515.6650,  ..., 2511.0210, 2503.0608,
         2492.7117],
        [2475.8333, 2529.4609, 2542.2546,  ..., 2565.6462, 2542.2822,
         2505.0024],
        [2453.8730, 2516.9917, 2526.4529,  ..., 2515.3352, 2509.9707,
         2495.4531],
        ...,
        [2500.7844, 2571.1865, 2576.1082,  ..., 2552.0007, 2547.2495,
         2534.3420],
        [2477.8733, 2544.0698, 2540.1116,  ..., 2526.7979, 2537.8604,
         2491.5720],
        [2459.9749, 2544.2688, 2524.2395,  ..., 2510.9868, 2512.9731,
         2485.2451]])

### Move to GPU

In [0]:
print(torch.cuda.is_available())

True


In [0]:
a = a.cuda()
b = b.cuda()
a.matmul(b)

tensor([[2469.5054, 2511.7136, 2515.6689,  ..., 2511.0144, 2503.0620,
         2492.7112],
        [2475.8306, 2529.4595, 2542.2600,  ..., 2565.6470, 2542.2781,
         2505.0000],
        [2453.8689, 2516.9963, 2526.4497,  ..., 2515.3303, 2509.9648,
         2495.4561],
        ...,
        [2500.7859, 2571.1890, 2576.1123,  ..., 2552.0056, 2547.2485,
         2534.3398],
        [2477.8738, 2544.0691, 2540.1091,  ..., 2526.8013, 2537.8616,
         2491.5718],
        [2459.9756, 2544.2671, 2524.2366,  ..., 2510.9856, 2512.9712,
         2485.2458]], device='cuda:0')

In [0]:
features = torch.randn( (1,5))
weights = torch.randn_like(features)
bias = torch.randn( (1,1))

print(torch.sum(features * weights + bias) )

tensor(-10.5711)


In [0]:
print(torch.mm(features, weights.view(5,1)) + bias)

tensor([[-4.1206]])


### Gradients

In [0]:
x = torch.autograd.Variable(torch.ones(2, 2), requires_grad=True)

In [0]:
y = x.mean()
print(y)

tensor(1., grad_fn=<MeanBackward0>)


In [0]:
print(y.backward())

None


In [0]:
print(x.grad)

tensor([[0.2500, 0.2500],
        [0.2500, 0.2500]])


In [0]:
print(x.grad_fn)

None


In [0]:
print(x.data)

tensor([[1., 1.],
        [1., 1.]])


In [0]:
print(y.grad_fn)

<MeanBackward0 object at 0x7f09732910b8>


### Import Data

In [0]:
import pandas as pd

from google.colab import drive
drive.mount('/content/gdrive')

df = pd.read_csv('/content/gdrive/My Drive/boston/train.csv', index_col='ID')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [0]:
train_df = df.sample(frac=0.8,random_state=0)
test_df = df.drop(train_df.index)

columns = ['nox', 'rm', 'chas', 'dis', 'ptratio', 'lstat', 'rad']

X_train = train_df[columns].values
X_test = test_df[columns].values
y_train = train_df[['medv']].values
y_test = test_df[['medv']].values

### Simple Linear Model

In [0]:
class SimpleModel():
  def __init__(self, x, y, lr=0.001):
    self.X = torch.autograd.Variable(torch.from_numpy(x).type(torch.FloatTensor))
    self.y = torch.autograd.Variable(torch.from_numpy(y).type(torch.FloatTensor))
    self.W = torch.autograd.Variable(torch.randn(x.shape[1], 1), requires_grad=True)
    self.b = torch.autograd.Variable(torch.randn(1, 1), requires_grad=True)
    self.alpha = lr
    
  def pred(self):
    return torch.matmul(self.X, self.W) + self.b
  
  def loss_fn(self):
    loss = (self.y - self.pred()).pow(2).sum()/self.X.shape[0]
    for p in [self.W, self.b]:
      if not p.grad is None:
        p.grad.data.zero_()
    loss.backward()
    return loss.data.item()
  
  def optimize(self):
    self.W.data -= self.alpha * self.W.grad.data
    self.b.data -= self.alpha * self.b.grad.data
    
  def train(self, epochs=100):
    for i in range(epochs):
      l = self.loss_fn()
      if i%10 == 0:
        print('Step:{} -- Current Loss: {}'.format(i, l))
      self.optimize()

In [0]:
model = SimpleModel(X_train, y_train)

In [0]:
model.train(50)

Step:0 -- Current Loss: 2725.74658203125
Step:10 -- Current Loss: 162.29730224609375
Step:20 -- Current Loss: 92.99630737304688
Step:30 -- Current Loss: 74.25244903564453
Step:40 -- Current Loss: 67.72014617919922
