In [1]:
import sys
from typing import Callable, NamedTuple

from absl import app
from absl import flags
import numpy as np
import pandas as pd
import tensorflow as tf

tf.enable_v2_behavior()

## Data

In [2]:
orthodont_data = pd.read_csv('../hw4/orthodont.csv')
orthodont_data = orthodont_data.set_index('Subject')
orthodont_data.head(8)

Unnamed: 0_level_0,distance,age,Sex
Subject,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
M01,26.0,8,Male
M01,25.0,10,Male
M01,29.0,12,Male
M01,31.0,14,Male
M02,21.5,8,Male
M02,22.5,10,Male
M02,23.0,12,Male
M02,26.5,14,Male


In [3]:
def make_covariates(data_frame):    
    age = (data_frame['age'] - 8).values
    is_female = (data_frame['Sex'] == 'Female').values.astype(np.float64)
    return np.column_stack((
        np.ones(len(data_frame)),
        age,
        is_female,
        age*is_female,        
    ))

def make_response(data_frame):
    return data_frame['distance'].values

X = tf.convert_to_tensor(
    [make_covariates(orthodont_data.loc[i]) for i in np.unique(orthodont_data.index)],
    tf.float32)
y = tf.expand_dims(tf.convert_to_tensor(
    [make_response(orthodont_data.loc[i]) for i in np.unique(orthodont_data.index)],
    tf.float32), -1)

## Cluster Correlation Structure

In [4]:
def make_covariance_homoscedastic(log_variance):
    """Makes diagonal homoscedastic covariance structure."""
    return tf.exp(log_variance)*tf.eye(4)

In [5]:
def make_covariance_exchangeable(log_covariance_params):
    """Makes heteroscedastic, exchangeble covariance structure."""
    standard_errors = tf.exp(log_covariance_params[:-1])  # First entries are standard errors.
    rho = tf.exp(log_covariance_params[-1])  # Last entry is correlation.
    correlation = tf.ones((4, 4), dtype=tf.float32)*rho + tf.eye(4)*(1. - rho)
    return correlation*standard_errors*tf.expand_dims(standard_errors, -1)

## REML Loss

In [6]:
def solve_beta(X, y, weights):
    projected_X = tf.reduce_sum(tf.matmul(tf.tensordot(tf.transpose(X, [0, 2, 1]), weights, 1), X), 0)
    projected_y = tf.reduce_sum(tf.matmul(tf.tensordot(tf.transpose(X, [0, 2, 1]), weights, 1), y), 0)
    return tf.linalg.cholesky_solve(tf.linalg.cholesky(projected_X), projected_y)

def loss_fn(X, y, covariance):
    weights = tf.linalg.cholesky_solve(tf.linalg.cholesky(covariance), tf.eye(4))        
    beta = solve_beta(X, y, weights)     
    residuals = y - tf.tensordot(X, beta, 1)
    weighted_squared_error = tf.matmul(
        tf.tensordot(tf.transpose(residuals, [0, 2, 1]), weights, 1), residuals)
    loss = tf.reduce_mean(weighted_squared_error) + tf.linalg.logdet(covariance)    
    reml_loss = tf.reduce_sum(tf.matmul(tf.tensordot(tf.transpose(X, [0, 2, 1]), weights, 1), X), 0)
    return loss + tf.linalg.logdet(reml_loss) / tf.cast(tf.shape(y)[0], tf.float32)

## Optimization

Minimizes REML loss with Newton-Raphson algorithm

In [7]:
class CovarianceSpec(NamedTuple('CovarianceSpec', [
    ('initial_params', np.array),
    ('make_covariance', Callable[[tf.Tensor], tf.Tensor]),
])):
    """Encapsulates covariance parameters."""

def fit(X, y, covariance_spec):
    covariance_params = tf.Variable(covariance_spec.initial_params)
    for i in range(16):
        with tf.GradientTape(persistent=True) as outer_tape:
            with tf.GradientTape() as inner_tape:
                loss = loss_fn(X, y, covariance_spec.make_covariance(covariance_params))
            gradients = inner_tape.gradient(loss, covariance_params)
        hessian = outer_tape.jacobian(gradients, covariance_params, experimental_use_pfor=False)            
        covariance_params.assign_add(tf.reshape(
            tf.linalg.cholesky_solve(tf.linalg.cholesky(hessian), -tf.expand_dims(gradients, -1)),
            covariance_params.shape))
    return covariance_params

In [8]:
log_variance = fit(
    X, y,
    CovarianceSpec(initial_params=[0.], make_covariance=make_covariance_homoscedastic))
tf.sqrt(tf.exp(log_variance)).numpy()

W0218 23:35:25.567388 140393872258816 deprecation.py:323] From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/math_grad.py:80: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.
W0218 23:35:25.593166 140393872258816 deprecation.py:323] From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/array_grad.py:425: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.


array([2.256949], dtype=float32)

In [9]:
def ml_covariance(X, weights):
    covariance = tf.reduce_sum(tf.matmul(tf.tensordot(tf.transpose(X, [0, 2, 1]), weights, 1), X), 0)
    return tf.linalg.cholesky_solve(tf.linalg.cholesky(covariance), tf.eye(tf.shape(covariance)[0]))

def sandwich_covariance(X, y, weights):
    bread = ml_covariance(X, weights)
    left_meat = tf.tensordot(tf.transpose(X, [0, 2, 1]), weights, 1)
    right_meat =  tf.transpose(left_meat, [0, 2, 1])    
    residuals = y - tf.tensordot(X, solve_beta(X, y, weights), 1)
    residuals = tf.matmul(residuals, tf.transpose(residuals, [0, 2, 1]))
    meat = tf.reduce_sum(tf.matmul(tf.matmul(left_meat, residuals), right_meat), 0)    
    return tf.matmul(tf.matmul(bread, meat), bread)

sandwich_covariance_estimate = sandwich_covariance(
    X, y,
    tf.linalg.cholesky_solve(tf.linalg.cholesky(
        make_covariance_homoscedastic(log_variance)), tf.eye(X.shape[-1])))

In [10]:
sandwich_covariance_estimate.numpy()

array([[ 0.28468183, -0.02929138, -0.28468183,  0.02929138],
       [-0.02929133,  0.00967223,  0.02929132, -0.00967223],
       [-0.2846818 ,  0.02929136,  0.5987651 , -0.02658853],
       [ 0.02929131, -0.00967222, -0.02658838,  0.01365793]],
      dtype=float32)

## REML Exchangeable Test

Should agree with numbers from Chapter 2, slides 73 and 75.

In [11]:
log_exchangeable_covariance_params = fit(
    X, y,
    CovarianceSpec(initial_params=[0., 0., 0., 0., -1.],
                   make_covariance=make_covariance_exchangeable))
tf.exp(log_exchangeable_covariance_params).numpy()

array([2.3867779 , 2.058272  , 2.4678187 , 2.19673   , 0.63528943],
      dtype=float32)

In [12]:
solve_beta(X, y, tf.linalg.cholesky_solve(
    tf.linalg.cholesky(
        make_covariance_exchangeable(log_exchangeable_covariance_params)),
    tf.eye(X.shape[-1])))        

<tf.Tensor: id=87523, shape=(4, 1), dtype=float32, numpy=
array([[22.485374  ],
       [ 0.79431295],
       [-1.2507197 ],
       [-0.3155596 ]], dtype=float32)>

In [13]:
tf.sqrt(tf.linalg.diag_part(
    ml_covariance(X, tf.linalg.cholesky_solve(
        tf.linalg.cholesky(
            make_covariance_exchangeable(log_exchangeable_covariance_params)),
        tf.eye(X.shape[-1]))))).numpy()

array([0.5308524 , 0.07701091, 0.8316859 , 0.12065291], dtype=float32)

In [21]:
tf.flags.FLAGS.get('ml_covariance', False)
#tf.flags.FLAGS.
#sys.argv

AttributeError: get