In [None]:
import tensorflow as tf
import numpy as np
import scipy.sparse.linalg
from sklearn.datasets import fetch_california_housing
from IPython.display import clear_output, Image, display, HTML

###### Do not modify here ###### 
def strip_consts(graph_def, max_const_size=32):
    """Strip large constant values from graph_def."""
    strip_def = tf.GraphDef()
    for n0 in graph_def.node:
        n = strip_def.node.add() 
        n.MergeFrom(n0)
        if n.op == 'Const':
            tensor = n.attr['value'].tensor
            size = len(tensor.tensor_content)
            if size > max_const_size:
                tensor.tensor_content = "<stripped %d bytes>"%size
    return strip_def

def show_graph(graph_def, max_const_size=32):
    """Visualize TensorFlow graph."""
    if hasattr(graph_def, 'as_graph_def'):
        graph_def = graph_def.as_graph_def()
    strip_def = graph_def
    #strip_def = strip_consts(graph_def, max_const_size=max_const_size)
    code = """
        <script>
          function load() {{
            document.getElementById("{id}").pbtxt = {data};
          }}
        </script>
        <link rel="import" href="https://tensorboard.appspot.com/tf-graph-basic.build.html" onload=load()>
        <div style="height:600px">
          <tf-graph-basic id="{id}"></tf-graph-basic>
        </div>
    """.format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))

    iframe = """
        <iframe seamless style="width:1200px;height:620px;border:0" srcdoc="{}"></iframe>
    """.format(code.replace('"', '&quot;'))
    display(HTML(iframe))
###### Do not modify  here ######

###### Implement Data Preprocess here ######

def detect_outliers(X):
    n, k = X.data.shape
    means = np.array([np.mean(X.data[:,i]) for i in range(k)])
    stddevs = np.array([np.std(X.data[:,i]) for i in range(k)])
    lowerbounds = means - 2*stddevs
    upperbounds = means + 2*stddevs
    NoOutliers_x = []
    NoOutliers_y = []
    for i in range(n):
        for j in range(k):
            if X.data[i][j] > lowerbounds[j] and X.data[i][j] < upperbounds[j]:
                if j == k-1:
                    NoOutliers_x.append(X.data[i])
                    NoOutliers_y.append(X.target[i])
            else:
                break
    X.data = np.asarray(NoOutliers_x, dtype=np.float64)
    X.target = np.asarray(NoOutliers_y, dtype=np.float64)
    return X

def features_normalize(X):
    k = X.shape[1]
    means = np.array([np.mean(X[:,i]) for i in range(k)])
    stddevs = np.array([np.std(X[:,i]) for i in range(k)])
    normalized = (X - means) / stddevs
    return normalized

housing = fetch_california_housing()

# detect outliers
housing = detect_outliers(housing)

# feature normalization
housing.data = features_normalize(housing.data)

# assign ground-truth X, Y and add biases 1 to X
n, k = housing.data.shape
X = np.c_[np.ones((n,1)), housing.data]
Y = housing.target.reshape(n,1)

# split into training set and testing set
X_train = tf.constant(X[:int(n*0.9)], dtype=tf.float32, name="X_train")
Y_train = tf.constant(Y[:int(n*0.9)], dtype=tf.float32, name="Y_train")
X_train_T = tf.transpose(X_train, name="X_train_T")
X_test = tf.constant(X[int(n*0.9):], dtype=tf.float32, name="X_test")
Y_test = tf.constant(Y[int(n*0.9):], dtype=tf.float32, name="Y_test")

# setup and initialize tf variables theta, Y_hat, and error rate.
init = tf.global_variables_initializer()
theta = tf.Variable(np.zeros((k+1,1), dtype=np.float32), name="theta")
Y_hat = tf.Variable(np.zeros((int(n*0.1),1), dtype=np.float32), name="Y_hat")
e = tf.Variable(0.0, name="error_rate")

# setup tf ops
cal_theta = tf.assign(theta, tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(X_train_T,X_train)), X_train_T), Y_train))
cal_Y_hat = tf.assign(Y_hat, tf.matmul(X_test, theta))
cal_e = tf.assign(e, tf.reduce_mean(tf.divide(tf.abs(tf.subtract(Y_test, Y_hat)), Y_test)))


###### Implement Data Preprocess here ######

###### Start TF session ######
with tf.Session() as sess:
    sess.run(init)
    sess.run(cal_theta)
    sess.run(cal_Y_hat)
    sess.run(cal_e)
    print("theta:", sess.run(theta))
    print("Error rate:", sess.run(e))

    show_graph(tf.get_default_graph().as_graph_def())
    '''
    Graph Explanation:
    It starts from a global initialization, which is a uniqie substructure that only contains a node.
    Then, we calaulate theta through a series of computation, which is defined by normal equation.
    In testing phase, Y_hat is predicted by X_test*theta. 
    Eventually, we evaluate error rate e by the mean of abs(Y_test-Y_hat)/Y_test.
    '''
###### Start TF session ######
