# Multi-feature Linear Regression with Tensorflow
### The following code provides the basic skeleton for performing multivariable regression using Tensorflow.
### The code should be optimized for a real dataset! 
The code can be seen as an introduction for the usage of tf.Variable, tf.placeholder, GradientDescentOptimizer, tf.Session etc. for a more complex classification or regression algorithm.

I first implement a simple Regression for a small dataset.
Then I implement the batch feeding for larger datasets.
Lastly, I use the tf.estimator API.

In [22]:
import tensorflow as tf
import numpy as np
np.set_printoptions(precision=3)
import matplotlib.pyplot as plt
%matplotlib inline

### Generating the data
The following cell should be modified to input real data

In [2]:
#Setting the data variables

num_features = 4
D = 30 #number of data points = length of the vectors

X_data = np.zeros((D,num_features))
for i in range(num_features):
    X_data[:,i] = np.linspace(0,10,D) + np.random.uniform(-1,1,D)

y_data = np.linspace(0,100,D) + np.random.uniform(-1,1,D)

### Initializing the parameters
With $y = m \cdot x +b$, we have $D+1$ parameters: $m$ a D dimensional vector and $b$ a scalar

In [3]:
m = tf.Variable(initial_value=tf.ones([num_features],tf.float64))
b = tf.Variable(1.0)

### Defining the Loss function 

In [4]:
error = 0
for X,y in zip(X_data,y_data):
    y_hat = tf.tensordot(X,m,1) + tf.cast(b, tf.float64)
    error += (y-y_hat)**2

### Hyperparameters

In [5]:
#Learning rate, should be adjusted for a larger range of features in the dataset
lr = 1e-4 

# relative difference in consecutive losses at which training should stop
stopping = 1e-8

### Tell Tensorflow what to minimize

In [6]:
optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr)
train = optimizer.minimize(error)

### Training

In [7]:
max_steps =10000

#the following command initializes the tf.Variables, for us m and b
init = tf.global_variables_initializer()

#Training
print('Tensorflow implementation:')
with tf.Session() as sess:
    sess.run(init)
  
    print('initial:\t m: {:20}\t b:{:.3f}\t loss:{:.4f}'
          .format(str(sess.run(m)),sess.run(b),sess.run(error)))
    
    loss_last = sess.run(error) + 1.0

    i = 0
    loop = True
    
    while (loop & (i < max_steps)):
        sess.run(train)
        
        if np.isnan(sess.run(error)):
            print('Loss function became NAN, consider decreasing learning rate')
            break
        
        #stopping criteria if the results are not changing
        if ((np.abs(loss_last-sess.run(error))/loss_last < stopping)):
            loop = False 

        i += 1
        loss_last = sess.run(error)
        
        #Print some information
        if ((i+1)%(max_steps/10) ==0):
            print('step:{:4d}\t m: {:20}\t b:{:.3f}\t loss:{:.4f}'
                  .format(i+1,str(sess.run(m)),sess.run(b),sess.run(error)))

    print()
    final_slope = sess.run(m)
    final_intercept = sess.run(b)
    if i < max_steps:
        print('Total no. of steps: {:d}'.format(i))
        print('Final parameters m: {:20}\t b:{:.3f}\t loss:{:.4f}'
              .format(str(sess.run(m)),sess.run(b),sess.run(error)))
    else:
        print('Loss function did not converge according to the set criteria after {:d} steps'.format(max_steps))
        print('Consider increasing max_steps or learning rate variable (lr)')      
        print('Obtained parameters m: {:20}\t b:{:.3f}\t loss:{:.4f}'
              .format(str(sess.run(m)),sess.run(b),sess.run(error)))

Tensorflow implementation:
initial:	 m: [1. 1. 1. 1.]       	 b:1.000	 loss:34709.9886
step:1000	 m: [3.575 2.187 2.026 1.944]	 b:1.548	 loss:306.0207
step:2000	 m: [3.863 2.143 1.869 1.84 ]	 b:1.653	 loss:304.5272
step:3000	 m: [3.939 2.136 1.814 1.819]	 b:1.688	 loss:304.4099

Total no. of steps: 3910
Final parameters m: [3.958 2.135 1.797 1.816]	 b:1.700	 loss:304.4000


In [8]:
#Sanity check with Sklearn's function    
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

reg = LinearRegression().fit(X_data, y_data)
sk_slope = reg.coef_
sk_intercept = reg.intercept_

print('sklearn implementation: m={:5}  \t b={:.3f} \t loss={:.3f}'
      .format(str(sk_slope),sk_intercept,
              D*mean_squared_error(y_data, reg.predict(X_data))))

sklearn implementation: m=[3.967 2.135 1.787 1.816]  	 b=1.707 	 loss=304.399


In [9]:
#Visualize the results
if num_features == 1:
    plt.plot(X_data,y_data,'+')
    
    #Tensorflow results
    plt.plot(X_data,final_slope*X_data+final_intercept,'r')
    
    #Sklearn
    plt.plot(X_data,sk_slope*X_data+sk_intercept,'b')
    
    plt.legend(('Data', 'Our Tensorflow implementation', 'from sklearn'),
           loc='upper left')

# Feeding in Batches

In [10]:
#Setting the data variables

num_features = 2
D = 10000 #number of data points = length of the vectors

X_data = np.zeros((D,num_features))
for i in range(num_features):
    X_data[:,i] = np.linspace(0,10,D) + np.random.uniform(-1,1,D)

b0 = np.random.randint(3)
y_data =  b0 + np.linspace(0,10,D) + np.random.uniform(-1,1,D)

In [11]:
m = tf.Variable(initial_value=tf.ones([num_features],tf.float64))
b = tf.Variable(2.0)

In [12]:
batch_size = 10
X_batch = tf.placeholder(tf.float64,shape=(batch_size,num_features),name='Xbatch')
y_batch = tf.placeholder(tf.float64,shape=(batch_size),name='ybatch')

In [13]:
#Learning rate, should be adjusted for a larger range of features in the dataset
lr = 1e-4

#Defining the optimizer
y_hat = tf.tensordot(X_batch,m,1) + tf.cast(b, tf.float64)
error = tf.reduce_sum(tf.square(y_batch-y_hat))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr)
train = optimizer.minimize(error)

In [14]:
epochs = 1000

with tf.Session() as sess:
    init = tf.global_variables_initializer()
    sess.run(init)
    
    for i in range(epochs):
        
        rand_ind = np.random.randint(X_data.shape[0],size=batch_size)
        feed = {X_batch:X_data[rand_ind,:],y_batch:y_data[rand_ind]}
        
        _,mstep,bstep,errorstep = sess.run([train,m,b,error],feed_dict=feed)
        
        if np.isnan(errorstep):
            print('Loss function became NAN, consider decreasing learning rate')
            break
            
        if ((i)%(epochs/10) ==0):
            print('epochs:{:4d}\t m: {:20}\t b:{:.3f}\t  loss:{:.4f}'
                  .format(i+1,str(mstep),bstep,errorstep/batch_size))

    print()
    final_slope = mstep
    final_intercept = bstep

    print('Final parameters m: {:20}\t b:{:.3f}\t  loss:{:.4f}'.format(str(mstep),bstep,errorstep/batch_size))


epochs:   1	 m: [0.866 0.869]       	 b:1.984	  loss:66.5434
epochs: 101	 m: [0.505 0.505]       	 b:1.930	  loss:0.3689
epochs: 201	 m: [0.501 0.5  ]       	 b:1.940	  loss:0.8182
epochs: 301	 m: [0.5   0.499]       	 b:1.950	  loss:0.9044
epochs: 401	 m: [0.517 0.515]       	 b:1.960	  loss:0.3440
epochs: 501	 m: [0.494 0.497]       	 b:1.963	  loss:0.5353
epochs: 601	 m: [0.504 0.505]       	 b:1.971	  loss:0.4950
epochs: 701	 m: [0.5   0.495]       	 b:1.975	  loss:0.8292
epochs: 801	 m: [0.502 0.497]       	 b:1.981	  loss:0.3708
epochs: 901	 m: [0.492 0.493]       	 b:1.988	  loss:0.6541

Final parameters m: [0.503 0.501]       	 b:1.994	  loss:0.4100


In [15]:
#Sanity check with Sklearn's function    
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

reg = LinearRegression().fit(X_data, y_data)
sk_slope = reg.coef_
sk_intercept = reg.intercept_

print('sklearn implementation: m={:5}  \t b={:.3f} \t loss={:.3f}'
      .format(str(sk_slope),sk_intercept,
              mean_squared_error(y_data, reg.predict(X_data))))

sklearn implementation: m=[0.504 0.475]  	 b=2.090 	 loss=0.498


## Using tf.estimator API

In [16]:
#This is necessary to suppress Tensorflow logging
#Comment this if you want to see the full output
tf.logging.set_verbosity(tf.logging.WARN)

In [23]:
feat_cols = [tf.feature_column.numeric_column('x',shape=[num_features])]

estimator = tf.estimator.LinearRegressor(feature_columns=feat_cols)

input_func = tf.estimator.inputs.numpy_input_fn({'x':X_data},y_data,
                                                batch_size=10,num_epochs=None,
                                                shuffle=True)

estimator.train(input_fn=input_func,steps =1000)
train_metrics = estimator.evaluate(input_fn=input_func,steps=1000)



In [24]:
print(train_metrics)

{'average_loss': 0.50244284, 'label/mean': 7.016204, 'loss': 5.0244284, 'prediction/mean': 7.036784, 'global_step': 1000}


In [25]:
estimator.get_variable_names()

['global_step',
 'linear/linear_model/bias_weights',
 'linear/linear_model/bias_weights/part_0/Ftrl',
 'linear/linear_model/bias_weights/part_0/Ftrl_1',
 'linear/linear_model/x/weights',
 'linear/linear_model/x/weights/part_0/Ftrl',
 'linear/linear_model/x/weights/part_0/Ftrl_1']

In [26]:
m_estimator = estimator.get_variable_value('linear/linear_model/x/weights')
b_estimator = estimator.get_variable_value('linear/linear_model/bias_weights')

In [27]:
print('From tf.estimator: m=',m_estimator.T,'\t b=',b_estimator)

From tf.estimator: m= [[0.503 0.496]] 	 b= [2.012]
