# Function Definition

In [None]:
### cleanUp
cleanUp <- function(){}
    rm(list = ls())
    cat(rep("\n", 50))
}

In [None]:
### featureNormalize
featureNormalize <- function(X) {
  #FEATURENORMALIZE Normalizes the features in X
  #   FEATURENORMALIZE(X) returns a normalized version of X where
  #   the mean value of each feature is 0 and the standard deviation
  #   is 1. This is often a good preprocessing step to do when
  #   working with learning algorithms.
  
  # You need to set these values correctly
  X_norm <- X
  mu <- rep(0,dim(X)[2])
  sigma <- rep(0,dim(X)[2])
  
  # ---------------------- YOUR CODE HERE ----------------------
  # Instructions: First, for each feature dimension, compute the mean
  #               of the feature and subtract it from the dataset,
  #               storing the mean value in mu. Next, compute the
  #               standard deviation of each feature and divide
  #               each feature by it's standard deviation, storing
  #               the standard deviation in sigma.
  #
  #               Note that X is a matrix where each column is a
  #               feature and each row is an example. You need
  #               to perform the normalization separately for
  #               each feature.
  #
  # Hint: You might find the 'mean' and 'sd' functions useful.
  #
  
  # mu
  for (p in 1:dim(X)[2]) {
    mu[p] <- mean(X[,p])
  }
  
  # sigma
  for (p in 1:dim(X)[2]) {
    sigma[p] <- sd(X[,p])
  }
  
  # X_norm
  for (p in 1:dim(X)[2]) {
    if (sigma[p] != 0)
      for (i in 1:dim(X)[1])
        X_norm[i, p] <- (X[i, p] - mu[p]) / sigma[p]
      else
        # sigma(p) == 0 <=> forall i, j,  X(i, p) == X(j, p) == mu(p)
        # In this case,  normalized values are all zero.
        # (mean is 0,  standard deviation is sigma(=0))
        X_norm[, p] <- t(rep(0,dim(X)[1]))
  }
  
  list(X_norm = X_norm, mu = mu, sigma = sigma)
  # ------------------------------------------------------------
  
}

In [None]:
### computeCostMulti <- function(X, y, theta) {
  #COMPUTECOSTMULTI Compute cost for linear regression with multiple variables
  #   J <- COMPUTECOSTMULTI(X, y, theta) computes the cost of using theta as the
  #   parameter for linear regression to fit the data points in X and y
  
  # Initialize some useful values
  m <- length(y) # number of training examples
  
  # You need to return the following variables correctly
  J <- 0
  
  # ---------------------- YOUR CODE HERE ----------------------
  # Instructions: Compute the cost of a particular choice of theta
  #               You should set J to the cost.
  
  dif <- X %*% theta - y
  J <- (t(dif) %*% dif) / (2 * m)
  J
  
  # -------------------------------------------------------------------------
        
}



In [None]:
### gradientDescentMulti <- function(X, y, theta, alpha, num_iters) {
  #GRADIENTDESCENTMULTI Performs gradient descent to learn theta
  #   theta <- GRADIENTDESCENTMULTI(x, y, theta, alpha, num_iters) updates theta by
  #   taking num_iters gradient steps with learning rate alpha
  
  # Initialize some useful values
  m <- length(y) # number of training examples
  J_history <- rep(0,num_iters)
  
  for (iter in 1:num_iters) {
    # ---------------------- YOUR CODE HERE ----------------------
    # Instructions: Perform a single gradient step on the parameter vector
    #               theta.
    #
    # Hint: While debugging, it can be useful to print out the values
    #       of the cost function (computeCostMulti) and gradient here.
    #
    
    # create a copy of theta for simultaneous update.
    theta_prev <- theta
    
    # number of features.
    p <- dim(X)[2]
    
    for (j in 1:p) {
      # calculate dJ/d(theta_j)
      deriv <- (t(X %*% theta_prev - y) %*% X[,j]) / m
      
      # # update theta_j
      theta[j] <- theta_prev[j] - (alpha * deriv)
    }
    
    # ------------------------------------------------------------
    
    # Save the cost J in every iteration
    J_history[iter] <- computeCostMulti(X, y, theta)
    
  }
  list(theta = theta, J_history = J_history)
}



In [None]:
### normalEqn <- function(X, y) {
  #NORMALEQN Computes the closed-form solution to linear regression
  #   NORMALEQN(X,y) computes the closed-form solution to linear
  #   regression using the normal equations.
  source("pinv.R")
  theta <- rep(0,length(y))
  
  # ---------------------- YOUR CODE HERE ----------------------
  # Instructions: Complete the code to compute the closed form solution
  #               to linear regression and put the result in theta.
  #
  theta <- pinv(t(X) %*% X) %*% t(X) %*% y
  theta
  # ------------------------------------------------------------
  
}



In [None]:
### Inverse
pinv <-
  function (X, tol = max(dim(X)) * max(X) * .Machine$double.eps)
  {
    if (length(dim(X)) > 2L || !(is.numeric(X) || is.complex(X)))
      stop("'X' must be a numeric or complex matrix")
    if (!is.matrix(X))
      X <- as.matrix(X)
    Xsvd <- svd(X)
    if (is.complex(X))
      Xsvd$u <- Conj(Xsvd$u)
    Positive <- any(Xsvd$d > max(tol * Xsvd$d[1L], 0))
    if (Positive)
      Xsvd$v %*% (1 / Xsvd$d * t(Xsvd$u))
    else
      array(0, dim(X)[2L:1L])
  }

# Linear Regression

In [None]:
cleanUp

training_data = '../../data/house_price_training_data.txt'
prediction_data = '../../data/house_price_predict.txt'


## ---------------- Part 1: Feature Normalization ----------------
cat('Loading data ...\n')
## Load Data
data <-
  read.table(training_data, sep = ',')
ncols = ncol(data)
X <- data[, 1:ncols-1]
y <- data[, ncols]
m <- length(y)


# Print out some data points
cat('First 10 examples from the dataset: \n')
temp <- cbind("X = [",X[1:10,], "], y =", y[1:10])
names(temp) <- NULL
print(temp)

## Scale features and set them to zero mean - Normalize Features

In [None]:
# Scale features and set them to zero mean
fN <- featureNormalize(X)
X <- fN$X_norm
mu <- fN$mu
sigma <- fN$sigma

# Add intercept term to X
X <- cbind(rep(1,m),X)
X <- as.matrix(X)

## Gradient Descent

In [None]:
# Choose some alpha value
alpha <- 1 # modified from 0.01 
num_iters <- 50 #modified from 100 

# Init Theta and Run Gradient Descent
theta <- rep(0,ncols)
# Here we can test different learning parameter alpha
gDM <- gradientDescentMulti(X, y, theta, alpha , num_iters)
theta <- gDM$theta
J_history <- gDM$J_history
rm(gDM)

## Plot the convergence graph - Gradient Descent

In [None]:
# Plot the convergence graph
plot(1:length(J_history), J_history, type="l", col="blue", lwd=2, cex=.1,
     xlab="Number of Iterations", ylab="Cost J")

# Display gradient descent's result
cat('Theta computed from gradient descent: \n')
print(theta)

## Predict price of a 1650 sq-ft, 3 br house - Gradient Descent

In [None]:
# Estimate the price of a 1650 sq-ft, 3 br house
# Recall that the first column of X is all-ones. Thus, it does
# not need to be normalized.

prediction_data <- read.table(prediction_data, sep = ',')
price <- data.matrix(prediction_data) %*% theta
cat(sprintf('Predicted price of a 1650 sq-ft, 3 br house (using gradient descent):\n $%f\n', price))

#price <- cbind(1, (1650-mu[1])/sigma[1], (3-mu[2])/sigma[2]) %*% theta
#price

## Solving with normal equations

In [None]:
cat('Solving with normal equations...\n')
## Load Data
data <- read.table(training_data,sep =',')
ncols = ncol(data)
X <- data[, 1:ncols-1]
y <- data[, ncols]
m <- length(y)

# Add intercept term to X
X <- cbind(rep(1,m),X)
X <- as.matrix(X)
# Calculate the parameters from the normal equation
theta <- normalEqn(X, y)

# Display normal equation's result
cat('Theta computed from the normal equations: \n')
print(theta)

## Predict price of a 1650 sq-ft, 3 br house - Normal Equation

In [None]:
price <- data.matrix(prediction_data) %*% theta
cat(sprintf('Predicted price of a 1650 sq-ft, 3 br house (using normal equations):\n $%f\n', price))