Permalink
Browse files

Added learning theory code.

  • Loading branch information...
1 parent 1fe972b commit 635dff5f6b14cbcf04aebc8be8efb2a734474813 @smc77 committed Nov 9, 2011
Showing with 683 additions and 0 deletions.
  1. +78 −0 r/stanford.machine.learning.Rcheck/00check.log
  2. +12 −0 r/stanford.machine.learning.Rcheck/00install.out
  3. +35 −0 r/stanford.machine.learning.Rcheck/stanford.ml-Ex.R
  4. +44 −0 r/stanford.machine.learning.Rcheck/stanford.ml-Ex.Rout
  5. BIN r/stanford.machine.learning.Rcheck/stanford.ml-Ex.ps
  6. +14 −0 r/stanford.machine.learning.Rcheck/stanford.ml/DESCRIPTION
  7. +3 −0 r/stanford.machine.learning.Rcheck/stanford.ml/INDEX
  8. BIN r/stanford.machine.learning.Rcheck/stanford.ml/Meta/Rd.rds
  9. BIN r/stanford.machine.learning.Rcheck/stanford.ml/Meta/demo.rds
  10. BIN r/stanford.machine.learning.Rcheck/stanford.ml/Meta/hsearch.rds
  11. BIN r/stanford.machine.learning.Rcheck/stanford.ml/Meta/links.rds
  12. BIN r/stanford.machine.learning.Rcheck/stanford.ml/Meta/package.rds
  13. +18 −0 r/stanford.machine.learning.Rcheck/stanford.ml/R/stanford.ml
  14. BIN r/stanford.machine.learning.Rcheck/stanford.ml/R/stanford.ml.rdb
  15. BIN r/stanford.machine.learning.Rcheck/stanford.ml/R/stanford.ml.rdx
  16. +2 −0 r/stanford.machine.learning.Rcheck/stanford.ml/demo/classification.R
  17. 0 r/stanford.machine.learning.Rcheck/stanford.ml/demo/linear.algebra.R
  18. +90 −0 r/stanford.machine.learning.Rcheck/stanford.ml/demo/multivariate.regression.R
  19. +95 −0 r/stanford.machine.learning.Rcheck/stanford.ml/demo/univariate.regression.R
  20. +2 −0 r/stanford.machine.learning.Rcheck/stanford.ml/help/AnIndex
  21. BIN r/stanford.machine.learning.Rcheck/stanford.ml/help/aliases.rds
  22. BIN r/stanford.machine.learning.Rcheck/stanford.ml/help/paths.rds
  23. BIN r/stanford.machine.learning.Rcheck/stanford.ml/help/stanford.ml.rdb
  24. BIN r/stanford.machine.learning.Rcheck/stanford.ml/help/stanford.ml.rdx
  25. +28 −0 r/stanford.machine.learning.Rcheck/stanford.ml/html/00Index.html
  26. +57 −0 r/stanford.machine.learning.Rcheck/stanford.ml/html/R.css
  27. +96 −0 r/stanford.machine.learning/demo/classification.R
  28. +109 −0 r/stanford.machine.learning/demo/regularization.R
  29. BIN r/stanford.ml_0.1.tar.gz
@@ -0,0 +1,78 @@
+* using log directory 'C:/Programming/MachineLearningLectures/r/stanford.machine.learning.Rcheck'
+* using R version 2.13.0 (2011-04-13)
+* using platform: i386-pc-mingw32 (32-bit)
+* using session charset: ISO8859-1
+* checking for file 'stanford.machine.learning/DESCRIPTION' ... OK
+* checking extension type ... Package
+* this is package 'stanford.ml' version '0.1'
+* checking package dependencies ... OK
+* checking if this is a source package ... OK
+* checking for executable files ... OK
+* checking whether package 'stanford.ml' can be installed ... WARNING
+Found the following significant warnings:
+ Warning: C:/Programming/MachineLearningLectures/r/stanford.machine.learning/man/stanford.machine.learning-package.Rd:32: All text must be in a section
+ Warning: C:/Programming/MachineLearningLectures/r/stanford.machine.learning/man/stanford.machine.learning-package.Rd:33: All text must be in a section
+See 'C:/Programming/MachineLearningLectures/r/stanford.machine.learning.Rcheck/00install.out' for details.
+* checking installed package size ... OK
+* checking package directory ... OK
+* checking for portable file names ... OK
+* checking DESCRIPTION meta-information ... WARNING
+Non-standard license specification:
+ GPL3
+Standardizable: FALSE
+* checking top-level files ... OK
+* checking index information ... WARNING
+Demos with missing or empty index information:
+[1] "classification"
+See the information on INDEX files and package subdirectories in the
+chapter 'Creating R packages' of the 'Writing R Extensions' manual.
+* checking package subdirectories ... OK
+* checking R files for non-ASCII characters ... OK
+* checking R files for syntax errors ... OK
+* checking whether the package can be loaded ... OK
+* checking whether the package can be loaded with stated dependencies ... OK
+* checking whether the package can be unloaded cleanly ... OK
+* checking for unstated dependencies in R code ... OK
+* checking S3 generic/method consistency ... OK
+* checking replacement functions ... OK
+* checking foreign function calls ... OK
+* checking R code for possible problems ... NOTE
+gradient.path: no visible binding for global variable 'alpha'
+gradient.path: no visible binding for global variable 'm'
+gradient.path: no visible binding for global variable 'y'
+* checking Rd files ... WARNING
+prepare_Rd: stanford.machine.learning-package.Rd:32: All text must be in a section
+prepare_Rd: stanford.machine.learning-package.Rd:33: All text must be in a section
+* checking Rd metadata ... OK
+* checking Rd cross-references ... WARNING
+Unknown package(s) '<pkg>' in Rd xrefs
+* checking for missing documentation entries ... WARNING
+Undocumented code objects:
+ grad gradient.path loss zscore
+All user-level objects in a package should have documentation entries.
+See the chapter 'Writing R documentation files' in manual 'Writing R
+Extensions'.
+* checking for code/documentation mismatches ... OK
+* checking Rd \usage sections ... OK
+* checking Rd contents ... OK
+* checking for unstated dependencies in examples ... NOTE
+Warning: parse error in file 'stanford.ml-Ex.R':
+11: unexpected symbol
+21:
+22: ~~ simple examples
+ ^
+* checking examples ... ERROR
+Running examples in 'stanford.ml-Ex.R' failed
+The error most likely occurred in:
+
+> ### Name: stanford.machine.learning-package
+> ### Title: Provides code to accompany Stanford CS229 lectures on "Machine
+> ### Learning".
+> ### Aliases: stanford.machine.learning-package stanford.machine.learning
+> ### Keywords: package
+>
+> ### ** Examples
+>
+> ~~ simple examples of the most important functions ~~
+Error: unexpected symbol in "~~ simple examples"
+Execution halted
@@ -0,0 +1,12 @@
+* installing *source* package 'stanford.ml' ...
+** R
+** demo
+** preparing package for lazy loading
+** help
+Warning: C:/Programming/MachineLearningLectures/r/stanford.machine.learning/man/stanford.machine.learning-package.Rd:32: All text must be in a section
+Warning: C:/Programming/MachineLearningLectures/r/stanford.machine.learning/man/stanford.machine.learning-package.Rd:33: All text must be in a section
+*** installing help indices
+** building package indices ...
+** testing if installed package can be loaded
+
+* DONE (stanford.ml)
@@ -0,0 +1,35 @@
+pkgname <- "stanford.ml"
+source(file.path(R.home("share"), "R", "examples-header.R"))
+options(warn = 1)
+options(pager = "console")
+library('stanford.ml')
+
+assign(".oldSearch", search(), pos = 'CheckExEnv')
+cleanEx()
+nameEx("stanford.machine.learning-package")
+### * stanford.machine.learning-package
+
+flush(stderr()); flush(stdout())
+
+### Name: stanford.machine.learning-package
+### Title: Provides code to accompany Stanford CS229 lectures on "Machine
+### Learning".
+### Aliases: stanford.machine.learning-package stanford.machine.learning
+### Keywords: package
+
+### ** Examples
+
+~~ simple examples of the most important functions ~~
+
+
+
+### * <FOOTER>
+###
+cat("Time elapsed: ", proc.time() - get("ptime", pos = 'CheckExEnv'),"\n")
+grDevices::dev.off()
+###
+### Local variables: ***
+### mode: outline-minor ***
+### outline-regexp: "\\(> \\)?### [*]+" ***
+### End: ***
+quit('no')
@@ -0,0 +1,44 @@
+
+R version 2.13.0 (2011-04-13)
+Copyright (C) 2011 The R Foundation for Statistical Computing
+ISBN 3-900051-07-0
+Platform: i386-pc-mingw32/i386 (32-bit)
+
+R is free software and comes with ABSOLUTELY NO WARRANTY.
+You are welcome to redistribute it under certain conditions.
+Type 'license()' or 'licence()' for distribution details.
+
+ Natural language support but running in an English locale
+
+R is a collaborative project with many contributors.
+Type 'contributors()' for more information and
+'citation()' on how to cite R or R packages in publications.
+
+Type 'demo()' for some demos, 'help()' for on-line help, or
+'help.start()' for an HTML browser interface to help.
+Type 'q()' to quit R.
+
+> pkgname <- "stanford.ml"
+> source(file.path(R.home("share"), "R", "examples-header.R"))
+> options(warn = 1)
+> options(pager = "console")
+> library('stanford.ml')
+>
+> assign(".oldSearch", search(), pos = 'CheckExEnv')
+> cleanEx()
+> nameEx("stanford.machine.learning-package")
+> ### * stanford.machine.learning-package
+>
+> flush(stderr()); flush(stdout())
+>
+> ### Name: stanford.machine.learning-package
+> ### Title: Provides code to accompany Stanford CS229 lectures on "Machine
+> ### Learning".
+> ### Aliases: stanford.machine.learning-package stanford.machine.learning
+> ### Keywords: package
+>
+> ### ** Examples
+>
+> ~~ simple examples of the most important functions ~~
+Error: unexpected symbol in "~~ simple examples"
+Execution halted
Binary file not shown.
@@ -0,0 +1,14 @@
+Package: stanford.ml
+Type: Package
+Title: Provides code to accompany Stanford's CS229: Machine Learning
+Version: 0.1
+Date: 2011-10-02
+Author: Shane Conway
+Maintainer: Shane Conway <shane.conway@gmail.com>
+Description: Provides R code to reproduce the material in CS229a,
+ Stanford's introduction to Machine Learning, which was offered
+ openly to the public on ml-class.org in Fall 2011. Most code
+ is provided in the form of demos.
+License: GPL3
+LazyLoad: yes
+Built: R 2.13.0; ; 2011-10-27 00:42:46 UTC; windows
@@ -0,0 +1,3 @@
+stanford.machine.learning-package
+ Provides code to accompany Stanford CS229
+ lectures on "Machine Learning".
@@ -0,0 +1,18 @@
+.First.lib <- function(libname, pkgname)
+{
+ fullName <- paste("package", pkgname, sep=":")
+ myEnv <- as.environment(match(fullName, search()))
+ barepackage <- sub("([^-]+)_.*", "\\1", pkgname)
+ dbbase <- file.path(libname, pkgname, "R", barepackage)
+ rm(.First.lib, envir = myEnv)
+ lazyLoad(dbbase, myEnv)
+ if(exists(".First.lib", envir = myEnv, inherits = FALSE)) {
+ f <- get(".First.lib", envir = myEnv, inherits = FALSE)
+ if(is.function(f))
+ f(libname, pkgname)
+ else
+ stop(gettextf("package '%s' has a non-function '.First.lib'",
+ pkgname),
+ domain = NA)
+ }
+}
@@ -0,0 +1,2 @@
+
+qplot(-10:10, 1/(1 + exp(-(-10:10))), geom="line", xlab="z", ylab="sigmoid function")
@@ -0,0 +1,90 @@
+# details about dataset available http://archive.ics.uci.edu/ml/datasets/Housing
+
+housing <- read.table("http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data")
+names(housing) <- c("CRIM", "ZN", "INDUS", "CHAS", "NOX", "RM", "AGE", "DIS", "RAD", "TAX", "PTRATIO", "B", "LSTAT", "MEDV")
+
+# Subset the data for our model
+housing <- housing[, c("CRIM", "RM", "PTRATIO", "LSTAT", "MEDV")]
+
+plot(housing)
+
+# Look at the linear model
+housing.lm = lm(MEDV ~ CRIM + RM + PTRATIO + LSTAT, data=housing)
+summary(housing.lm)
+
+readline("Press <ENTER> to Continue.")
+
+# Load data and initialize values
+data <- read.csv("http://www.statalgo.com/wp-content/uploads/2011/10/housing.csv")
+
+num.iterations <- 1000
+
+x <- data[, c("area", "bedrooms")]
+y <- matrix(data$price, ncol=1) / 1000 # Divide by a thousand so that numbers are in $1000's
+
+# Function to standardize input values
+zscore <- function(x, mean.val=NA) {
+ if(is.matrix(x)) return(apply(x, 2, zscore, mean.val=mean.val))
+ if(is.data.frame(x)) return(data.frame(apply(x, 2, zscore, mean.val=mean.val)))
+ if(is.na(mean.val)) mean.val <- mean(x)
+ sd.val <- sd(x)
+ if(all(sd.val == 0)) return(x) # if all the values are the same
+ (x - mean.val) / sd.val
+}
+
+# Standardize the features
+x.scaled <- zscore(x)
+
+# Gradient descent function
+grad <- function(x, y, theta) {
+ gradient <- (1 / nrow(y)) * (t(x) %*% ((x %*% t(theta)) - y))
+ return(t(gradient))
+}
+
+gradient.descent <- function(x, y, alpha=0.1, num.iterations=500, threshold=1e-5, output.path=FALSE) {
+ # Add x_0 = 1 as the first column
+ if(is.vector(x) || (!all(x[,1] == 1))) x <- cbind(rep(1, m), x)
+ x <- apply(x, 2, as.numeric)
+
+ m <- if(is.matrix(x)) nrow(x) else length(x)
+ num.features <- ncol(x)
+
+
+ # Initialize the parameters
+ theta <- matrix(rep(0, num.features), nrow=1)
+
+ # Look at the values over each iteration
+ theta.path <- theta
+ for (i in 1:num.iterations) {
+ theta <- theta - alpha * grad(x, y, theta)
+ if(all(is.na(theta))) break
+ theta.path <- rbind(theta.path, theta)
+ if(i > 2) if(all(abs(theta - theta.path[i-1,]) < threshold)) break
+ }
+
+ if(output.path) return(theta.path) else return(theta.path[nrow(theta.path),])
+}
+
+unscaled.theta <- gradient.descent(x=x, y=y, num.iterations=num.iterations, output.path=TRUE)
+scaled.theta <- gradient.descent(x=x.scaled, y=y, num.iterations=num.iterations, output.path=TRUE)
+
+summary(lm(y ~ area + bedrooms, data=x))
+summary(lm(y ~ area + bedrooms, data=x.scaled))
+
+qplot(1:(nrow(scaled.theta)), scaled.theta[,1], geom=c("line"), xlab="iteration", ylab="theta_1")
+qplot(1:(nrow(scaled.theta)), scaled.theta[,2], geom=c("line"), xlab="iteration", ylab="theta_2")
+
+# Look at output for various different alpha values
+vary.alpha <- lapply(c(1e-9, 1e-7, 1e-3, 0.1), function(alpha) gradient.descent(x=x, y=y, alpha=alpha, num.iterations=num.iterations, output.path=TRUE))
+
+par(mfrow = c(2, 2))
+for (j in 1:4) {
+ plot(vary.alpha[[j]][,2], ylab="area (alpha=1e-9)", xlab="iteration", type="l")
+}
+
+readline("Press <ENTER> to Continue.")
+
+data <- read.csv("http://www.statalgo.com/wp-content/uploads/2011/10/housing.csv")
+x <- as.matrix(cbind(intercept=rep(1, m), data[, c("area", "bedrooms")]))
+theta <- solve(t(x) %*% x) %*% t(x) %*% y
+
Oops, something went wrong.

0 comments on commit 635dff5

Please sign in to comment.