Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Added learning theory code.

  • Loading branch information...
commit 635dff5f6b14cbcf04aebc8be8efb2a734474813 1 parent 1fe972b
@smc77 authored
Showing with 683 additions and 0 deletions.
  1. +78 −0 r/stanford.machine.learning.Rcheck/00check.log
  2. +12 −0 r/stanford.machine.learning.Rcheck/00install.out
  3. +35 −0 r/stanford.machine.learning.Rcheck/stanford.ml-Ex.R
  4. +44 −0 r/stanford.machine.learning.Rcheck/stanford.ml-Ex.Rout
  5. BIN  r/stanford.machine.learning.Rcheck/stanford.ml-Ex.ps
  6. +14 −0 r/stanford.machine.learning.Rcheck/stanford.ml/DESCRIPTION
  7. +3 −0  r/stanford.machine.learning.Rcheck/stanford.ml/INDEX
  8. BIN  r/stanford.machine.learning.Rcheck/stanford.ml/Meta/Rd.rds
  9. BIN  r/stanford.machine.learning.Rcheck/stanford.ml/Meta/demo.rds
  10. BIN  r/stanford.machine.learning.Rcheck/stanford.ml/Meta/hsearch.rds
  11. BIN  r/stanford.machine.learning.Rcheck/stanford.ml/Meta/links.rds
  12. BIN  r/stanford.machine.learning.Rcheck/stanford.ml/Meta/package.rds
  13. +18 −0 r/stanford.machine.learning.Rcheck/stanford.ml/R/stanford.ml
  14. BIN  r/stanford.machine.learning.Rcheck/stanford.ml/R/stanford.ml.rdb
  15. BIN  r/stanford.machine.learning.Rcheck/stanford.ml/R/stanford.ml.rdx
  16. +2 −0  r/stanford.machine.learning.Rcheck/stanford.ml/demo/classification.R
  17. 0  r/stanford.machine.learning.Rcheck/stanford.ml/demo/linear.algebra.R
  18. +90 −0 r/stanford.machine.learning.Rcheck/stanford.ml/demo/multivariate.regression.R
  19. +95 −0 r/stanford.machine.learning.Rcheck/stanford.ml/demo/univariate.regression.R
  20. +2 −0  r/stanford.machine.learning.Rcheck/stanford.ml/help/AnIndex
  21. BIN  r/stanford.machine.learning.Rcheck/stanford.ml/help/aliases.rds
  22. BIN  r/stanford.machine.learning.Rcheck/stanford.ml/help/paths.rds
  23. BIN  r/stanford.machine.learning.Rcheck/stanford.ml/help/stanford.ml.rdb
  24. BIN  r/stanford.machine.learning.Rcheck/stanford.ml/help/stanford.ml.rdx
  25. +28 −0 r/stanford.machine.learning.Rcheck/stanford.ml/html/00Index.html
  26. +57 −0 r/stanford.machine.learning.Rcheck/stanford.ml/html/R.css
  27. +96 −0 r/stanford.machine.learning/demo/classification.R
  28. +109 −0 r/stanford.machine.learning/demo/regularization.R
  29. BIN  r/stanford.ml_0.1.tar.gz
View
78 r/stanford.machine.learning.Rcheck/00check.log
@@ -0,0 +1,78 @@
+* using log directory 'C:/Programming/MachineLearningLectures/r/stanford.machine.learning.Rcheck'
+* using R version 2.13.0 (2011-04-13)
+* using platform: i386-pc-mingw32 (32-bit)
+* using session charset: ISO8859-1
+* checking for file 'stanford.machine.learning/DESCRIPTION' ... OK
+* checking extension type ... Package
+* this is package 'stanford.ml' version '0.1'
+* checking package dependencies ... OK
+* checking if this is a source package ... OK
+* checking for executable files ... OK
+* checking whether package 'stanford.ml' can be installed ... WARNING
+Found the following significant warnings:
+ Warning: C:/Programming/MachineLearningLectures/r/stanford.machine.learning/man/stanford.machine.learning-package.Rd:32: All text must be in a section
+ Warning: C:/Programming/MachineLearningLectures/r/stanford.machine.learning/man/stanford.machine.learning-package.Rd:33: All text must be in a section
+See 'C:/Programming/MachineLearningLectures/r/stanford.machine.learning.Rcheck/00install.out' for details.
+* checking installed package size ... OK
+* checking package directory ... OK
+* checking for portable file names ... OK
+* checking DESCRIPTION meta-information ... WARNING
+Non-standard license specification:
+ GPL3
+Standardizable: FALSE
+* checking top-level files ... OK
+* checking index information ... WARNING
+Demos with missing or empty index information:
+[1] "classification"
+See the information on INDEX files and package subdirectories in the
+chapter 'Creating R packages' of the 'Writing R Extensions' manual.
+* checking package subdirectories ... OK
+* checking R files for non-ASCII characters ... OK
+* checking R files for syntax errors ... OK
+* checking whether the package can be loaded ... OK
+* checking whether the package can be loaded with stated dependencies ... OK
+* checking whether the package can be unloaded cleanly ... OK
+* checking for unstated dependencies in R code ... OK
+* checking S3 generic/method consistency ... OK
+* checking replacement functions ... OK
+* checking foreign function calls ... OK
+* checking R code for possible problems ... NOTE
+gradient.path: no visible binding for global variable 'alpha'
+gradient.path: no visible binding for global variable 'm'
+gradient.path: no visible binding for global variable 'y'
+* checking Rd files ... WARNING
+prepare_Rd: stanford.machine.learning-package.Rd:32: All text must be in a section
+prepare_Rd: stanford.machine.learning-package.Rd:33: All text must be in a section
+* checking Rd metadata ... OK
+* checking Rd cross-references ... WARNING
+Unknown package(s) '<pkg>' in Rd xrefs
+* checking for missing documentation entries ... WARNING
+Undocumented code objects:
+ grad gradient.path loss zscore
+All user-level objects in a package should have documentation entries.
+See the chapter 'Writing R documentation files' in manual 'Writing R
+Extensions'.
+* checking for code/documentation mismatches ... OK
+* checking Rd \usage sections ... OK
+* checking Rd contents ... OK
+* checking for unstated dependencies in examples ... NOTE
+Warning: parse error in file 'stanford.ml-Ex.R':
+11: unexpected symbol
+21:
+22: ~~ simple examples
+ ^
+* checking examples ... ERROR
+Running examples in 'stanford.ml-Ex.R' failed
+The error most likely occurred in:
+
+> ### Name: stanford.machine.learning-package
+> ### Title: Provides code to accompany Stanford CS229 lectures on "Machine
+> ### Learning".
+> ### Aliases: stanford.machine.learning-package stanford.machine.learning
+> ### Keywords: package
+>
+> ### ** Examples
+>
+> ~~ simple examples of the most important functions ~~
+Error: unexpected symbol in "~~ simple examples"
+Execution halted
View
12 r/stanford.machine.learning.Rcheck/00install.out
@@ -0,0 +1,12 @@
+* installing *source* package 'stanford.ml' ...
+** R
+** demo
+** preparing package for lazy loading
+** help
+Warning: C:/Programming/MachineLearningLectures/r/stanford.machine.learning/man/stanford.machine.learning-package.Rd:32: All text must be in a section
+Warning: C:/Programming/MachineLearningLectures/r/stanford.machine.learning/man/stanford.machine.learning-package.Rd:33: All text must be in a section
+*** installing help indices
+** building package indices ...
+** testing if installed package can be loaded
+
+* DONE (stanford.ml)
View
35 r/stanford.machine.learning.Rcheck/stanford.ml-Ex.R
@@ -0,0 +1,35 @@
+pkgname <- "stanford.ml"
+source(file.path(R.home("share"), "R", "examples-header.R"))
+options(warn = 1)
+options(pager = "console")
+library('stanford.ml')
+
+assign(".oldSearch", search(), pos = 'CheckExEnv')
+cleanEx()
+nameEx("stanford.machine.learning-package")
+### * stanford.machine.learning-package
+
+flush(stderr()); flush(stdout())
+
+### Name: stanford.machine.learning-package
+### Title: Provides code to accompany Stanford CS229 lectures on "Machine
+### Learning".
+### Aliases: stanford.machine.learning-package stanford.machine.learning
+### Keywords: package
+
+### ** Examples
+
+~~ simple examples of the most important functions ~~
+
+
+
+### * <FOOTER>
+###
+cat("Time elapsed: ", proc.time() - get("ptime", pos = 'CheckExEnv'),"\n")
+grDevices::dev.off()
+###
+### Local variables: ***
+### mode: outline-minor ***
+### outline-regexp: "\\(> \\)?### [*]+" ***
+### End: ***
+quit('no')
View
44 r/stanford.machine.learning.Rcheck/stanford.ml-Ex.Rout
@@ -0,0 +1,44 @@
+
+R version 2.13.0 (2011-04-13)
+Copyright (C) 2011 The R Foundation for Statistical Computing
+ISBN 3-900051-07-0
+Platform: i386-pc-mingw32/i386 (32-bit)
+
+R is free software and comes with ABSOLUTELY NO WARRANTY.
+You are welcome to redistribute it under certain conditions.
+Type 'license()' or 'licence()' for distribution details.
+
+ Natural language support but running in an English locale
+
+R is a collaborative project with many contributors.
+Type 'contributors()' for more information and
+'citation()' on how to cite R or R packages in publications.
+
+Type 'demo()' for some demos, 'help()' for on-line help, or
+'help.start()' for an HTML browser interface to help.
+Type 'q()' to quit R.
+
+> pkgname <- "stanford.ml"
+> source(file.path(R.home("share"), "R", "examples-header.R"))
+> options(warn = 1)
+> options(pager = "console")
+> library('stanford.ml')
+>
+> assign(".oldSearch", search(), pos = 'CheckExEnv')
+> cleanEx()
+> nameEx("stanford.machine.learning-package")
+> ### * stanford.machine.learning-package
+>
+> flush(stderr()); flush(stdout())
+>
+> ### Name: stanford.machine.learning-package
+> ### Title: Provides code to accompany Stanford CS229 lectures on "Machine
+> ### Learning".
+> ### Aliases: stanford.machine.learning-package stanford.machine.learning
+> ### Keywords: package
+>
+> ### ** Examples
+>
+> ~~ simple examples of the most important functions ~~
+Error: unexpected symbol in "~~ simple examples"
+Execution halted
View
BIN  r/stanford.machine.learning.Rcheck/stanford.ml-Ex.ps
Binary file not shown
View
14 r/stanford.machine.learning.Rcheck/stanford.ml/DESCRIPTION
@@ -0,0 +1,14 @@
+Package: stanford.ml
+Type: Package
+Title: Provides code to accompany Stanford's CS229: Machine Learning
+Version: 0.1
+Date: 2011-10-02
+Author: Shane Conway
+Maintainer: Shane Conway <shane.conway@gmail.com>
+Description: Provides R code to reproduce the material in CS229a,
+ Stanford's introduction to Machine Learning, which was offered
+ openly to the public on ml-class.org in Fall 2011. Most code
+ is provided in the form of demos.
+License: GPL3
+LazyLoad: yes
+Built: R 2.13.0; ; 2011-10-27 00:42:46 UTC; windows
View
3  r/stanford.machine.learning.Rcheck/stanford.ml/INDEX
@@ -0,0 +1,3 @@
+stanford.machine.learning-package
+ Provides code to accompany Stanford CS229
+ lectures on "Machine Learning".
View
BIN  r/stanford.machine.learning.Rcheck/stanford.ml/Meta/Rd.rds
Binary file not shown
View
BIN  r/stanford.machine.learning.Rcheck/stanford.ml/Meta/demo.rds
Binary file not shown
View
BIN  r/stanford.machine.learning.Rcheck/stanford.ml/Meta/hsearch.rds
Binary file not shown
View
BIN  r/stanford.machine.learning.Rcheck/stanford.ml/Meta/links.rds
Binary file not shown
View
BIN  r/stanford.machine.learning.Rcheck/stanford.ml/Meta/package.rds
Binary file not shown
View
18 r/stanford.machine.learning.Rcheck/stanford.ml/R/stanford.ml
@@ -0,0 +1,18 @@
+.First.lib <- function(libname, pkgname)
+{
+ fullName <- paste("package", pkgname, sep=":")
+ myEnv <- as.environment(match(fullName, search()))
+ barepackage <- sub("([^-]+)_.*", "\\1", pkgname)
+ dbbase <- file.path(libname, pkgname, "R", barepackage)
+ rm(.First.lib, envir = myEnv)
+ lazyLoad(dbbase, myEnv)
+ if(exists(".First.lib", envir = myEnv, inherits = FALSE)) {
+ f <- get(".First.lib", envir = myEnv, inherits = FALSE)
+ if(is.function(f))
+ f(libname, pkgname)
+ else
+ stop(gettextf("package '%s' has a non-function '.First.lib'",
+ pkgname),
+ domain = NA)
+ }
+}
View
BIN  r/stanford.machine.learning.Rcheck/stanford.ml/R/stanford.ml.rdb
Binary file not shown
View
BIN  r/stanford.machine.learning.Rcheck/stanford.ml/R/stanford.ml.rdx
Binary file not shown
View
2  r/stanford.machine.learning.Rcheck/stanford.ml/demo/classification.R
@@ -0,0 +1,2 @@
+
+qplot(-10:10, 1/(1 + exp(-(-10:10))), geom="line", xlab="z", ylab="sigmoid function")
View
0  r/stanford.machine.learning.Rcheck/stanford.ml/demo/linear.algebra.R
No changes.
View
90 r/stanford.machine.learning.Rcheck/stanford.ml/demo/multivariate.regression.R
@@ -0,0 +1,90 @@
+# details about dataset available http://archive.ics.uci.edu/ml/datasets/Housing
+
+housing <- read.table("http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data")
+names(housing) <- c("CRIM", "ZN", "INDUS", "CHAS", "NOX", "RM", "AGE", "DIS", "RAD", "TAX", "PTRATIO", "B", "LSTAT", "MEDV")
+
+# Subset the data for our model
+housing <- housing[, c("CRIM", "RM", "PTRATIO", "LSTAT", "MEDV")]
+
+plot(housing)
+
+# Look at the linear model
+housing.lm = lm(MEDV ~ CRIM + RM + PTRATIO + LSTAT, data=housing)
+summary(housing.lm)
+
+readline("Press <ENTER> to Continue.")
+
+# Load data and initialize values
+data <- read.csv("http://www.statalgo.com/wp-content/uploads/2011/10/housing.csv")
+
+num.iterations <- 1000
+
+x <- data[, c("area", "bedrooms")]
+y <- matrix(data$price, ncol=1) / 1000 # Divide by a thousand so that numbers are in $1000's
+
+# Function to standardize input values
+zscore <- function(x, mean.val=NA) {
+ if(is.matrix(x)) return(apply(x, 2, zscore, mean.val=mean.val))
+ if(is.data.frame(x)) return(data.frame(apply(x, 2, zscore, mean.val=mean.val)))
+ if(is.na(mean.val)) mean.val <- mean(x)
+ sd.val <- sd(x)
+ if(all(sd.val == 0)) return(x) # if all the values are the same
+ (x - mean.val) / sd.val
+}
+
+# Standardize the features
+x.scaled <- zscore(x)
+
+# Gradient descent function
+grad <- function(x, y, theta) {
+ gradient <- (1 / nrow(y)) * (t(x) %*% ((x %*% t(theta)) - y))
+ return(t(gradient))
+}
+
+gradient.descent <- function(x, y, alpha=0.1, num.iterations=500, threshold=1e-5, output.path=FALSE) {
+ # Add x_0 = 1 as the first column
+ if(is.vector(x) || (!all(x[,1] == 1))) x <- cbind(rep(1, m), x)
+ x <- apply(x, 2, as.numeric)
+
+ m <- if(is.matrix(x)) nrow(x) else length(x)
+ num.features <- ncol(x)
+
+
+ # Initialize the parameters
+ theta <- matrix(rep(0, num.features), nrow=1)
+
+ # Look at the values over each iteration
+ theta.path <- theta
+ for (i in 1:num.iterations) {
+ theta <- theta - alpha * grad(x, y, theta)
+ if(all(is.na(theta))) break
+ theta.path <- rbind(theta.path, theta)
+ if(i > 2) if(all(abs(theta - theta.path[i-1,]) < threshold)) break
+ }
+
+ if(output.path) return(theta.path) else return(theta.path[nrow(theta.path),])
+}
+
+unscaled.theta <- gradient.descent(x=x, y=y, num.iterations=num.iterations, output.path=TRUE)
+scaled.theta <- gradient.descent(x=x.scaled, y=y, num.iterations=num.iterations, output.path=TRUE)
+
+summary(lm(y ~ area + bedrooms, data=x))
+summary(lm(y ~ area + bedrooms, data=x.scaled))
+
+qplot(1:(nrow(scaled.theta)), scaled.theta[,1], geom=c("line"), xlab="iteration", ylab="theta_1")
+qplot(1:(nrow(scaled.theta)), scaled.theta[,2], geom=c("line"), xlab="iteration", ylab="theta_2")
+
+# Look at output for various different alpha values
+vary.alpha <- lapply(c(1e-9, 1e-7, 1e-3, 0.1), function(alpha) gradient.descent(x=x, y=y, alpha=alpha, num.iterations=num.iterations, output.path=TRUE))
+
+par(mfrow = c(2, 2))
+for (j in 1:4) {
+ plot(vary.alpha[[j]][,2], ylab="area (alpha=1e-9)", xlab="iteration", type="l")
+}
+
+readline("Press <ENTER> to Continue.")
+
+data <- read.csv("http://www.statalgo.com/wp-content/uploads/2011/10/housing.csv")
+x <- as.matrix(cbind(intercept=rep(1, m), data[, c("area", "bedrooms")]))
+theta <- solve(t(x) %*% x) %*% t(x) %*% y
+
View
95 r/stanford.machine.learning.Rcheck/stanford.ml/demo/univariate.regression.R
@@ -0,0 +1,95 @@
+# First look at a linear model fit to the housing data
+
+# details about dataset available http://archive.ics.uci.edu/ml/datasets/Housing
+housing <- read.table("http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data")[, c(6, 14)]
+names(housing) <- c("num.rooms", "median.values")
+
+housing.lm <- lm(median.values ~ num.rooms, data=housing)
+plot(housing)
+abline(housing.lm)
+summary(housing.lm)
+
+readline("Press <ENTER> to Continue.")
+
+# Example of randomly chosen lines
+plot(housing)
+abline(0, 5, col="red")
+abline(-50, 10, col="blue")
+
+x <- housing[, "num.rooms"]
+y <- housing[, "median.values"]
+
+# Create the loss function
+loss <- function(intercept, slope, x, y) sum(((intercept + (slope * x)) - y)^2)/2
+
+# Create some data for a given line and compute the loss
+loss(0, 5, x, y)
+loss(-30, 10, x, y)
+
+readline("Press <ENTER> to Continue.")
+
+# Test a few different slopes with different intercepts
+x <- -50:50
+y <- -10:10
+z <- sapply(x, function(intercept) (sapply(y, function(slope, intercept) loss(intercept, slope, x, y), intercept=intercept)))
+rownames(z) <- y
+colnames(z) <- x
+
+# 3D plot of loss function
+library(lattice)
+
+wireframe(z, shade=TRUE, xlab="theta0", ylab="theta1", zlab="loss function", aspect = c(61/87, 0.4), light.source = c(10,0,10))
+
+readline("Press <ENTER> to Continue.")
+
+# Contour plot
+library(reshape)
+library(ggplot2)
+
+loss.values <- as.data.frame(melt(z))
+names(loss.values) <- c("slope", "intercept", "loss")
+
+v <- ggplot(loss.values, aes(intercept, slope, z = loss))
+v + geom_tile(aes(fill = loss)) + stat_contour()
+
+readline("Press <ENTER> to Continue.")
+
+# Load data and initialize values
+data <- read.csv("http://www.statalgo.com/wp-content/uploads/2011/10/housing.csv")
+
+alpha <- 0.01
+m <- nrow(data)
+x <- matrix(c(rep(1,m), data$area), ncol=2)
+y <- matrix(data$price, ncol=1) / 1000
+
+# Z-Score the feature
+x.scaled <- x
+x.scaled[,2] <- (x[,2] - mean(x[,2]))/sd(x[,2])
+
+# Gradient descent function
+grad <- function(x, y, theta) {
+ gradient <- (t(x) %*% ((x %*% t(theta)) - y))
+ return(t(gradient))
+}
+
+gradient.path <- function(x) {
+ # Initialize the parameters
+ theta <- matrix(c(0, 0), nrow=1)
+
+ # Look at the values over each iteration
+ theta.path <- matrix(ncol=2)
+ for (i in 1:500) {
+ theta <- theta - alpha * 1/m * grad(x, y, theta)
+ if(all(is.na(theta))) break
+ theta.path <- rbind(theta.path, theta)
+ }
+ theta.path
+}
+
+unscaled.theta <- gradient.path(x)
+scaled.theta <- gradient.path(x.scaled)
+
+summary(lm(y ~ x[, 2]))
+summary(lm(y ~ x.scaled[, 2]))
+
+qplot(1:501, scaled.theta[,2], geom=c("line"), xlab="iteration", ylab="theta_1")
View
2  r/stanford.machine.learning.Rcheck/stanford.ml/help/AnIndex
@@ -0,0 +1,2 @@
+stanford.machine.learning-package stanford.machine.learning-package
+stanford.machine.learning stanford.machine.learning-package
View
BIN  r/stanford.machine.learning.Rcheck/stanford.ml/help/aliases.rds
Binary file not shown
View
BIN  r/stanford.machine.learning.Rcheck/stanford.ml/help/paths.rds
Binary file not shown
View
BIN  r/stanford.machine.learning.Rcheck/stanford.ml/help/stanford.ml.rdb
Binary file not shown
View
BIN  r/stanford.machine.learning.Rcheck/stanford.ml/help/stanford.ml.rdx
Binary file not shown
View
28 r/stanford.machine.learning.Rcheck/stanford.ml/html/00Index.html
@@ -0,0 +1,28 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html><head><title>R: Provides code to accompany Stanford's CS229: Machine Learning</title>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<link rel="stylesheet" type="text/css" href="R.css">
+</head><body>
+<h1> Provides code to accompany Stanford's CS229: Machine Learning
+<img class="toplogo" src="../../../doc/html/logo.jpg" alt="[R logo]"></h1>
+</h1>
+<hr>
+<div align="center">
+<a href="../../../doc/html/packages.html"><img src="../../../doc/html/left.jpg" alt="[Up]" width="30" height="30" border="0"></a>
+<a href="../../../doc/html/index.html"><img src="../../../doc/html/up.jpg" alt="[Top]" width="30" height="30" border="0"></a>
+</div><h2>Documentation for package &lsquo;stanford.ml&rsquo; version 0.1</h2>
+
+<ul><li><a href="../DESCRIPTION">DESCRIPTION file</a>.</li>
+<li><a href="../demo">Code demos</a>. Use <a href="../../utils/help/demo">demo()</a> to run them.</li>
+</ul>
+
+<h2>Help Pages</h2>
+
+
+<table width="100%">
+<tr><td width="25%"><a href="stanford.machine.learning-package.html">stanford.machine.learning-package</a></td>
+<td>Provides code to accompany Stanford CS229 lectures on "Machine Learning".</td></tr>
+<tr><td width="25%"><a href="stanford.machine.learning-package.html">stanford.machine.learning</a></td>
+<td>Provides code to accompany Stanford CS229 lectures on "Machine Learning".</td></tr>
+</table>
+</body></html>
View
57 r/stanford.machine.learning.Rcheck/stanford.ml/html/R.css
@@ -0,0 +1,57 @@
+BODY{ background: white;
+ color: black }
+
+A:link{ background: white;
+ color: blue }
+A:visited{ background: white;
+ color: rgb(50%, 0%, 50%) }
+
+H1{ background: white;
+ color: rgb(55%, 55%, 55%);
+ font-family: monospace;
+ font-size: x-large;
+ text-align: center }
+
+H2{ background: white;
+ color: rgb(40%, 40%, 40%);
+ font-family: monospace;
+ font-size: large;
+ text-align: center }
+
+H3{ background: white;
+ color: rgb(40%, 40%, 40%);
+ font-family: monospace;
+ font-size: large }
+
+H4{ background: white;
+ color: rgb(40%, 40%, 40%);
+ font-family: monospace;
+ font-style: italic;
+ font-size: large }
+
+H5{ background: white;
+ color: rgb(40%, 40%, 40%);
+ font-family: monospace }
+
+H6{ background: white;
+ color: rgb(40%, 40%, 40%);
+ font-family: monospace;
+ font-style: italic }
+
+IMG.toplogo{ vertical-align: middle }
+
+IMG.arrow{ width: 30px;
+ height: 30px;
+ border: 0 }
+
+span.acronym{font-size: small}
+span.env{font-family: monospace}
+span.file{font-family: monospace}
+span.option{font-family: monospace}
+span.pkg{font-weight: bold}
+span.samp{font-family: monospace}
+
+div.vignettes a:hover {
+ background: rgb(85%, 85%, 85%);
+}
+
View
96 r/stanford.machine.learning/demo/classification.R
@@ -0,0 +1,96 @@
+
+# Plot the sigmoid function
+library(ggplot2)
+qplot(-10:10, 1/(1 + exp(-(-10:10))), geom="line", xlab="z", ylab="sigmoid function")
+
+# Download South African heart disease data
+sa.heart <- read.table("http://www-stat.stanford.edu/~tibs/ElemStatLearn/datasets/SAheart.data", sep=",",head=T,row.names=1)
+
+# Pretty plot
+pairs(sa.heart[1:9],pch=21,bg=c("red","green")[factor(sa.heart$chd)])
+
+num.iterations <- 1000
+
+# Download South African heart disease data
+sa.heart <- read.table("http://www-stat.stanford.edu/~tibs/ElemStatLearn/datasets/SAheart.data", sep=",",head=T,row.names=1)
+
+x <- sa.heart[,c("age", "ldl")]
+y <- sa.heart$chd
+plot(x, pch=21, bg=c("red","green")[factor(y)])
+
+# Function to standardize input values
+zscore <- function(x, mean.val=NA) {
+ if(is.matrix(x)) return(apply(x, 2, zscore, mean.val=mean.val))
+ if(is.data.frame(x)) return(data.frame(apply(x, 2, zscore, mean.val=mean.val)))
+ if(is.na(mean.val)) mean.val <- mean(x)
+ sd.val <- sd(x)
+ if(all(sd.val == 0)) return(x) # if all the values are the same
+ (x - mean.val) / sd.val
+}
+
+# Standardize the features
+x.scaled <- zscore(x)
+
+# Gradient descent function
+grad <- function(x, y, theta) {
+ gradient <- (1 / nrow(y)) * (t(x) %*% (1/(1 + exp(-x %*% t(theta))) - y))
+ return(t(gradient))
+}
+
+gradient.descent <- function(x, y, alpha=0.1, num.iterations=500, threshold=1e-5, output.path=FALSE) {
+
+ # Add x_0 = 1 as the first column
+ m <- if(is.vector(x)) length(x) else nrow(x)
+ if(is.vector(x) || (!all(x[,1] == 1))) x <- cbind(rep(1, m), x)
+ if(is.vector(y)) y <- matrix(y)
+ x <- apply(x, 2, as.numeric)
+
+ num.features <- ncol(x)
+
+ # Initialize the parameters
+ theta <- matrix(rep(0, num.features), nrow=1)
+
+ # Look at the values over each iteration
+ theta.path <- theta
+ for (i in 1:num.iterations) {
+ theta <- theta - alpha * grad(x, y, theta)
+ if(all(is.na(theta))) break
+ theta.path <- rbind(theta.path, theta)
+ if(i > 2) if(all(abs(theta - theta.path[i-1,]) < threshold)) break
+ }
+
+ if(output.path) return(theta.path) else return(theta.path[nrow(theta.path),])
+}
+
+unscaled.theta <- gradient.descent(x=x, y=y, num.iterations=num.iterations, output.path=TRUE)
+scaled.theta <- gradient.descent(x=x.scaled, y=y, num.iterations=num.iterations, output.path=TRUE)
+
+summary(glm(chd ~ age + ldl, family = binomial, data=sa.heart))
+
+qplot(1:(nrow(scaled.theta)), scaled.theta[,1], geom=c("line"), xlab="iteration", ylab="theta_1")
+qplot(1:(nrow(scaled.theta)), scaled.theta[,2], geom=c("line"), xlab="iteration", ylab="theta_2")
+
+# Look at output for various different alpha values
+vary.alpha <- lapply(c(1e-12, 1e-9, 1e-7, 1e-3, 0.1, 0.9), function(alpha) gradient.descent(x=x.scaled, y=y, alpha=alpha, num.iterations=num.iterations, output.path=TRUE))
+
+par(mfrow = c(2, 3))
+for (j in 1:6) {
+ plot(vary.alpha[[j]][,2], ylab="area (alpha=1e-9)", xlab="iteration", type="l")
+}
+
+# Use stepwise logistic regression to reduce the dimensions
+library(MASS)
+sa.heart.step <- stepAIC(sa.heart)
+summary(sa.heart.step)
+
+# Apply logistic regression to South African heart data from ESL
+sa.heart.fit <- glm(chd ~ sbp + tobacco + ldl + famhist + obesity + alcohol + age , family = binomial, data=sa.heart)
+summary(sa.heart.fit)
+
+#
+iris.glm <- glm(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width, family=binomial, data=iris)
+summary(iris.glm)
+
+pairs(iris[1:4], main = "Anderson's Iris Data -- 3 species", pch = 21, bg = c("red", "green3", "blue")[unclass(iris$Species)])
+
+irisLda <- lda(Species ~ ., data = iris)
View
109 r/stanford.machine.learning/demo/regularization.R
@@ -0,0 +1,109 @@
+#
+
+n <- 10
+
+f <- function(x) sin(2 * pi * x)
+
+x <- seq(0, 1, length=n)
+y <- f(x) + rnorm(n, sd=0.2)
+
+plot(data.frame(x, y))
+curve(f, type="l", col="green", add=TRUE)
+
+# How to fit explicit polynomial terms4; easier with poly() function
+fit <- lm(y ~ x + I(x^2) + I(x^3))
+fit <- lm(d ~ poly(x, 3, raw=TRUE))
+summary(fit)
+
+# We can predict these values using the coefficients directly
+x.data <- data.frame(rep(1, n), x, x^2, x^3)
+y.pred <- apply(fit[["coefficients"]] * t(x.data), 2, sum)
+
+# Or we can just use the predict function to do the same thing
+y.pred <- predict(fit)
+
+points(data.frame(x, y.pred), col="red")
+
+error.function <- function(y, y.pred) sum((y.pred - y)^2) / 2
+
+e.rms <- function(y, y.pred) sqrt(2 * error.function(y=y, y.pred=y.pred) / length(y))
+
+e.rms(y, y.pred)
+
+
+par(mfrow=c(2, 2))
+
+for (i in c(1, 3, 6, 9)) {
+ plot(data.frame(x, d), xlab=paste("polynomial fit order", i), ylab="f(x)")
+ curve(f, type="l", col="green", add=TRUE)
+ fit <- lm(d ~ poly(x, i, raw=TRUE))
+ p <- polynom(coef(fit))
+ curve(p, col="red", add=TRUE)
+}
+
+fit.values <- matrix(ncol=2)
+for (i in 1:9) {
+ fit.sum <- summary(lm(d ~ poly(x, i, raw=TRUE)))
+ fit.values <- rbind(fit.values, c(i, fit.sum["r.squared"][[1]]))
+}
+
+colnames(fit.values) <- c("Polynomial Order", "R^2")
+plot(fit.values, type="l")
+
+# Adding more data:
+par(mfrow=c(2, 2))
+
+for (i in c(10, 50, 100, 1000)) {
+ x <- seq(0, 1, length=i)
+ d <- f(x) + rnorm(i, sd=0.15)
+
+ plot(data.frame(x, d), xlab=paste("Data size:", i), ylab="f(x)")
+ curve(f, type="l", col="blue", add=TRUE)
+ fit <- lm(d ~ poly(x, 9, raw=TRUE))
+ summary(fit)["r.squared"][[1]]
+ p <- polynom(coef(fit))
+ curve(p, col="red", add=TRUE)
+}
+
+
+test <- f(x) + rnorm(length(x), sd=0.1)
+training <- f(x) + rnorm(length(x), sd=0.1)
+
+
+#
+# Let's look at how the different models generalize between different datasets
+#
+
+n.training <- 10
+n.test <- 100
+
+build.data <- function(n) {
+ f <- function(x) sin(2 * pi * x)
+ x <- seq(0, 1, length=n)
+ y <- f(x) + rnorm(n, sd=0.2)
+ return(data.frame(y=y, x=x))
+}
+
+training <- build.data(n=n.training)
+test <- build.data(n=n.test)
+
+test.poly.error <- function(training, test, polynomials=1:9) {
+
+ errors.training <- errors.test <- numeric()
+ for(i in polynomials) {
+ fit <- lm(y ~ poly(x, i, raw=TRUE), data=training)
+ y.pred.training <- predict(fit)
+ errors.training[i] <- e.rms(training$y, y.pred.training)
+ y.pred.test <- predict(fit, newdata=test)
+ errors.test[i] <- e.rms(test$y, y.pred.test)
+ }
+ errors <- data.frame(polynomial=polynomials, training.error=errors.training, test.error=errors.test)
+ return(errors)
+}
+
+errors <- test.poly.error(training, test)
+errors <- melt(errors, x)
+colnames(errors) <- c("polynomial", "dataset", "error")
+
+p <- ggplot(errors, aes(x=polynomial, y=error, grouping=dataset, colour=dataset)) + geom_line()
+p
View
BIN  r/stanford.ml_0.1.tar.gz
Binary file not shown
Please sign in to comment.
Something went wrong with that request. Please try again.