Permalink
Switch branches/tags
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
99 lines (76 sloc) 2.45 KB
load_dataset <- function (file_name) {
# opens a csv file
dataset <- read.csv(file_name, header=FALSE)
return (dataset)
}
normalize_dataset <- function (dataset) {
normalize_number <- function (column) {
# applies min-max normalization using the formula
# zi = (xi - min(x)) / ( max(x) - min(x))
max = max(column)
min = min(column)
return ( (column - min) / (max - min) )
}
normalize_yes_no <- function (column) {
#turns yes to 1 and no to 0
n = length(column)
result = vector(length=n)
for(i in 1:n) {
if ( column[i] == "yes") {
result[i] = 1
} else {
result[i] = 0
}
}
return (result)
}
dataset[1] <- normalize_number(dataset[1])
dataset[2] <- normalize_number(dataset[2])
dataset[3] <- normalize_number(dataset[3])
dataset[4] <- normalize_yes_no(dataset[,4])
randomnize_dataset <- function(dataset) {
# reorder dataset randomly
rnumbers <- runif(nrow(dataset))
return(dataset[order(rnumbers), ])
}
return (randomnize_dataset(dataset))
}
run_knn <- function (file_name) {
# random seed
set.seed(9850)
dataset <- load_dataset(file_name)
dataset <- normalize_dataset(dataset)
dataset_train <- dataset[1:85,]
dataset_test <- dataset[86:100,]
dataset_train_target <- dataset[1:85, 4]
dataset_test_target <- dataset[86:100, 4]
# loading package
require(class)
model1 <- knn(train=dataset_train, test=dataset_test, cl=dataset_train_target, k=3)
# compare how well the algorithm classified the test dataset
table(dataset_test_target, model1)
# return original dataset
return (dataset)
}
plot_dataset <- function (file_name) {
#plot dataset computing the weighted sum of all features
# every one of the 3 features is equally important so
# we define a weigth of 33.33% for every feature
compute_sum <- function(dataset) {
n = nrow(dataset)
m = matrix(nrow=n, ncol=2)
for(i in 1:n) {
m[i, 2] = dataset[i, 1] * 0.33 + dataset[i, 2] * 0.33 + dataset[i, 3] * 0.33
if (dataset[i, 4] == "yes") {
m[i, 1] = 1
} else{
m[i, 1] = 0
}
}
return(m)
}
dataset <- read.csv(file_name, header=FALSE)
m <- compute_sum(dataset)
plot(m)
return(m)
}