Permalink
Browse files

Version 1.0 Major Commit

I should have commited sooner.  This is the first "working" cut.  Note the entry point class:  LaunchClassifier, and the arguments it takes.  Also note due to the complexity (time) of K-Nearest Neighbor, the test data has been trimmed down a bit more, and the old data that had 200 lines moved to their own respective sub directories in test_data and training_data.  Skeleton code still exists but will be removed.  Trigger LaunchClassifier with no arguments to get usage printout.
  • Loading branch information...
1 parent 3bcbf9c commit a943ee77a95e77f09f40318f3ba09f21bf15aee3 @jweaver committed Apr 15, 2012
Showing with 8,008 additions and 4,096 deletions.
  1. +6 −0 runKNN8Test.sh
  2. +129 −0 src/com/weaverworx/usc/csci561/assignment3/LaunchClassifier.java
  3. +0 −58 src/com/weaverworx/usc/csci561/assignment3/Launcher.java
  4. +6 −6 src/com/weaverworx/usc/csci561/assignment3/skeleton/KNearestNeighbor_Skeleton.java
  5. +269 −155 src/com/weaverworx/usc/csci561/assignment3/skeleton/NaiveBayes_Skeleton.java
  6. +32 −0 src/com/weaverworx/usc/csci561/assignment3/util/ClassifierTypes.java
  7. +0 −97 src/com/weaverworx/usc/csci561/assignment3/util/KNNUtil.java
  8. +276 −0 src/com/weaverworx/usc/csci561/assignment3/util/LearningUtil.java
  9. +199 −0 test_data/200_lines/test0.txt
  10. +199 −0 test_data/200_lines/test1.txt
  11. +199 −0 test_data/200_lines/test2.txt
  12. +199 −0 test_data/200_lines/test3.txt
  13. +199 −0 test_data/200_lines/test4.txt
  14. +199 −0 test_data/200_lines/test5.txt
  15. +199 −0 test_data/200_lines/test6.txt
  16. +199 −0 test_data/200_lines/test7.txt
  17. +199 −0 test_data/200_lines/test8.txt
  18. +199 −0 test_data/200_lines/test9.txt
  19. +39 −189 test_data/test0.txt
  20. +47 −189 test_data/test1.txt
  21. +42 −189 test_data/test2.txt
  22. +41 −189 test_data/test3.txt
  23. +40 −189 test_data/test4.txt
  24. +35 −189 test_data/test5.txt
  25. +38 −189 test_data/test6.txt
  26. +42 −189 test_data/test7.txt
  27. +39 −189 test_data/test8.txt
  28. +41 −189 test_data/test9.txt
  29. +199 −0 training_data/200_lines/train0.txt
  30. +199 −0 training_data/200_lines/train1.txt
  31. +199 −0 training_data/200_lines/train2.txt
  32. +199 −0 training_data/200_lines/train3.txt
  33. +199 −0 training_data/200_lines/train4.txt
  34. +199 −0 training_data/200_lines/train5.txt
  35. +199 −0 training_data/200_lines/train6.txt
  36. +199 −0 training_data/200_lines/train7.txt
  37. +199 −0 training_data/200_lines/train8.txt
  38. +199 −0 training_data/200_lines/train9.txt
  39. +287 −189 training_data/train0.txt
  40. +328 −189 training_data/train1.txt
  41. +288 −189 training_data/train2.txt
  42. +297 −189 training_data/train3.txt
  43. +283 −189 training_data/train4.txt
  44. +262 −189 training_data/train5.txt
  45. +286 −189 training_data/train6.txt
  46. +304 −189 training_data/train7.txt
  47. +283 −189 training_data/train8.txt
  48. +288 −189 training_data/train9.txt
View
@@ -0,0 +1,6 @@
+#!/bin/bash
+#
+# Run script to test K-Nearest Neighbor (KNN) with K = 8.
+
+
+java -c bin com.weaverworx.usc.csci561.assignment3.LaunchClassifier knn -k=8
@@ -0,0 +1,129 @@
+/**
+ * File: Launcher.java
+ * Author: Jack Weaver <jhweaver@usc.edu>, <weaver.jack@gmail.com>
+ * Course: CSCI 561, Spring 2012
+ * Assignment: Assignment 3 - Supervised Learning Systems
+ * Target: aludra.usc.edu running Java 1.6.0_23
+ */
+package com.weaverworx.usc.csci561.assignment3;
+
+import com.weaverworx.usc.csci561.assignment3.knn.KNNRecord;
+import com.weaverworx.usc.csci561.assignment3.util.ClassifierTypes;
+import com.weaverworx.usc.csci561.assignment3.util.FileReader;
+import com.weaverworx.usc.csci561.assignment3.util.LearningUtil;
+
+/**
+ * Main class to launch the application.
+ *
+ * @author jw
+ *
+ */
+public class LaunchClassifier {
+ private final static int EX_CLASS_INDEX = 784;
+
+ /**
+ * Entry point.
+ *
+ * @param args
+ */
+ public static void main(String[] args) {
+ ClassifierTypes classifier = LearningUtil.parseClassifierSystem(args);
+
+ if (classifier.name().compareTo(ClassifierTypes.K_NEAREST_NEIGHBOR.name()) == 0) {
+ int k = LearningUtil.parseKArgument(args); // Read k from user input
+ // Get the training data and test data
+ int[][] trainData = FileReader.getTrainingData(LearningUtil.NUMBER_OF_FEATURES,
+ LearningUtil.NUMBER_OF_CLASSES);
+ int[][] testData = FileReader.getTestData(LearningUtil.NUMBER_OF_FEATURES,
+ LearningUtil.NUMBER_OF_CLASSES);
+
+ // Set up the K-Nearest Neighbor Records
+ KNNRecord[] knnRecords = new KNNRecord[trainData.length];
+ for (int i = 0; i < trainData.length; i++) {
+ knnRecords[i] = new KNNRecord();
+ }
+
+ // Set up the arrays for # correct and # incorrect
+ int[] correct = new int[LearningUtil.NUMBER_OF_CLASSES], incorrect =
+ new int[LearningUtil.NUMBER_OF_CLASSES];
+ for (int i = 0; i < testData.length; i++) {
+ for (int j = 0; j < trainData.length; j++) {
+ // Set the distance & the record class
+ knnRecords[j].setExampleClass(trainData[j][EX_CLASS_INDEX]);
+ knnRecords[j].setDistance(LearningUtil
+ .getEuclideanDistance(testData[i], trainData[j]));
+ }
+ int actualClass = testData[i][EX_CLASS_INDEX];
+ int predictedClass = LearningUtil.predictKNN(k, knnRecords);
+ // Set the counters for accuracy, every time we correctly
+ // predict
+ // the numerical class, tally it. Otherwise tally the miss.
+ if (actualClass == predictedClass) {
+ correct[actualClass]++;
+ } else {
+ incorrect[actualClass]++;
+ }
+ }
+ // Display the results to Stdout
+ LearningUtil.outputKNNResultsToStdOut(k, correct, incorrect);
+
+
+ } else if (classifier.name().compareTo(ClassifierTypes.NAIVE_BAYES.name()) == 0) {
+ int threshold = LearningUtil.parseTArgument(args);
+ double[] N_Y = new double[LearningUtil.NUMBER_OF_CLASSES];
+ double[] P_Y = new double[LearningUtil.NUMBER_OF_CLASSES];
+ int[][] trainingData = FileReader.getTrainingData(LearningUtil.NUMBER_OF_FEATURES,
+ LearningUtil.NUMBER_OF_CLASSES);
+ int[][] testData = FileReader.getTestData(LearningUtil.NUMBER_OF_FEATURES,
+ LearningUtil.NUMBER_OF_CLASSES);
+
+ //Setup N_Y: the number of times a given training data entry
+ //appears among ALL the training data, ie- N_Y[1] is # of 1s
+ int totalTrainingSize = 0;
+ for (int i = 0; i < LearningUtil.NUMBER_OF_CLASSES; i++) {
+ for (int j = 0; j < trainingData.length; j++) {
+ //For each "record" in all test data
+ if (trainingData[j][trainingData[j].length -1] == i) {
+ N_Y[i] = N_Y[i] + 1;
+ totalTrainingSize++;
+ }
+ }
+ if (i == LearningUtil.NUMBER_OF_CLASSES - 1) {
+ for (int k = 0; k < P_Y.length; k++) {
+ P_Y[k] = N_Y[k] / totalTrainingSize;
+ }
+ }
+ }
+
+ double[][][] P_X_given_Y =
+ LearningUtil.getBayesConditionalProbabilities(threshold,
+ trainingData, N_Y);
+ int[] correct = new int[LearningUtil.NUMBER_OF_CLASSES];
+ int[] incorrect = new int[LearningUtil.NUMBER_OF_CLASSES];
+ for (int i = 0; i < testData.length; i++) { // for each test example
+ int actual_class = testData[i][EX_CLASS_INDEX];
+ /*
+ * predict by using P_Y and P_X_given_Y parameters threshold used
+ * for converting to binary (1,0) data format.
+ */
+ int predict_class = LearningUtil.predictBayes(testData[i], threshold,
+ P_Y, P_X_given_Y);
+ if (actual_class == predict_class) {
+ /*
+ * if actual_class same as predict_class,
+ * increasing correct of that class
+ * (correct[actual_class])
+ */
+ correct[actual_class]++;
+ } else {
+ incorrect[actual_class]++;
+ }
+ }
+ LearningUtil.outputBayesResultsToStdOut(threshold, correct, incorrect);
+ // End of NAIVE BAYES
+ } else {
+ System.out.println(LearningUtil.getUsage());
+ System.exit(0);
+ }
+ }
+}
@@ -1,58 +0,0 @@
-/**
- * File: Launcher.java
- * Author: Jack Weaver <jhweaver@usc.edu>, <weaver.jack@gmail.com>
- * Course: CSCI 561, Spring 2012
- * Assignment: Assignment 3 - Supervised Learning Systems
- * Target: aludra.usc.edu running Java 1.6.0_23
- */
-package com.weaverworx.usc.csci561.assignment3;
-
-import com.weaverworx.usc.csci561.assignment3.knn.KNNRecord;
-import com.weaverworx.usc.csci561.assignment3.util.FileReader;
-import com.weaverworx.usc.csci561.assignment3.util.KNNUtil;
-
-/**
- * Main class to launch the application.
- *
- * @author jw
- *
- */
-public class Launcher {
- private final static int NUMBER_OF_CLASSES = 10;
- private final static int NUMBER_OF_FEATURES = 784;
- private final static int EX_CLASS_INDEX = 784;
-
- /**
- * Entry point.
- *
- * @param args
- */
- public static void main(String[] args) {
- int k = KNNUtil.parseKArgument(args); //Read k from user input
- //Get the training data and test data
- int[][] trainData = FileReader.getTrainingData(
- NUMBER_OF_FEATURES, NUMBER_OF_CLASSES);
- int[][] testData = FileReader.getTestData(
- NUMBER_OF_FEATURES, NUMBER_OF_CLASSES);
-
- //Set up the K-Nearest Neighbor Records
- KNNRecord[] knnRecords = new KNNRecord[trainData.length];
- for (int i = 0; i < trainData.length; i++) {
- knnRecords[i] = new KNNRecord();
- }
-
- //Set up the arrays for # correct and # incorrect
- int[] correct, incorrect = new int[NUMBER_OF_CLASSES];
- for (int i = 0; i < testData.length; i++) {
- for (int j = 0; j < trainData.length; j++) {
- //Set the distance & the record class
- knnRecords[j].setExampleClass(trainData[j][EX_CLASS_INDEX]);
- knnRecords[j].setDistance(KNNUtil.getEuclideanDistance(
- testData[i], trainData[j]));
- }
-
- }
-
-
- }
-}
@@ -6,7 +6,7 @@
import com.weaverworx.usc.csci561.assignment3.knn.KNNRecord;
import com.weaverworx.usc.csci561.assignment3.util.FileReader;
-import com.weaverworx.usc.csci561.assignment3.util.KNNUtil;
+import com.weaverworx.usc.csci561.assignment3.util.LearningUtil;
/**
*
@@ -20,12 +20,12 @@
public static void main(String args[]) {
- int K = KNNUtil.parseKArgument(args); // read K from user input
+ int K = LearningUtil.parseKArgument(args); // read K from user input
int[][] train = FileReader.getTrainingData(numberOfFeatures, numberOfClasses); // last column 785 is a class of train
// image
int[][] test = FileReader.getTestData(numberOfFeatures, numberOfClasses); // last column 785 is a class of test
- // imag e
+ // image
KNNRecord[] knn_records = new KNNRecord[train.length]; //
for (int j = 0; j < train.length; j++) {
@@ -43,11 +43,11 @@ public static void main(String args[]) {
// knn_records[j].example_class = train[j][example_class_index]; // class is train class
knn_records[j].setExampleClass(train[j][example_class_index]);
// knn_records[j].distance = getEuclideanDistance(test[i], train[j]); // difference between test i and train j
- knn_records[j].setDistance(KNNUtil.getEuclideanDistance(test[i], train[j]));
+ knn_records[j].setDistance(LearningUtil.getEuclideanDistance(test[i], train[j]));
}
int actual_class = test[i][example_class_index];
- int predict_class = KNNUtil.predict(K, knn_records);
+ int predict_class = LearningUtil.predictKNN(K, knn_records);
if (actual_class == predict_class) {
correct[actual_class]++; // if actual_class same as
@@ -61,7 +61,7 @@ public static void main(String args[]) {
}
// display output
- KNNUtil.outputResultsToStdOut(K, correct, incorrect, numberOfClasses);
+ LearningUtil.outputKNNResultsToStdOut(K, correct, incorrect);
}
Oops, something went wrong.

0 comments on commit a943ee7

Please sign in to comment.