Skip to content

Commit

Permalink
Merge pull request #420 from aayushKumarJarvis/master
Browse files Browse the repository at this point in the history
Stop Words - A Text Mining Problem
  • Loading branch information
dalleng committed Oct 27, 2014
2 parents d161826 + c8d6e74 commit 1400937
Show file tree
Hide file tree
Showing 4 changed files with 155 additions and 0 deletions.
20 changes: 20 additions & 0 deletions Factorial/Java/aayushKumarJarvis/Factorial.java
@@ -0,0 +1,20 @@
import java.util.Scanner;

public class Factorial {

public static void main(String[] args) {
Scanner scanner = new Scanner(System.in);
System.out.print("Enter the number whose factorial is to be found: ");
int n = scanner.nextInt();
int result = factorial(n);
System.out.println("The factorial of " + n + " is " + result);
}

public static int factorial(int n) {
int result = 1;
for (int i = 1; i <= n; i++) {
result = result * i;
}
return result;
}
}
4 changes: 4 additions & 0 deletions README.md
Expand Up @@ -2,6 +2,9 @@

(Note: there is a backstory on my motivation for starting this at the bottom of this README)

<<<<<<< HEAD
Website coming soon!.
=======
**Update 06/09/14**
I have released a native Android app for viewing the algorithms! It's a wrapper around the web app with some native functionality. Check it out on Google Play here
[Algorithm Implementations Android App](https://play.google.com/store/apps/details?id=com.kennyledet.algorithms.app.grmeb)
Expand All @@ -13,6 +16,7 @@ You can see a preview here!
[Algorithms Implementations Website](http://dry-sea-7022.herokuapp.com/algorithms)

Please offer up your suggestions in the Issues section of the Github repository
>>>>>>> upstream/master
**Update 12/30/13:** I have given the 3 top committers (jcla1, PatrickYevsukov, dalleng) collaborator access. This was very well earned, as they have managed to contribute much more to this repo than I have had the time to.

Expand Down
88 changes: 88 additions & 0 deletions Stop_Words/aayushKumarJarvis/StopWords.java
@@ -0,0 +1,88 @@
import java.util.*;
import java.io.*;

public class StopWords {

private static String OUTPUT_FILE = "YOUR_FILE_LOCATION";

public static Boolean searchForStopWord(String word, List<String> textForCheck) {

int indexOfWord = Collections.binarySearch(textForCheck,word);

if(indexOfWord < 0)
return false;
else
return true;
}

public static List<String> readStopWords(String stopWordsFilename) throws Exception {

FileInputStream fStream = new FileInputStream(stopWordsFilename);

DataInputStream dataStreamObject = new DataInputStream(fStream);
BufferedReader objectForBuffer = new BufferedReader(new InputStreamReader(dataStreamObject));

String strLine;
String oneLinerString = "";

while ((strLine = objectForBuffer.readLine()) != null) {
strLine.trim();
oneLinerString = oneLinerString + "," + strLine;
}

List<String> tokenizedList = Arrays.asList(oneLinerString.split(","));
fStream.close();

return tokenizedList;
}

public static void removeStopWords(String textFilename, List<String> stopWords) {

try {
FileOutputStream outputStream = new FileOutputStream(OUTPUT_FILE);
PrintStream outputFileWriter = new PrintStream(outputStream);

FileInputStream fStream = new FileInputStream(textFilename);
DataInputStream dataStreamObject = new DataInputStream(fStream);
BufferedReader objectForBuffer = new BufferedReader(new InputStreamReader(dataStreamObject));

String strLine;

while ((strLine = objectForBuffer.readLine()) != null) {

boolean flag = false;

List<String> tokenizedList = Arrays.asList(strLine.split("([^a-zA-z0-9])"));

for(int i=0;i<tokenizedList.size();i++) {

flag = searchForStopWord(tokenizedList.get(i), stopWords);

if (!flag)
outputFileWriter.print(tokenizedList.get(i)+" ");

flag = false;
}

outputFileWriter.print("\n");
}
}

catch(Exception e){
System.err.println(e.getMessage());
}
}

public static void main(String[] arg) throws Exception {

Scanner keyboard = new Scanner(System.in);

System.out.print("Please type stop words file name: ");
List<String> stopWords = readStopWords(keyboard.next());

System.out.print("Please type text file name: ");
removeStopWords(keyboard.next(), stopWords);

}
}

43 changes: 43 additions & 0 deletions Stop_Words/aayushKumarJarvis/TestStopWords.java
@@ -0,0 +1,43 @@
mport org.junit.Test;
import java.util.Arrays;
import java.util.List;

import static org.junit.Assert.*;

public class TestStopWords {

@Test
public void testSearchForStopWord() {

String testWord1 = "Aayush";
String testWord2 = "Kumar";
String testWord3 = "Srivastava";
String testWord4 = "Random Text";
String testWord5 = "Text";

String[] listOfNames = {"Aayush","Kumar","Srivastava"};
List<String> testString = Arrays.asList(listOfNames); // Converting Array into List of String

assertEquals(StopWords.searchForStopWord(testWord1, testString), true);
assertEquals(StopWords.searchForStopWord(testWord2,testString),true);
assertEquals(StopWords.searchForStopWord(testWord3,testString),true);
assertEquals(StopWords.searchForStopWord(testWord4,testString),false);
assertEquals(StopWords.searchForStopWord(testWord5,testString),false);
}

@Test
public void testReadStopWords() throws Exception {

List<String> tokenizedText = StopWords.readStopWords("FILE_NAME");
System.out.println(tokenizedText);
}

@Test
public void testForRemoveStopWords() throws Exception {

List<String> tokenizedText = StopWords.readStopWords("FILE_NAME");
StopWords.removeStopWords("FILE_NAME",tokenizedText);
}

}

0 comments on commit 1400937

Please sign in to comment.