Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add searching #1

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions parallelism/src/main/java/ru/hh/school/homework/Launcher.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import java.util.Map;
import java.util.stream.Stream;

import static java.lang.System.currentTimeMillis;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
Expand All @@ -14,14 +17,38 @@
import static java.util.stream.Collectors.counting;
import static java.util.stream.Collectors.groupingBy;
import static java.util.stream.Collectors.toMap;
import ru.hh.school.homework.search.PrintUtil;
import ru.hh.school.homework.search.file.ForkJionFilesSearcher;
import ru.hh.school.homework.search.file.FileSearcher;
import ru.hh.school.homework.search.google.CompletableFutureSearcher;
import ru.hh.school.homework.search.google.OnlineSearcher;
import ru.hh.school.homework.search.word.ParallelStreamTopWordsSearcher;
import ru.hh.school.homework.search.word.TopWordsSearcher;

public class Launcher {

public static void main(String[] args) throws IOException {
// Написать код, который, как можно более параллельно:
// - по заданному пути найдет все "*.java" файлы
Path root = Path.of("D:", "projects", "work", "hh-school",
"parallelism", "src", "main", "java", "ru", "school", "parallelism");
String fileName = ".java";
FileSearcher fileSearcher = new ForkJionFilesSearcher();
long startTime = currentTimeMillis();
List<String> files = fileSearcher.parallelSearch(root, fileName);
PrintUtil.printWordsOfFiles(files, startTime, currentTimeMillis());

// - для каждого файла вычислит 10 самых популярных слов (см. #naiveCount())
TopWordsSearcher topWordsSearcher = new ParallelStreamTopWordsSearcher();
startTime = currentTimeMillis();
Map<String, Integer> wordsFromFiles = topWordsSearcher.inFilesTopWordsCounter(files, 10);
PrintUtil.printWordsOfFiles(wordsFromFiles, startTime, currentTimeMillis());

// - соберет top 10 для каждой папки в которой есть хотя-бы один java файл
startTime = currentTimeMillis();
Map<String, Integer> wordsFromFolders = topWordsSearcher.inFoldersTopWordsCounter(files, 10);
PrintUtil.printWordsOfFolders(wordsFromFolders, startTime, currentTimeMillis());

// - для каждого слова сходит в гугл и вернет количество результатов по нему (см. #naiveSearch())
// - распечатает в консоль результаты в виде:
// <папка1> - <слово #1> - <кол-во результатов в гугле>
Expand All @@ -36,6 +63,10 @@ public static void main(String[] args) throws IOException {
//
// Порядок результатов в консоли не обязательный.
// При желании naiveSearch и naiveCount можно оптимизировать.
OnlineSearcher onlineSearcher = new CompletableFutureSearcher();
startTime = currentTimeMillis();
List<String> resultsFromGoogle = onlineSearcher.onlineSearch(wordsFromFolders);
PrintUtil.printStringList(resultsFromGoogle, startTime, currentTimeMillis());

// test our naive methods:
testCount();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package ru.hh.school.homework.search;

import java.util.List;
import java.util.Map;

public class PrintUtil {

public static void printWordsOfFiles(List<String> files, long start, long end) {
System.out.println("Found " + files.size() + " file(s) in " + (end - start) + " ms:");
for (var file : files) {
System.out.println(" " + file);
}
}

public static void printWordsOfFiles(Map<String, Integer> folders, long start, long end) {
System.out.println("Found " + folders.size() + " word(s) of each file in " + (end - start) + " ms:");
for (var word : folders.entrySet()) {
System.out.println(" " + word.getKey() + " [" + word.getValue() + "]");
}
}

public static void printWordsOfFolders(Map<String, Integer> folders, long start, long end) {
System.out.println("Found " + folders.size() + " word(s) of folder's file in " + (end - start) + " ms:");
for (var word : folders.entrySet()) {
System.out.println(" " + word.getKey() + " [" + word.getValue() + "]");
}
}

public static void printStringList(List<String> list, long start, long end) {
System.out.println("Search results found in Google in " + (end - start) + " ms:");
for (var line : list) {
System.out.println(" " + line);
}
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package ru.hh.school.homework.search.file;

import java.io.IOException;
import java.nio.file.Path;
import java.util.List;

public interface FileSearcher {

List<String> parallelSearch(Path path, String fileName) throws IOException;

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package ru.hh.school.homework.search.file;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import java.util.stream.Stream;

public class Folder {

private final List<Path> files;

protected Folder(List<Path> files) {
this.files = files;
}

protected List<Path> getFiles() {
return files;
}

protected static Folder fromPath(Path path) throws IOException {
try (Stream<Path> lines = Files.list(path)) {
return new Folder(lines.toList());
}
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package ru.hh.school.homework.search.file;

import java.io.IOException;
import java.nio.file.Path;
import java.util.List;
import java.util.concurrent.ForkJoinPool;

public class ForkJionFilesSearcher implements FileSearcher {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Jion -> Join

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Да, спасибо.


private final ForkJoinPool forkJoinPool = ForkJoinPool.commonPool();

@Override
public List<String> parallelSearch(Path path, String fileName) throws IOException {
return forkJoinPool.invoke(new SearchTask(path, fileName));
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package ru.hh.school.homework.search.file;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.RecursiveTask;

class SearchTask extends RecursiveTask<List<String>> {

private final Folder folder;
private final String fileName;

protected SearchTask(Path path, String fileName) throws IOException {
super();
this.folder = Folder.fromPath(path);
this.fileName = fileName;
}

@Override
protected List<String> compute() {
List<String> foundFiles = new ArrayList<>();
List<RecursiveTask<List<String>>> forks = new ArrayList<>();
for (Path path : folder.getFiles()) {
if (Files.isDirectory(path)) {
SearchTask task = null;
try {
task = new SearchTask(path, fileName);
forks.add(task);
task.fork();
} catch (IOException e) {
e.printStackTrace();
}
} else if (path.getFileName().toString().toUpperCase().contains(fileName.toUpperCase())) {
foundFiles.add(path.toString());
}
}
for (var task : forks) {
foundFiles.addAll(task.join());
}
return foundFiles;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package ru.hh.school.homework.search.google;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;

import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

public class CompletableFutureSearcher implements OnlineSearcher {

private final String GOOGLE_URL = "https://www.google.com/search?q=";
private final String USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " +
"AppleWebKit/537.36 (KHTML, like Gecko) " +
"Chrome/80.0.3987.116 Safari/537.36";
private ExecutorService threadPool = null;

@Override
public List<String> onlineSearch(Map<String, Integer> files) {
threadPool = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
List<String> searchInfo = files.entrySet().parallelStream()
.map(this::searchGoogleAsync)
.map(CompletableFuture::join)
.toList();
threadPool.close();
return searchInfo;
}

private long searchGoogle(String query) {
Document document;
try {
document = Jsoup
.connect(GOOGLE_URL + query)
.userAgent(USER_AGENT)
.get();
} catch (IOException e) {
throw new RuntimeException(e);
}
Element divResultStats = document.select("div#result-stats").first();
String text = divResultStats.text();
String resultsPart = text.substring(0, text.indexOf('('));
return Long.parseLong(resultsPart.replaceAll("[^0-9]", ""));
}

private CompletableFuture<String> searchGoogleAsync(Map.Entry<String, Integer> file) {
String query = file.getKey().split(" ")[2];
return CompletableFuture
.supplyAsync(() -> getSearchResultInfo(file.getKey(), file.getValue(), searchGoogle(query)), threadPool);
}

private String getSearchResultInfo(String file, Integer wordCount, long searchCount) {
return file + " [" + wordCount + "] - " + searchCount + " google search results";
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package ru.hh.school.homework.search.google;

import java.util.List;
import java.util.Map;

public interface OnlineSearcher {

List<String> onlineSearch(Map<String, Integer> files);

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
package ru.hh.school.homework.search.word;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;

public class ParallelStreamTopWordsSearcher implements TopWordsSearcher {

@Override
public Map<String, Integer> inFilesTopWordsCounter(List<String> files, int limit) {
return files.parallelStream()
.flatMap(file -> fileReadLines(file).parallelStream()
.map(line -> line.split("[^a-zA-Z]"))
.flatMap(lines -> Arrays.stream(lines).parallel())
.filter(word -> word.length() > 3)
.collect(Collectors.groupingByConcurrent(Function.identity(), Collectors.counting()))
.entrySet()
.parallelStream()
.sorted(Map.Entry.comparingByValue(Comparator.reverseOrder()))
.limit(limit)
.collect(Collectors.toMap(v -> getName(file, v.getKey()), v -> v.getValue().intValue()))
.entrySet()
.parallelStream())
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
}

@Override
public Map<String, Integer> inFoldersTopWordsCounter(List<String> folders, int limit) {
return folders.parallelStream()
.collect(Collectors.groupingByConcurrent(file -> Path.of(file).getParent(), Collectors.toList()))
.entrySet()
.parallelStream()
.flatMap(files -> files.getValue().parallelStream()
.flatMap(file -> fileReadLines(file).parallelStream())
.map(line -> line.split("[^a-zA-Z]"))
.flatMap(lines -> Arrays.stream(lines).parallel())
.filter(word -> word.length() > 3)
.collect(Collectors.groupingByConcurrent(Function.identity(), Collectors.counting()))
.entrySet()
.parallelStream()
.sorted(Map.Entry.comparingByValue(Comparator.reverseOrder()))
.limit(limit)
.collect(Collectors.toMap(v -> getName(files.getKey(), v.getKey()), v -> v.getValue().intValue()))
.entrySet()
.parallelStream())
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
}

private List<String> fileReadLines(String path) {
try (Stream<String> stringStream = Files.lines(Path.of(path))) {
return stringStream.toList();
} catch (IOException e) {
throw new RuntimeException(e);
}
}

private String getName(String filePath, String word) {
return filePath + " - " + word;
}

private String getName(Path filePath, String word) {
return filePath.toString() + " - " + word;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package ru.hh.school.homework.search.word;

import java.util.List;
import java.util.Map;

public interface TopWordsSearcher {

Map<String, Integer> inFilesTopWordsCounter(List<String> files, int limit);

Map<String, Integer> inFoldersTopWordsCounter(List<String> files, int limit);

}