-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
7 changed files
with
119 additions
and
27 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
"""Command line interface for pre-processing models""" | ||
|
||
import argparse | ||
import os | ||
|
||
from hal.streams.logger import log_message | ||
|
||
from peeper.preprocessing.models import Merger | ||
|
||
|
||
def create_args(): | ||
""" | ||
:return: ArgumentParser | ||
Parser that handles cmd arguments. | ||
""" | ||
|
||
parser = argparse.ArgumentParser(usage='-d <directory to parse> ' | ||
'-h for full usage') | ||
parser.add_argument('-d', dest='dir', | ||
help='directory to use', required=True) | ||
return parser | ||
|
||
|
||
def parse_args(parser): | ||
""" | ||
:param parser: ArgumentParser | ||
Object that holds cmd arguments. | ||
:return: tuple | ||
Values of arguments. | ||
""" | ||
|
||
args = parser.parse_args() | ||
directory = str(args.dir) | ||
assert (os.path.exists(directory)) | ||
|
||
return directory | ||
|
||
|
||
def main(): | ||
folder = parse_args(create_args()) | ||
log_message("Using folder", folder) | ||
|
||
output_file = "Merged.csv" | ||
output_file = os.path.join(folder, output_file) | ||
|
||
driver = Merger(folder) | ||
driver.merge_into(output_file) | ||
|
||
log_message("Merged into", output_file) | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
"""Module containing pre-processing models""" | ||
|
||
import pandas as pd | ||
|
||
from hal.files.models.files import Document | ||
from hal.files.models.system import ls_dir | ||
|
||
|
||
class Merger: | ||
"""Merges multiple .csv data files into a big one""" | ||
|
||
def __init__(self, folder): | ||
""" | ||
:param folder: folder where there are the input files | ||
""" | ||
|
||
self.path = folder | ||
self.data = { | ||
file: pd.read_csv(file) | ||
for file in self._find_files() | ||
} # dictionary file name -> file data (as pandas data frame) | ||
|
||
def _find_files(self): | ||
"""Finds files in folder | ||
:return: list of input files in folder | ||
""" | ||
|
||
files = ls_dir(self.path) | ||
files = [ | ||
file | ||
for file in files | ||
if Document(file).extension == ".csv" # just csv files | ||
] | ||
return files | ||
|
||
def _merge(self): | ||
"""Merges data frames into one big | ||
:return: one big data frame with data from all input files | ||
""" | ||
|
||
return pd.DataFrame() | ||
|
||
def merge_into(self, output_file): | ||
"""Merges all inputs files into one | ||
:param output_file: output file (where to write data) | ||
""" | ||
|
||
data = self._merge() | ||
data.to_csv(output_file) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,19 +1,2 @@ | ||
urllib3>=1.23 | ||
scipy>=0.19.1 | ||
stem>=1.6.0 | ||
colorama>=0.3.9 | ||
send2trash>=1.5.0 | ||
numpy>=1.13.3 | ||
mutagen>=1.41.1 | ||
psutil>=5.4.7 | ||
pymongo>=3.7.1 | ||
httplib2>=0.11.3 | ||
pyparsing>=2.2.2 | ||
requests>=2.18.4 | ||
setuptools>=39.0.1 | ||
unidiff>=0.5.5 | ||
matplotlib>=2.1.1 | ||
beautifulsoup4>=4.6.3 | ||
GitPython>=2.1.11 | ||
google_api_python_client>=1.7.4 | ||
scikit_learn>=0.20.0 | ||
pyhal>=10.2.6 | ||
pandas>=0.23.4 |