Skip to content

Commit

Permalink
add pre-processing module
Browse files Browse the repository at this point in the history
  • Loading branch information
sirfoga committed Oct 14, 2018
1 parent c8eeeb2 commit 0555b25
Show file tree
Hide file tree
Showing 7 changed files with 119 additions and 27 deletions.
12 changes: 6 additions & 6 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,19 @@ fast-install:
@echo "\033[95m\nInstalled to /usr/local/lib/python3.6/dist-packages/peeper033[0m"
$(MAKE) show-installed-version

test:
rm -rf htmlcov/
python3 -m pytest --cov=./ --cov-report=html
@echo "\033[95m\nTest report htmlcov/index.html\033[0m"
#test:
# rm -rf htmlcov/
# python3 -m pytest --cov=./ --cov-report=html
# @echo "\033[95m\nTest report htmlcov/index.html\033[0m"

flake8:
pipenv run flake8 --ignore=E501,F401,E128,E402,E731,F821,E722 peeper

pylint:
pylint3 -j 8 peeper/* || pylint-exit $?

coverage:
pipenv run py.test --cov-config .coveragerc --verbose --cov-report term --cov-report xml --cov=requests tests
#coverage:
# pipenv run py.test --cov-config .coveragerc --verbose --cov-report term --cov-report xml --cov=requests tests

publish:
$(MAKE) clean
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* [Examples](#examples)
* [Install](#install)
* [Usage and documentation](#usage-and-documentation)
* [Questions and issues](#questions-and-issues)
* [Contributing and feedback](#contributing-and-feedback)
* [License](#license)

</details>
Expand Down Expand Up @@ -49,7 +49,7 @@ Different ways, all equals
## Usage and documentation
<a href="https://peeper.readthedocs.io/en/latest/"><img src="https://readthedocs.org/projects/peeper/badge/?version=latest"></a>

Browse the online documentation here](https://peeper.readthedocs.io/en/latest/)
Browse the online documentation [here](https://peeper.readthedocs.io/en/latest/)
or make your own by `make docs`


Expand Down
Empty file added peeper/ml/__init__.py
Empty file.
Empty file.
55 changes: 55 additions & 0 deletions peeper/preprocessing/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# -*- coding: utf-8 -*-

"""Command line interface for pre-processing models"""

import argparse
import os

from hal.streams.logger import log_message

from peeper.preprocessing.models import Merger


def create_args():
"""
:return: ArgumentParser
Parser that handles cmd arguments.
"""

parser = argparse.ArgumentParser(usage='-d <directory to parse> '
'-h for full usage')
parser.add_argument('-d', dest='dir',
help='directory to use', required=True)
return parser


def parse_args(parser):
"""
:param parser: ArgumentParser
Object that holds cmd arguments.
:return: tuple
Values of arguments.
"""

args = parser.parse_args()
directory = str(args.dir)
assert (os.path.exists(directory))

return directory


def main():
folder = parse_args(create_args())
log_message("Using folder", folder)

output_file = "Merged.csv"
output_file = os.path.join(folder, output_file)

driver = Merger(folder)
driver.merge_into(output_file)

log_message("Merged into", output_file)


if __name__ == '__main__':
main()
54 changes: 54 additions & 0 deletions peeper/preprocessing/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# -*- coding: utf-8 -*-

"""Module containing pre-processing models"""

import pandas as pd

from hal.files.models.files import Document
from hal.files.models.system import ls_dir


class Merger:
"""Merges multiple .csv data files into a big one"""

def __init__(self, folder):
"""
:param folder: folder where there are the input files
"""

self.path = folder
self.data = {
file: pd.read_csv(file)
for file in self._find_files()
} # dictionary file name -> file data (as pandas data frame)

def _find_files(self):
"""Finds files in folder
:return: list of input files in folder
"""

files = ls_dir(self.path)
files = [
file
for file in files
if Document(file).extension == ".csv" # just csv files
]
return files

def _merge(self):
"""Merges data frames into one big
:return: one big data frame with data from all input files
"""

return pd.DataFrame()

def merge_into(self, output_file):
"""Merges all inputs files into one
:param output_file: output file (where to write data)
"""

data = self._merge()
data.to_csv(output_file)
21 changes: 2 additions & 19 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,19 +1,2 @@
urllib3>=1.23
scipy>=0.19.1
stem>=1.6.0
colorama>=0.3.9
send2trash>=1.5.0
numpy>=1.13.3
mutagen>=1.41.1
psutil>=5.4.7
pymongo>=3.7.1
httplib2>=0.11.3
pyparsing>=2.2.2
requests>=2.18.4
setuptools>=39.0.1
unidiff>=0.5.5
matplotlib>=2.1.1
beautifulsoup4>=4.6.3
GitPython>=2.1.11
google_api_python_client>=1.7.4
scikit_learn>=0.20.0
pyhal>=10.2.6
pandas>=0.23.4

0 comments on commit 0555b25

Please sign in to comment.