Skip to content

Commit

Permalink
Merge pull request #30 from machine-data-hub/final
Browse files Browse the repository at this point in the history
Final
  • Loading branch information
cbarnes7 committed May 27, 2021
2 parents 4ad4753 + bd51992 commit f0f84fb
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 21 deletions.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
[![Documentation Status](https://readthedocs.org/projects/machine-data-hub/badge/?version=latest)](https://machine-data-hub.readthedocs.io/en/latest/?badge=latest)

# Machine Data Hub
Python Client to use the [Machine Data Hub](https://machinedatahub.ai).
Python Client to use the [Machine Data Hub](https://machinedatahub.ai), an open source project
to make state of the art machine learning and artificial intelligence techniques more accessible to engineering domains
by providing suitable benchmark datasets for advancing prognostics
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
name = "machine-data-hub"
version = "0.1.0"
description = "Python-based Command Line Tool to use the Machine Data Hub"
homepage = "https://machinedatahub.ai/"
repository = "https://github.com/machine-data-hub"
documentation = "https://machine-data-hub.readthedocs.io/en/latest/?badge=latest"
readme = "README.md"
authors = ["ceciliabarnes <cbarnes7@uw.edu>"]
license = "MIT License"

Expand Down
9 changes: 2 additions & 7 deletions src/machine_data_hub/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def dataset_names(datasets):
+ " ("
+ str(len(row["Datasets"]))
+ " files, "
+ row["File Size"]
+ row["FileSize"]
+ ")"
for row in datasets
]
Expand Down Expand Up @@ -176,14 +176,9 @@ def metadata(id: int):
),
]
table.append(info)
elif key == "Summary":
sep = "\n"
row[key] = sep.join(textwrap.wrap(row["Summary"], width=90))
info = [key, row[key]]
table.append(info)
elif (
key == "img_link"
or key == "One Line"
or key == "Summary"
or key == "URL"
or key == "Rank"
):
Expand Down
29 changes: 16 additions & 13 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,32 +10,35 @@

MOCK_DATASETS = [
{
"id": 1,
"id": "1",
"Rank": 1,
"Name": "Combined Cycle Power Plant Data Set",
"Owner": "UC Irvine",
"URL": "https://archive.ics.uci.edu/ml/machine-learning-databases/00294/CCPP.zip",
"Short Summary": "The dataset contains 9568 data points collected from a Combined Cycle Power Plant over 6 years (2006-2011), when the power plant was set to work with full load. Features consist of hourly average ambient variables Temperature (T), Ambient Pressure (AP), Relative Humidity (RH) and Exhaust Vacuum (V) to predict the net hourly electrical energy output (EP) of the plant. A combined cycle power plant (CCPP) is composed of gas turbines (GT), steam turbines (ST) and heat recovery steam generators. In a CCPP, the electricity is generated by gas and steam turbines, which are combined in one cycle, and is transferred from one turbine to another. While the Vacuum is collected from and has effect on the Steam Turbine, he other three of the ambient variables effect the GT performance. For comparability with our baseline studies, and to allow 5x2 fold statistical tests be carried out, we provide the data shuffled five times. For each shuffling 2-fold CV is carried out and the resulting 10 measurements are used for statistical testing.",
"Name": "Combined Cycle Power Plant",
"Owner": "University of California Irvine",
"DateDonated": "January 20, 2021",
"URL": "https://archive.ics.uci.edu/ml/datasets/Combined+Cycle+Power+Plant",
"ShortSummary": "The dataset contains 9568 data points collected from a Combined Cycle Power Plant over 6 years (2006-2011), when the power plant was set to work with full load. Features consist of hourly average ambient variables Temperature (T), Ambient Pressure (AP), Relative Humidity (RH) and Exhaust Vacuum (V) to predict the net hourly electrical energy output (EP) of the plant. A combined cycle power plant (CCPP) is composed of gas turbines (GT), steam turbines (ST) and heat recovery steam generators. In a CCPP, the electricity is generated by gas and steam turbines, which are combined in one cycle, and is transferred from one turbine to another. While the Vacuum is collected from and has effect on the Steam Turbine, he other three of the ambient variables effect the GT performance. For comparability with our baseline studies, and to allow 5x2 fold statistical tests be carried out, we provide the data shuffled five times. For each shuffling 2-fold CV is carried out and the resulting 10 measurements are used for statistical testing.",
"OneLine": "Data collected from a Combined Power Plant working full load over 6 years.",
"FileType": "xlsx",
"Sector": "Power",
"ML Type": "Regression",
"MLType": ["Regression"],
"Labeled": "Yes",
"Time Series": "No",
"Simulation (Yes/No)": "N/A",
"TimeSeries": "No",
"Simulation": "",
"Attributes": 4,
"Instances": 9568,
"Downloads": 191037,
"Likes": 0,
"File Size": "3.7 MB",
"img_link": "https://www.miga.org/sites/default/files/2018-06/power-plant-bright-blue-sky.jpg",
"FileSize": "3.7 MB",
"ImgLink": "/images/power-plant-bright-blue-sky.jpg",
"Datasets": [
{
"Name": "Dataset 1",
"Name": "File 1",
"URL": "https://archive.ics.uci.edu/ml/machine-learning-databases/00294/CCPP.zip",
"Likes": 0,
"Downloads": 191037,
"File Size": "3.7 MB",
"FileSize": "3.7 MB"
}
],
]
}
]

Expand Down

0 comments on commit f0f84fb

Please sign in to comment.