Skip to content
Browse files

Add meta-data to 'wine'

  • Loading branch information...
1 parent adfe350 commit 468d6cb77070305840872a1502940e3e25422ded Luís Pedro Coelho committed May 22, 2009
Showing with 58 additions and 4 deletions.
  1. +39 −0 milksets/utils.py
  2. +19 −4 milksets/wine/wine.py
View
39 milksets/utils.py
@@ -0,0 +1,39 @@
+# -*- coding: utf-8 -*-
+# Copyright (C) 2008-2009, Luís Pedro Coelho <lpc@cmu.edu>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+from __future__ import division
+
+standard_properties = ['name', 'long_name', 'short_name', 'reference', 'url', 'missing_values', 'data_source', 'label_names', 'missing_values']
+
+def standard_classification_loader(name):
+ def adddoc(f):
+ f.__doc__ = '''\
+ features,labels = load()
+
+ Load features and labels for dataset %s
+
+ features will be a numpy array.
+ labels will be a numpy array of integer type
+ ''' % name
+ return f
+ return adddoc
+
+# vim: set ts=4 sts=4 sw=4 expandtab smartindent:
View
23 milksets/wine/wine.py
@@ -22,19 +22,34 @@
from __future__ import division
import numpy as np
from os.path import dirname
+from ..utils import standard_properties, standard_classification_loader
-def standard_loader(f):
- return f
+__all__ = ['load'] + standard_properties
+
+name = 'Wine'
+short_name = 'Wine'
+long_name = 'UCI Wine'
+reference = '''\
+Forina, M. et al, PARVUS -
+An Extendible Package for Data Exploration, Classification and Correlation.
+Institute of Pharmaceutical and Food Analysis and Technologies, Via Brigata Salerno,
+16147 Genoa, Italy.
+'''
+url = 'http://archive.ics.uci.edu/ml/datasets/Wine'
+data_source = 'UCI'
+label_names = [1,2,3]
+missing_values = False
_winedatafile = dirname(__file__)+'/data/wine.data'
-@standard_loader
+@standard_classification_loader(name)
def load(force_contiguous=True):
data = np.array([map(float,line.split(',')) for line in file(_winedatafile)])
- labels = data[:,0].copy()
+ labels = data[:,0] - 1 # Wine dataset is 1..3
features = data[:,1:]
if force_contiguous:
labels = labels.copy()
features = features.copy()
return features,labels
+

0 comments on commit 468d6cb

Please sign in to comment.
Something went wrong with that request. Please try again.