one-hot encoding

rasbt · Feb 12, 2016 · 32626c5 · 32626c5
1 parent 5a6bf6b
commit 32626c5
Show file tree

Hide file tree

Showing 28 changed files with 764 additions and 35 deletions.
diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml
@@ -81,6 +81,8 @@ pages:
     - user_guide/general_concepts/gradient-optimization.md
     - user_guide/general_concepts/linear-gradient-derivative.md
     - user_guide/general_concepts/regularization-linear.md
+  - Upcoming Features / 0.3.1dev:
+    - user_guide/preprocessing/one-hot_encoding.md
 - API:
   - api_subpackages/mlxtend.classifier.md
   - api_subpackages/mlxtend.data.md

diff --git a/docs/sources/CHANGELOG.md b/docs/sources/CHANGELOG.md
@@ -4,7 +4,7 @@
 
 ### Version 0.3.1dev
 
-- -
+- Function for one-hot encoding of class labels ([`preprocessing.one_hot`](./user_guide/preprocessing/one-hot_encoding.md))
 
 ### Version 0.3.0 (2016-01-31)
 

diff --git a/docs/sources/api_modules/mlxtend.preprocessing/one_hot.md b/docs/sources/api_modules/mlxtend.preprocessing/one_hot.md
@@ -0,0 +1,28 @@
+## one_hot
+
+*one_hot(y, num_labels='auto', dtype='float')*
+
+One-hot encoding of class labels
+
+**Parameters**
+
+- `y` : array-like, shape = [n_classlabels]
+
+    Python list or numpy array consisting of class labels.
+
+- `num_labels` : int or 'auto'
+
+    Number of unique labels in the class label array. Infers the number
+    of unique labels from the input array if set to 'auto'.
+
+- `dtype` : str
+
+    NumPy array type (float, float32, float64) of the output array.
+
+**Returns**
+
+- `onehot` : numpy.ndarray, shape = [n_classlabels]
+
+    One-hot encoded array, where each sample is represented as
+    a row vector in the returned array.
+
diff --git a/docs/sources/api_subpackages/mlxtend.classifier.md b/docs/sources/api_subpackages/mlxtend.classifier.md
@@ -1,4 +1,4 @@
-mlxtend version: 0.3.0
+mlxtend version: 0.3.1dev
 ## Adaline
 
 *Adaline(eta=0.01, epochs=50, solver='sgd', random_seed=None, shuffle=False, zero_init_weight=False)*

diff --git a/docs/sources/api_subpackages/mlxtend.data.md b/docs/sources/api_subpackages/mlxtend.data.md
@@ -1,4 +1,4 @@
-mlxtend version: 0.3.0
+mlxtend version: 0.3.1dev
 ## autompg_data
 
 *autompg_data()*

diff --git a/docs/sources/api_subpackages/mlxtend.evaluate.md b/docs/sources/api_subpackages/mlxtend.evaluate.md
@@ -1,4 +1,4 @@
-mlxtend version: 0.3.0
+mlxtend version: 0.3.1dev
 ## confusion_matrix
 
 *confusion_matrix(y_target, y_predicted, binary=False, positive_label=1)*

diff --git a/docs/sources/api_subpackages/mlxtend.feature_selection.md b/docs/sources/api_subpackages/mlxtend.feature_selection.md
@@ -1,4 +1,4 @@
-mlxtend version: 0.3.0
+mlxtend version: 0.3.1dev
 ## ColumnSelector
 
 *ColumnSelector(cols)*

diff --git a/docs/sources/api_subpackages/mlxtend.file_io.md b/docs/sources/api_subpackages/mlxtend.file_io.md
@@ -1,4 +1,4 @@
-mlxtend version: 0.3.0
+mlxtend version: 0.3.1dev
 ## find_filegroups
 
 *find_filegroups(paths, substring='', extensions=None, validity_check=True, ignore_invisible=True, rstrip='', ignore_substring=None)*

diff --git a/docs/sources/api_subpackages/mlxtend.general_plotting.md b/docs/sources/api_subpackages/mlxtend.general_plotting.md
@@ -1,4 +1,4 @@
-mlxtend version: 0.3.0
+mlxtend version: 0.3.1dev
 ## category_scatter
 
 *category_scatter(x, y, label_col, data, markers='sxo^v', colors=('blue', 'green', 'red', 'purple', 'gray', 'cyan'), alpha=0.7, markersize=20.0, legend_loc='best')*

diff --git a/docs/sources/api_subpackages/mlxtend.math.md b/docs/sources/api_subpackages/mlxtend.math.md
@@ -1,4 +1,4 @@
-mlxtend version: 0.3.0
+mlxtend version: 0.3.1dev
 ## factorial
 
 *factorial(n)*

diff --git a/docs/sources/api_subpackages/mlxtend.preprocessing.md b/docs/sources/api_subpackages/mlxtend.preprocessing.md
@@ -1,4 +1,4 @@
-mlxtend version: 0.3.0
+mlxtend version: 0.3.1dev
 ## DenseTransformer
 
 *DenseTransformer(some_param=True)*
@@ -131,6 +131,34 @@ Min max scaling of pandas' DataFrames.
 
     Copy of the array or DataFrame with rescaled columns.
 
+## one_hot
+
+*one_hot(y, num_labels='auto', dtype='float')*
+
+One-hot encoding of class labels
+
+**Parameters**
+
+- `y` : array-like, shape = [n_classlabels]
+
+    Python list or numpy array consisting of class labels.
+
+- `num_labels` : int or 'auto'
+
+    Number of unique labels in the class label array. Infers the number
+    of unique labels from the input array if set to 'auto'.
+
+- `dtype` : str
+
+    NumPy array type (float, float32, float64) of the output array.
+
+**Returns**
+
+- `onehot` : numpy.ndarray, shape = [n_classlabels]
+
+    One-hot encoded array, where each sample is represented as
+    a row vector in the returned array.
+
 ## shuffle_arrays_unison
 
 *shuffle_arrays_unison(arrays, random_seed=None)*

diff --git a/docs/sources/api_subpackages/mlxtend.regression_utils.md b/docs/sources/api_subpackages/mlxtend.regression_utils.md
@@ -1,4 +1,4 @@
-mlxtend version: 0.3.0
+mlxtend version: 0.3.1dev
 ## plot_linear_regression
 
 *plot_linear_regression(X, y, model=LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False), corr_func='pearsonr', scattercolor='blue', fit_style='k--', legend=True, xlim='auto')*

diff --git a/docs/sources/api_subpackages/mlxtend.regressor.md b/docs/sources/api_subpackages/mlxtend.regressor.md
@@ -1,4 +1,4 @@
-mlxtend version: 0.3.0
+mlxtend version: 0.3.1dev
 ## LinearRegression
 
 *LinearRegression(solver='normal equation', eta=0.01, epochs=50, random_seed=None, shuffle=False, zero_init_weight=False)*

diff --git a/docs/sources/api_subpackages/mlxtend.text.md b/docs/sources/api_subpackages/mlxtend.text.md
@@ -1,4 +1,4 @@
-mlxtend version: 0.3.0
+mlxtend version: 0.3.1dev
 ## generalize_names
 
 *generalize_names(name, output_sep=' ', firstname_output_letters=1)*

diff --git a/docs/sources/api_subpackages/mlxtend.utils.md b/docs/sources/api_subpackages/mlxtend.utils.md
@@ -1,4 +1,4 @@
-mlxtend version: 0.3.0
+mlxtend version: 0.3.1dev
 ## Counter
 
 *Counter(stderr=False, start_newline=True)*

diff --git a/docs/sources/user_guide/evaluate/plot_decision_regions.ipynb b/docs/sources/user_guide/evaluate/plot_decision_regions.ipynb
diff --git a/docs/sources/user_guide/evaluate/plot_decision_regions.md b/docs/sources/user_guide/evaluate/plot_decision_regions.md
@@ -124,6 +124,43 @@ plt.show()
 ![png](plot_decision_regions_files/plot_decision_regions_14_0.png)
 
 
+## Example 4 - Highlighting Test Data Points
+
+
+```python
+from mlxtend.evaluate import plot_decision_regions
+from mlxtend.preprocessing import shuffle_arrays_unison
+import matplotlib.pyplot as plt
+from sklearn import datasets
+from sklearn.svm import SVC
+
+
+# Loading some example data
+iris = datasets.load_iris()
+X, y = iris.data[:, [0,2]], iris.target
+X, y = shuffle_arrays_unison(arrays=[X, y], random_seed=3)
+
+X_train, y_train = X[:100], y[:100]
+X_test, y_test = X[100:], y[100:]
+
+# Training a classifier
+svm = SVC(C=0.5, kernel='linear')
+svm.fit(X_train, y_train)
+
+# Plotting decision regions
+plot_decision_regions(X, y, clf=svm, res=0.02, legend=2, X_highlight=X_test)
+
+# Adding axes annotations
+plt.xlabel('sepal length [cm]')
+plt.ylabel('petal length [cm]')
+plt.title('SVM on Iris')
+plt.show()
+```
+
+
+![png](plot_decision_regions_files/plot_decision_regions_16_0.png)
+
+
 # API
 
 

diff --git a/.../user_guide/evaluate/plot_decision_regions_files/plot_decision_regions_16_0.png b/.../user_guide/evaluate/plot_decision_regions_files/plot_decision_regions_16_0.png