In [1]:
import numpy as np

# Load Data From UCI Machine Learning Repository

The data from UCI ML Repo are usually formatted in CSV (comma-separated values):

    e.g. Wine data:
    1,14.23,1.71,2.43,15.6,127,2.8,3.06,.28,2.29,5.64,1.04,3.92,1065
    1,13.2,1.78,2.14,11.2,100,2.65,2.76,.26,1.28,4.38,1.05,3.4,1050

    e.g. Iris data:
    5.1,3.5,1.4,0.2,Iris-setosa
    4.9,3.0,1.4,0.2,Iris-setosa

## If the all elements in the data file are numbers:

In [2]:
# Wine data:
# Dataset page: https://archive.ics.uci.edu/ml/datasets/Wine
# Data file:    https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data

In [3]:
# Delimiter is used to indicate which character to separate elements in the data.
a = np.loadtxt('./wine.data', delimiter=',')

print(a.shape)
print(a)

(178, 14)
[[  1.00000000e+00   1.42300000e+01   1.71000000e+00 ...,   1.04000000e+00
    3.92000000e+00   1.06500000e+03]
 [  1.00000000e+00   1.32000000e+01   1.78000000e+00 ...,   1.05000000e+00
    3.40000000e+00   1.05000000e+03]
 [  1.00000000e+00   1.31600000e+01   2.36000000e+00 ...,   1.03000000e+00
    3.17000000e+00   1.18500000e+03]
 ..., 
 [  3.00000000e+00   1.32700000e+01   4.28000000e+00 ...,   5.90000000e-01
    1.56000000e+00   8.35000000e+02]
 [  3.00000000e+00   1.31700000e+01   2.59000000e+00 ...,   6.00000000e-01
    1.62000000e+00   8.40000000e+02]
 [  3.00000000e+00   1.41300000e+01   4.10000000e+00 ...,   6.10000000e-01
    1.60000000e+00   5.60000000e+02]]


## If some of the elements in the data file are strings:

In this case, we may need to convert the strings into numbers. We need to (1) define a converter function at first and then (2) feed the column number and the converter function into `np.loadtxt()`. 

In [4]:
# Iris data:
# Dataset page: https://archive.ics.uci.edu/ml/datasets/Iris
# Data file:    https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data

In [5]:
# This converter function will convert the string into corresponding value.
def converter1(x):
    # Here the character b before string is needed, 
    # because the string read as bytes flow.
    #     https://stackoverflow.com/questions/6269765/what-does-the-b-character-do-in-front-of-a-string-literal

    if x == b'Iris-setosa':       
        return 0
    elif x == b'Iris-versicolor':
        return 1
    else:
        return 2

In [10]:
column1 = 4

a = np.loadtxt('./iris.data', delimiter=',', 
               converters={column1:converter1})

# You can also use multiple converters:
# a = np.loadtxt('./???.data', delimiter=',', 
#                converters={column1:converter1, column2:converter2})

print(a.shape)
print(a)

(150, 5)
[[ 5.1  3.5  1.4  0.2  0. ]
 [ 4.9  3.   1.4  0.2  0. ]
 [ 4.7  3.2  1.3  0.2  0. ]
 [ 4.6  3.1  1.5  0.2  0. ]
 [ 5.   3.6  1.4  0.2  0. ]
 [ 5.4  3.9  1.7  0.4  0. ]
 [ 4.6  3.4  1.4  0.3  0. ]
 [ 5.   3.4  1.5  0.2  0. ]
 [ 4.4  2.9  1.4  0.2  0. ]
 [ 4.9  3.1  1.5  0.1  0. ]
 [ 5.4  3.7  1.5  0.2  0. ]
 [ 4.8  3.4  1.6  0.2  0. ]
 [ 4.8  3.   1.4  0.1  0. ]
 [ 4.3  3.   1.1  0.1  0. ]
 [ 5.8  4.   1.2  0.2  0. ]
 [ 5.7  4.4  1.5  0.4  0. ]
 [ 5.4  3.9  1.3  0.4  0. ]
 [ 5.1  3.5  1.4  0.3  0. ]
 [ 5.7  3.8  1.7  0.3  0. ]
 [ 5.1  3.8  1.5  0.3  0. ]
 [ 5.4  3.4  1.7  0.2  0. ]
 [ 5.1  3.7  1.5  0.4  0. ]
 [ 4.6  3.6  1.   0.2  0. ]
 [ 5.1  3.3  1.7  0.5  0. ]
 [ 4.8  3.4  1.9  0.2  0. ]
 [ 5.   3.   1.6  0.2  0. ]
 [ 5.   3.4  1.6  0.4  0. ]
 [ 5.2  3.5  1.5  0.2  0. ]
 [ 5.2  3.4  1.4  0.2  0. ]
 [ 4.7  3.2  1.6  0.2  0. ]
 [ 4.8  3.1  1.6  0.2  0. ]
 [ 5.4  3.4  1.5  0.4  0. ]
 [ 5.2  4.1  1.5  0.1  0. ]
 [ 5.5  4.2  1.4  0.2  0. ]
 [ 4.9  3.1  1.5  0.1  0. ]
 [ 5.   3.2

## Notes:

You can also try some other packages like `csv` or `pandas` to load the data.