In [2]:
import csv

with open("winequality-red.csv", 'r') as f:
    wines = list(csv.reader(f, delimiter=";"))
    
print(wines[:3])

[['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density', 'pH', 'sulphates', 'alcohol', 'quality'], ['7.4', '0.7', '0', '1.9', '0.076', '11', '34', '0.9978', '3.51', '0.56', '9.4', '5'], ['7.8', '0.88', '0', '2.6', '0.098', '25', '67', '0.9968', '3.2', '0.68', '9.8', '5']]


In [3]:
print(wines[:3])

[['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density', 'pH', 'sulphates', 'alcohol', 'quality'], ['7.4', '0.7', '0', '1.9', '0.076', '11', '34', '0.9978', '3.51', '0.56', '9.4', '5'], ['7.8', '0.88', '0', '2.6', '0.098', '25', '67', '0.9968', '3.2', '0.68', '9.8', '5']]


In [4]:
qualities = [float(item[-1]) for item in wines[1:]]

In [6]:
#qualities

In [7]:
sum(qualities) / len(qualities)

5.6360225140712945

Although we were able to do the calculation we wanted, the code is fairly 
complex, and it won't be fun to have to do something similar every time we 
want to compute a quantity. Luckily, we can use NumPy to make it easier 
to work with our data.

In [4]:
import numpy as np

In [5]:
wines = np.array(wines[1:], dtype=np.float)
wines

array([[  7.4  ,   0.7  ,   0.   , ...,   0.56 ,   9.4  ,   5.   ],
       [  7.8  ,   0.88 ,   0.   , ...,   0.68 ,   9.8  ,   5.   ],
       [  7.8  ,   0.76 ,   0.04 , ...,   0.65 ,   9.8  ,   5.   ],
       ..., 
       [  6.3  ,   0.51 ,   0.13 , ...,   0.75 ,  11.   ,   6.   ],
       [  5.9  ,   0.645,   0.12 , ...,   0.71 ,  10.2  ,   5.   ],
       [  6.   ,   0.31 ,   0.47 , ...,   0.66 ,  11.   ,   6.   ]])

We can check the number of rows and columns in our data 
using the shape property of NumPy arrays:

In [6]:
wines.shape

(1599, 12)

In [9]:
wines = np.genfromtxt("winequality-red.csv", delimiter=";", skip_header=1)

In [10]:
wines #

array([[  7.4  ,   0.7  ,   0.   , ...,   0.56 ,   9.4  ,   5.   ],
       [  7.8  ,   0.88 ,   0.   , ...,   0.68 ,   9.8  ,   5.   ],
       [  7.8  ,   0.76 ,   0.04 , ...,   0.65 ,   9.8  ,   5.   ],
       ..., 
       [  6.3  ,   0.51 ,   0.13 , ...,   0.75 ,  11.   ,   6.   ],
       [  5.9  ,   0.645,   0.12 , ...,   0.71 ,  10.2  ,   5.   ],
       [  6.   ,   0.31 ,   0.47 , ...,   0.66 ,  11.   ,   6.   ]])

In [11]:
wines[2,3]

2.2999999999999998

In [12]:
wines[0:3,3]

array([ 1.9,  2.6,  2.3])

In [16]:
wines[:3,3]
#omit the 0 to just retrieve all the elements from the beginning 
#up to element 3:

array([ 1.9,  2.6,  2.3])

In [17]:
wines[:,3]

array([ 1.9,  2.6,  2.3, ...,  2.3,  2. ,  3.6])

In [18]:
wines[3,:]

array([ 11.2  ,   0.28 ,   0.56 ,   1.9  ,   0.075,  17.   ,  60.   ,
         0.998,   3.16 ,   0.58 ,   9.8  ,   6.   ])

In [13]:
wines[1,5] = 10

In [14]:
wines[:,10] = 50

In [15]:
third_wine = wines[3,:]

In [16]:
third_wine

array([ 11.2  ,   0.28 ,   0.56 ,   1.9  ,   0.075,  17.   ,  60.   ,
         0.998,   3.16 ,   0.58 ,  50.   ,   6.   ])

In [17]:
np.random.rand(3)

array([ 0.02230071,  0.12566288,  0.55813984])

In [18]:
wines.dtype

dtype('float64')

In [19]:
wines.astype(int)

array([[ 7,  0,  0, ...,  0, 50,  5],
       [ 7,  0,  0, ...,  0, 50,  5],
       [ 7,  0,  0, ...,  0, 50,  5],
       ..., 
       [ 6,  0,  0, ...,  0, 50,  6],
       [ 5,  0,  0, ...,  0, 50,  5],
       [ 6,  0,  0, ...,  0, 50,  6]])

In [20]:
int_wines = wines.astype(int)
int_wines.dtype.name

'int32'

In [27]:
wines[:,11] + 10

array([ 15.,  15.,  15., ...,  16.,  15.,  16.])

In [23]:
wines[:,11] += 10 # wines[:,11]= wines[:,11] + 10
wines[:,11]

array([ 25.,  25.,  25., ...,  26.,  25.,  26.])

In [24]:
wines[:,11] * 2

array([ 50.,  50.,  50., ...,  52.,  50.,  52.])

In [25]:
wines[:,11] + wines[:,11]

array([ 50.,  50.,  50., ...,  52.,  50.,  52.])

Note that this is equivalent to wines[11] * 2 -- this is because NumPy adds each pair of elements. The first element in the first array is added to the first element in the second array, the second to the second, and so on.

In [26]:
wines[:,10] * wines[:,11]

array([ 1250.,  1250.,  1250., ...,  1300.,  1250.,  1300.])