### Numpy

In [2]:
import numpy as np

In [3]:
myList = np.arange(1, 10)
myList

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

#### Applying operations to elements

In [4]:
matrix = myList.reshape(3, 3)
matrix

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [5]:
matrix.shape, matrix.size, matrix.ndim

((3, 3), 9, 2)

In [13]:
add_100 = lambda i: i+ 100
add_100

<function __main__.<lambda>(i)>

In [14]:
vectorize_add_100 = np.vectorize(add_100)
vectorize_add_100

<numpy.vectorize at 0x1059e0e50>

In [15]:
vectorize_add_100(matrix)

array([[101, 102, 103],
       [104, 105, 106],
       [107, 108, 109]])

##### Without lambda

In [16]:
def myFunc(i):
    return i + 100

vfunc = np.vectorize(myFunc)
vfunc

<numpy.vectorize at 0x1059e05e0>

In [17]:
vfunc(matrix)

array([[101, 102, 103],
       [104, 105, 106],
       [107, 108, 109]])

In [19]:
def myFunc_even(i):
    return i + 100 if i%2==0 else i

vfunc = np.vectorize(myFunc_even)
vfunc

<numpy.vectorize at 0x10a8f0fd0>

In [20]:
vfunc(matrix)

array([[  1, 102,   3],
       [104,   5, 106],
       [  7, 108,   9]])

In [21]:
add_100_even = lambda i: i+ 100 if i%2==0 else i

vfunc_even = np.vectorize(add_100_even)

vfunc_even(matrix)

array([[  1, 102,   3],
       [104,   5, 106],
       [  7, 108,   9]])

### Loading data from files

In [22]:
dataset = np.genfromtxt('normal_distribution.csv', delimiter=',')
dataset

array([[ 99.14931546, 104.03852715, 107.43534677,  97.85230675,
         98.74986914,  98.80833412,  96.81964892,  98.56783189],
       [ 92.02628776,  97.10439252,  99.32066924,  97.24584816,
         92.9267508 ,  92.65657752, 105.7197853 , 101.23162942],
       [ 95.66253664,  95.17750125,  90.93318132, 110.18889465,
         98.80084371, 105.95297652,  98.37481387, 106.54654286],
       [ 91.37294597, 100.96781394, 100.40118279, 113.42090475,
        105.48508838,  91.6604946 , 106.1472841 ,  95.08715803],
       [101.20862522, 103.5730309 , 100.28690912, 105.85269352,
         93.37126331, 108.57980357, 100.79478953,  94.20019732],
       [102.80387079,  98.29687616,  93.24376389,  97.24130034,
         89.03452725,  96.2832753 , 104.60344836, 101.13442416],
       [106.71751618, 102.97585605,  98.45723272, 100.72418901,
        106.39798503,  95.46493436,  94.35373179, 106.83273763],
       [ 96.02548256, 102.82360856, 106.47551845, 101.34745901,
        102.45651798,  98.7476749

In [23]:
dataset.dtype

dtype('float64')

In [24]:
type(dataset)

numpy.ndarray

In [25]:
dataset.shape

(24, 8)

In [26]:
dataset.size

192

In [27]:
np.mean(dataset)

100.16536917390624

In [31]:
np.mean(dataset, axis=0) # column-wise mean

array([ 99.7674351 ,  99.61229127, 101.14584656, 101.8449316 ,
        99.04871791,  99.67838931,  99.7848489 , 100.44049274])

In [32]:
np.mean(dataset, axis=1) # row-wise mean

array([100.17764752,  97.27899259, 100.20466135, 100.56785907,
       100.98341406,  97.83018578, 101.49052285,  99.75332252,
       101.89845125,  99.77973914, 101.013081  , 100.54961696,
        98.48256886,  98.49816126, 101.85956927,  97.05201872,
       102.62147483, 101.21177037,  99.58777968,  98.96533534,
       103.85792812, 101.89050288,  99.07192574,  99.34233101])

In [41]:
np.median(dataset)

100.18206203

In [33]:
np.median(dataset, axis=0)

array([ 99.47902326, 100.10811926, 101.66384622, 100.96596128,
       100.19629221,  99.08416697,  98.79890832, 100.60581955])

In [34]:
np.median(dataset, axis=1)

array([ 98.77910163,  97.17512034,  98.58782879, 100.68449836,
       101.00170737,  97.76908825, 101.85002253, 100.04756697,
       102.24292555,  99.59514997, 100.4955753 ,  99.8860714 ,
        99.00647994,  98.67276177, 102.44376222,  96.61933565,
       104.0968893 , 100.72023043,  98.70877396,  99.75008654,
       104.89344428, 101.00634942,  98.30543801,  99.18748092])

In [40]:
np.var(dataset)

23.408155574137027

In [35]:
np.var(dataset, axis=0)

array([23.64757465, 29.78886109, 20.50542011, 26.03204443, 28.38853175,
       19.09960817, 17.67291174, 16.17923204])

In [36]:
np.var(dataset, axis=1)

array([11.47831221, 19.87124806, 38.76626578, 51.82791356, 24.08117728,
       22.86112657, 22.39746721, 16.95556067, 14.99854574, 28.70862927,
       10.31592621, 15.62432312, 51.65273836, 21.93340236, 17.11660913,
       22.57008154, 11.02824211,  5.01674729, 17.35648886, 11.40121072,
       10.98051411, 19.32123956, 12.93285875, 16.47774976])

In [37]:
np.std(dataset, axis=1)

array([3.38796579, 4.45771781, 6.22625616, 7.19916062, 4.90725761,
       4.78133105, 4.73259624, 4.11771304, 3.8727956 , 5.35804342,
       3.21184156, 3.95276145, 7.18698395, 4.68331105, 4.13722239,
       4.75079799, 3.32087972, 2.23980965, 4.16611196, 3.37656789,
       3.31368588, 4.3955932 , 3.59622841, 4.05927946])

In [38]:
np.std(dataset, axis=0)

array([4.8628772 , 5.45791728, 4.52829108, 5.10216076, 5.32808894,
       4.37030985, 4.20391624, 4.02234161])

In [39]:
np.std(dataset)

4.838197554269257

In [42]:
np.sum(dataset)

19231.75088139

In [43]:
np.sum(dataset, axis=0)

array([2394.4184425 , 2390.6949905 , 2427.50031738, 2444.27835834,
       2377.16922983, 2392.28134355, 2394.83637359, 2410.5718257 ])

### Indexing and slicing

In [44]:
# matrix[row][col]
dataset[0]

array([ 99.14931546, 104.03852715, 107.43534677,  97.85230675,
        98.74986914,  98.80833412,  96.81964892,  98.56783189])

In [45]:
dataset[0, 0]

99.14931546

In [46]:
dataset[0][0]

99.14931546

In [47]:
dataset[0:1, 0:1]

array([[99.14931546]])

In [48]:
dataset[0, :8]

array([ 99.14931546, 104.03852715, 107.43534677,  97.85230675,
        98.74986914,  98.80833412,  96.81964892,  98.56783189])

In [49]:
dataset[0, :]

array([ 99.14931546, 104.03852715, 107.43534677,  97.85230675,
        98.74986914,  98.80833412,  96.81964892,  98.56783189])

In [50]:
dataset[0, :-1]

array([ 99.14931546, 104.03852715, 107.43534677,  97.85230675,
        98.74986914,  98.80833412,  96.81964892])

In [51]:
dataset[0:1, -1]

array([98.56783189])

In [53]:
X = dataset[:, :-1]
y = dataset[:, -1]


In [55]:
dataset[:, 0:2]

array([[ 99.14931546, 104.03852715],
       [ 92.02628776,  97.10439252],
       [ 95.66253664,  95.17750125],
       [ 91.37294597, 100.96781394],
       [101.20862522, 103.5730309 ],
       [102.80387079,  98.29687616],
       [106.71751618, 102.97585605],
       [ 96.02548256, 102.82360856],
       [105.30350449,  92.87730812],
       [110.44484313,  93.87155456],
       [101.3514185 , 100.37372248],
       [ 97.21315663, 107.02874163],
       [ 95.65982034, 107.22482426],
       [100.39303522,  92.0108226 ],
       [103.1521596 , 109.40523174],
       [106.11454989,  88.80221141],
       [ 96.78266211,  99.84251605],
       [101.86186193, 103.61720152],
       [ 97.49594839,  96.59385486],
       [ 96.76814836,  91.6779221 ],
       [106.89005002, 106.57364584],
       [ 99.80873105, 101.63973121],
       [ 96.10020311,  94.57421727],
       [ 94.11176915,  99.62387832]])

In [56]:
dataset[-1, :]

array([ 94.11176915,  99.62387832, 104.51786419,  97.62787811,
        93.97853495,  98.75108352, 106.05042487, 100.07721494])

In [57]:
dataset[0:2, 0:2]

array([[ 99.14931546, 104.03852715],
       [ 92.02628776,  97.10439252]])

In [58]:
dataset[-1, -1]

100.07721494

In [59]:
np.mean(dataset[:, -1])

100.4404927375

### Spliting & Iterating

In [61]:
dataset = np.arange(48).reshape(6, 8)
dataset

array([[ 0,  1,  2,  3,  4,  5,  6,  7],
       [ 8,  9, 10, 11, 12, 13, 14, 15],
       [16, 17, 18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29, 30, 31],
       [32, 33, 34, 35, 36, 37, 38, 39],
       [40, 41, 42, 43, 44, 45, 46, 47]])

In [65]:
# horizontal spliting
np.hsplit(dataset, 2)

[array([[ 0,  1,  2,  3],
        [ 8,  9, 10, 11],
        [16, 17, 18, 19],
        [24, 25, 26, 27],
        [32, 33, 34, 35],
        [40, 41, 42, 43]]),
 array([[ 4,  5,  6,  7],
        [12, 13, 14, 15],
        [20, 21, 22, 23],
        [28, 29, 30, 31],
        [36, 37, 38, 39],
        [44, 45, 46, 47]])]

In [67]:
# vertical split
np.vsplit(dataset, 3)

[array([[ 0,  1,  2,  3,  4,  5,  6,  7],
        [ 8,  9, 10, 11, 12, 13, 14, 15]]),
 array([[16, 17, 18, 19, 20, 21, 22, 23],
        [24, 25, 26, 27, 28, 29, 30, 31]]),
 array([[32, 33, 34, 35, 36, 37, 38, 39],
        [40, 41, 42, 43, 44, 45, 46, 47]])]

In [69]:
np.split(dataset, 2, axis=1)

[array([[ 0,  1,  2,  3],
        [ 8,  9, 10, 11],
        [16, 17, 18, 19],
        [24, 25, 26, 27],
        [32, 33, 34, 35],
        [40, 41, 42, 43]]),
 array([[ 4,  5,  6,  7],
        [12, 13, 14, 15],
        [20, 21, 22, 23],
        [28, 29, 30, 31],
        [36, 37, 38, 39],
        [44, 45, 46, 47]])]

In [70]:
np.split(dataset, 3, axis=0)

[array([[ 0,  1,  2,  3,  4,  5,  6,  7],
        [ 8,  9, 10, 11, 12, 13, 14, 15]]),
 array([[16, 17, 18, 19, 20, 21, 22, 23],
        [24, 25, 26, 27, 28, 29, 30, 31]]),
 array([[32, 33, 34, 35, 36, 37, 38, 39],
        [40, 41, 42, 43, 44, 45, 46, 47]])]

In [71]:
# Iterating
dataset

array([[ 0,  1,  2,  3,  4,  5,  6,  7],
       [ 8,  9, 10, 11, 12, 13, 14, 15],
       [16, 17, 18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29, 30, 31],
       [32, 33, 34, 35, 36, 37, 38, 39],
       [40, 41, 42, 43, 44, 45, 46, 47]])

In [72]:
for r in dataset:
    print(r)

[0 1 2 3 4 5 6 7]
[ 8  9 10 11 12 13 14 15]
[16 17 18 19 20 21 22 23]
[24 25 26 27 28 29 30 31]
[32 33 34 35 36 37 38 39]
[40 41 42 43 44 45 46 47]


In [74]:
for row in dataset:
    for col in row:
        print(col, end=', ')

0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 

In [76]:
for data in np.nditer(dataset):
    print(data, end=', ')

0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 