## Numpy 

In [1]:
import numpy as np

### indexing

In [2]:
array_a = np.array([[2,3,5],[1,4,2]])
array_a

array([[2, 3, 5],
       [1, 4, 2]])

In [3]:
type(array_a)

numpy.ndarray

In [4]:
array_a[0]

array([2, 3, 5])

In [5]:
array_a[0][2]

5

In [6]:
array_a[1][2]

2

In [7]:
array_a[:,1:]

array([[3, 5],
       [4, 2]])

In [8]:
array_a[-1]

array([1, 4, 2])

In [9]:
array_a[0,1]= 4

In [10]:
array_a

array([[2, 4, 5],
       [1, 4, 2]])

In [11]:
array_a[0]=4

In [12]:
array_a

array([[4, 4, 4],
       [1, 4, 2]])

In [13]:
array_a[:,0 ]=10

In [14]:
array_a

array([[10,  4,  4],
       [10,  4,  2]])

## Supported data types

In [15]:
import numpy as np

In [16]:
array_b = np.array([[1,2,4],[4,5,7]], dtype='float')
array_b

array([[1., 2., 4.],
       [4., 5., 7.]])

In [17]:
array_b = np.array([[1,2,4],[4,5,7]], dtype=np.float16)
array_b

array([[1., 2., 4.],
       [4., 5., 7.]], dtype=float16)

In [18]:
array_b = np.array([[1,2,4],[4,5,7]], dtype=np.complex64)
array_b

array([[1.+0.j, 2.+0.j, 4.+0.j],
       [4.+0.j, 5.+0.j, 7.+0.j]], dtype=complex64)

In [19]:
array_b = np.array([[1,2,4],[4,5,7]], dtype=np.bool)
array_b

array([[ True,  True,  True],
       [ True,  True,  True]])

In [20]:
array_b = np.array([[1,2,4],[4,5,7]], dtype=np.str)
array_b

array([['1', '2', '4'],
       ['4', '5', '7']], dtype='<U1')

## Broadcasting

Broadcasting rules > 
The arrays have the same shape. 
The arrays have the same number of dimensions , and the length of each dimension is either common or 1.
The arrays that have too few dimension can have their shapes altered with a dimension 1, to satisfy the second rule.

In [21]:
array_a = np.array([1,2,3])
array_a

array([1, 2, 3])

In [22]:
array_b = np.array([[1],[2]])
array_b

array([[1],
       [2]])

In [23]:
matrix_c = np.array([[1,2,3],[4,5,6]])
matrix_c

array([[1, 2, 3],
       [4, 5, 6]])

In [24]:
np.add(array_a,matrix_c)

array([[2, 4, 6],
       [5, 7, 9]])

In [25]:
np.add(array_b,matrix_c)

array([[2, 3, 4],
       [6, 7, 8]])

### Type casting

In [26]:
np.add(array_b, matrix_c , dtype=np.float64)

array([[2., 3., 4.],
       [6., 7., 8.]])

In [27]:
np.add(array_b, matrix_c , dtype=np.str)

UFuncTypeError: ufunc 'add' did not contain a loop with signature matching types (dtype('<U'), dtype('<U')) -> dtype('<U')

### Running over an Axis

In [28]:
np.mean(matrix_c,axis=0)  # axis = 0 , finding the mean for each column of the array

array([2.5, 3.5, 4.5])

In [29]:
matrix_c

array([[1, 2, 3],
       [4, 5, 6]])

In [30]:
np.mean(matrix_c,axis=1)  # axis = 1 , finding the mean for each row of the array

array([2., 5.])

### ndarrays

In [31]:
#  ndarray = N-Dimensional array
# N > Natural Numbers 

array_a = np.array([1,2,3])

In [32]:
array_a

array([1, 2, 3])

In [33]:
type(array_a)

numpy.ndarray

In [34]:
print(array_a)

[1 2 3]


In [35]:
array_a.shape

(3,)

In [36]:
array_b = np.array([[7,8,9],[10,11,12]])
array_b

array([[ 7,  8,  9],
       [10, 11, 12]])

In [37]:
array_b.shape

(2, 3)

In [38]:
type(array_b)

numpy.ndarray

In [39]:
print(array_b)

[[ 7  8  9]
 [10 11 12]]


In [40]:
array_c = np.array(13)

In [41]:
type(array_c)

numpy.ndarray

In [42]:
print(array_c)

13


In [43]:
array_c.shape

()

In [44]:
array_d = np.array([13])


In [45]:
type(array_d)

numpy.ndarray

In [46]:
array_d.shape

(1,)

In [47]:
print(array_d)

[13]


### List vs Array

In [48]:
list_a = [1,2,4,5,6,7]

In [49]:
len(list_a)

6

In [50]:
list_a = [[1,2,4],[5,6,7]]


In [51]:
len(list_a)

2

In [52]:
type(list_a)

list

In [53]:
array_a = np.array(list_a)

In [54]:
array_a

array([[1, 2, 4],
       [5, 6, 7]])

In [55]:
type(array_a)

numpy.ndarray

In [56]:
print(list_a)

[[1, 2, 4], [5, 6, 7]]


In [57]:
print(array_a)

[[1 2 4]
 [5 6 7]]


In [58]:
# List have len() ## Array have shape
array_a.shape

(2, 3)

In [59]:
list_a.shape

AttributeError: 'list' object has no attribute 'shape'

In [60]:
len(list_a)

2

In [61]:
list_b = list_a[0] + list_a[1]
array_b = array_a[0]+array_a[1]

In [62]:
print(list_b)

[1, 2, 4, 5, 6, 7]


In [63]:
print(array_b)

[ 6  8 11]


In [64]:
np.sqrt(array_a)

array([[1.        , 1.41421356, 2.        ],
       [2.23606798, 2.44948974, 2.64575131]])

### String vs Object vs Numbers

In [65]:
lending_co_lt = np.genfromtxt('data/lending-co-LT.csv', delimiter=',',dtype=np.str) # np > NumPy specific datatype # int32 > Integers up to 32 bits

In [66]:
lending_co_lt

array([['LoanID', 'StringID', 'Product', ..., 'Location', 'Region',
        'TotalPrice'],
       ['1', 'id_1', 'Product B', ..., 'Location 2', 'Region 2',
        '16600.0'],
       ['2', 'id_2', 'Product B', ..., 'Location 3', '', '16600.0'],
       ...,
       ['1041', 'id_1041', 'Product B', ..., 'Location 23', 'Region 4',
        '16600.0'],
       ['1042', 'id_1042', 'Product C', ..., 'Location 52', 'Region 6',
        '15600.0'],
       ['1043', 'id_1043', 'Product B', ..., 'Location 142', 'Region 6',
        '16600.0']], dtype='<U14')

In [67]:
print(lending_co_lt)

[['LoanID' 'StringID' 'Product' ... 'Location' 'Region' 'TotalPrice']
 ['1' 'id_1' 'Product B' ... 'Location 2' 'Region 2' '16600.0']
 ['2' 'id_2' 'Product B' ... 'Location 3' '' '16600.0']
 ...
 ['1041' 'id_1041' 'Product B' ... 'Location 23' 'Region 4' '16600.0']
 ['1042' 'id_1042' 'Product C' ... 'Location 52' 'Region 6' '15600.0']
 ['1043' 'id_1043' 'Product B' ... 'Location 142' 'Region 6' '16600.0']]


## Slicing

### Conditional Slicing

In [68]:
matrix_c = np.array([[1,1,0,3,5],[2,2,5,3,1],[2,1,0,8,6]])
matrix_c

array([[1, 1, 0, 3, 5],
       [2, 2, 5, 3, 1],
       [2, 1, 0, 8, 6]])

In [69]:
matrix_c[:,0]

array([1, 2, 2])

In [70]:
matrix_c[:,0]>1

array([False,  True,  True])

In [71]:
matrix_c[matrix_c[:,0]>1]

array([[2, 2, 5, 3, 1],
       [2, 1, 0, 8, 6]])

In [72]:
matrix_c[matrix_c[:,:]%2 == 0]

array([0, 2, 2, 2, 0, 8, 6])

In [73]:
matrix_c[(matrix_c[:,:]%2 ==0) & (matrix_c[:,:]<=4)]

array([0, 2, 2, 2, 0])

In [74]:
matrix_c[(matrix_c[:,:]%2 ==0) | (matrix_c[:,:]<=4)]

array([1, 1, 0, 3, 2, 2, 3, 1, 2, 1, 0, 8, 6])

### Dimensions and the Squeeze Function

In [75]:
matrix_D = np.array([[3,4,6,1,2],[7,5,8,9,0],[4,6,2,1,0]])
matrix_D

array([[3, 4, 6, 1, 2],
       [7, 5, 8, 9, 0],
       [4, 6, 2, 1, 0]])

In [76]:
type(matrix_D)

numpy.ndarray

In [77]:
type(matrix_D[0,0])

numpy.int32

In [78]:
matrix_D[0,0:1]

array([3])

In [79]:
type(matrix_D[0,0:1])

numpy.ndarray

In [80]:
matrix_D[0:1,0:1]

array([[3]])

In [81]:
matrix_D[0:1,0:1].shape

(1, 1)

In [82]:
print(matrix_D[0,0].shape) #scalar
print(matrix_D[0,0:1].shape) #vector
print(matrix_D[0:1,0:1].shape) # Matrix

()
(1,)
(1, 1)


In [83]:
matrix_D[0:1,0:1].squeeze()

array(3)

In [84]:
print(matrix_D[0:1,0:1].squeeze())

3


## Generating Data with numpy

In [85]:
import numpy as np

### np.empty(), np.zeros(), np.ones(), np.full()

In [86]:
array_empty = np.empty(shape = (2,3))

In [87]:
array_empty

array([[1.        , 1.41421356, 2.        ],
       [2.23606798, 2.44948974, 2.64575131]])

In [88]:
array_0s = np.zeros(shape=(2,3))
array_0s

array([[0., 0., 0.],
       [0., 0., 0.]])

In [89]:
array_0s = np.zeros(shape=(2,3), dtype = np.int8)
array_0s

array([[0, 0, 0],
       [0, 0, 0]], dtype=int8)

In [90]:
array_1s = np.ones(shape=(2,3), dtype=int)
array_1s

array([[1, 1, 1],
       [1, 1, 1]])

In [91]:
array_full = np.full(shape = (2,3),fill_value=2)
array_full

array([[2, 2, 2],
       [2, 2, 2]])

### "_like" functions

In [92]:
matrix_A = np.array([[3,2,1,6,7],[3,6,8,2,1],[7,9,0,9,0]])
matrix_A

array([[3, 2, 1, 6, 7],
       [3, 6, 8, 2, 1],
       [7, 9, 0, 9, 0]])

In [93]:
arr_empty_like = np.empty_like(matrix_A)
arr_empty_like

array([[   0,    0,    0,    0,    0],
       [   0,    0,    0,    0,    0],
       [1508,    0,    0,    0,    0]])

In [94]:
arr_0s_like = np.zeros_like(matrix_A) # same way ones_like, full_like
arr_0s_like

array([[0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0]])

### np.arrange()

In [95]:
list(range(10))

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [96]:
array_rng = np.arange(30)
array_rng

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29])

In [97]:
array_rng = np.arange(start=0,stop=30)
array_rng

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29])

In [98]:
array_rng = np.arange(start = 0, stop=30, step = 2.5, dtype = np.int32)
array_rng

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18, 20, 22])

## Random Generators 

### Defining Random Generators

In [99]:
from numpy.random import Generator as gen
from numpy.random import PCG64 as pcg

In [100]:
array_RG = gen(pcg())
array_RG.normal(size=4)

array([ 0.87932378, -0.4786603 ,  0.73011772,  0.85910844])

In [101]:
array_RG = gen(pcg())
array_RG.normal(size=(3,4))

array([[ 1.82575432, -0.05153985, -0.30817317, -1.38229384],
       [-0.86513756,  0.04680135, -0.0374176 ,  0.02147147],
       [-1.01529372, -0.00587239,  1.2429951 , -1.20424217]])

In [102]:
array_RG = gen(pcg(seed = 365))
array_RG.normal(size=(3,4))

array([[-0.13640899,  0.09414431, -0.06300442,  1.05391641],
       [-0.6866818 , -0.50922173, -0.7999526 ,  0.73041825],
       [ 0.08825439, -2.1177576 ,  0.65526774, -0.48095012]])

 #### Generating integers  , Probabilities and Random Choice 

In [103]:
array_RG = gen(pcg(seed = 365))
array_RG.integers(low = 10 , high = 100 , size=(3,4))

array([[18, 78, 64, 78],
       [84, 66, 67, 28],
       [10, 69, 45, 15]], dtype=int64)

In [104]:
array_RG = gen(pcg(seed = 365))
array_RG.random(size=(3,4))

array([[0.75915734, 0.7662218 , 0.6291028 , 0.20336599],
       [0.66501486, 0.06559111, 0.71326309, 0.10812106],
       [0.87969046, 0.49405844, 0.82472673, 0.45652944]])

In [105]:
array_RG = gen(pcg(seed = 365))
array_RG.choice([1,5,3,2,4],p = [0.1,0.1,0.1,0.1,0.2] ,size=(3,4))

ValueError: probabilities do not sum to 1

### Statistics with Numpy

In [106]:
import numpy as np

### np.mean()

In [107]:
matrix_a = np.array([[1,0,4,3,6],[3,5,4,7,1],[4,3,1,2,7]])
matrix_a

array([[1, 0, 4, 3, 6],
       [3, 5, 4, 7, 1],
       [4, 3, 1, 2, 7]])

In [108]:
np.mean(matrix_a)

3.4

In [109]:
np.mean(matrix_a[0])

2.8

In [110]:
np.mean(matrix_a[:,0])

2.6666666666666665

In [111]:
np.mean(matrix_a,axis = 0) # row

array([2.66666667, 2.66666667, 3.        , 4.        , 4.66666667])

In [112]:
np.mean(matrix_a, axis = 1) # column

array([2.8, 4. , 3.4])

In [113]:
np.mean(matrix_a,axis = 0, dtype = np.int64)

array([2, 2, 3, 4, 4], dtype=int64)

### Min and Max

In [114]:
matrix_a = np.array([[1,0,4,3,6],[3,5,4,7,1],[4,3,1,2,7]])
matrix_a

array([[1, 0, 4, 3, 6],
       [3, 5, 4, 7, 1],
       [4, 3, 1, 2, 7]])

In [115]:
np.min(matrix_a)

0

In [116]:
np.amin(matrix_a)

0

In [117]:
np.minimum(matrix_a[0], matrix_a[2])

array([1, 0, 1, 2, 6])

In [118]:
np.minimum.reduce(matrix_a)

array([1, 0, 1, 2, 1])

In [119]:
np.min(matrix_a,axis = 0) # same np.minimum.reduce(matrix_a)

array([1, 0, 1, 2, 1])

In [120]:
np.max(matrix_a)

7

### Statistical Order Function

In [121]:
import numpy as np

In [122]:
matrix_a = np.array([[1,0,4,3,6],[3,5,4,7,1],[4,3,1,2,7]])
matrix_a

array([[1, 0, 4, 3, 6],
       [3, 5, 4, 7, 1],
       [4, 3, 1, 2, 7]])

In [123]:
np.ptp(matrix_a)  # ptp = peak to peak , Returns the difference between the highest and lowest values within an array

7

In [124]:
np.ptp(matrix_a, axis = 0)

array([3, 5, 3, 5, 6])

In [125]:
np.ptp(matrix_a, axis = 1)

array([6, 6, 6])

In [126]:
np.sort(matrix_a, axis= None)

array([0, 1, 1, 1, 2, 3, 3, 3, 4, 4, 4, 5, 6, 7, 7])

In [127]:
np.percentile(matrix_a, 70) # 70%

4.0

In [128]:
np.percentile(matrix_a, 70, interpolation='higher') # Lower, midpoint, nearest

4

In [129]:
np.quantile(matrix_a, 0.70, interpolation='nearest')

4

## Preprocessing with NumPy

In [1]:
import numpy as np

### Checking for Missing Values

In [2]:
lending_co_data_numeric = np.loadtxt('data/Lending-company-Numeric.csv', delimiter = ',')

In [3]:
np.isnan(lending_co_data_numeric)

array([[False, False, False, False, False, False],
       [False, False, False, False, False, False],
       [False, False, False, False, False, False],
       ...,
       [False, False, False, False, False, False],
       [False, False, False, False, False, False],
       [False, False, False, False, False, False]])

In [4]:
np.isnan(lending_co_data_numeric).sum()

0

In [5]:
lending_co_data_numeric_NAN = np.loadtxt('data/Lending-company-Numeric-NAN.csv', delimiter = ';') # Crash , beacause of NAN ,loadtxt not working, so we genarate on NAN to txt

ValueError: could not convert string to float: ''

In [6]:
lending_co_data_numeric_NAN

NameError: name 'lending_co_data_numeric_NAN' is not defined

In [7]:
np.isnan()

ValueError: invalid number of arguments

In [8]:
lending_co_data_numeric_NAN = np.genfromtxt('data/Lending-company-Numeric-NAN.csv', delimiter = ';')

In [9]:
lending_co_data_numeric_NAN

array([[ 2000.,    40.,   365.,  3121.,  4241., 13621.],
       [ 2000.,    40.,   365.,  3061.,  4171., 15041.],
       [ 1000.,    40.,   365.,  2160.,  3280., 15340.],
       ...,
       [   nan,    40.,   365.,  4201.,  5001., 16600.],
       [ 1000.,    40.,   365.,  2080.,  3320., 15600.],
       [ 2000.,    40.,   365.,  4601.,  4601., 16600.]])

In [10]:
np.isnan(lending_co_data_numeric_NAN)

array([[False, False, False, False, False, False],
       [False, False, False, False, False, False],
       [False, False, False, False, False, False],
       ...,
       [ True, False, False, False, False, False],
       [False, False, False, False, False, False],
       [False, False, False, False, False, False]])

In [11]:
np.isnan(lending_co_data_numeric_NAN).sum()

260

In [12]:
# fill with 0
lending_co_data_numeric_NAN_F = np.genfromtxt('data/Lending-company-Numeric-NAN.csv', delimiter = ';', filling_values=0)

In [13]:
np.isnan(lending_co_data_numeric_NAN_F).sum()

0

In [14]:
temporary_fill = np.nanmax(lending_co_data_numeric_NAN).round(2)+1

In [15]:
temporary_fill

64002.0

In [16]:
lending_co_data_numeric_NAN_N = np.genfromtxt('data/Lending-company-Numeric-NAN.csv', 
                                              delimiter = ';', 
                                              filling_values=temporary_fill)

In [17]:
np.isnan(lending_co_data_numeric_NAN_N).sum()

0

### Substituting Missing Values

In [18]:
lending_co_data_numeric_NAN = np.genfromtxt('data/Lending-company-Numeric-NAN.csv', 
                                            delimiter = ';')
lending_co_data_numeric_NAN

array([[ 2000.,    40.,   365.,  3121.,  4241., 13621.],
       [ 2000.,    40.,   365.,  3061.,  4171., 15041.],
       [ 1000.,    40.,   365.,  2160.,  3280., 15340.],
       ...,
       [   nan,    40.,   365.,  4201.,  5001., 16600.],
       [ 1000.,    40.,   365.,  2080.,  3320., 15600.],
       [ 2000.,    40.,   365.,  4601.,  4601., 16600.]])

In [19]:
temporary_mean = np.nanmean(lending_co_data_numeric_NAN, axis = 0).round(2)

In [20]:
temporary_mean[0]

2250.25

In [21]:
temporary_mean[2]

365.0

In [22]:
temporary_fill = np.nanmax(lending_co_data_numeric_NAN).round(2)+1

lending_co_data_numeric_NAN = np.genfromtxt('data/Lending-company-Numeric-NAN.csv', 
                                              delimiter = ';', 
                                              filling_values = temporary_fill)

In [23]:
temporary_fill

64002.0

In [24]:
np.mean(lending_co_data_numeric_NAN[:,0]).round(2)

4263.25

In [25]:
lending_co_data_numeric_NAN

array([[2.0000e+03, 4.0000e+01, 3.6500e+02, 3.1210e+03, 4.2410e+03,
        1.3621e+04],
       [2.0000e+03, 4.0000e+01, 3.6500e+02, 3.0610e+03, 4.1710e+03,
        1.5041e+04],
       [1.0000e+03, 4.0000e+01, 3.6500e+02, 2.1600e+03, 3.2800e+03,
        1.5340e+04],
       ...,
       [6.4002e+04, 4.0000e+01, 3.6500e+02, 4.2010e+03, 5.0010e+03,
        1.6600e+04],
       [1.0000e+03, 4.0000e+01, 3.6500e+02, 2.0800e+03, 3.3200e+03,
        1.5600e+04],
       [2.0000e+03, 4.0000e+01, 3.6500e+02, 4.6010e+03, 4.6010e+03,
        1.6600e+04]])

In [26]:
temporary_mean[0]

2250.25

In [27]:
lending_co_data_numeric_NAN[:,0]=np.where(lending_co_data_numeric_NAN[:,0]== temporary_fill,
                                         temporary_mean[0],
                                         lending_co_data_numeric_NAN[:,0])

In [28]:
lending_co_data_numeric_NAN

array([[ 2000.  ,    40.  ,   365.  ,  3121.  ,  4241.  , 13621.  ],
       [ 2000.  ,    40.  ,   365.  ,  3061.  ,  4171.  , 15041.  ],
       [ 1000.  ,    40.  ,   365.  ,  2160.  ,  3280.  , 15340.  ],
       ...,
       [ 2250.25,    40.  ,   365.  ,  4201.  ,  5001.  , 16600.  ],
       [ 1000.  ,    40.  ,   365.  ,  2080.  ,  3320.  , 15600.  ],
       [ 2000.  ,    40.  ,   365.  ,  4601.  ,  4601.  , 16600.  ]])

In [29]:
for i in range(lending_co_data_numeric_NAN.shape[1]):
    lending_co_data_numeric_NAN[:,i] = np.where(lending_co_data_numeric_NAN[:,i] == temporary_fill,
                                               temporary_mean[i],
                                               lending_co_data_numeric_NAN[:,i])

In [30]:
# for negative value
for i in range(lending_co_data_numeric_NAN.shape[1]):
    lending_co_data_numeric_NAN[:,i] = np.where(lending_co_data_numeric_NAN[:,i] <0,
                                               0,
                                               lending_co_data_numeric_NAN[:,i])

In [31]:
lending_co_data_numeric_NAN

array([[ 2000.  ,    40.  ,   365.  ,  3121.  ,  4241.  , 13621.  ],
       [ 2000.  ,    40.  ,   365.  ,  3061.  ,  4171.  , 15041.  ],
       [ 1000.  ,    40.  ,   365.  ,  2160.  ,  3280.  , 15340.  ],
       ...,
       [ 2250.25,    40.  ,   365.  ,  4201.  ,  5001.  , 16600.  ],
       [ 1000.  ,    40.  ,   365.  ,  2080.  ,  3320.  , 15600.  ],
       [ 2000.  ,    40.  ,   365.  ,  4601.  ,  4601.  , 16600.  ]])

In [33]:
np.isnan(lending_co_data_numeric_NAN).sum()

0

### Reshaping

In [35]:
import numpy as np

In [36]:
lending_co_data_numeric = np.loadtxt('data/Lending-company-Numeric.csv',delimiter=',')

In [37]:
lending_co_data_numeric

array([[ 2000.,    40.,   365.,  3121.,  4241., 13621.],
       [ 2000.,    40.,   365.,  3061.,  4171., 15041.],
       [ 1000.,    40.,   365.,  2160.,  3280., 15340.],
       ...,
       [ 2000.,    40.,   365.,  4201.,  5001., 16600.],
       [ 1000.,    40.,   365.,  2080.,  3320., 15600.],
       [ 2000.,    40.,   365.,  4601.,  4601., 16600.]])

In [38]:
lending_co_data_numeric.shape

(1043, 6)

In [39]:
np.reshape(lending_co_data_numeric,(6,1043))

array([[ 2000.,    40.,   365., ...,   365.,  1581.,  3041.],
       [12277.,  2000.,    40., ...,    50.,   365.,  5350.],
       [ 6850., 15150.,  1000., ...,  2000.,    40.,   365.],
       [ 3101.,  4351., 16600., ..., 16600.,  2000.,    40.],
       [  365.,  3441.,  4661., ...,  8450., 22250.,  2000.],
       [   40.,   365.,  3701., ...,  4601.,  4601., 16600.]])

In [40]:
np.transpose(lending_co_data_numeric)

array([[ 2000.,  2000.,  1000., ...,  2000.,  1000.,  2000.],
       [   40.,    40.,    40., ...,    40.,    40.,    40.],
       [  365.,   365.,   365., ...,   365.,   365.,   365.],
       [ 3121.,  3061.,  2160., ...,  4201.,  2080.,  4601.],
       [ 4241.,  4171.,  3280., ...,  5001.,  3320.,  4601.],
       [13621., 15041., 15340., ..., 16600., 15600., 16600.]])

In [41]:
np.reshape(lending_co_data_numeric,(3,500))

ValueError: cannot reshape array of size 6258 into shape (3,500)

In [42]:
np.reshape(lending_co_data_numeric,(3,2086))

array([[ 2000.,    40.,   365., ...,    50.,   365.,  5350.],
       [ 6850., 15150.,  1000., ..., 16600.,  2000.,    40.],
       [  365.,  3441.,  4661., ...,  4601.,  4601., 16600.]])

In [44]:
np.reshape(lending_co_data_numeric,(2,3,1043))

array([[[ 2000.,    40.,   365., ...,   365.,  1581.,  3041.],
        [12277.,  2000.,    40., ...,    50.,   365.,  5350.],
        [ 6850., 15150.,  1000., ...,  2000.,    40.,   365.]],

       [[ 3101.,  4351., 16600., ..., 16600.,  2000.,    40.],
        [  365.,  3441.,  4661., ...,  8450., 22250.,  2000.],
        [   40.,   365.,  3701., ...,  4601.,  4601., 16600.]]])

### Removing Values 

In [45]:
import numpy as np

In [46]:
lending_co_data_numeric = np.loadtxt('data/Lending-company-Numeric.csv',delimiter=',')

In [47]:
lending_co_data_numeric

array([[ 2000.,    40.,   365.,  3121.,  4241., 13621.],
       [ 2000.,    40.,   365.,  3061.,  4171., 15041.],
       [ 1000.,    40.,   365.,  2160.,  3280., 15340.],
       ...,
       [ 2000.,    40.,   365.,  4201.,  5001., 16600.],
       [ 1000.,    40.,   365.,  2080.,  3320., 15600.],
       [ 2000.,    40.,   365.,  4601.,  4601., 16600.]])

In [53]:
np.delete(lending_co_data_numeric,1) ## np.delete (array, index ,axis=0/1 row/column)  

array([[ 2000.,   365.,  3121.,  4241., 13621.],
       [ 2000.,   365.,  3061.,  4171., 15041.],
       [ 1000.,   365.,  2160.,  3280., 15340.],
       ...,
       [ 2000.,   365.,  4201.,  5001., 16600.],
       [ 1000.,   365.,  2080.,  3320., 15600.],
       [ 2000.,   365.,  4601.,  4601., 16600.]])

In [54]:
np.delete(lending_co_data_numeric,0,axis=0)

array([[ 2000.,    40.,   365.,  3061.,  4171., 15041.],
       [ 1000.,    40.,   365.,  2160.,  3280., 15340.],
       [ 2000.,    40.,   365.,  3041.,  4241., 15321.],
       ...,
       [ 2000.,    40.,   365.,  4201.,  5001., 16600.],
       [ 1000.,    40.,   365.,  2080.,  3320., 15600.],
       [ 2000.,    40.,   365.,  4601.,  4601., 16600.]])

In [55]:
np.delete(lending_co_data_numeric,(0,3),axis=1) # delete 1st, 4th column

array([[   40.,   365.,  4241., 13621.],
       [   40.,   365.,  4171., 15041.],
       [   40.,   365.,  3280., 15340.],
       ...,
       [   40.,   365.,  5001., 16600.],
       [   40.,   365.,  3320., 15600.],
       [   40.,   365.,  4601., 16600.]])

In [57]:
np.delete(np.delete(lending_co_data_numeric,(0,3),axis=1),[0,2,-1],axis=0) 
# delete 1st, 4th column ## delete 1st ,3rd and last rows

array([[   40.,   365.,  4171., 15041.],
       [   40.,   365.,  4241., 15321.],
       [   50.,   365.,  4820., 13720.],
       ...,
       [   40.,   365.,  5440., 16600.],
       [   40.,   365.,  5001., 16600.],
       [   40.,   365.,  3320., 15600.]])

### Sorting 

In [59]:
import numpy as np
lending_co_data_numeric = np.loadtxt('data/Lending-company-Numeric.csv',delimiter=',')
lending_co_data_numeric

array([[ 2000.,    40.,   365.,  3121.,  4241., 13621.],
       [ 2000.,    40.,   365.,  3061.,  4171., 15041.],
       [ 1000.,    40.,   365.,  2160.,  3280., 15340.],
       ...,
       [ 2000.,    40.,   365.,  4201.,  5001., 16600.],
       [ 1000.,    40.,   365.,  2080.,  3320., 15600.],
       [ 2000.,    40.,   365.,  4601.,  4601., 16600.]])

In [63]:
np.sort(lending_co_data_numeric)

array([[   40.,   365.,  2000.,  3121.,  4241., 13621.],
       [   40.,   365.,  2000.,  3061.,  4171., 15041.],
       [   40.,   365.,  1000.,  2160.,  3280., 15340.],
       ...,
       [   40.,   365.,  2000.,  4201.,  5001., 16600.],
       [   40.,   365.,  1000.,  2080.,  3320., 15600.],
       [   40.,   365.,  2000.,  4601.,  4601., 16600.]])

In [66]:
np.sort(lending_co_data_numeric,axis=0) ## np.set_printoptions(suppress=True) solve scientific notation

array([[ 1000.,    35.,   365., -2870., -2870.,  -350.],
       [ 1000.,    35.,   365., -2550., -2100.,   150.],
       [ 1000.,    35.,   365., -2450., -2000.,  1100.],
       ...,
       [ 9000.,   125.,   365., 16751., 18751., 54625.],
       [ 9000.,   165.,   365., 17650., 20001., 54625.],
       [ 9000.,   165.,   365., 19001., 22001., 64001.]])

In [65]:
np.set_printoptions(suppress=True)

In [67]:
np.sort(lending_co_data_numeric,axis=None)

array([-2870., -2870., -2550., ..., 54625., 54625., 64001.])

In [68]:
np.sort(lending_co_data_numeric,axis=1)

array([[   40.,   365.,  2000.,  3121.,  4241., 13621.],
       [   40.,   365.,  2000.,  3061.,  4171., 15041.],
       [   40.,   365.,  1000.,  2160.,  3280., 15340.],
       ...,
       [   40.,   365.,  2000.,  4201.,  5001., 16600.],
       [   40.,   365.,  1000.,  2080.,  3320., 15600.],
       [   40.,   365.,  2000.,  4601.,  4601., 16600.]])

In [73]:
np.sort(-lending_co_data_numeric)

array([[-13621.,  -4241.,  -3121.,  -2000.,   -365.,    -40.],
       [-15041.,  -4171.,  -3061.,  -2000.,   -365.,    -40.],
       [-15340.,  -3280.,  -2160.,  -1000.,   -365.,    -40.],
       ...,
       [-16600.,  -5001.,  -4201.,  -2000.,   -365.,    -40.],
       [-15600.,  -3320.,  -2080.,  -1000.,   -365.,    -40.],
       [-16600.,  -4601.,  -4601.,  -2000.,   -365.,    -40.]])

In [74]:
-np.sort(-lending_co_data_numeric)

array([[13621.,  4241.,  3121.,  2000.,   365.,    40.],
       [15041.,  4171.,  3061.,  2000.,   365.,    40.],
       [15340.,  3280.,  2160.,  1000.,   365.,    40.],
       ...,
       [16600.,  5001.,  4201.,  2000.,   365.,    40.],
       [15600.,  3320.,  2080.,  1000.,   365.,    40.],
       [16600.,  4601.,  4601.,  2000.,   365.,    40.]])

In [82]:
lending_co_data_numeric

array([[ 2000.,    40.,   365.,  3121.,  4241., 13621.],
       [ 2000.,    40.,   365.,  3061.,  4171., 15041.],
       [ 1000.,    40.,   365.,  2160.,  3280., 15340.],
       ...,
       [ 2000.,    40.,   365.,  4201.,  5001., 16600.],
       [ 1000.,    40.,   365.,  2080.,  3320., 15600.],
       [ 2000.,    40.,   365.,  4601.,  4601., 16600.]])

In [83]:
np.sort(lending_co_data_numeric[:,3]) # np.sort() not overwrite
lending_co_data_numeric

array([[ 2000.,    40.,   365.,  3121.,  4241., 13621.],
       [ 2000.,    40.,   365.,  3061.,  4171., 15041.],
       [ 1000.,    40.,   365.,  2160.,  3280., 15340.],
       ...,
       [ 2000.,    40.,   365.,  4201.,  5001., 16600.],
       [ 1000.,    40.,   365.,  2080.,  3320., 15600.],
       [ 2000.,    40.,   365.,  4601.,  4601., 16600.]])

In [85]:
lending_co_data_numeric[:,3].sort()   #ndarray.sort() is overwrite
lending_co_data_numeric

array([[ 2000.,    40.,   365., -2870.,  4241., 13621.],
       [ 2000.,    40.,   365., -2550.,  4171., 15041.],
       [ 1000.,    40.,   365., -2450.,  3280., 15340.],
       ...,
       [ 2000.,    40.,   365., 16751.,  5001., 16600.],
       [ 1000.,    40.,   365., 17650.,  3320., 15600.],
       [ 2000.,    40.,   365., 19001.,  4601., 16600.]])

### Argument Function

#### np.argsort()

In [86]:
import numpy as np
lending_co_data_numeric = np.loadtxt('data/Lending-company-Numeric.csv',delimiter=',')
lending_co_data_numeric

array([[ 2000.,    40.,   365.,  3121.,  4241., 13621.],
       [ 2000.,    40.,   365.,  3061.,  4171., 15041.],
       [ 1000.,    40.,   365.,  2160.,  3280., 15340.],
       ...,
       [ 2000.,    40.,   365.,  4201.,  5001., 16600.],
       [ 1000.,    40.,   365.,  2080.,  3320., 15600.],
       [ 2000.,    40.,   365.,  4601.,  4601., 16600.]])

In [87]:
np.argsort(lending_co_data_numeric)

array([[1, 2, 0, 3, 4, 5],
       [1, 2, 0, 3, 4, 5],
       [1, 2, 0, 3, 4, 5],
       ...,
       [1, 2, 0, 3, 4, 5],
       [1, 2, 0, 3, 4, 5],
       [1, 2, 0, 3, 4, 5]], dtype=int64)

#### np.argwhere()

In [1]:
import numpy as np
lending_co_data_numeric = np.loadtxt('data/Lending-company-Numeric.csv',delimiter=',')
lending_co_data_numeric

array([[ 2000.,    40.,   365.,  3121.,  4241., 13621.],
       [ 2000.,    40.,   365.,  3061.,  4171., 15041.],
       [ 1000.,    40.,   365.,  2160.,  3280., 15340.],
       ...,
       [ 2000.,    40.,   365.,  4201.,  5001., 16600.],
       [ 1000.,    40.,   365.,  2080.,  3320., 15600.],
       [ 2000.,    40.,   365.,  4601.,  4601., 16600.]])

In [2]:
np.argwhere(lending_co_data_numeric)

array([[   0,    0],
       [   0,    1],
       [   0,    2],
       ...,
       [1042,    3],
       [1042,    4],
       [1042,    5]], dtype=int64)