## Checking for the missing values in Ndarrays

In [1]:
import numpy as np

In [2]:
# loadtxt() helps to find out whether any missing values present while loading the file

In [3]:
lending_co_data_numeric = np.loadtxt("Lending-Company-Numeric-Data.csv", delimiter= ',')

In [4]:
# np.isnan() - Return an array with same shape and size as the one we input having representation with boolean.

In [5]:
np.isnan(lending_co_data_numeric).sum() # -->Helps to take the total count of boolean matrix
# 0 means NO missing values

0

In [6]:
lending_co_data_numeric_NAN = np.genfromtxt("Lending-company-Numeric-NAN.csv", delimiter= ';')

In [7]:
np.isnan(lending_co_data_numeric_NAN).sum() 

260

In [8]:
# Testing, Changes the numpy data to Data frame and check for the NaN counts
import pandas as pd
df = pd.DataFrame(lending_co_data_numeric_NAN)
df_1 = df.isna().sum()
df_1.sum()

260

In [9]:
lending_co_data_numeric_NAN = np.genfromtxt("Lending-company-Numeric-NAN.csv", delimiter= ';', filling_values= 0)

In [10]:
np.isnan(lending_co_data_numeric_NAN).sum() 

0

In [11]:
# Use a number greater than the highest value of the dataset using np.nanmax()

In [12]:
lending_co_data_numeric_NAN = np.genfromtxt("Lending-company-Numeric-NAN.csv", delimiter= ';')

In [13]:
temporary_fill = np.nanmax(lending_co_data_numeric_NAN).round(2) + 1

In [14]:
temporary_fill

64002.0

In [15]:
lending_co_data_numeric_NAN = np.genfromtxt("Lending-company-Numeric-NAN.csv", delimiter= ';', 
                                            filling_values= temporary_fill)
np.isnan(lending_co_data_numeric_NAN).sum()
# 0 mean we filled out all the missing values

0

## Substituting Missing Values in NumPy

In [16]:
# Fill all missing values with the mean values
# This won't change the overall interpretation of the dataset
# All missing values would be considered average
# NOTE : This is NOT always Valid

In [17]:
lending_co_data_numeric_NAN = np.genfromtxt("Lending-company-Numeric-NAN.csv", delimiter= ';')
lending_co_data_numeric_NAN

array([[ 2000.,    40.,   365.,  3121.,  4241., 13621.],
       [ 2000.,    40.,   365.,  3061.,  4171., 15041.],
       [ 1000.,    40.,   365.,  2160.,  3280., 15340.],
       ...,
       [   nan,    40.,   365.,  4201.,  5001., 16600.],
       [ 1000.,    40.,   365.,  2080.,  3320., 15600.],
       [ 2000.,    40.,   365.,  4601.,  4601., 16600.]])

In [18]:
temp_mean = np.nanmean(lending_co_data_numeric_NAN, axis = 0).round(2)
temp_mean

array([ 2250.25,    46.11,   365.  ,  3895.99,  5160.75, 16571.44])

In [19]:
temp_mean[0]

2250.25

In [20]:
lending_co_data_numeric_NAN = np.genfromtxt("Lending-company-Numeric-NAN.csv", delimiter= ';', 
                                            filling_values= temporary_fill)
temporary_fill

64002.0

In [21]:
np.mean(lending_co_data_numeric_NAN[:,0]).round(2)

4263.25

In [22]:
# using np.where condition to minimize the data difference
lending_co_data_numeric_NAN[:,0] = np.where(lending_co_data_numeric_NAN[:,0] == temporary_fill,
                                           temp_mean[0],
                                           lending_co_data_numeric_NAN[:,0])

In [23]:
np.mean(lending_co_data_numeric_NAN[:,0]).round(2)

2250.25

In [24]:
# Whenever we add the mean of the set to itself, the mean of the new set stays the same.

In [25]:
for i in range(lending_co_data_numeric_NAN.shape[1]):        
    lending_co_data_numeric_NAN[:,i] = np.where(lending_co_data_numeric_NAN[:,i] == temporary_fill, 
                                                temp_mean[i], 
                                                lending_co_data_numeric_NAN[:,i])
    
# We're generalizing the filling from earlier and going through all the columns. 
# Shape [1] means all columns

In [26]:
# Validated
np.isnan(lending_co_data_numeric_NAN).sum()

0

## Reshaping Ndarrays

In [27]:
lending_co_data_numeric = np.loadtxt('Lending-company-Numeric.csv', delimiter= ',')
lending_co_data_numeric

array([[ 2000.,    40.,   365.,  3121.,  4241., 13621.],
       [ 2000.,    40.,   365.,  3061.,  4171., 15041.],
       [ 1000.,    40.,   365.,  2160.,  3280., 15340.],
       ...,
       [ 2000.,    40.,   365.,  4201.,  5001., 16600.],
       [ 1000.,    40.,   365.,  2080.,  3320., 15600.],
       [ 2000.,    40.,   365.,  4601.,  4601., 16600.]])

In [28]:
# Why reshaping is useful?
# Cetain conditions about shapes and sizes need to be met
# Not always possible to store the outputs of a function as a part of existing array (or series)

In [29]:
lending_co_data_numeric.shape

(1043, 6)

In [30]:
np.reshape(lending_co_data_numeric,(6,1043))
# Reshaping (1043,6) to (6,1043) is not the same as transposing.

array([[ 2000.,    40.,   365., ...,   365.,  1581.,  3041.],
       [12277.,  2000.,    40., ...,    50.,   365.,  5350.],
       [ 6850., 15150.,  1000., ...,  2000.,    40.,   365.],
       [ 3101.,  4351., 16600., ..., 16600.,  2000.,    40.],
       [  365.,  3441.,  4661., ...,  8450., 22250.,  2000.],
       [   40.,   365.,  3701., ...,  4601.,  4601., 16600.]])

In [31]:
# first row : The first 1043 values of the flattened array
# Second row : The next 1043 values of flatterend array .. and so on

In [32]:
np.transpose(lending_co_data_numeric)

array([[ 2000.,  2000.,  1000., ...,  2000.,  1000.,  2000.],
       [   40.,    40.,    40., ...,    40.,    40.,    40.],
       [  365.,   365.,   365., ...,   365.,   365.,   365.],
       [ 3121.,  3061.,  2160., ...,  4201.,  2080.,  4601.],
       [ 4241.,  4171.,  3280., ...,  5001.,  3320.,  4601.],
       [13621., 15041., 15340., ..., 16600., 15600., 16600.]])

In [33]:
np.reshape(lending_co_data_numeric,(3,500))

ValueError: cannot reshape array of size 6258 into shape (3,500)

In [34]:
np.reshape(lending_co_data_numeric,(2,3,1043))

array([[[ 2000.,    40.,   365., ...,   365.,  1581.,  3041.],
        [12277.,  2000.,    40., ...,    50.,   365.,  5350.],
        [ 6850., 15150.,  1000., ...,  2000.,    40.,   365.]],

       [[ 3101.,  4351., 16600., ..., 16600.,  2000.,    40.],
        [  365.,  3441.,  4661., ...,  8450., 22250.,  2000.],
        [   40.,   365.,  3701., ...,  4601.,  4601., 16600.]]])

In [35]:
# Reshaping doesn't immediately alter the dataset

In [36]:
lending_co_data_numeric.reshape(6,1043)

# Equivalent method. 

array([[ 2000.,    40.,   365., ...,   365.,  1581.,  3041.],
       [12277.,  2000.,    40., ...,    50.,   365.,  5350.],
       [ 6850., 15150.,  1000., ...,  2000.,    40.,   365.],
       [ 3101.,  4351., 16600., ..., 16600.,  2000.,    40.],
       [  365.,  3441.,  4661., ...,  8450., 22250.,  2000.],
       [   40.,   365.,  3701., ...,  4601.,  4601., 16600.]])

## Removing values from Ndarrays

In [37]:
lending_co_data_numeric

array([[ 2000.,    40.,   365.,  3121.,  4241., 13621.],
       [ 2000.,    40.,   365.,  3061.,  4171., 15041.],
       [ 1000.,    40.,   365.,  2160.,  3280., 15340.],
       ...,
       [ 2000.,    40.,   365.,  4201.,  5001., 16600.],
       [ 1000.,    40.,   365.,  2080.,  3320., 15600.],
       [ 2000.,    40.,   365.,  4601.,  4601., 16600.]])

In [38]:
np.delete(lending_co_data_numeric,0)

array([   40.,   365.,  3121., ...,  4601.,  4601., 16600.])

In [39]:
np.delete(lending_co_data_numeric,0).shape  # Converted to Flattend 1-D array

(6257,)

In [40]:
# What if we need to get rid of entire rows or columns?
# If we pass a value of axis arguement

In [41]:
np.delete(lending_co_data_numeric,0, axis = 1)  # for columns

array([[   40.,   365.,  3121.,  4241., 13621.],
       [   40.,   365.,  3061.,  4171., 15041.],
       [   40.,   365.,  2160.,  3280., 15340.],
       ...,
       [   40.,   365.,  4201.,  5001., 16600.],
       [   40.,   365.,  2080.,  3320., 15600.],
       [   40.,   365.,  4601.,  4601., 16600.]])

In [42]:
np.delete(lending_co_data_numeric,[0,2,4], axis = 1)  # for columns

array([[   40.,  3121., 13621.],
       [   40.,  3061., 15041.],
       [   40.,  2160., 15340.],
       ...,
       [   40.,  4201., 16600.],
       [   40.,  2080., 15600.],
       [   40.,  4601., 16600.]])

In [43]:
# What if we need to delete both simultaneously?
np.delete(np.delete(lending_co_data_numeric,[0,2,4], axis = 1),[0,2,-1], axis = 0)

array([[   40.,  3061., 15041.],
       [   40.,  3041., 15321.],
       [   50.,  3470., 13720.],
       ...,
       [   40.,  4240., 16600.],
       [   40.,  4201., 16600.],
       [   40.,  2080., 15600.]])

## Sorting Ndarrays

In [44]:
lending_co_data_numeric

array([[ 2000.,    40.,   365.,  3121.,  4241., 13621.],
       [ 2000.,    40.,   365.,  3061.,  4171., 15041.],
       [ 1000.,    40.,   365.,  2160.,  3280., 15340.],
       ...,
       [ 2000.,    40.,   365.,  4201.,  5001., 16600.],
       [ 1000.,    40.,   365.,  2080.,  3320., 15600.],
       [ 2000.,    40.,   365.,  4601.,  4601., 16600.]])

In [45]:
#np.sort() - takes an array and returns a sorted version(in ascending order)

In [46]:
np.sort(lending_co_data_numeric) 

# Default axis = -1 , that means the "last" axis
# The "second" axis = axis with index 1 is also the "last" axis the column axis
# We are rearranging the different columns in every row

array([[   40.,   365.,  2000.,  3121.,  4241., 13621.],
       [   40.,   365.,  2000.,  3061.,  4171., 15041.],
       [   40.,   365.,  1000.,  2160.,  3280., 15340.],
       ...,
       [   40.,   365.,  2000.,  4201.,  5001., 16600.],
       [   40.,   365.,  1000.,  2080.,  3320., 15600.],
       [   40.,   365.,  2000.,  4601.,  4601., 16600.]])

In [47]:
np.sort(lending_co_data_numeric).shape

(1043, 6)

In [48]:
np.sort(lending_co_data_numeric, axis = 0) 

array([[ 1.0000e+03,  3.5000e+01,  3.6500e+02, -2.8700e+03, -2.8700e+03,
        -3.5000e+02],
       [ 1.0000e+03,  3.5000e+01,  3.6500e+02, -2.5500e+03, -2.1000e+03,
         1.5000e+02],
       [ 1.0000e+03,  3.5000e+01,  3.6500e+02, -2.4500e+03, -2.0000e+03,
         1.1000e+03],
       ...,
       [ 9.0000e+03,  1.2500e+02,  3.6500e+02,  1.6751e+04,  1.8751e+04,
         5.4625e+04],
       [ 9.0000e+03,  1.6500e+02,  3.6500e+02,  1.7650e+04,  2.0001e+04,
         5.4625e+04],
       [ 9.0000e+03,  1.6500e+02,  3.6500e+02,  1.9001e+04,  2.2001e+04,
         6.4001e+04]])

In [49]:
np.set_printoptions(suppress = True)
# Be Caution while running this code , because it will apply for entire session.

In [50]:
np.sort(lending_co_data_numeric, axis = None) 
# flattened 1-D array

array([-2870., -2870., -2550., ..., 54625., 54625., 64001.])

In [51]:
# NumPy sort function doesn't have a parameter that automatically changes
# the order from increasing to decreasing while sorting the values.

In [52]:
np.sort(lending_co_data_numeric)

array([[   40.,   365.,  2000.,  3121.,  4241., 13621.],
       [   40.,   365.,  2000.,  3061.,  4171., 15041.],
       [   40.,   365.,  1000.,  2160.,  3280., 15340.],
       ...,
       [   40.,   365.,  2000.,  4201.,  5001., 16600.],
       [   40.,   365.,  1000.,  2080.,  3320., 15600.],
       [   40.,   365.,  2000.,  4601.,  4601., 16600.]])

In [53]:
# Change the ascending sort to descending 
# Step 1 : change the value +ve to -ve and vice versa
np.sort(-lending_co_data_numeric)   

array([[-13621.,  -4241.,  -3121.,  -2000.,   -365.,    -40.],
       [-15041.,  -4171.,  -3061.,  -2000.,   -365.,    -40.],
       [-15340.,  -3280.,  -2160.,  -1000.,   -365.,    -40.],
       ...,
       [-16600.,  -5001.,  -4201.,  -2000.,   -365.,    -40.],
       [-15600.,  -3320.,  -2080.,  -1000.,   -365.,    -40.],
       [-16600.,  -4601.,  -4601.,  -2000.,   -365.,    -40.]])

In [54]:
# Step 2 : Add one '-' sign before np.sort
-np.sort(-lending_co_data_numeric)

array([[13621.,  4241.,  3121.,  2000.,   365.,    40.],
       [15041.,  4171.,  3061.,  2000.,   365.,    40.],
       [15340.,  3280.,  2160.,  1000.,   365.,    40.],
       ...,
       [16600.,  5001.,  4201.,  2000.,   365.,    40.],
       [15600.,  3320.,  2080.,  1000.,   365.,    40.],
       [16600.,  4601.,  4601.,  2000.,   365.,    40.]])

In [55]:
np.sort(lending_co_data_numeric[:,3])

array([-2870., -2550., -2450., ..., 16751., 17650., 19001.])

In [56]:
# ND_array.sort()
# 1. Takes the array variable
# 2. Sorts the array variable
# 3. Stores the sorted version over the orignal

# np.sort() just sort the array and didn't take place into orignal array

In [57]:
lending_co_data_numeric[:,3].sort()
lending_co_data_numeric

array([[ 2000.,    40.,   365., -2870.,  4241., 13621.],
       [ 2000.,    40.,   365., -2550.,  4171., 15041.],
       [ 1000.,    40.,   365., -2450.,  3280., 15340.],
       ...,
       [ 2000.,    40.,   365., 16751.,  5001., 16600.],
       [ 1000.,    40.,   365., 17650.,  3320., 15600.],
       [ 2000.,    40.,   365., 19001.,  4601., 16600.]])

## Argument Sort in NumPy

In [58]:
lending_co_data_numeric = np.loadtxt('Lending-company-Numeric.csv', delimiter= ',')
lending_co_data_numeric

array([[ 2000.,    40.,   365.,  3121.,  4241., 13621.],
       [ 2000.,    40.,   365.,  3061.,  4171., 15041.],
       [ 1000.,    40.,   365.,  2160.,  3280., 15340.],
       ...,
       [ 2000.,    40.,   365.,  4201.,  5001., 16600.],
       [ 1000.,    40.,   365.,  2080.,  3320., 15600.],
       [ 2000.,    40.,   365.,  4601.,  4601., 16600.]])

In [59]:
np.argsort(lending_co_data_numeric)

array([[1, 2, 0, 3, 4, 5],
       [1, 2, 0, 3, 4, 5],
       [1, 2, 0, 3, 4, 5],
       ...,
       [1, 2, 0, 3, 4, 5],
       [1, 2, 0, 3, 4, 5],
       [1, 2, 0, 3, 4, 5]], dtype=int64)

In [60]:
np.sort(lending_co_data_numeric, axis = 0)

array([[ 1000.,    35.,   365., -2870., -2870.,  -350.],
       [ 1000.,    35.,   365., -2550., -2100.,   150.],
       [ 1000.,    35.,   365., -2450., -2000.,  1100.],
       ...,
       [ 9000.,   125.,   365., 16751., 18751., 54625.],
       [ 9000.,   165.,   365., 17650., 20001., 54625.],
       [ 9000.,   165.,   365., 19001., 22001., 64001.]])

In [61]:
np.argsort(lending_co_data_numeric, axis = 0)

array([[ 537,  443,    0,   32,   32,  482],
       [ 639,  327,  687,  166,  166,  493],
       [ 849,  432,  688,   85,   85,  166],
       ...,
       [  27,  326,  355,  568, 1019,  568],
       [ 277,   27,  357,  718, 1033,  534],
       [ 420,  408, 1042,  912,  912,   27]], dtype=int64)

In [62]:
lending_co_data_numeric[482,5]

-350.0

In [63]:
lending_co_data_numeric = lending_co_data_numeric[np.argsort(lending_co_data_numeric[:,0])]

In [64]:
lending_co_data_numeric

array([[ 1000.,    40.,   365.,  2200.,  3400., 15600.],
       [ 1000.,    40.,   365.,  2200.,  3800., 15600.],
       [ 1000.,    40.,   365.,  2000.,  3950., 15600.],
       ...,
       [ 9000.,   165.,   365., 14501., 16846., 64001.],
       [ 9000.,   125.,   365., 12001., 15751., 38626.],
       [ 9000.,   125.,   365., 12251., 14251., 25626.]])

In [65]:
lending_co_data_numeric.argsort(axis = 0)

array([[   0,   22,    0,  199,  199,  172],
       [ 155,   62,  687,   53,   53,  160],
       [ 156,   38,  688,  169,  169,   53],
       ...,
       [1022, 1042,  355, 1024, 1037, 1023],
       [1031, 1039,  357,  941, 1029, 1024],
       [1042, 1040, 1042, 1027, 1027, 1040]], dtype=int64)

In [66]:
# NOTE : np.argsort() = ndarray.argsort()
# Where as , np.sort() =/= ndarray.sort()

## Argument where in NumPy

In [67]:
# Functions that return 'co-ordinates' Or 'Indices' within an array are called "Argument" functions

In [68]:
lending_co_data_numeric = np.loadtxt('Lending-company-Numeric.csv', delimiter= ',')
lending_co_data_numeric

array([[ 2000.,    40.,   365.,  3121.,  4241., 13621.],
       [ 2000.,    40.,   365.,  3061.,  4171., 15041.],
       [ 1000.,    40.,   365.,  2160.,  3280., 15340.],
       ...,
       [ 2000.,    40.,   365.,  4201.,  5001., 16600.],
       [ 1000.,    40.,   365.,  2080.,  3320., 15600.],
       [ 2000.,    40.,   365.,  4601.,  4601., 16600.]])

In [69]:
np.argwhere(lending_co_data_numeric)

# np.argwhere() - Goes over the entire NDarray and checks whether the individual elements satisfy a given condition.
# The o/p are the indices for all the individual elements where the condition is met

array([[   0,    0],
       [   0,    1],
       [   0,    2],
       ...,
       [1042,    3],
       [1042,    4],
       [1042,    5]], dtype=int64)

In [70]:
# To find out the zeroes in the dataset
np.argwhere(lending_co_data_numeric == False)

array([[116,   4],
       [430,   3]], dtype=int64)

In [71]:
lending_co_data_numeric[116]

array([ 1000.,    50.,   365., -1450.,     0., 13850.])

In [72]:
lending_co_data_numeric

array([[ 2000.,    40.,   365.,  3121.,  4241., 13621.],
       [ 2000.,    40.,   365.,  3061.,  4171., 15041.],
       [ 1000.,    40.,   365.,  2160.,  3280., 15340.],
       ...,
       [ 2000.,    40.,   365.,  4201.,  5001., 16600.],
       [ 1000.,    40.,   365.,  2080.,  3320., 15600.],
       [ 2000.,    40.,   365.,  4601.,  4601., 16600.]])

In [73]:
np.argwhere(lending_co_data_numeric %2 == 0)

array([[   0,    0],
       [   0,    1],
       [   1,    0],
       ...,
       [1042,    0],
       [1042,    1],
       [1042,    5]], dtype=int64)

In [74]:
# Very similar to the "filtering" related to conditional slicing
# Slicing gives us the actual values
# np.argwhere() returns their co-ordinates within an array
# We can use this function to separate only the elements that interest us and examine just them

In [75]:
lending_co_data_numeric.argwhere()

AttributeError: 'numpy.ndarray' object has no attribute 'argwhere'

In [76]:
np.isnan(lending_co_data_numeric).sum()

0

In [77]:
lending_co_data_numeric_NAN = np.genfromtxt("Lending-company-Numeric-NAN.csv", delimiter= ';')
lending_co_data_numeric_NAN

array([[ 2000.,    40.,   365.,  3121.,  4241., 13621.],
       [ 2000.,    40.,   365.,  3061.,  4171., 15041.],
       [ 1000.,    40.,   365.,  2160.,  3280., 15340.],
       ...,
       [   nan,    40.,   365.,  4201.,  5001., 16600.],
       [ 1000.,    40.,   365.,  2080.,  3320., 15600.],
       [ 2000.,    40.,   365.,  4601.,  4601., 16600.]])

In [78]:
np.argwhere(np.isnan(lending_co_data_numeric_NAN[:100]))  # ':100' for checking only first 100 rows 

array([[11,  3],
       [15,  3],
       [27,  3],
       [58,  3],
       [60,  4],
       [85,  4]], dtype=int64)

In [79]:
lending_co_data_numeric_NAN[27]

array([ 9000.,   165.,   365.,    nan, 16846., 64001.])

In [80]:
for array_index in np.argwhere(np.isnan(lending_co_data_numeric_NAN)):
    lending_co_data_numeric_NAN[array_index[0], array_index[1]] = 0

In [81]:
lending_co_data_numeric_NAN[27]

array([ 9000.,   165.,   365.,     0., 16846., 64001.])

In [82]:
# validation check
np.isnan(lending_co_data_numeric_NAN).sum()

0

## Shuffling Ndarrays

In [83]:
# Shuffling - Rearranging the parts of a dataset
# We do so without a fixed pattern
# The end goal is that a random sample would be repesentative of the entire dataset
# The content remains same

In [84]:
lending_co_data_numeric = np.loadtxt('Lending-company-Numeric.csv', delimiter= ',')[:8]
lending_co_data_numeric

array([[ 2000.,    40.,   365.,  3121.,  4241., 13621.],
       [ 2000.,    40.,   365.,  3061.,  4171., 15041.],
       [ 1000.,    40.,   365.,  2160.,  3280., 15340.],
       [ 2000.,    40.,   365.,  3041.,  4241., 15321.],
       [ 2000.,    50.,   365.,  3470.,  4820., 13720.],
       [ 2000.,    40.,   365.,  3201.,  4141., 14141.],
       [ 2000.,    50.,   365.,  1851.,  3251., 17701.],
       [ 2000.,    40.,   365.,  3971.,  4131., 15351.]])

In [85]:
np.random.shuffle(lending_co_data_numeric)
lending_co_data_numeric

array([[ 2000.,    40.,   365.,  3971.,  4131., 15351.],
       [ 2000.,    40.,   365.,  3061.,  4171., 15041.],
       [ 2000.,    40.,   365.,  3041.,  4241., 15321.],
       [ 2000.,    50.,   365.,  3470.,  4820., 13720.],
       [ 2000.,    40.,   365.,  3201.,  4141., 14141.],
       [ 1000.,    40.,   365.,  2160.,  3280., 15340.],
       [ 2000.,    40.,   365.,  3121.,  4241., 13621.],
       [ 2000.,    50.,   365.,  1851.,  3251., 17701.]])

In [86]:
lending_co_data_numeric = np.loadtxt('Lending-company-Numeric.csv', delimiter= ',')
lending_co_data_numeric

array([[ 2000.,    40.,   365.,  3121.,  4241., 13621.],
       [ 2000.,    40.,   365.,  3061.,  4171., 15041.],
       [ 1000.,    40.,   365.,  2160.,  3280., 15340.],
       ...,
       [ 2000.,    40.,   365.,  4201.,  5001., 16600.],
       [ 1000.,    40.,   365.,  2080.,  3320., 15600.],
       [ 2000.,    40.,   365.,  4601.,  4601., 16600.]])

In [87]:
from numpy.random import shuffle

In [88]:
shuffle(lending_co_data_numeric)
lending_co_data_numeric

# We write shuffle() instead of numpy.random.shuffle() since we imported the function earlier. 

array([[ 2000.,    40.,   365.,  3040.,  4200., 16600.],
       [ 2000.,    40.,   365.,  5521.,  5921., 16600.],
       [ 2000.,    40.,   365.,  3400.,  4600., 16600.],
       ...,
       [ 2000.,    40.,   365.,  3701.,  5751., 12551.],
       [ 2000.,    40.,   365.,  4600.,  5800., 16600.],
       [ 2000.,    40.,   365.,  3401.,  4601., 16600.]])

In [89]:
from numpy.random import Generator as gen
from numpy.random import PCG64 as pcg

# Random generators can be used for shuffling. 

In [90]:
array_RG = gen(pcg())
array_RG.shuffle(lending_co_data_numeric)
lending_co_data_numeric

# Seeds don't work for shuffling (and it's intended).

array([[ 2000.,    40.,   365.,  3200.,  4620., 16600.],
       [ 2000.,    50.,   365., 12751., 15751., 20250.],
       [ 4000.,    50.,   365.,  5350.,  6800., 17400.],
       ...,
       [ 2000.,    40.,   365.,  3201.,  4401., 14001.],
       [ 4000.,    50.,   365.,  5350.,  6850., 22250.],
       [ 2000.,    50.,   365.,  7251.,  7251., 20250.]])

## Casting Ndarrays

In [91]:
# type casting - Taking an object with values of a certain datatype and creating an identical object that contains 
# values of different datatype.
# Creating a new array that stores the values of the orignal array under different type

In [92]:
lending_co_data_numeric = np.loadtxt('Lending-company-Numeric.csv', delimiter= ',')[:8]
lending_co_data_numeric

array([[ 2000.,    40.,   365.,  3121.,  4241., 13621.],
       [ 2000.,    40.,   365.,  3061.,  4171., 15041.],
       [ 1000.,    40.,   365.,  2160.,  3280., 15340.],
       [ 2000.,    40.,   365.,  3041.,  4241., 15321.],
       [ 2000.,    50.,   365.,  3470.,  4820., 13720.],
       [ 2000.,    40.,   365.,  3201.,  4141., 14141.],
       [ 2000.,    50.,   365.,  1851.,  3251., 17701.],
       [ 2000.,    40.,   365.,  3971.,  4131., 15351.]])

In [93]:
lending_co_data_numeric.astype(dtype = np.int32)

array([[ 2000,    40,   365,  3121,  4241, 13621],
       [ 2000,    40,   365,  3061,  4171, 15041],
       [ 1000,    40,   365,  2160,  3280, 15340],
       [ 2000,    40,   365,  3041,  4241, 15321],
       [ 2000,    50,   365,  3470,  4820, 13720],
       [ 2000,    40,   365,  3201,  4141, 14141],
       [ 2000,    50,   365,  1851,  3251, 17701],
       [ 2000,    40,   365,  3971,  4131, 15351]])

In [94]:
lending_co_data_numeric = lending_co_data_numeric.astype(dtype = "str")

In [95]:
lending_co_data_numeric

array([['2000.0', '40.0', '365.0', '3121.0', '4241.0', '13621.0'],
       ['2000.0', '40.0', '365.0', '3061.0', '4171.0', '15041.0'],
       ['1000.0', '40.0', '365.0', '2160.0', '3280.0', '15340.0'],
       ['2000.0', '40.0', '365.0', '3041.0', '4241.0', '15321.0'],
       ['2000.0', '50.0', '365.0', '3470.0', '4820.0', '13720.0'],
       ['2000.0', '40.0', '365.0', '3201.0', '4141.0', '14141.0'],
       ['2000.0', '50.0', '365.0', '1851.0', '3251.0', '17701.0'],
       ['2000.0', '40.0', '365.0', '3971.0', '4131.0', '15351.0']],
      dtype='<U32')

In [96]:
type (lending_co_data_numeric)

numpy.ndarray

In [97]:
lending_co_data_numeric.astype(dtype = np.int32)

ValueError: invalid literal for int() with base 10: '2000.0'

In [98]:
lending_co_data_numeric = lending_co_data_numeric.astype(dtype = np.float32)

In [99]:
lending_co_data_numeric.astype(dtype = np.int32)

array([[ 2000,    40,   365,  3121,  4241, 13621],
       [ 2000,    40,   365,  3061,  4171, 15041],
       [ 1000,    40,   365,  2160,  3280, 15340],
       [ 2000,    40,   365,  3041,  4241, 15321],
       [ 2000,    50,   365,  3470,  4820, 13720],
       [ 2000,    40,   365,  3201,  4141, 14141],
       [ 2000,    50,   365,  1851,  3251, 17701],
       [ 2000,    40,   365,  3971,  4131, 15351]])

In [100]:
# NOTE : we can't change the string to int. directly , so for that we we follow the trend :
# str --> float --> int

In [101]:
lending_co_data_numeric = np.loadtxt("Lending-company-Numeric.csv", delimiter = ',')
lending_co_data_numeric = lending_co_data_numeric.astype(dtype = np.str)
lending_co_data_numeric

# To showcase the other way to go from strings to integers, we need to get the strings version of the array once again. 

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  lending_co_data_numeric = lending_co_data_numeric.astype(dtype = np.str)


array([['2000.0', '40.0', '365.0', '3121.0', '4241.0', '13621.0'],
       ['2000.0', '40.0', '365.0', '3061.0', '4171.0', '15041.0'],
       ['1000.0', '40.0', '365.0', '2160.0', '3280.0', '15340.0'],
       ...,
       ['2000.0', '40.0', '365.0', '4201.0', '5001.0', '16600.0'],
       ['1000.0', '40.0', '365.0', '2080.0', '3320.0', '15600.0'],
       ['2000.0', '40.0', '365.0', '4601.0', '4601.0', '16600.0']],
      dtype='<U32')

In [102]:
lending_co_data_numeric.astype(dtype = np.float32).astype(dtype = np.int32)
lending_co_data_numeric

# We can use chain methods in NumPy.

array([['2000.0', '40.0', '365.0', '3121.0', '4241.0', '13621.0'],
       ['2000.0', '40.0', '365.0', '3061.0', '4171.0', '15041.0'],
       ['1000.0', '40.0', '365.0', '2160.0', '3280.0', '15340.0'],
       ...,
       ['2000.0', '40.0', '365.0', '4201.0', '5001.0', '16600.0'],
       ['1000.0', '40.0', '365.0', '2080.0', '3320.0', '15600.0'],
       ['2000.0', '40.0', '365.0', '4601.0', '4601.0', '16600.0']],
      dtype='<U32')

## Stripping values in Ndarrays

In [103]:
lending_co_total_price = np.genfromtxt("Lending-Company-Total-Price.csv",
                                       delimiter = ',',
                                       dtype = np.str,
                                       skip_header = 1, 
                                       usecols = [1,2,4])
lending_co_total_price

# We don't neeed the entire array. We only want a few columns to showcase how stripping data works.

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  dtype = np.str,


array([['id_1', 'Product B', 'Location 2'],
       ['id_2', 'Product B', 'Location 3'],
       ['id_3', 'Product C', 'Location 5'],
       ...,
       ['id_413', 'Product B', 'Location 135'],
       ['id_414', 'Product C', 'Location 200'],
       ['id_415', 'Product A', 'Location 8']], dtype='<U12')

In [104]:
# Stripping - Removing specific parts of strings 
# Not Straight up deleting the values stored in specific positions
# It allows us to get rid of excess data
# np.chararray.strip()

In [105]:
np.chararray.strip(lending_co_total_price[:,0], "id_")

chararray(['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12',
           '13', '14', '15', '16', '17', '18', '19', '20', '21', '22',
           '23', '24', '25', '26', '27', '28', '29', '30', '31', '32',
           '33', '34', '35', '36', '37', '38', '39', '40', '41', '42',
           '43', '44', '45', '46', '47', '48', '49', '50', '51', '52',
           '53', '54', '55', '56', '57', '58', '59', '60', '61', '62',
           '63', '64', '65', '66', '67', '68', '69', '70', '71', '72',
           '73', '74', '75', '76', '77', '78', '79', '80', '81', '82',
           '83', '84', '85', '86', '87', '88', '89', '90', '91', '92',
           '93', '94', '95', '96', '97', '98', '99', '100', '101', '102',
           '103', '104', '105', '106', '107', '108', '109', '110', '111',
           '112', '113', '114', '115', '116', '117', '118', '119', '120',
           '121', '122', '123', '124', '125', '126', '127', '128', '129',
           '130', '131', '132', '133', '134', '135', '136', '1

In [106]:
lending_co_total_price[:,0] = np.chararray.strip(lending_co_total_price[:,0], "id_")
lending_co_total_price[:,1] = np.chararray.strip(lending_co_total_price[:,1], "Product ")
lending_co_total_price[:,2] = np.chararray.strip(lending_co_total_price[:,2], "Location ")
lending_co_total_price

# Remove "id_" from the 1st column, as well as "Product " from the second and "Location " from the third one. 

array([['1', 'B', '2'],
       ['2', 'B', '3'],
       ['3', 'C', '5'],
       ...,
       ['413', 'B', '135'],
       ['414', 'C', '200'],
       ['415', 'A', '8']], dtype='<U12')

In [107]:
lending_co_total_price[:,1] = np.where(lending_co_total_price[:,1] == 'A', 1, lending_co_total_price[:,1]) 
lending_co_total_price[:,1] = np.where(lending_co_total_price[:,1] == 'B', 2, lending_co_total_price[:,1]) 
lending_co_total_price[:,1] = np.where(lending_co_total_price[:,1] == 'C', 3, lending_co_total_price[:,1]) 
lending_co_total_price[:,1] = np.where(lending_co_total_price[:,1] == 'D', 4, lending_co_total_price[:,1]) 
lending_co_total_price[:,1] = np.where(lending_co_total_price[:,1] == 'E', 5, lending_co_total_price[:,1]) 
lending_co_total_price[:,1] = np.where(lending_co_total_price[:,1] == 'F', 6, lending_co_total_price[:,1]) 

lending_co_total_price

# We can combine stripping with substituting to transform all the letters in numbers. 

array([['1', '2', '2'],
       ['2', '2', '3'],
       ['3', '3', '5'],
       ...,
       ['413', '2', '135'],
       ['414', '3', '200'],
       ['415', '1', '8']], dtype='<U12')

In [108]:
lending_co_total_price.astype(dtype = np.int32)

# Even though the values look like numbers, they're actually just text, so we need to cast them once again. 

array([[  1,   2,   2],
       [  2,   2,   3],
       [  3,   3,   5],
       ...,
       [413,   2, 135],
       [414,   3, 200],
       [415,   1,   8]])

## Stacking Ndarrays

In [109]:
# Stacking - Placing multiple objects on top of one another to create a bigger(larger) object
# We can just stack arrays of matching shapes to create a larger array

In [110]:
lending_co_data_numeric = np.loadtxt("Lending-company-Numeric.csv", delimiter = ',') 
lending_co_data_numeric

array([[ 2000.,    40.,   365.,  3121.,  4241., 13621.],
       [ 2000.,    40.,   365.,  3061.,  4171., 15041.],
       [ 1000.,    40.,   365.,  2160.,  3280., 15340.],
       ...,
       [ 2000.,    40.,   365.,  4201.,  5001., 16600.],
       [ 1000.,    40.,   365.,  2080.,  3320., 15600.],
       [ 2000.,    40.,   365.,  4601.,  4601., 16600.]])

In [111]:
# Recall

lending_co_data_numeric_NAN = np.genfromtxt("Lending-company-Numeric-NAN.csv", delimiter = ';')

temporary_fill = np.nanmax(lending_co_data_numeric_NAN).round(2) + 1
temporary_mean = np.nanmean(lending_co_data_numeric_NAN, axis = 0).round(2)

lending_co_data_numeric_NAN = np.genfromtxt("Lending-company-Numeric-NAN.csv", 
                                            delimiter = ';', 
                                            filling_values = temporary_fill)

for i in range(lending_co_data_numeric_NAN.shape[1]):
    lending_co_data_numeric_NAN[:,i] = np.where(lending_co_data_numeric_NAN[:,i] == temporary_fill,
                                                temporary_mean[i],
                                                lending_co_data_numeric_NAN[:,i])
lending_co_data_numeric_NAN


## We create a filler, reimport and fill all the nan-s, then subsitute all the temporary fillers with more appropriate values

array([[ 2000.  ,    40.  ,   365.  ,  3121.  ,  4241.  , 13621.  ],
       [ 2000.  ,    40.  ,   365.  ,  3061.  ,  4171.  , 15041.  ],
       [ 1000.  ,    40.  ,   365.  ,  2160.  ,  3280.  , 15340.  ],
       ...,
       [ 2250.25,    40.  ,   365.  ,  4201.  ,  5001.  , 16600.  ],
       [ 1000.  ,    40.  ,   365.  ,  2080.  ,  3320.  , 15600.  ],
       [ 2000.  ,    40.  ,   365.  ,  4601.  ,  4601.  , 16600.  ]])

In [112]:
np.stack((lending_co_data_numeric[:,0],lending_co_data_numeric[:,1]))

# Stacking the first 2 columns. (We can stack them in any order we like)
# Other way to think that , transposing the first 2 columns of orignal array

array([[2000., 2000., 1000., ..., 2000., 1000., 2000.],
       [  40.,   40.,   40., ...,   40.,   40.,   40.]])

In [113]:
np.transpose(lending_co_data_numeric[:,:2])

array([[2000., 2000., 1000., ..., 2000., 1000., 2000.],
       [  40.,   40.,   40., ...,   40.,   40.,   40.]])

In [114]:
np.stack((lending_co_data_numeric[:,0],lending_co_data_numeric[:,1],lending_co_data_numeric[:,2]), axis = 1)

array([[2000.,   40.,  365.],
       [2000.,   40.,  365.],
       [1000.,   40.,  365.],
       ...,
       [2000.,   40.,  365.],
       [1000.,   40.,  365.],
       [2000.,   40.,  365.]])

In [115]:
# NOTE : The arrays must be the SAME shape
# Stack functions : 1. np.stack 2. np.vstack 3. np.hstack 4. np.dstack

In [116]:
lending_co_data_numeric_NAN.shape

(1043, 6)

In [117]:
# Vstack is known as vertical stack
# The function stacks 2-D array vetically 
# places the first array on top of the second one 
# Results in a "Longer" array

In [118]:
np.vstack((lending_co_data_numeric, lending_co_data_numeric_NAN))

array([[ 2000.  ,    40.  ,   365.  ,  3121.  ,  4241.  , 13621.  ],
       [ 2000.  ,    40.  ,   365.  ,  3061.  ,  4171.  , 15041.  ],
       [ 1000.  ,    40.  ,   365.  ,  2160.  ,  3280.  , 15340.  ],
       ...,
       [ 2250.25,    40.  ,   365.  ,  4201.  ,  5001.  , 16600.  ],
       [ 1000.  ,    40.  ,   365.  ,  2080.  ,  3320.  , 15600.  ],
       [ 2000.  ,    40.  ,   365.  ,  4601.  ,  4601.  , 16600.  ]])

In [119]:
np.vstack((lending_co_data_numeric, lending_co_data_numeric_NAN)).shape

(2086, 6)

In [120]:
# np.hstack() - Horizontal stack
np.hstack((lending_co_data_numeric, lending_co_data_numeric_NAN))

array([[ 2000.,    40.,   365., ...,  3121.,  4241., 13621.],
       [ 2000.,    40.,   365., ...,  3061.,  4171., 15041.],
       [ 1000.,    40.,   365., ...,  2160.,  3280., 15340.],
       ...,
       [ 2000.,    40.,   365., ...,  4201.,  5001., 16600.],
       [ 1000.,    40.,   365., ...,  2080.,  3320., 15600.],
       [ 2000.,    40.,   365., ...,  4601.,  4601., 16600.]])

In [121]:
np.hstack((lending_co_data_numeric, lending_co_data_numeric_NAN)).shape

(1043, 12)

In [122]:
# np.dstack() - depth stack
# We stack arrays in the third dimension
# Return an array of a higher dimensions

In [123]:
np.dstack((lending_co_data_numeric, lending_co_data_numeric_NAN))

array([[[ 2000.  ,  2000.  ],
        [   40.  ,    40.  ],
        [  365.  ,   365.  ],
        [ 3121.  ,  3121.  ],
        [ 4241.  ,  4241.  ],
        [13621.  , 13621.  ]],

       [[ 2000.  ,  2000.  ],
        [   40.  ,    40.  ],
        [  365.  ,   365.  ],
        [ 3061.  ,  3061.  ],
        [ 4171.  ,  4171.  ],
        [15041.  , 15041.  ]],

       [[ 1000.  ,  1000.  ],
        [   40.  ,    40.  ],
        [  365.  ,   365.  ],
        [ 2160.  ,  2160.  ],
        [ 3280.  ,  3280.  ],
        [15340.  , 15340.  ]],

       ...,

       [[ 2000.  ,  2250.25],
        [   40.  ,    40.  ],
        [  365.  ,   365.  ],
        [ 4201.  ,  4201.  ],
        [ 5001.  ,  5001.  ],
        [16600.  , 16600.  ]],

       [[ 1000.  ,  1000.  ],
        [   40.  ,    40.  ],
        [  365.  ,   365.  ],
        [ 2080.  ,  2080.  ],
        [ 3320.  ,  3320.  ],
        [15600.  , 15600.  ]],

       [[ 2000.  ,  2000.  ],
        [   40.  ,    40.  ],
        [  365.  

In [124]:
np.dstack((lending_co_data_numeric, lending_co_data_numeric_NAN)).shape  # (Row,column, original array)

(1043, 6, 2)

In [125]:
array_example_1 = np.array([[[1,2,3,4],[5,6,7,8],[9,10,11,12]],[[21,22,23,24],[25,26,27,28],[29,30,31,32]]])
array_example_2 = array_example_1 * 2

# We're quickly creating some 3-D arrays to showcase how dstack works for higher dimensions. 

In [126]:
np.dstack((array_example_1, array_example_2)).shape

(2, 3, 8)

In [127]:
np.stack((array_example_1, array_example_2), axis = 2).shape

# We can no longer replicate the output of dstack by simply specifying an axis. 

(2, 3, 2, 4)

## Concatenating in Ndarrays

In [128]:
#Concatenation - Linking together objects in a chain.
# Creating a new array by merging existing smaller arrays along a given axis
# The inputs and the outputs of the np.concatenate() function always have the same number of dimensions

In [129]:
lending_co_data_numeric = np.loadtxt("Lending-company-Numeric.csv", delimiter = ',') 
lending_co_data_numeric

array([[ 2000.,    40.,   365.,  3121.,  4241., 13621.],
       [ 2000.,    40.,   365.,  3061.,  4171., 15041.],
       [ 1000.,    40.,   365.,  2160.,  3280., 15340.],
       ...,
       [ 2000.,    40.,   365.,  4201.,  5001., 16600.],
       [ 1000.,    40.,   365.,  2080.,  3320., 15600.],
       [ 2000.,    40.,   365.,  4601.,  4601., 16600.]])

In [130]:
np.concatenate((lending_co_data_numeric[0,:], lending_co_data_numeric[1,:]))

array([ 2000.,    40.,   365.,  3121.,  4241., 13621.,  2000.,    40.,
         365.,  3061.,  4171., 15041.])

In [131]:
#Recall: 
    
lending_co_data_numeric_NAN = np.genfromtxt("Lending-company-Numeric-NAN.csv", delimiter = ';')

temporary_fill = np.nanmax(lending_co_data_numeric_NAN).round(2) + 1
temporary_mean = np.nanmean(lending_co_data_numeric_NAN, axis = 0).round(2)

lending_co_data_numeric_NAN = np.genfromtxt("Lending-company-Numeric-NAN.csv",
                                            delimiter = ';', 
                                            filling_values = temporary_fill)

for i in range(lending_co_data_numeric_NAN.shape[1]):        
    lending_co_data_numeric_NAN[:,i] = np.where(lending_co_data_numeric_NAN[:,i] == temporary_fill,
                                                temporary_mean[i],
                                                lending_co_data_numeric_NAN[:,i])
    
lending_co_data_numeric_NAN

array([[ 2000.  ,    40.  ,   365.  ,  3121.  ,  4241.  , 13621.  ],
       [ 2000.  ,    40.  ,   365.  ,  3061.  ,  4171.  , 15041.  ],
       [ 1000.  ,    40.  ,   365.  ,  2160.  ,  3280.  , 15340.  ],
       ...,
       [ 2250.25,    40.  ,   365.  ,  4201.  ,  5001.  , 16600.  ],
       [ 1000.  ,    40.  ,   365.  ,  2080.  ,  3320.  , 15600.  ],
       [ 2000.  ,    40.  ,   365.  ,  4601.  ,  4601.  , 16600.  ]])

In [132]:
np.concatenate((lending_co_data_numeric, lending_co_data_numeric_NAN)).shape  # Same as stack function

(2086, 6)

In [133]:
np.concatenate((lending_co_data_numeric, lending_co_data_numeric_NAN), axis = 1).shape

(1043, 12)

In [134]:
np.concatenate((lending_co_data_numeric, lending_co_data_numeric_NAN), axis = 2).shape

AxisError: axis 2 is out of bounds for array of dimension 2

In [135]:
np.concatenate((array_example_1,array_example_2), axis = 0)

array([[[ 1,  2,  3,  4],
        [ 5,  6,  7,  8],
        [ 9, 10, 11, 12]],

       [[21, 22, 23, 24],
        [25, 26, 27, 28],
        [29, 30, 31, 32]],

       [[ 2,  4,  6,  8],
        [10, 12, 14, 16],
        [18, 20, 22, 24]],

       [[42, 44, 46, 48],
        [50, 52, 54, 56],
        [58, 60, 62, 64]]])

In [136]:
# For 1 & 2 -D arrays :
# np.hstack() --> np.concatenate(() ,axis = 0)
# np.vstack() --> np.concatenate(() ,axis = 1)
# np.dstack() --> np.concatenate(() ,axis = 2)
# NOTE : concatenating in 1-D doesnot require the inputs to have the same shape

In [137]:
np.concatenate((lending_co_data_numeric[0,:], lending_co_data_numeric[:,0]))

array([2000.,   40.,  365., ..., 2000., 1000., 2000.])

In [138]:
np.concatenate((lending_co_data_numeric, lending_co_data_numeric[:,:1]), axis = 1)
# We can concatenate the same dimensions but diffenrent shapes but only if:
# their dimensions match for the axis we are concatenating along.

array([[ 2000.,    40.,   365., ...,  4241., 13621.,  2000.],
       [ 2000.,    40.,   365., ...,  4171., 15041.,  2000.],
       [ 1000.,    40.,   365., ...,  3280., 15340.,  1000.],
       ...,
       [ 2000.,    40.,   365., ...,  5001., 16600.,  2000.],
       [ 1000.,    40.,   365., ...,  3320., 15600.,  1000.],
       [ 2000.,    40.,   365., ...,  4601., 16600.,  2000.]])

## Finding unique values in Ndarrays

In [139]:
lending_co_data_numeric = np.loadtxt("Lending-company-Numeric.csv", delimiter = ',') 
lending_co_data_numeric

array([[ 2000.,    40.,   365.,  3121.,  4241., 13621.],
       [ 2000.,    40.,   365.,  3061.,  4171., 15041.],
       [ 1000.,    40.,   365.,  2160.,  3280., 15340.],
       ...,
       [ 2000.,    40.,   365.,  4201.,  5001., 16600.],
       [ 1000.,    40.,   365.,  2080.,  3320., 15600.],
       [ 2000.,    40.,   365.,  4601.,  4601., 16600.]])

In [140]:
# np.unique() - Takes an array as an input and creates another array that contains all the distinct values
# Any value can feature only once in the np.unique() output

In [141]:
np.unique(lending_co_data_numeric[:,1])

array([ 35.,  40.,  50., 125., 165.])

In [142]:
#np.unique(lending_co_data_numeric[:,1], return_counts = True, return_index = True)
np.unique(lending_co_data_numeric[:,1], return_counts = True)

# Unique -> returns the unique values within the array in increasing order
# return_counts -> returns how many times each unique value appears in the array
# return_index -> returns the index of the first encounter with each unique value

(array([ 35.,  40.,  50., 125., 165.]),
 array([  4, 567, 451,  19,   2], dtype=int64))

In [143]:
array_example = np.array(["a1", "a3","A1","A3","A3","AA1","B1","A2","B1","A2","B2","B2", "B3","a2","a3","B3","B3","a3" ])
np.unique(array_example)

# If the values of the array are text, the unique function sorts them in "alphabetical" order by their ASCII codes.

array(['A1', 'A2', 'A3', 'AA1', 'B1', 'B2', 'B3', 'a1', 'a2', 'a3'],
      dtype='<U3')

# ************* END ****************