## Arrays of 0s and 1s

In [1]:
# Generating data using NumPy arrays 
# random data and Non - random data

In [2]:
import numpy as np

In [3]:
#np.empty() - Creates an "empty" N-D array , Return an array without initializing entries
# it is the fastest way to create an N-D array

In [4]:
array_empty = np.empty(shape = (2,3))
array_empty

# An array is assinged memory from our RAM, thats the reason we are getting random numbers here

array([[0., 0., 0.],
       [0., 0., 0.]])

In [5]:
#np.zeros() - Create an array full of 0s
# np. zeros() has consistent output where np.empty() doesn't have 

In [6]:
array_0s = np.zeros(shape = (2,3))
array_0s

array([[0., 0., 0.],
       [0., 0., 0.]])

In [7]:
array_0s = np.zeros(shape = (2,3), dtype = np.int8)
array_0s

array([[0, 0, 0],
       [0, 0, 0]], dtype=int8)

In [8]:
# np.ones() - The '1' is equivalent to np.zeros() Generates an array of 1s
array_1s = np.ones(shape = (2,3))
array_1s

array([[1., 1., 1.],
       [1., 1., 1.]])

In [9]:
# np.full() - Generates an array filled entirely with a specified value
# Contains an additional manditory arguement : fill_value
# Fill_value takes scalar value

In [10]:
array_full = np.full(shape = (2,3), fill_value = 2) # One additional mandatory argument - fill_value -> scalar
array_full

array([[2, 2, 2],
       [2, 2, 2]])

In [11]:
array_full = np.full(shape = (2,3), fill_value = 'Three-Six-Five')
array_full

array([['Three-Six-Five', 'Three-Six-Five', 'Three-Six-Five'],
       ['Three-Six-Five', 'Three-Six-Five', 'Three-Six-Five']],
      dtype='<U14')

## "_like" functions in NumPy

In [12]:
# it's equivalent to empty,full,zeros,ones 
# Don't need to specify a shape or type
# We need to provide another array(whose shape and type we take)

In [13]:
matrix_A = np.array([[1,0,9,2,2],[3,23,4,5,1],[0,2,3,4,1]])
matrix_A

array([[ 1,  0,  9,  2,  2],
       [ 3, 23,  4,  5,  1],
       [ 0,  2,  3,  4,  1]])

In [14]:
array_empty_like = np.empty_like(matrix_A)    

# Shape and type are like the prototype. 
# If we want to override this, we can define dtype and shape and pass different values (but why even use empty_like then). 

array_empty_like

array([[      0,       0,       0,       0,       0],
       [      0,       0,       0,       0,       0],
       [   1436,       0,       0, 7602273,       0]])

In [15]:
array_0s_like = np.zeros_like(matrix_A)    
array_0s_like

# We have corresponding functions for 1s and full as well. 

array([[0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0]])

## A Non-Random Sequence of Numbers

In [16]:
# np.arange() = array range
# Numpy equivalent of python's range funtion
# Create a sequence of consecutive integer values within given range
#Range --> range object
# array range --> array

In [17]:
range(20)

range(0, 20)

In [18]:
list(range(20))

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]

In [19]:
array_rng = np.arange(20)
array_rng
# Creates an ndarray with the values in this range.

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19])

In [20]:
array_rng = np.arange(stop =  30)
#array_rng = np.arange(start =  30)
array_rng

# The only mandatory argument is "start", rather than stop. 
# If we specify only a start, the function assumes this is the "stop" and starts from the origin (0).

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29])

In [21]:
array_rng = np.arange(start = 0, stop =  30, step = 2.5)
array_rng

# "Step" doesn't have to be the same type as the values of the array. 

array([ 0. ,  2.5,  5. ,  7.5, 10. , 12.5, 15. , 17.5, 20. , 22.5, 25. ,
       27.5])

In [22]:
array_rng = np.arange(start = 0, stop =  30, step = 2.5, dtype = np.int32)
array_rng

# The casting happens after all the computations. 

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18, 20, 22])

## Random Generators and Seeds

In [23]:
from numpy.random import Generator as gen
from numpy.random import PCG64 as pcg

## We load two functions from the numpy.random module.

In [24]:
# The Generator function takes a bit generator as an input and creates generator objects
# numpy.random.Generator class

In [25]:
# PCG64 - Permutation Congruential Generator Function pointers that can produce values of upto 64 bits in size.

In [26]:
array_RG = gen(pcg())
array_RG.normal(size = (5,5))

# The shape of the array will be equal to the tuple (5,5) in this case
# Always generate a random number where the Generator select a SEED Randomly
# SEED - a set of starting parameters for the algorithm

array([[ 1.11982585,  1.0437866 ,  0.31669616, -0.94164487, -1.56117628],
       [-0.52425048, -1.87745063, -1.48818482,  1.10658996,  0.56824664],
       [-0.29746017,  0.47117125,  0.38156541, -0.81276303, -0.10842344],
       [ 0.19194959,  0.72593131, -2.26133607,  0.71930234, -0.10339198],
       [-0.69594266,  0.72959745,  0.31202597,  0.90306843,  0.57225307]])

In [27]:
array_RG = gen(pcg(seed = 365))
array_RG.normal(size = (5,5))

# if we specified the seed, the output remain same with random values
# Re-running this cell provides a consistent output, since the seed (with fixed starting values) is set. 

array([[-0.13640899,  0.09414431, -0.06300442,  1.05391641, -0.6866818 ],
       [-0.50922173, -0.7999526 ,  0.73041825,  0.08825439, -2.1177576 ],
       [ 0.65526774, -0.48095012, -0.5519114 , -0.58578662, -0.98257896],
       [ 1.12378166, -1.30984316, -0.04703774,  0.955272  ,  0.26071745],
       [-0.20023668, -1.50172484, -1.4929163 ,  0.96535084,  1.18694633]])

In [28]:
array_RG.normal(size = (5,5))

# A Seed only lasts for one execution of the method or funtion before it is reset
# The seed is fixed for a single itteration. 

array([[-0.76065577,  1.48158358,  0.01200258, -0.06846959,  0.25301664],
       [-0.52640788,  0.79613109,  0.28203421,  1.80238008,  0.93932117],
       [-0.53693283, -0.26317689, -1.77723035,  1.14900013, -2.20733915],
       [ 1.54116775, -0.5124932 , -2.14564563,  1.98878673,  0.32208907],
       [-1.2651495 ,  3.2714633 ,  1.78650493, -0.20233675,  0.20427467]])

## Generating Integers, Probabilities and Random Choices
## Basic random funtionality in NumPy

In [29]:
# Generates Whole numbers (integers)
# Requires defining a fixed range of values to choose from
# If we only provide a single value it automatically assumes we only want integers between 0 and it

In [30]:
array_RG = gen(pcg(seed = 365))
array_RG.integers(low = 10,high = 100, size = (5,5))

array([[18, 78, 64, 78, 84],
       [66, 67, 28, 10, 69],
       [45, 15, 37, 74, 96],
       [19, 21, 89, 73, 54],
       [53, 84, 66, 51, 92]], dtype=int64)

In [31]:
# Random 
array_RG = gen(pcg(seed = 365))
array_RG.random(size = (5,5))

array([[0.75915734, 0.7662218 , 0.6291028 , 0.20336599, 0.66501486],
       [0.06559111, 0.71326309, 0.10812106, 0.87969046, 0.49405844],
       [0.82472673, 0.45652944, 0.07367232, 0.69628564, 0.36690736],
       [0.29787156, 0.4996155 , 0.4865245 , 0.62740703, 0.54952637],
       [0.64894629, 0.04411757, 0.7206516 , 0.84594003, 0.17159792]])

In [32]:
# NumPy Choices() - Simulates the idea of making an arbitary choice out of a given sequence.

In [33]:
array_RG = gen(pcg(seed = 365))
array_RG.choice([1,2,3,4,5],size = (5,5))

# All the outcomes are equally likely

array([[1, 4, 4, 4, 5],
       [4, 4, 2, 1, 4],
       [2, 1, 2, 4, 5],
       [1, 1, 5, 4, 3],
       [3, 5, 4, 3, 5]])

In [34]:
array_RG = gen(pcg(seed = 365))
array_RG.choice([1,2,3,4,5],p = [0.1,0.1,0.1,0.1,0.6],size = (5,5))


array([[5, 5, 5, 3, 5],
       [1, 5, 2, 5, 5],
       [5, 5, 1, 5, 4],
       [3, 5, 5, 5, 5],
       [5, 1, 5, 5, 2]])

## Probability Distributions in NumPy

In [35]:
array_RG = gen(pcg(seed = 365)) 
array_RG.poisson(size = (5,5))   # Lambda = 1

# The default Poisson distribution.

array([[2, 0, 1, 1, 2],
       [1, 1, 0, 1, 1],
       [1, 2, 1, 1, 0],
       [0, 1, 0, 2, 1],
       [0, 1, 0, 0, 2]], dtype=int64)

In [36]:
array_RG = gen(pcg(seed = 365)) 
array_RG.poisson(lam = 10,size = (5,5)) 

array([[11, 12, 12, 14, 13],
       [ 9, 10, 11, 11,  8],
       [11,  8, 10,  9, 14],
       [ 7,  8,  9, 15, 15],
       [13,  8,  8,  7,  9]], dtype=int64)

In [37]:
array_RG = gen(pcg(seed = 365)) 
array_RG.binomial(n = 100, p = 0.4, size = (5,5))

# A binomial distribution with p = 0.4 and 100 trials. 
# n = number of trails
# p = probablity of getting our desired outcome

array([[42, 44, 30, 36, 45],
       [36, 41, 38, 42, 41],
       [35, 31, 35, 46, 29],
       [41, 41, 46, 34, 48],
       [45, 45, 45, 40, 43]], dtype=int64)

In [38]:
array_RG = gen(pcg(seed = 365)) 
array_RG.logistic(loc = 9, scale = 1.2, size = (5,5))

# A logistic distribution with a location = 9 and scale = 1.2.

array([[10.37767822, 10.42451863,  9.63404367,  7.36153427,  9.82286787],
       [ 5.81223125, 10.09354231,  6.46790532, 11.38740256,  8.97147918],
       [10.85844698,  8.79081317,  5.962079  ,  9.99560681,  8.34539118],
       [ 7.97105522,  8.9981544 ,  8.93530194,  9.6253307 ,  9.23850869],
       [ 9.73729284,  5.3090678 , 10.13723528, 11.04372782,  7.11078651]])

## Application of Random Data with NumPy

In [39]:
# Test Creation - When Real data isn't available , We sometimes need pseudo - random data
# to test how well a program performs


In [40]:
array_RG = gen(pcg(seed = 365)) 

array_column_1 = array_RG.normal(loc = 2, scale = 3, size = (1000))
array_column_2 = array_RG.normal(loc = 7, scale = 2, size = (1000))
array_column_3 = array_RG.logistic(loc = 11, scale = 3, size = (1000))
array_column_4  = array_RG.exponential(scale = 4, size = (1000))
array_column_5  = array_RG.geometric(p = 0.7, size = (1000))

# Create the individual columns of the dataset we're creating.

In [41]:
random_test_data = np.array([array_column_1, array_column_2, array_column_3, array_column_4, array_column_5]).transpose()
random_test_data

# Use np.array to generate a new array with the 5 arrays we created earlier. 
# Use the transpose method to make sure our dataset isn't flipped. 

array([[ 1.59077303,  6.42174295, 10.14698427,  6.91500737,  1.        ],
       [ 2.28243293,  8.57902322, 15.93309953,  6.243605  ,  1.        ],
       [ 1.81098674,  5.17270135, -0.46878789,  2.44997251,  1.        ],
       ...,
       [ 0.1973629 ,  4.3465854 ,  2.66485989,  0.80935387,  1.        ],
       [-2.21015722,  8.2176402 , 12.69328115,  0.50644607,  2.        ],
       [ 2.91161235,  7.90337695, 11.79840961,  4.86816939,  1.        ]])

In [42]:
random_test_data.shape

(1000, 5)

In [43]:
np.savetxt("Random-Test-from-NumPy.csv", random_test_data, fmt = '%s', delimiter = ',')

# Saving the arrays to an extrenal file we're creating. 

# file name -> "Random-Test-from-NumPy.csv"
# random_test_data -> data we're exporting (saving to an external file)
# format -> strings
# delimiter ","

# We'll talk more about these in just a bit. 

In [44]:
np.genfromtxt("Random-Test-from-NumPy.csv", delimiter = ',')

# Importing the data from the file we just created. 

array([[ 1.59077303,  6.42174295, 10.14698427,  6.91500737,  1.        ],
       [ 2.28243293,  8.57902322, 15.93309953,  6.243605  ,  1.        ],
       [ 1.81098674,  5.17270135, -0.46878789,  2.44997251,  1.        ],
       ...,
       [ 0.1973629 ,  4.3465854 ,  2.66485989,  0.80935387,  1.        ],
       [-2.21015722,  8.2176402 , 12.69328115,  0.50644607,  2.        ],
       [ 2.91161235,  7.90337695, 11.79840961,  4.86816939,  1.        ]])

In [45]:
rand_test_data = np.genfromtxt("Random-Test-from-NumPy.csv", delimiter = ',')
print(rand_test_data)

[[ 1.59077303  6.42174295 10.14698427  6.91500737  1.        ]
 [ 2.28243293  8.57902322 15.93309953  6.243605    1.        ]
 [ 1.81098674  5.17270135 -0.46878789  2.44997251  1.        ]
 ...
 [ 0.1973629   4.3465854   2.66485989  0.80935387  1.        ]
 [-2.21015722  8.2176402  12.69328115  0.50644607  2.        ]
 [ 2.91161235  7.90337695 11.79840961  4.86816939  1.        ]]


# ***********END***********