# Generating Data w/ Numpy

In [1]:
import numpy as np

### np.empty(), np.zeros(), np.ones(), np.full()

In [3]:
array_1 = np.empty(shape=(2,3))
array_1

array([[0., 0., 0.],
       [0., 0., 0.]])

In [6]:
array_2 = np.zeros(shape=(2,3),dtype=np.int32)
array_2

array([[0, 0, 0],
       [0, 0, 0]])

In [8]:
array_3 = np.ones(shape=(2,3),dtype=np.int32)
array_3

array([[1, 1, 1],
       [1, 1, 1]])

In [9]:
array_3 = np.full(shape=(3,3),fill_value=9)
array_3

array([[9, 9, 9],
       [9, 9, 9],
       [9, 9, 9]])

### "_like" functions

In [10]:
array_empty_like = np.empty_like(array_3)
array_empty_like

array([[         0,          1,          0],
       [       646,        844,          0],
       [       768,        646, 1522680640]])

In [11]:
array_0_like = np.zeros_like(array_3)
array_1_like = np.ones_like(array_3)

In [13]:
 array_1_like

array([[1, 1, 1],
       [1, 1, 1],
       [1, 1, 1]])

### np.arange()

In [16]:
list(range(5))

[0, 1, 2, 3, 4]

In [18]:
array_arange1 = np.arange(5)
array_arange1

array([0, 1, 2, 3, 4])

In [20]:
array_arange2 = np.arange(start=1,stop=4)
array_arange2

array([1, 2, 3])

In [21]:
array_arange3 = np.arange(start=1,stop=5,step=2)
array_arange3

array([1, 3])

In [22]:
array_arange4 = np.arange(start=1,stop=5,step=0.5)
array_arange4

array([1. , 1.5, 2. , 2.5, 3. , 3.5, 4. , 4.5])

In [27]:
array_arange5 = np.arange(start=1,stop=10,step=0.5,dtype=np.int64)
array_arange5

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int64)

## Random Generators

### Defining Random Generators

In [2]:
from numpy.random import Generator as gen
from numpy.random import PCG64 as pcg

In [29]:
array_rg = gen(pcg())
array_rg.normal()

-0.603335611968042

In [30]:
array_rg.normal(size=5)

array([-0.46710978, -0.27516158,  0.69839721,  0.31898861,  1.13321113])

In [31]:
array_rg.normal(size=(2,2))

array([[ 1.32051185,  0.45402649],
       [-1.56999938,  0.6772714 ]])

In [37]:
#if you use seed value for pcg random generator, you see fix number, every time you run it
array_rg1 = gen(pcg(seed=365))
array_rg1.normal(size=(2,2))

array([[-0.13640899,  0.09414431],
       [-0.06300442,  1.05391641]])

In [42]:
array_rg1.normal(size=(2,2))

array([[-1.30984316, -0.04703774],
       [ 0.955272  ,  0.26071745]])

### Generating Integers, Probabilities and Random Choices

In [3]:
array_rg = gen(pcg(seed = 365))
array_rg.integers(10,size=(5,5))

array([[0, 7, 6, 7, 8],
       [6, 6, 2, 0, 6],
       [3, 0, 3, 7, 9],
       [1, 1, 8, 7, 4],
       [4, 8, 6, 4, 9]], dtype=int64)

In [4]:
array_rg.integers(low =20,high=100,size=(5,5))

array([[25, 82, 75, 28, 49],
       [32, 43, 52, 59, 77],
       [58, 38, 70, 49, 63],
       [24, 71, 36, 23, 63],
       [77, 77, 87, 44, 33]], dtype=int64)

In [6]:
array_rg.random(size=(5,5))

array([[0.90762694, 0.5903071 , 0.02853661, 0.84445833, 0.91917457],
       [0.82386954, 0.7588933 , 0.06393185, 0.34507138, 0.12637865],
       [0.33319814, 0.13927789, 0.21461953, 0.5337677 , 0.36904177],
       [0.53629827, 0.81589813, 0.07632435, 0.9389407 , 0.48395624],
       [0.9295958 , 0.23011817, 0.64945578, 0.02243555, 0.97401848]])

In [8]:
array_rg.choice([2,3,4,6,7],size=(5,5))

array([[6, 3, 6, 6, 3],
       [6, 6, 2, 6, 7],
       [2, 2, 7, 3, 6],
       [6, 2, 3, 3, 7],
       [2, 3, 7, 3, 3]])

In [9]:
array_rg.choice([2,3,4,6,7],p=[0.2,0.1,0.4,0.1,0.2],size=(5,5))

array([[2, 2, 7, 2, 4],
       [6, 2, 2, 4, 4],
       [2, 2, 2, 2, 7],
       [7, 7, 3, 4, 4],
       [2, 2, 7, 3, 4]])

### Generating Arrays From Known Distributions

In [10]:
array_rg = gen(pcg(seed=365))
array_rg.poisson(size=(5,5))

array([[2, 0, 1, 1, 2],
       [1, 1, 0, 1, 1],
       [1, 2, 1, 1, 0],
       [0, 1, 0, 2, 1],
       [0, 1, 0, 0, 2]], dtype=int64)

In [11]:
array_rg.poisson(lam=10,size=(5,5))

array([[11, 11, 13,  4,  6],
       [ 6, 10, 10,  7,  8],
       [10, 15, 15, 16, 12],
       [ 8, 13,  3,  9, 12],
       [10,  7,  8, 12,  7]], dtype=int64)

In [15]:
array_rg =gen(pcg(seed=365))
array_rg.binomial(n=100,p=0.4,size=(5,5))

array([[42, 44, 30, 36, 45],
       [36, 41, 38, 42, 41],
       [35, 31, 35, 46, 29],
       [41, 41, 46, 34, 48],
       [45, 45, 45, 40, 43]], dtype=int64)

In [16]:
array_rg = gen(pcg(seed = 365))
array_rg.logistic(loc=9,scale=1.2,size=(5,5))

array([[10.37767822, 10.42451863,  9.63404367,  7.36153427,  9.82286787],
       [ 5.81223125, 10.09354231,  6.46790532, 11.38740256,  8.97147918],
       [10.85844698,  8.79081317,  5.962079  ,  9.99560681,  8.34539118],
       [ 7.97105522,  8.9981544 ,  8.93530194,  9.6253307 ,  9.23850869],
       [ 9.73729284,  5.3090678 , 10.13723528, 11.04372782,  7.11078651]])

https://numpy.org/doc/stable/reference/random/generator

### Applications of Random Generators

In [18]:
array_rg = gen(pcg(seed=365))
array_column1 = array_rg.normal(loc = 3 , scale = 2 , size=(500))
array_column2 = array_rg.normal(loc = 7, scale = 3, size=(500))
array_column3 = array_rg.logistic(loc = 9, scale=1.2, size = (500))

In [19]:
random_matrix = np.array([array_column1,array_column2,array_column3]).transpose()

In [20]:
random_matrix

array([[ 2.72718202,  8.2766977 ,  8.67931423],
       [ 3.18828862, 10.63426055,  9.93205046],
       [ 2.87399116,  2.1224141 ,  6.03877999],
       ...,
       [ 2.67325603,  5.1973629 ,  9.98776705],
       [ 0.41719471,  2.78984278,  9.61742411],
       [ 1.89841075,  7.91161235, 12.08262038]])

In [21]:
random_matrix.shape

(500, 3)

In [23]:
np.savetxt('random_dataset.csv',random_matrix,fmt='%s',delimiter=',')

In [25]:
np.genfromtxt('random_dataset.csv',delimiter=',')

array([[ 2.72718202,  8.2766977 ,  8.67931423],
       [ 3.18828862, 10.63426055,  9.93205046],
       [ 2.87399116,  2.1224141 ,  6.03877999],
       ...,
       [ 2.67325603,  5.1973629 ,  9.98776705],
       [ 0.41719471,  2.78984278,  9.61742411],
       [ 1.89841075,  7.91161235, 12.08262038]])