In [19]:
import numpy as np

Let us create a NumPy array using arange function in NumPy. The 1d-array starts at 0 and ends at 8

In [35]:
array = np.arange(9)
array

array([0, 1, 2, 3, 4, 5, 6, 7, 8])

### Boolean Operators

we can compare two Numpy arrays element-wise

In [20]:
my_house = np.array([18.0, 20.0, 10.75, 9.50])
your_house = np.array([14.0, 24.0, 14.25, 9.0])

# my_house greater than or equal to 18
print(my_house >= 18)

# my_house less than your_house
print(my_house < your_house)

[ True  True False False]
[False  True  True False]


In [27]:
array = np.arange(10,13,0.5)
array

array([10. , 10.5, 11. , 11.5, 12. , 12.5])

In [28]:
print(array<=12)
print(array>=11)

[ True  True  True  True  True False]
[False False  True  True  True  True]


In [29]:
np.logical_and(array<=12, array>=11)

array([False, False,  True,  True,  True, False])

In [30]:
array[np.logical_and(array<=12, array>=11)]

array([11. , 11.5, 12. ])

### From list to Numpy array 

In [21]:
height = [70, 73, 66, 71]
weight = [159, 185, 194, 157]

# Calculate the BMI: bmi
np_height_m = np.array(height) * 0.0254
np_weight_kg = np.array(weight) * 0.453592
bmi = np_weight_kg / np_height_m ** 2

print(type(np_height_m))
print(type(np_weight_kg))

# Create the light array
light = np.array(bmi < 23)

# Print out light
print(light)

# Print out BMIs of all baseball players whose BMI is below 21
print(bmi[light])

### 2d-numpy-arrays arithmetics

In [44]:
array = np.arange(5,105,5)
array

array([  5,  10,  15,  20,  25,  30,  35,  40,  45,  50,  55,  60,  65,
        70,  75,  80,  85,  90,  95, 100])

We can use NumPy’s `reshape()` function to convert the 1d-array to 2d-array of dimension 3×3, 3 rows and 3 columns. NumPy’s reshape function takes a tuple as input.

In [45]:
array2D = array.reshape((5,4))
array2D

array([[  5,  10,  15,  20],
       [ 25,  30,  35,  40],
       [ 45,  50,  55,  60],
       [ 65,  70,  75,  80],
       [ 85,  90,  95, 100]])

In [102]:
height = [70, 73, 66, 71]
weight = [159, 185, 194, 157]

np_height_m = np.array(height) * 0.0254
np_weight_kg = np.array(weight) * 0.453592

np_2d = np.column_stack((np_height_m, np_weight_kg))
np_2d

array([[ 1.778   , 72.121128],
       [ 1.8542  , 83.91452 ],
       [ 1.6764  , 87.996848],
       [ 1.8034  , 71.213944]])

In [99]:
np_2d * 2

array([[  3.556   , 144.242256],
       [  3.7084  , 167.82904 ],
       [  3.3528  , 175.993696],
       [  3.6068  , 142.427888]])

In [101]:
np_2d / np.array([0.0254, 0.453592])

array([[ 70., 159.],
       [ 73., 185.],
       [ 66., 194.],
       [ 71., 157.]])

In [103]:
np_2d + np.array([1, 100])

array([[  2.778   , 172.121128],
       [  2.8542  , 183.91452 ],
       [  2.6764  , 187.996848],
       [  2.8034  , 171.213944]])

In [104]:
np_2d + np_2d

array([[  3.556   , 144.242256],
       [  3.7084  , 167.82904 ],
       [  3.3528  , 175.993696],
       [  3.6068  , 142.427888]])

#### Generate Data

`np.round(np.random.normal(distribution mean, distribution std., number of samples))`

In [123]:
height = np.round(np.random.normal(1.70, 0.20, 10), 2)
weight = np.round(np.random.normal(75, 10, 10), 2)

np_group = np.column_stack((height, weight))
np_group

array([[ 2.21, 77.77],
       [ 1.94, 65.69],
       [ 1.58, 89.65],
       [ 2.23, 82.4 ],
       [ 1.88, 57.5 ],
       [ 1.91, 73.82],
       [ 1.64, 74.7 ],
       [ 1.3 , 96.37],
       [ 1.72, 88.34],
       [ 1.71, 84.57]])

In [127]:
# Print mean height (first column)
avg = np.mean(np_group[:,0])
print("Average height: " + str(avg))

# Print median height.
med = np.median(np_group[:,0])
print("Median height: " + str(med))

# Print out the standard deviation on height. 
stddev = np.std(np_group[:,0])
print("Standard Deviation of height: " + str(stddev))

# Print out correlation between first and second column. 
corr = np.corrcoef(np_group[:,0], np_group[:,1])
print("Correlation: " + str(corr))

Average height: 1.812
Median height: 1.7999999999999998
Standard Deviation of height: 0.26984439960836687
Correlation: [[ 1.         -0.49409564]
 [-0.49409564  1.        ]]


#### Practise

We have two lists. positions = ['GK', 'M', 'A', 'D', ...] heights = [191, 184, 185, 180, ...]

Each element in the lists corresponds to a player. The first list, positions, contains
strings representing each player's position. The possible positions are: 'GK' (goalkeeper),
'M' (midfield), 'A' (attack) and 'D' (defense). The second list, heights, contains integers
representing the height of the player in cm. The first player in the lists is a goalkeeper
and is pretty tall (191 cm).

In [130]:
positions = ['GK', 'M', 'A', 'D']
np_positions = np.random.choice(positions, 100, p=[1/11, 4/11, 3/11, 3/11])
np_positions

array(['A', 'M', 'M', 'M', 'D', 'D', 'D', 'M', 'M', 'D', 'D', 'A', 'D',
       'M', 'GK', 'GK', 'A', 'A', 'A', 'A', 'M', 'M', 'A', 'GK', 'A', 'A',
       'M', 'M', 'D', 'M', 'D', 'D', 'D', 'M', 'A', 'A', 'D', 'A', 'M',
       'M', 'M', 'A', 'M', 'D', 'GK', 'GK', 'M', 'GK', 'M', 'GK', 'A',
       'M', 'D', 'A', 'M', 'D', 'D', 'GK', 'A', 'M', 'M', 'GK', 'M', 'D',
       'A', 'M', 'GK', 'M', 'A', 'M', 'M', 'A', 'A', 'A', 'A', 'A', 'D',
       'M', 'D', 'A', 'D', 'M', 'M', 'D', 'A', 'D', 'GK', 'M', 'M', 'M',
       'GK', 'M', 'M', 'M', 'M', 'M', 'M', 'D', 'A', 'M'], dtype='<U2')

In [140]:
np_heights = np.round(np.random.normal(1.70, 0.20, 100), 2)
np_heights

array([1.31, 1.86, 1.85, 1.8 , 1.77, 1.49, 1.66, 1.57, 1.74, 1.28, 2.15,
       2.05, 1.51, 1.73, 1.88, 1.94, 1.66, 1.42, 1.91, 1.93, 1.35, 2.13,
       1.9 , 1.94, 1.9 , 1.77, 1.44, 1.44, 1.79, 1.64, 1.65, 1.82, 1.75,
       2.03, 1.85, 1.75, 1.79, 1.33, 1.89, 1.78, 1.36, 2.13, 1.75, 1.77,
       1.73, 1.76, 1.88, 1.69, 1.49, 2.05, 1.92, 1.44, 1.42, 1.79, 1.77,
       1.89, 1.67, 1.74, 1.24, 1.62, 1.72, 1.81, 1.85, 1.75, 2.02, 1.81,
       1.93, 1.81, 1.51, 1.75, 1.57, 1.99, 1.78, 1.34, 1.58, 1.61, 1.82,
       2.04, 1.52, 1.6 , 1.79, 1.58, 1.66, 1.52, 1.97, 2.08, 1.58, 1.98,
       1.58, 1.64, 1.61, 1.63, 1.68, 1.64, 1.59, 1.54, 1.4 , 1.65, 1.81,
       1.75])

In [141]:
# Heights of the goalkeepers: gk_heights
gk_heights = np_heights[np_positions == 'GK']
print(gk_heights)

# Heights of the other players: other_heights
other_heights = np_heights[np_positions != 'GK']

# Print out the median height of goalkeepers. 
print("Median height of goalkeepers: " + str(np.median(gk_heights)))

# Print out the median height of other players. 
print("Median height of other players: " + str(np.median(other_heights)))

[1.88 1.94 1.94 1.73 1.76 1.69 2.05 1.74 1.81 1.93 1.58 1.61]
Median height of goalkeepers: 1.7850000000000001
Median height of other players: 1.75


### Numpay iteration

In [16]:
import numpy as np
array = np.arange(5,35,5).reshape(3,2)
array

array([[ 5, 10],
       [15, 20],
       [25, 30]])

In [17]:
for val in np.nditer(array):
    print(val)

5
10
15
20
25
30


`seed()`: sets the random seed, so that your results are the reproducible between simulations.
As an argument, it takes an integer of your choosing. If you call the function,
no output will be generated.

In [24]:
# Set the seed
np.random.seed(123)

# Generate and print random float
print(np.random.rand())
print(np.random.rand())
print(np.random.rand())

0.6964691855978616
0.28613933495037946
0.2268514535642031


In [39]:
np.random.seed(123)

# Use randint() to simulate a dice
print(np.random.randint(1, 7))

# Use randint() again
print(np.random.randint(1, 7))

6
3


In [None]:
Source : Datacamp - intro-to-python-for-data-science