In [2]:
import numpy as np

# Array Creation

In [19]:
a = np.array([1, 2, 3])
print(a)
print(a.ndim) # prints the dimension of the array

[1 2 3]
1


In [23]:
b = np.array([[1, 2, 3], [4, 5, 6]]) # multi-dimensional array (matrix)
print(b)

[[1 2 3]
 [4 5 6]]


In [24]:
print(b.shape) # prints the length of each dimension of the array
print(b.ndim)# 2 arrays of 3 elements each

(2, 3)
2


In [25]:
print(a.dtype) # prints the type of the items in the array
print(b.dtype)

int32
int32


In [9]:
c = np.array([2.2, 6, 7.7])
print(c.dtype.name)

float64


In [10]:
print(c) # numPy automatically converts 6 (int) to a float since it always tries to maintain the same type for all elements of an array when possible
# Upcasting

[2.2 6.  7.7]


In [11]:
d = np.zeros((2, 3)) # initialize an array of shape (2, 3) with zeros
print(d)

e = np.ones((2, 3)) # initialize an array of shape (2, 3) with ones
print(e)

[[0. 0. 0.]
 [0. 0. 0.]]
[[1. 1. 1.]
 [1. 1. 1.]]


In [28]:
print(np.random.rand(2, 3)) # initialize an array of shape (2, 3) with random values

[[0.18985871 0.14336611 0.89347304]
 [0.40337142 0.17874153 0.30166053]]


In [13]:
f = np.arange(10, 50, 2) # creates a sequence of numbers in an array
# 1st argument: the starting bound (inclusive)
# 2nd argument: the ending bound (exclusive)
# 3rd argument: the difference between each number
print(f)

[10 12 14 16 18 20 22 24 26 28 30 32 34 36 38 40 42 44 46 48]


In [15]:
g = np.linspace(0, 2, 15) # creates a sequences of floats equally separated
# 1st argument: the starting bound (inclusive)
# 2nd argument: the ending bound (inclusive)
# 3rd argument: the total of numbers to be generated
print(g)

[0.         0.14285714 0.28571429 0.42857143 0.57142857 0.71428571
 0.85714286 1.         1.14285714 1.28571429 1.42857143 1.57142857
 1.71428571 1.85714286 2.        ]


# Array Operations

In [29]:
a = np.array([10, 20, 30, 40])
b = np.array([1, 2, 3, 4])

c = a - b
print(c)

d = a * b
print(d)

[ 9 18 27 36]
[ 10  40  90 160]


In [30]:
farenheit = np.array([0, -10, -5, -15, 0])
celcius = (farenheit - 31) * (5/9)
print(celcius)

[-17.22222222 -22.77777778 -20.         -25.55555556 -17.22222222]


In [31]:
print(celcius > -20)

[ True False False False  True]


In [32]:
print(celcius % 2 == 0)

[False False  True False False]


In [35]:
matrix1 = np.array([[1, 2], [0, 1]])
matrix2 = np.array([[2, 0], [3, 4]])
print(matrix1 * matrix2) # multiply each element separatedly
print(matrix1 @ matrix2) # matrix multiplication

[[2 0]
 [0 4]]
[[8 8]
 [3 4]]


In [36]:
array = np.array([3, 5.8, 71, 0.53])
print(array.sum())
print(array.min())
print(array.max())
print(array.mean()) # Normal average

80.33
0.53
71.0
20.0825


In [37]:
b = np.arange(1, 16, 1).reshape(3, 5)
print(b)

[[ 1  2  3  4  5]
 [ 6  7  8  9 10]
 [11 12 13 14 15]]


In [41]:
c = np.full(b.shape, 12) # all 12
c = c.astype(np.int32) # we are sure that the type must be int32
print(c)

[[12 12 12 12 12]
 [12 12 12 12 12]
 [12 12 12 12 12]]


In [42]:
d = np.reshape(c, (5, 3))
print(d)

[[12 12 12]
 [12 12 12]
 [12 12 12]
 [12 12 12]
 [12 12 12]]


# Indexing

In [43]:
a = np.array([1, 3, 5, 7])
print(a[2])

5


In [45]:
a = np.array([[1, 2], [3, 4], [5, 6]])
print(a[1,1]) # or print(a[1][1])

4
4


In [47]:
b = np.array([a[0,0], a[1,1], a[2,0]])
print(b)

[1 4 5]


In [48]:
print(a[[0, 1, 2], [0, 1, 1]]) 
# 0th subarray -> 0th element
# 1st subarray -> 1st element
# 2nd subarray -> 1st element

[1 4 6]


# Boolean Indexing

In [49]:
print(a > 5)

[[False False]
 [False False]
 [False  True]]


In [50]:
print(a[a>5]) # only prints the elements from a greater than 5

[6]


# Slicing

In [52]:
a = np.array([0, 1, 2, 3, 4, 5])
print(a[:3])
print(a[2:4])

[0 1 2]
[2 3]


In [58]:
a = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
print(a[:2]) # prints the 0th and the 1st row
print(a[:2, 1:3]) # 0th and 1st row, but 1st and 2nd elements only (rows, columns)

[[1 2 3 4]
 [5 6 7 8]]
[[2 3]
 [6 7]]


In [59]:
subarray = a[:2, 1:3]
subarray[0][0] = 50
print(subarray)
print(a)

[[50  3]
 [ 6  7]]
[[ 1 50  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]


# NumPy with Datasets

In [78]:
# "fixed acidity";"volatile acidity";"citric acid";"residual sugar";"chlorides";"free sulfur dioxide";"total sulfur dioxide";"density";"pH";"sulphates";"alcohol";"quality"
# genfromtxt() is used to load a dataset in numpy

wines = np.genfromtxt("winequality-red.csv", delimiter=";", skip_header=1)
print(wines)
print(wines.shape)

[[ 7.4    0.7    0.    ...  0.56   9.4    5.   ]
 [ 7.8    0.88   0.    ...  0.68   9.8    5.   ]
 [ 7.8    0.76   0.04  ...  0.65   9.8    5.   ]
 ...
 [ 6.3    0.51   0.13  ...  0.75  11.     6.   ]
 [ 5.9    0.645  0.12  ...  0.71  10.2    5.   ]
 [ 6.     0.31   0.47  ...  0.66  11.     6.   ]]
(1599, 12)


In [61]:
print(f"Fixed acidity: {wines[:, 0]}") # : because it must print all rows

Fixed acidity: [7.4 7.8 7.8 ... 6.3 5.9 6. ]


In [62]:
print("Fixed acitidy, citric acid, chlorides")
print(wines[:, [0, 2, 4]]) # all rows, columns 0, 2 and 4

Fixed acitidy, citric acid, chlorides
[[7.4   0.    0.076]
 [7.8   0.    0.098]
 [7.8   0.04  0.092]
 ...
 [6.3   0.13  0.076]
 [5.9   0.12  0.075]
 [6.    0.47  0.067]]


In [63]:
print(f"Average quality: {wines[:, -1].mean()}")

Average quality: 5.6360225140712945


In [79]:
# Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR ,CGPA,Research,Chance of Admit 
admission = np.genfromtxt("Admission_Predict.csv", delimiter=",", skip_header=1, names=("Serial No.","GRE Score","TOEFL Score","University Rating", "SOP", "LOR", "CGPA", "Research", "Chance of Admit"))
print(admission)
print(admission.shape)

[(  1., 337., 118., 4., 4.5, 4.5, 9.65, 1., 0.92)
 (  2., 324., 107., 4., 4. , 4.5, 8.87, 1., 0.76)
 (  3., 316., 104., 3., 3. , 3.5, 8.  , 1., 0.72)
 (  4., 322., 110., 3., 3.5, 2.5, 8.67, 1., 0.8 )
 (  5., 314., 103., 2., 2. , 3. , 8.21, 0., 0.65)
 (  6., 330., 115., 5., 4.5, 3. , 9.34, 1., 0.9 )
 (  7., 321., 109., 3., 3. , 4. , 8.2 , 1., 0.75)
 (  8., 308., 101., 2., 3. , 4. , 7.9 , 0., 0.68)
 (  9., 302., 102., 1., 2. , 1.5, 8.  , 0., 0.5 )
 ( 10., 323., 108., 3., 3.5, 3. , 8.6 , 0., 0.45)
 ( 11., 325., 106., 3., 3.5, 4. , 8.4 , 1., 0.52)
 ( 12., 327., 111., 4., 4. , 4.5, 9.  , 1., 0.84)
 ( 13., 328., 112., 4., 4. , 4.5, 9.1 , 1., 0.78)
 ( 14., 307., 109., 3., 4. , 3. , 8.  , 1., 0.62)
 ( 15., 311., 104., 3., 3.5, 2. , 8.2 , 1., 0.61)
 ( 16., 314., 105., 3., 3.5, 2.5, 8.3 , 0., 0.54)
 ( 17., 317., 107., 3., 4. , 3. , 8.7 , 0., 0.66)
 ( 18., 319., 106., 3., 4. , 3. , 8.  , 1., 0.65)
 ( 19., 318., 110., 3., 4. , 3. , 8.8 , 0., 0.63)
 ( 20., 303., 102., 3., 3.5, 3. , 8.5 , 0., 0.62)


In [65]:
print(admission["CGPA"][0:5])

[9.65 8.87 8.   8.67 8.21]


In [66]:
admission["CGPA"] = admission["CGPA"] * 100
print(admission["CGPA"][0:20])

[965. 887. 800. 867. 821. 934. 820. 790. 800. 860. 840. 900. 910. 800.
 820. 830. 870. 800. 880. 850.]


In [67]:
print(admission)

[(  1., 337., 118., 4., 4.5, 4.5, 965., 1., 0.92)
 (  2., 324., 107., 4., 4. , 4.5, 887., 1., 0.76)
 (  3., 316., 104., 3., 3. , 3.5, 800., 1., 0.72)
 (  4., 322., 110., 3., 3.5, 2.5, 867., 1., 0.8 )
 (  5., 314., 103., 2., 2. , 3. , 821., 0., 0.65)
 (  6., 330., 115., 5., 4.5, 3. , 934., 1., 0.9 )
 (  7., 321., 109., 3., 3. , 4. , 820., 1., 0.75)
 (  8., 308., 101., 2., 3. , 4. , 790., 0., 0.68)
 (  9., 302., 102., 1., 2. , 1.5, 800., 0., 0.5 )
 ( 10., 323., 108., 3., 3.5, 3. , 860., 0., 0.45)
 ( 11., 325., 106., 3., 3.5, 4. , 840., 1., 0.52)
 ( 12., 327., 111., 4., 4. , 4.5, 900., 1., 0.84)
 ( 13., 328., 112., 4., 4. , 4.5, 910., 1., 0.78)
 ( 14., 307., 109., 3., 4. , 3. , 800., 1., 0.62)
 ( 15., 311., 104., 3., 3.5, 2. , 820., 1., 0.61)
 ( 16., 314., 105., 3., 3.5, 2.5, 830., 0., 0.54)
 ( 17., 317., 107., 3., 4. , 3. , 870., 0., 0.66)
 ( 18., 319., 106., 3., 4. , 3. , 800., 1., 0.65)
 ( 19., 318., 110., 3., 4. , 3. , 880., 0., 0.63)
 ( 20., 303., 102., 3., 3.5, 3. , 850., 0., 0.62)


In [69]:
print(len(admission["Research"][admission["Research"] == 1]))

219


In [75]:
print(admission[admission["Chance_of_Admit"] > 0.8]["GRE_Score"].mean())
print(admission[admission["Chance_of_Admit"] < 0.4]["GRE_Score"].mean())

328.7350427350427
302.2857142857143


In [77]:
print(admission[admission["Chance_of_Admit"] > 0.8])
# tuples which are being used above

[(  1., 337., 118., 4., 4.5, 4.5, 965., 1., 0.92)
 (  6., 330., 115., 5., 4.5, 3. , 934., 1., 0.9 )
 ( 12., 327., 111., 4., 4. , 4.5, 900., 1., 0.84)
 ( 23., 328., 116., 5., 5. , 5. , 950., 1., 0.94)
 ( 24., 334., 119., 5., 5. , 4.5, 970., 1., 0.95)
 ( 25., 336., 119., 5., 4. , 3.5, 980., 1., 0.97)
 ( 26., 340., 120., 5., 4.5, 4.5, 960., 1., 0.94)
 ( 33., 338., 118., 4., 3. , 4.5, 940., 1., 0.91)
 ( 34., 340., 114., 5., 4. , 4. , 960., 1., 0.9 )
 ( 35., 331., 112., 5., 4. , 5. , 980., 1., 0.94)
 ( 36., 320., 110., 5., 5. , 5. , 920., 1., 0.88)
 ( 44., 332., 117., 4., 4.5, 4. , 910., 0., 0.87)
 ( 45., 326., 113., 5., 4.5, 4. , 940., 1., 0.91)
 ( 46., 322., 110., 5., 5. , 4. , 910., 1., 0.88)
 ( 47., 329., 114., 5., 4. , 5. , 930., 1., 0.86)
 ( 48., 339., 119., 5., 4.5, 4. , 970., 0., 0.89)
 ( 49., 321., 110., 3., 3.5, 5. , 885., 1., 0.82)
 ( 71., 332., 118., 5., 5. , 5. , 964., 1., 0.94)
 ( 72., 336., 112., 5., 5. , 5. , 976., 1., 0.96)
 ( 73., 321., 111., 5., 5. , 5. , 945., 1., 0.93)
