In [1]:
import pandas as pd
import numpy as np

### The Basics

In [2]:
a = np.array([1,2,3])
print(a)

[1 2 3]


In [3]:
b = np.array([[9.0,6.0,8.0,7.0],[6.0,2.0,1.0,3.0]])
print(b)

[[9. 6. 8. 7.]
 [6. 2. 1. 3.]]


#### Get dimensions of arrays

In [4]:
a.ndim

1

In [5]:
b.ndim

2

#### Get shape of arrays

In [6]:
#this array is a vector, so it will only tell us the number of items since it is one dimension
a.shape

(3,)

In [7]:
#this shows us both number of rows and columns
b.shape

(2, 4)

#### How much memory is it using?

In [8]:
print("""REMEMBER: 
-For integers, each byte is 8 units, so int32 is 4 bytes, int16 is 2 bytes, etc.
-Floats are bigger than integers""")

REMEMBER: 
-For integers, each byte is 8 units, so int32 is 4 bytes, int16 is 2 bytes, etc.
-Floats are bigger than integers


In [9]:
a.dtype

dtype('int32')

In [10]:
b.dtype

dtype('float64')

In [11]:
c = np.array([1,2,3], dtype = 'int16') # we can specify which type to store data as, i.e. if we had only small values and used int16, it would use less space
c.dtype

dtype('int16')

In [12]:
#Get size
a.itemsize

4

In [13]:
#Get total size
a.size

3

### Accessing/Changing specific elements, rows, columns, etc.

In [8]:
d = np.array([[1,2,3,4,5,6,7],[8,9,10,11,12,13,14]])
print(d)

[[ 1  2  3  4  5  6  7]
 [ 8  9 10 11 12 13 14]]


In [9]:
d.shape

(2, 7)

In [16]:
#getting a specific element - still using python indexing principles, starting at 0 [row,column]
d[1,5]

13

In [17]:
#Get a specific row
d[0,:]

array([1, 2, 3, 4, 5, 6, 7])

In [18]:
#Get a specific column - all rows, 3 index item (2 = (0,1,2 which is the third column))
d[:,2]

array([ 3, 10])

In [19]:
#Getting a little more fancy [startindex:endindex:stepsize]
d[0,1:6:2]

array([2, 4, 6])

In [20]:
#Changing value
d[1,5] = 18
print(d)

[[ 1  2  3  4  5  6  7]
 [ 8  9 10 11 12 18 14]]


In [21]:
#Change specific value for all rows, in a specified column
d[:,2] = 5
print(d)

# d[:,2] = [5,2] #Do this to change each respective value to the ones in the list
# print(d)

[[ 1  2  5  4  5  6  7]
 [ 8  9  5 11 12 18 14]]


#### 3D Example

In [22]:
e = np.array([[[1,2],[3,4]],[[5,6],[7,8]]])
print(e)

[[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]]


In [23]:
#Getting specified element from 3D array, work outside in - trying to get 4 in example. 

#Syntax = e[groupnumber:rownumber:columnnumber]
e[0,1,1]

#The above works since we are telling Python to enter the first list of lists (1,2,3,4), and then go to the second row 
#as well as the second element. Results in 0,1,1 because of indexing

4

In [24]:
#replacing a number with example above. Would need to create a subsequence which is the same dimension
e[:,1,:] = [[9,9],[8,8]]
print(e)

[[[1 2]
  [9 9]]

 [[5 6]
  [8 8]]]


### Initializing Different Types of Arrays

In [25]:
#ALL 0s matrix - must specify a desired shape in parenthesis
np.zeros(3)

array([0., 0., 0.])

In [26]:
np.zeros((2,3))

array([[0., 0., 0.],
       [0., 0., 0.]])

In [27]:
#ALL 1s matrix
np.ones((4,2,2))

array([[[1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.]]])

In [28]:
np.ones((4,2,2), dtype = 'int32')

array([[[1, 1],
        [1, 1]],

       [[1, 1],
        [1, 1]],

       [[1, 1],
        [1, 1]],

       [[1, 1],
        [1, 1]]])

In [29]:
#Any other number - takes in two parameters with specified or desired values. 

#((shapeOfMatrix), desired values, datatype)
np.full((2,2), 99)

array([[99, 99],
       [99, 99]])

In [30]:
#Any other number (full_like) - allows us to take a shape that is already built and pass in new values
np.full_like(e.shape, 4)

array([4, 4, 4])

In [31]:
#random decimal numbers
np.random.rand(4,2)

array([[0.18649224, 0.87516731],
       [0.45783184, 0.58610286],
       [0.70661954, 0.29256282],
       [0.69444211, 0.75821068]])

In [32]:
np.random.rand(4,2,3)

array([[[0.90516593, 0.37466511, 0.68909884],
        [0.13212226, 0.47625861, 0.52040019]],

       [[0.17973722, 0.59696726, 0.55299933],
        [0.49821672, 0.99662337, 0.64570369]],

       [[0.55770263, 0.05682656, 0.79352197],
        [0.10341658, 0.65510653, 0.80495198]],

       [[0.67152473, 0.02132003, 0.18024451],
        [0.29301746, 0.58849345, 0.54367572]]])

In [33]:
#random int values - specify the range of possible numbers with beginning int, and then shape. 7 is exclusive, put 8 
#to include it

np.random.randint(7, size=(3,3))

array([[4, 0, 4],
       [0, 5, 0],
       [2, 4, 1]])

In [34]:
#can also specify start and stop points
np.random.randint(2,7, size=(3,3))

array([[3, 3, 3],
       [6, 5, 6],
       [2, 5, 2]])

In [35]:
#identity matrix by it's nature is going to be a square matrix.
np.identity(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [36]:
#repeating an array 3 times
arr = np.array([1,2,3])
r1  = np.repeat(arr,3)
print(r1)

[1 1 1 2 2 2 3 3 3]


In [37]:
#Making the above array two dimensional - use the axis argument to accomplish this, axis = 0 is two-dimensional
arr2 = np.array([[1,2,3]])
r2  = np.repeat(arr2,3, axis=0)
print(r2)

[[1 2 3]
 [1 2 3]
 [1 2 3]]


#### Copying an array

In [38]:
#Making a copy of an array
f = np.array((1,2,3))
print(f)

[1 2 3]


In [39]:
g = f.copy()
g[0] = 100
print(g)

[100   2   3]


In [40]:
#verifying the original is still in tact
print(f)

[1 2 3]


### Mathematics

In [41]:
a = np.array((1,2,3,4))
print(a)

[1 2 3 4]


In [42]:
a + 2

array([3, 4, 5, 6])

In [43]:
a - 2

array([-1,  0,  1,  2])

In [44]:
a * 2

array([2, 4, 6, 8])

In [45]:
a / 2

array([0.5, 1. , 1.5, 2. ])

In [46]:
b = np.array((5,6,7,8))
print(b)

[5 6 7 8]


In [47]:
a + b

array([ 6,  8, 10, 12])

In [48]:
a ** 2

array([ 1,  4,  9, 16])

In [49]:
np.sin(a)

array([ 0.84147098,  0.90929743,  0.14112001, -0.7568025 ])

In [50]:
np.cos(a)

array([ 0.54030231, -0.41614684, -0.9899925 , -0.65364362])

#### Linear Algebra

In [55]:
#Number of columns of first matrix equal to rows of the second one
i = np.ones((2,3))
print(i)

print('- ' * 10)

j = np.full((3,2),2)
print(j)

#Can't do i*j, will result in error. Use matmul() function
np.matmul(i,j)

[[1. 1. 1.]
 [1. 1. 1.]]
- - - - - - - - - - 
[[2 2]
 [2 2]
 [2 2]]


array([[6., 6.],
       [6., 6.]])

In [57]:
#Find the determinant
l = np.identity(3)
np.linalg.det(l)

1.0

#### Statistics

In [58]:
stats = np.array([[1,2,3],[4,5,6]])
stats

array([[1, 2, 3],
       [4, 5, 6]])

##### Min/Max

In [59]:
np.min(stats)

1

In [64]:
np.min(stats, axis = 0)

array([1, 2, 3])

In [65]:
np.min(stats, axis = 1)

array([1, 4])

In [60]:
np.max(stats)

6

In [63]:
np.max(stats, axis = 0)

array([4, 5, 6])

In [62]:
np.max(stats, axis = 1)

array([3, 6])

##### Sum

In [66]:
#all values in matrix
np.sum(stats)

21

In [67]:
np.sum(stats, axis=0) #Adding all values going downwards

array([5, 7, 9])

### Reorganizing Arrays

In [68]:
before = np.array([[1,2,3,4],[5,6,7,8]])
print(before)

[[1 2 3 4]
 [5 6 7 8]]


In [71]:
#RESHAPING REQUIRES SAME AMOUNT OF VALUES
after = before.reshape((4,2))
after

array([[1, 2],
       [3, 4],
       [5, 6],
       [7, 8]])

##### Vertical Stacking Matrices

In [72]:
v1 = np.array((1,2,3,4))
v2 = np.array((5,6,7,8))

np.vstack((v1,v2))

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

##### Horizontal Stacking Matrices

In [73]:
h1 = np.ones((2,4))
h2 = np.zeros((2,2))

np.hstack((h1,h2))

array([[1., 1., 1., 1., 0., 0.],
       [1., 1., 1., 1., 0., 0.]])

### Miscellaneous

##### Loading file from data into array

In [97]:
data = np.genfromtxt('data.txt', delimiter = ',')
data2 = data.astype('int32') #USE .astype() to specify data types
print(data2)

[[   1    2    3    4    5    6    7   87    8    8    6   54    4   43
    45   55   34   54   25 2435]
 [   3    3  342   32   32   44    1    5    7    8    8   56    4   56
     6    4    4    5    6    7]
 [  54   85    9    5    4    7   54   75   86   21  256  859  142 5856
   968   58    7    7   41    5]]


###### Boolean Masking & Advanced Indexing

In [90]:
data2 > 50

array([[False, False, False, False, False, False, False,  True, False,
        False, False,  True, False, False, False,  True, False,  True,
        False,  True],
       [False, False,  True, False, False, False, False, False, False,
        False, False,  True, False,  True, False, False, False, False,
        False, False],
       [ True,  True, False, False, False, False,  True,  True,  True,
        False,  True,  True,  True,  True,  True,  True, False, False,
        False, False]])

In [91]:
#You can index with a list in numpy
data2[data2 > 50]

array([  87,   54,   55,   54, 2435,  342,   56,   56,   54,   85,   54,
         75,   86,  256,  859,  142, 5856,  968,   58])

In [92]:
#Indexing using a list, grabbing 1,2,8 elements, showing how the above code works in a different way
m = np.array((1,2,3,4,5,6,7,8,9))
m[[1,2,8]]

array([2, 3, 9])

In [93]:
#Going through and return True/False if ANY values in columns in the matrix > 50
np.any(data2 > 50, axis = 0)

array([ True,  True,  True, False, False, False,  True,  True,  True,
       False,  True,  True,  True,  True,  True,  True, False,  True,
       False,  True])

In [94]:
#Going through and return True/False if ALL values in columns in the matrix > 50
np.all(data2 > 50, axis = 0)

array([False, False, False, False, False, False, False, False, False,
       False, False,  True, False, False, False, False, False, False,
       False, False])

In [98]:
((data2 > 50) & (data2 < 100)) # Can use ~ at the beginning of code to display "Not equal to": (~((data2 > 50) & (data2 < 100)))

array([[False, False, False, False, False, False, False,  True, False,
        False, False,  True, False, False, False,  True, False,  True,
        False, False],
       [False, False, False, False, False, False, False, False, False,
        False, False,  True, False,  True, False, False, False, False,
        False, False],
       [ True,  True, False, False, False, False,  True,  True,  True,
        False, False, False, False, False, False,  True, False, False,
        False, False]])