# Intro to Numpy

## Arrays have the following properties
- homogenos meaning all elements are the same type
- fixed shape
- mutable

## How to import numpy

```import numpy as np```

In [2]:
import numpy as np

### How to create an Array

In [3]:
an_array = np.array([1,2,3,4,5])
an_array

array([1, 2, 3, 4, 5])

In [4]:
ls = [1,2,3,4]
an_array_2 = np.array(ls)
an_array_2

array([1, 2, 3, 4])

In [5]:
slice = an_array_2[:3]  # slicing
slice

array([1, 2, 3])

### Craeting a 2d array
- must be an NxM matrix

In [6]:
arr = np.array([[1,2],[3,4],[5,6]])
arr

array([[1, 2],
       [3, 4],
       [5, 6]])

## Working with Array

In [7]:
def changed(an_array):
    an_array[-1] = 1000

changed(an_array)
an_array

array([   1,    2,    3,    4, 1000])

In [8]:
# statistical funtions

new_arr = np.array([1,2,3,4,5,1,2])
arr_average = np.mean(new_arr)
print(f'Mean is {arr_average}')

st_deviation = np.std(new_arr)
print(f'ST dev is: {st_deviation}')

hist = np.histogram(new_arr, bins=5)
print(hist)

Mean is 2.5714285714285716
ST dev is: 1.3997084244475304
(array([2, 2, 1, 1, 1]), array([1. , 1.8, 2.6, 3.4, 4.2, 5. ]))


## Boolean Indexing
create conditions and apply thenm to the array

In [9]:
an_array = np.array([1,2,3,4,5,1,2])
condition = an_array > 2
filtered_array = an_array[condition]
print(filtered_array)

indexes = [0,1,6]
new_arr = an_array[indexes]
print(new_arr)

[3 4 5]
[1 2 2]


## Problem 
- write a program to create an array of high tempuratures for each day of the month
- extract the temp from the first week and calculate its mean and std 
- display all the temps that are above the mean of the first week 
- display temp that are one std above and below the mean of the first week

In [10]:
temperatures = np.array([25,28,24,26,30,29,27,23,25,28,31,29,26,24,27,29,32,30,28,25,27,29,30,28,26])
temperatures

array([25, 28, 24, 26, 30, 29, 27, 23, 25, 28, 31, 29, 26, 24, 27, 29, 32,
       30, 28, 25, 27, 29, 30, 28, 26])

In [11]:
first_week = temperatures[:7]
first_week

array([25, 28, 24, 26, 30, 29, 27])

In [12]:
avg = np.average(first_week)
print(avg)
st_dev = np.std(first_week)
print(st_dev)

27.0
2.0


In [13]:
# display temp > avg

condition1 = temperatures > avg

greater_than_avg = temperatures[condition1]
print(f'temps greater than the avg of first week: {greater_than_avg}')

# display mean-std<temp<mean+std

condition2 = (temperatures > (avg - st_dev)) & (temperatures < (avg + st_dev))  # should the | ask why
both = temperatures[condition2]
print(both)

temps greater than the avg of first week: [28 30 29 28 31 29 29 32 30 28 29 30 28]
[28 26 27 28 26 27 28 27 28 26]


## indexing using common for 2D arrays

In [14]:
an_array = np.array([
    [1,2,3,4],
    [5,6,7,8],
    [9,10,11,12],
    [12,14,15,16],
    [17,18,19,20]
])
print(an_array[0,3])   # positive indexing
print(an_array[-1,-1]) # negative indexing
print(an_array[0:2,0:2]) # mix of both slice and indexing
print(an_array[1:4,1:3]) # get the box in the middle

4
20
[[1 2]
 [5 6]]
[[ 6  7]
 [10 11]
 [14 15]]


## Basix Numpy prperties
Dim(ndim), Shape(shape), Size(size), Type(dtype)

In [17]:
a = np.array([1.1,3.3])


print(f'dimensions: {a.ndim}')
print(f'Shape: {a.shape}')
print(f'length: {len(a)}')
print(f'Size: {a.size}')
print(f'Type: {a.dtype}')

dimensions: 1
Shape: (2,)
length: 2
Size: 2
Type: float64


In [19]:
b = np.array([[1,2,3,4],[5,6,7,8],[9,10,11,12]])


print(f'dimensions: {b.ndim}')
print(f'Shape: {b.shape}')
print(f'length: {len(b)}')
print(f'Size: {b.size}')
print(f'Type: {b.dtype}')

dimensions: 2
Shape: (3, 4)
length: 3
Size: 12
Type: int64


In [22]:
c= np.array([1,2,5.0])


print(f'dimensions: {c.ndim}')
print(f'Shape: {c.shape}')
print(f'length: {len(c)}')
print(f'Size: {c.size}')
print(f'Type: {c.dtype}')
# This will convert all the ints to a float and fixes our mistake

dimensions: 1
Shape: (3,)
length: 3
Size: 3
Type: float64


In [None]:
a = np.array([1,2,3.0,'abc'])


print(f'dimensions: {a.ndim}')
print(f'Shape: {a.shape}')
print(f'length: {len(a)}')
print(f'Size: {a.size}')
print(f'Type: {a.dtype}')
'coverts all to string'

dimensions: 1
Shape: (4,)
length: 4
Size: 4
Type: <U32


## Array creation

In [35]:
zeros = np.zeros(6)
print(zeros)

zeros_2d = np.zeros((6,3))
print(zeros_2d)

full = np.full((5,5),100)
print(full)

ones = np.ones(3)
print(ones)

identity = np.eye(2,2)
print(identity)

empty = np.empty(3)
print(empty)

arrange = np.arange(2,10,2)
print(arrange)

lin = np.linspace(2,10,3)
print(lin)

[0. 0. 0. 0. 0. 0.]
[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]
[[100 100 100 100 100]
 [100 100 100 100 100]
 [100 100 100 100 100]
 [100 100 100 100 100]
 [100 100 100 100 100]]
[1. 1. 1.]
[[1. 0.]
 [0. 1.]]
[1. 1. 1.]
[2 4 6 8]
[ 2.  6. 10.]


## Array manipulation

In [None]:
arr = np.array([1,2,3,4,5,6])
print(arr)

col_array = arr[:, np.newaxis] # coverts the row to a column
print(col_array)

row_array = col_array[np.newaxis,:]
print(row_array)

a_2d_array = arr.reshape(2,3)
print(a_2d_array)


[1 2 3 4 5 6]
[[1]
 [2]
 [3]
 [4]
 [5]
 [6]]
[[[1]
  [2]
  [3]
  [4]
  [5]
  [6]]]
[[1 2 3]
 [4 5 6]]


In [45]:
zeros = np.zeros(5,dtype = np.int64)
print(zeros)

[0 0 0 0 0]


## Cancatation

In [64]:
a= np.zeros((3,3))
print(a)
b= np.ones((1,3))
print(b)
concat = np.concatenate((a,b), axis=0)
print(concat)
c = np.array([[40], [80], [120]])
concat_2 = np.concatenate((c,a), axis=1)
print(concat_2)




[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]
[[1. 1. 1.]]
[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [1. 1. 1.]]
[[ 40.   0.   0.   0.]
 [ 80.   0.   0.   0.]
 [120.   0.   0.   0.]]


In [59]:

'''
Write a program that generates an 1D array of 20 numbers corresponding to seats in a class room
Reshape the array to 5 rows and 4 columns
Generate a reservation chart showing 0 (unreserved) for all seats except the 1st seat and the last seat in the first row. the
reserved seats are marked -1.
Sample Output:
Seat numbers:
 [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20]

Seating chart:
 [[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]
 [13 14 15 16]
 [17 18 19 20]]

Shape of seating chart: (5, 4)

Reservation chart:
 [[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]

Reservation chart:
 [[-1.  0.  0. -1.]
 [ 0.  0.  0.  0.]
 [ 0.  0.  0.  0.]
 [ 0.  0.  0.  0.]
 [ 0.  0.  0.  0.]]
'''


seating = np.arange(1,21)
print(seating)

map_seating = seating.reshape(5,4)
print(map_seating)

reserve = np.zeros((5,4))
print(reserve)

reserve[0,[0,-1]] = -1
print(reserve)





[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20]
[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]
 [13 14 15 16]
 [17 18 19 20]]
[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
[[-1.  0.  0. -1.]
 [ 0.  0.  0.  0.]
 [ 0.  0.  0.  0.]
 [ 0.  0.  0.  0.]
 [ 0.  0.  0.  0.]]


In [65]:
'''
Write code to take number of athletes participating in an event and the number of rounds they play. For example, 
There may be 3 participants in shot put event and each participant has 4 rounds. 
Generate a matrix with rows corresponding to participants and columns corresponding to rounds.
Write a function to enter values into the array. 
Write function to find max for each round. 

def main ():
    athletes = int(input("Enter number of participants: "))
    rounds = int(input("Enter number of rounds: "))

    data = np.zeros((athletes, rounds))
    entervalues(data, athletes, rounds)
    print(data)
    analyze_each_round(data, rounds)
main()

Sample Output:
Enter number of participants:  3
Enter number of rounds:  2
Enter data for athlete 1, round 1  10
Enter data for athlete 2, round 1  9
Enter data for athlete 3, round 1  5
Enter data for athlete 1, round 2  5
Enter data for athlete 2, round 2  6
Enter data for athlete 3, round 2  8

[[10.  5.]
 [ 9.  6.]
 [ 5.  8.]]
Round 1 highest is: 10.0
Round 2 highest is: 8.0

'''

'\nWrite code to take number of athletes participating in an event and the number of rounds they play. For example, \nThere may be 3 participants in shot put event and each participant has 4 rounds. \nGenerate a matrix with rows corresponding to participants and columns corresponding to rounds.\nWrite a function to enter values into the array. \nWrite function to find max for each round. \n\ndef main ():\n    athletes = int(input("Enter number of participants: "))\n    rounds = int(input("Enter number of rounds: "))\n\n    data = np.zeros((athletes, rounds))\n    entervalues(data, athletes, rounds)\n    print(data)\n    analyze_each_round(data, rounds)\nmain()\n\nSample Output:\nEnter number of participants:  3\nEnter number of rounds:  2\nEnter data for athlete 1, round 1  10\nEnter data for athlete 2, round 1  9\nEnter data for athlete 3, round 1  5\nEnter data for athlete 1, round 2  5\nEnter data for athlete 2, round 2  6\nEnter data for athlete 3, round 2  8\n\n[[10.  5.]\n [ 9.  

In [None]:
def entervalues(data, rows, cols):
    for c in range(cols):
        for r in range(rows):
            data[r,c] = int(input(f'Enter data for athlete {r+1}, round {c+1} '))

def analyze_each_round(data, rounds):
    for r in range(rounds):
        column_data = data[:, r]
        print(f'Round {r+1} highest is: {max(column_data)}')



def main():
    athletes = int(input("enter number od participants: "))
    rounds = int(input("enter number of rounds: "))

    data = np.zeros((athletes,rounds))
    entervalues(data, athletes, rounds)
    print(data)
    analyze_each_round(data, rounds)
main()

ValueError: invalid literal for int() with base 10: ''

## Sorting

In [69]:
arr = np.array([100,2,30,4,500,6,70,8,9,100,110,12])
sorted_array = np.sort(arr)
print(sorted_array)

arr = arr.reshape((4,3))
print(arr)

sorted_array = np.sort(arr)
print(sorted_array)


[  2   4   6   8   9  12  30  70 100 100 110 500]
[[100   2  30]
 [  4 500   6]
 [ 70   8   9]
 [100 110  12]]
[[  2  30 100]
 [  4   6 500]
 [  8   9  70]
 [ 12 100 110]]


In [None]:
arr = np.array([100,2,30,4,500,6,70,8,9,100,110,12])
print(arr)
print()
c_style = np.reshape(arr, (4,3), order = 'C')  # first 3 is the row
print(c_style)
f_style = np.reshape(arr, (4,3), order='F') # the first 4 are column
print()
print(f_style)

[100   2  30   4 500   6  70   8   9 100 110  12]

[[100   2  30]
 [  4 500   6]
 [ 70   8   9]
 [100 110  12]]

[[100 500   9]
 [  2   6 100]
 [ 30  70 110]
 [  4   8  12]]


## Filtering 2D arrays

In [80]:
a = np.array([[1 , 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])

condition = (a>7) & (a<12)
print(a[condition])

condition_2 = a%2==0
print(a[condition_2])

condition_3 = (a%2==1) & (a>8)
print(a[condition_3])

condition_4 = a[a[:,0] >4]  #Rows with first value greater than 4
print(condition_4)

condition_5 = a[:, a[0,:] > 2]
print(condition_5)

[ 8  9 10 11]
[ 2  4  6  8 10 12]
[ 9 11]
[[ 5  6  7  8]
 [ 9 10 11 12]]
[[ 3  4]
 [ 7  8]
 [11 12]]


## Array Stacking

In [89]:
a1 = np.array([[1,1],[2,2]])

a2 = np.array([[3, 3],
               [4, 4]])

a3 = np.array([[3, 3],
               [4, 4],
               [0, 0]])

a4 = np.array([[3, 3, 6],
               [4, 4, 6]])

stacked = np.vstack((a1,a2,a3))
print(stacked)

stacked = np.hstack((a1,a2,a4))
print(stacked)

[[1 1]
 [2 2]
 [3 3]
 [4 4]
 [3 3]
 [4 4]
 [0 0]]
[[1 1 3 3 3 3 6]
 [2 2 4 4 4 4 6]]


In [104]:
'''
Given the result of 7 quizzes for three sections of a course. Combine the arrays into a 2d
array and find mean of all the quizzes and mean of each class and stack them to the data. 

a = np.mean(arr, axis=x)
arr = np.round(arr, 2)
arr = np.column_stack((arr, arr_1d))
'''


class_a = np.array([78, 85, 92, 65, 88, 72, 95])
class_b = np.array([80, 88, 76, 90, 82, 70, 93])
class_c = np.array([50, 60, 76, 80, 82, 70, 93])

grades = np.vstack((class_a,class_b,class_c))
print(grades)

means = np.mean(grades,axis=1)
print(means)
means = np.round(means,2)
print(means)
grades = np.column_stack((grades,means))
print(grades)

means = np.mean(grades, axis =0)
print(means)
means = np.round(means,2)
print(means)
grades = np.vstack((grades,means))
print(grades)

[[78 85 92 65 88 72 95]
 [80 88 76 90 82 70 93]
 [50 60 76 80 82 70 93]]
[82.14285714 82.71428571 73.        ]
[82.14 82.71 73.  ]
[[78.   85.   92.   65.   88.   72.   95.   82.14]
 [80.   88.   76.   90.   82.   70.   93.   82.71]
 [50.   60.   76.   80.   82.   70.   93.   73.  ]]
[69.33333333 77.66666667 81.33333333 78.33333333 84.         70.66666667
 93.66666667 79.28333333]
[69.33 77.67 81.33 78.33 84.   70.67 93.67 79.28]
[[78.   85.   92.   65.   88.   72.   95.   82.14]
 [80.   88.   76.   90.   82.   70.   93.   82.71]
 [50.   60.   76.   80.   82.   70.   93.   73.  ]
 [69.33 77.67 81.33 78.33 84.   70.67 93.67 79.28]]


In [107]:
arr= np.arange(0,27).reshape(3,9)
print(arr)

a,b,c = np.hsplit(arr,3)
print(a)
print(b)
print(c)

[[ 0  1  2  3  4  5  6  7  8]
 [ 9 10 11 12 13 14 15 16 17]
 [18 19 20 21 22 23 24 25 26]]
[[ 0  1  2]
 [ 9 10 11]
 [18 19 20]]
[[ 3  4  5]
 [12 13 14]
 [21 22 23]]
[[ 6  7  8]
 [15 16 17]
 [24 25 26]]


In [108]:
arr =np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
flattened = arr.ravel()
print(arr)
print(flattened)

[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]
[ 1  2  3  4  5  6  7  8  9 10 11 12]


## Arithmatic Operations and Summary stats for 2D arrays


In [120]:
a1 = np.array([[1, 1],
               [2, 2]])

a2 = np.array([[3, 3],
               [4, 4]])
print(a1)
print(a2)

res1 = a1+a2
print(res1)

res2 = a1-a2
print(res2)

res3 = a1*a2
print(res3)

res4 = a1/a2
print(res4)

print(a1.sum())
print(a1[1,:].sum())
print(a1[:,0].sum())
print(a1.sum(axis=0))
print(a1.sum(axis=1))
print(a1.min())

print(a1 *2)


[[1 1]
 [2 2]]
[[3 3]
 [4 4]]
[[4 4]
 [6 6]]
[[-2 -2]
 [-2 -2]]
[[3 3]
 [8 8]]
[[0.33333333 0.33333333]
 [0.5        0.5       ]]
6
4
3
[3 3]
[2 4]
1
[[2 2]
 [4 4]]


## Broadcasting

In [122]:
a = np.array([[1, 2, 3],
              [4, 5, 6]])

b = np.array([10, 20, 30])

c = a + b
print(c)
c = a*b
print(c)
c= a/b
print(c)
c= b%a
print(c)

[[11 22 33]
 [14 25 36]]
[[ 10  40  90]
 [ 40 100 180]]
[[0.1  0.1  0.1 ]
 [0.4  0.25 0.2 ]]
[[0 0 0]
 [2 0 0]]


## Matrix Operations:


In [125]:
matrix1 = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
matrix2 = np.array([[9, 8, 7], [6, 5, 4], [3, 2, 1]])

print(np.add(matrix1,matrix2))
print(np.dot(matrix1,matrix2))
print(np.transpose(matrix1))

[[10 10 10]
 [10 10 10]
 [10 10 10]]
[[ 30  24  18]
 [ 84  69  54]
 [138 114  90]]
[[1 4 7]
 [2 5 8]
 [3 6 9]]


In [132]:
'''
You're analyzing the sales data for a clothing store. You have the prices for each item 
(shirts, pants, shoes) in a 2D array, and you want to apply different discount rates to each item category.
Column 1 has shirt prices, column 2 has pant prices and column 3 has shoe prices.
You need to apply a discount of 10%, 12%, and 15% for shirts, pants and shoes respectively. 

We want to apply discounts but only to items that are greater than 100 in price

prices = np.array([[150, 200, 100],
                    [120, 180, 80],
                    [200, 250, 120]])
'''
prices = np.array([[150, 200, 100],
                    [120, 180, 80],
                    [200, 250, 120]])

discounts= np.array([.1,.12,.15])
condition = prices >100
discount_map = np.where(condition, prices*discounts, 0)
print(discount_map)

prices = prices - discount_map
print(prices)

[[15.  24.   0. ]
 [12.  21.6  0. ]
 [20.  30.  18. ]]
[[135.  176.  100. ]
 [108.  158.4  80. ]
 [180.  220.  102. ]]
