# Creating array

In [None]:
from sys import getsizeof
import numpy as np
import matplotlib.pyplot as plt

# using list or tuple
print([1, 2, 3], np.array([1, 2, 3]))
print((1, 2, 3), np.array((1, 2, 3)))

In [None]:
# does not work with multiple numeric arguments
a = np.array(1,2,3,4)    # WRONG

In [None]:
# milti-level list -> multi-dimensional array
print([[1, 2, 3], [4, 5, 6]])
print(np.array([[1, 2, 3], [4, 5, 6]]))

In [None]:
# convert back to list
a = np.array([[1, 2, 3], [4, 5, 6]])
print(a.tolist())
print(list(a))

In [None]:
# using arange
print(np.arange(10))
print(np.arange(2, 10))
print(np.arange(2, 10, 2))

In [None]:
# using linspace() to generate evenly spaced numbers
print(np.linspace(0, 10, 6))
print(np.linspace(0, 100, 11))

In [None]:
np.sin(np.linspace( 0, 2*np.pi, 51 ) )

In [None]:
# create uniform array
print(np.ones([3, 4]), '\n')
print(np.zeros([3, 4]), '\n')
print(np.empty([3, 4]), '\n')
print(np.full([3, 4], 'hello'), '\n')

In [None]:
# create uniform array based on the shape of another array
print(np.ones_like(np.zeros([3, 4])))

# Data types

In [None]:
print(np.dtype(int))
print(np.dtype(float)) # takes up large storage when saving data in default types

In [None]:
# you can specify the date you want when creating a new array
print(np.ones([3, 4], dtype=int), '\n')
print(np.ones([3, 4], dtype=str), '\n')
print(np.zeros([3, 4], dtype=int), '\n')
print(np.zeros([3, 4], dtype=str), '\n')
print(np.empty([3, 4], dtype=int), '\n')
print(np.empty([3, 4], dtype=str), '\n')

In [None]:
# be careful about the range of a data type
print(np.power(100, 8, dtype=np.int64))
print(np.power(100, 8, dtype=np.int32))
print(np.power(100, 8, dtype=np.int16))

In [None]:
# check the range
print(np.iinfo(np.int64))
print(np.iinfo(np.int32))
print(np.iinfo(np.int16))

In [None]:
# structured array is very powerful to deal with table-like data (but we will learn about a more powerful tool very soon)
dtype = np.dtype([('siteno', 'U10'), ('datetime', 'M'), ('streamflow', 'f2')])
print(dtype)

# Indexing

In [None]:
# first of all array is mutable; the index is referencing to the location of the data (referencing)
a = np.ones_like(np.zeros([3, 4]))
print('before', a[1, 1])
a[1, 1] = 2
print('after', a[1, 1])

In [None]:
# 1. indexing like lists
#a1 = np.random.randint(1, 100, 10)
a1 = np.arange(10)
print(a1)
print(a1[:5])
print(a1[7:])
print(a1[-5:])

In [None]:
# specify the step
print(a1[:8:2])

In [None]:
# 2. numpy feature indexing by list/array of specific indices
print(a1[[2, 3, 6, 9]])

In [None]:
# 2.2 what is returned when index arrays are used is an array with the same shape as the index array
print(a1)
print(a1[np.array([[2, 3], [-2, -1]])])

In [None]:
# numpy feature indexing by boolean
temperatures = [-5.4, 1.0, -1.3, -4.8, 3.9, 0.1, -4.4, 4.0, -2.2, -3.9, 4.4,
                -2.5, -4.6, 5.1, 2.1, -2.4, 1.9, -3.3, -4.8, 1.0, -0.8, -2.8,
                -0.1, -4.7, -5.6, 2.6, -2.7, -4.6, 3.4, -0.4, -0.9, 3.1, 2.4,
                1.6, 4.2, 3.5, 2.6, 3.1, 2.2, 1.8, 3.3, 1.6, 1.5, 4.7, 4.0,
                3.6, 4.9, 4.8, 5.3, 5.6, 4.1, 3.7, 7.6, 6.9, 5.1, 6.4, 3.8,
                4.0, 8.6, 4.1, 1.4, 8.9, 3.0, 1.6, 8.5, 4.7, 6.6, 8.1, 4.5,
                4.8, 11.3, 4.7, 5.2, 11.5, 6.2, 2.9, 4.3, 2.8, 2.8, 6.3, 2.6,
                -0.0, 7.3, 3.4, 4.7, 9.3, 6.4, 5.4, 7.6, 5.2]
temp = np.array(temperatures)

temp1 = temp[temp < -2]
temp2 = temp[(temp >= -2) & (temp < 2)]
temp3 = temp[(temp >= 2)  & (temp < 15)]

print(temp1.mean())
print(temp2.mean())
print(temp3.mean())

In [None]:
# Numpy broadcasting
temp = np.tile(temperatures, 3).reshape([3, -1])
lower = np.array([-np.inf, -2, 2]).reshape([3, 1])
upper = np.array([-2, 2, 15]).reshape([3, 1])
 
temp = np.where((temp > lower) & (temp <= upper), temp, np.nan )
temp

In [None]:
np.nanmean(temp, axis=1)

In [None]:
def method1(temperatures):
    temp = np.array(temperatures)
    temp1 = temp[temp < -2]
    temp2 = temp[(temp >= -2) & (temp < 2)]
    temp3 = temp[(temp >= 2)  & (temp < 15)]

    return temp1.mean(), temp2.mean(), temp3.mean()

def method2(temperatures):
    temp = np.tile(temperatures, 3).reshape([3, len(temperatures)])
    lower = np.array([-np.inf, -2, 2]).reshape([3, 1])
    upper = np.array([-2, 2, 15]).reshape([3, 1])

    temp = np.where((temp > lower) & (temp <= upper), temp, np.nan )
    return(np.nanmean(temp, axis=1))

%timeit method1(temperatures)
%timeit method2(temperatures)

In [None]:
# boolean indexing returns a 1-D array containing all the elements in the indexed array 
# corresponding to all the true elements in the boolean array
a1 = np.arange(6).reshape([2, 3])
b  = np.array(
        [[True, False, False],
         [False, True, True]])
print(a1[b])

In [None]:
# unless the array has more dimension than the index array
a2 = a1.reshape([3, 2])
b = np.array([True, False, True])
a2[b]

In [None]:
# combining the indexing method
a1 = np.arange(16).reshape([4,4])
print(a1)
print(a1[:2, [1, 3]])


In [None]:
# assignment
a = np.arange(10)
print(a)

In [None]:
a[:] = 1
print(a)

In [None]:
# change to another object
a = 1
print(a)

# Change shape/dimension

In [None]:
a = np.floor(10*np.random.random((3,4)))
print(a)
print('shape is', a.shape)

In [None]:
print(a.flatten())
print(a.reshape(4, 3))
print(a.reshape(6, 2))

In [None]:
# transposed
a.T

In [None]:
# stacking
a = [1, 2, 3]
b = [3, 5, 6]

print(np.vstack([a, b]))
print(np.hstack([a, b]))
print(np.concatenate([a, b]))

In [None]:
a = np.array(a).reshape(1, 3)
b = np.array(b).reshape(1, 3)
print(np.concatenate([a, b], axis=0))


# Armetic operationb

In [None]:
a = np.array([2, 2, 2, 2])
b = np.array([1, 2, 4, 8])


print('a:', a)
print('b:', b)

print('+:', b + a)
print('-:', b - a)
print('*:', b * a)
print('/:', b / a)
print('%:', b % a)
print('@:', b @ a)      # Unlike in many matrix languages, the product operator * operates elementwise in NumPy arrays. 
print('dot:', b.dot(a)) # The matrix product can be performed using the @ operator (in python >=3.5) or the dot function or method:

In [None]:
print('b2:', b ** 2)
print('b2:', b ** 0.5)

In [None]:
# += and *=, act in place to modify an existing array
print(a)
a += 1
print(a)

In [None]:
# unary operations
print(b.sum())
print(b.min())
print(b.max())

In [None]:
# for multidimensional
b = np.arange(12).reshape(3,4)
print(b.sum())
print(b.sum(axis=0))
print(b.sum(axis=1))

# Common Numpy functions

```
all, any, apply_along_axis, 
argmax, argmin, argsort, average, bincount, ceil, 
clip, conj, corrcoef, cov, cross, cumprod, cumsum, diff, dot, floor, inner, inv, lexsort, 
max, maximum, mean, median, min, minimum, nonzero, outer, prod, re, 
round, sort, std, sum, trace, transpose, var, vdot, vectorize, where
```

In [None]:
# clip values
a = np.array([3, 8, 7, 5, 4])
np.clip(a, 4, 6)

In [None]:
# cumsum
a.cumsum()

In [None]:
# difference
a = a.cumsum()
print(np.diff(a))
print(np.diff(a, 2))

In [None]:
# where to replace
a = np.array([8, 3, 5, 7, 9])
np.where(a > 5, a, np.nan)

In [None]:
b = np.where(a > 5, a, np.nan)
b.sum()

In [None]:
# exclude np.nan
np.nansum(b)

In [None]:
# repeat and tile
a = np.arange(3)
print(np.repeat(a, 4))
print(np.tile(a, 4))

In [None]:
# argmax, argmin, argsort nonzero returns the result of index, 
# very helpful when you need to based on an array's value to located an element in another array

# find the precip on different land cover types
landcover     = np.random.randint(1, 5, [5, 5])
precipitation = np.random.random([5, 5])

print('landuse\n', landcover)
print('landuse\n', precipitation)

# plot


# find location of irrigated land


# identify the precipitation on these cells and calculate the mean



# identify the wetest and driest land cover and their mean precipitation

In [None]:
np.clip


In [None]:
# How to get the common items between two python numpy arrays?
a = [1, 3, 5, 7, 9]
b = [2, 3, 4, 5, 6]
np.intersect1d(a, b)

In [None]:
# Other useful function

# calculate histogram/freq
mu, sigma = 2, 0.5
v = np.random.normal(mu,sigma,10000)
(n, bins) = np.histogram(v, bins=50, density=True)
plt.plot(.5*(bins[1:]+bins[:-1]), n)