In [None]:
# 1/8 - What is Data Analysis, why Python?, what other options are there? what's the cycle of a Data Analysis project? What's the difference between Data Analysis and Data Science?
# https://docs.google.com/presentation/d/1fDpjlyMiOMJyuc7_jMekcYLPP2XlSl1eWw9F7yE7byk/edit#slide=id.g6fe1465eda_0_215
# 
# 2/8 - A demonstration of a real life data analysis project using Python, Pandas, SQL and Seaborn.
# https://github.com/ine-rmotr-curriculum/FreeCodeCamp-Pandas-Real-Life-Example
#
# 3/8 - A step by step tutorial to learn how to use Jupyter Notebooks
# https://github.com/ine-rmotr-curriculum/ds-content-interactive-jupyterlab-tutorial
#
# 5/8 - Pandas data processing, Data Frame vs spreadsheets, Big Data
# https://github.com/ine-rmotr-curriculum/freecodecamp-intro-to-pandas
#
# 6/8 - Data cleaning (null values, invalid values, statistical outliers, etc, and how to clean them)
# https://github.com/ine-rmotr-curriculum/data-cleaning-rmotr-freecodecamp
#
# 7/8 - Reading data from other sources 
# https://github.com/ine-rmotr-curriculum/RDP-Reading-Data-with-Python-and-Pandas
#
# 8/8 - If your Python or coding skills are rusty, check out this section for a quick recap of Python main features and control flow structures.
# https://github.com/ine-rmotr-curriculum/ds-content-python-under-10-minutes

## 4/8 - NumPy data processing, low level details of computations and memory storage, limitations of Excel

In [None]:
# https://github.com/ine-rmotr-curriculum/freecodecamp-intro-to-numpy

import numpy as np

# can pass another list of indexes when indexing an array which will return an array with values being the values at those indices
a = np.array([1, 2, 3, 4])
b = a[[0, 2, -1]]
print(b)

# can store strings and objects in arrays, but np not made for it
c = np.array(['a', 'b', 'c'])
print(c.dtype)
d = np.array([{'a': 1}, sys])
print(d.dtype)

# if the shape of an array isn't consistent, it'll just fall back to regular Python objects as the data type
e = np.array([[[12, 11, 10],[9, 8, 7]], [[6, 5, 4]]])
print(e.dtype, e.shape, e.size, type(e[0]))

# can select an element in a matrix using comma notation (allows slicing as well)
f = np.array([
    [1, 2, 3],
    [4, 5, 6], 
    [7, 8, 9]  
])
print(f[1, 0]) 
print(f[0:2]) 
print(f[:, :2])
print(f[:2, 2:])

# can modify parts of a matrix in two ways
f[1] = np.array([10, 10, 10])
print(f)
f[2] = 99
print(f)

# can get summary statistics for n-dimensional arrays - https://docs.scipy.org/doc/numpy-1.13.0/reference/arrays.ndarray.html#array-methods
print(a.sum(), a.mean(), a.std(), a.var())
print(f.sum(), f.mean(), f.sum(axis=0), f.mean(axis=1)) # axis 0/1 = across row/column - from 0 to ndim-1

# vectorised operations (fast operations between arrays and scalars/arrays) - returns new array
g = np.arange(4)
print(g + 10, g * 10)
print(a + g, a * g) # need both arrays to have same shape
print([i * 10 for i in g])

# can override the above behaviour with broadcasting operations so that arrays become mutable
g += 10
print(g)

# can also use boolean operators (masks) which return boolean arrays for filtering
print(g[[True, False, False, True]])
print(g[g >= 2])
print(g[g > g.mean()], g[~(g > g.mean())])
print(g[(g == 0) | (g == 1)])
print(g[(g <= 2) & (g % 2 == 0)])

h = np.random.randint(100, size=(3, 3))
print(h[np.array([
    [True, False, True],
    [False, True, False],
    [True, False, True]
])])
print(h[h > 30])

# can do linear algebra operations
A = np.array([
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9]
])
B = np.array([
    [6, 5],
    [4, 3],
    [2, 1]
])
print(A.dot(B))
print(B.T)
print(A @ B) # matrix multiplication
print(B.T @ A)

# for more useful functions (random, arange, reshape, linspace, zeros, ones, empty, identity, eye), see 2. NumPy.ipynb in GitHub link above