In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import itertools as it
import operator as op

## NumPy Examples

### Array Creation

In [None]:
# array creation by sequence
A = np.array([[1,2,3],[4,5,6]])
print("This is array A",A)

In [None]:
# pre-format arrays with zeros/ones
A = np.zeros((2,3))
B = np.ones(5) 
C = np.zeros(shape=(100,100), dtype=float)
D = np.ones((2,2),dtype=np.int16)
E = np.empty((2,2))
print("This is array A",A,"and array B",B)
print("This is array C",C)
print("This is array D",D)
print("This is array E",E)

In [None]:
# creation through ranges 
A = np.arange(2,10,2) # array from 2 to 10 with steps of 2
B = np.linspace(0,4.5,10) # array from 0 to 4.5 at steps of 0.5
print("This is array A",A,"and B",B)

In [None]:
# from random source 
A = np.random.rand(1,2)
B = 2 * np.random.randn(1,2) + 10 # mu = 10, sigma = 2 
print(A,B)

In [None]:
# arrays have attributes that can be printed to have informations that are needed
print(A.ndim, A.shape, A.size, A.dtype, A.itemsize, A.data)

### Manipulation of arrays

In [None]:
# reshape array
A = np.arange(12).reshape(4,3) # reshape an array of 12 elements arranged in 3 columns and 4 rows
print(A)

In [None]:
# A is an array of 10 elements, by doing A[2,5] a sub-array is given with only elements between 2 and 5
A = np.arange(10)
A[2:5]  

In [None]:
# A is an array of 9 elements, ordered in 3 columns and 3 rowe. 
A = np.arange(9).reshape(3,3)
A, A[1], A[:,1] # A gives the whole 3x3 matrix, A[1] gives only the row 1 of the matrix, A[:,1] gives only column 1 of the matrix

In [None]:
# for loop over A. Each element of the matrix A is squared
for i in A: 
    print (i**2)

In [None]:
# Alternative way to loop over A and square each element of the matrix
[i**2 for i in A]

In [None]:
# Flat: Return a copy of the array collapsed into one dimension.
[i**2 for i in A.flat]

In [None]:
# reshaping
A = np.floor(10*np.random.random((3,4)))
print(A, A.shape)
print(A.ravel()) # flatten array
print(A.reshape(6,2)) 
print(A.T, A.T.shape)

In [None]:
# use resize to reshape in place
B = A 
A.resize(2,6)
print(A)
B.reshape(3,-1) # -1 calc cols automatically
print(B)

In [None]:
# stacking
A = np.floor(10*np.random.random((3,4)))
B = np.floor(10*np.random.random((3,4)))
print(A)
print(B)
C = np.vstack((A,B))
print(C)
D = np.hstack((A,B))
print(D)

In [None]:
# splitting
A = np.floor(10*np.random.random((2,12)))
print("A", A)
B, C, D = np.hsplit(A,3)
print("B", B)
print("C", C)
print("D", D)
E, F, G = np.hsplit(A,(3,4)) # split A after col 3 and col 4
print("E", E)
print("F", F)
print("G", G)

In [None]:
a = [1, 2, 3, 4]  # list like object
b = ['a', 'b', 'c', 'd']  # another list like object
c = zip(a, b)  # a generator which will 'merge' 2 list-like objects into a list of pairs
list(c)  # lets look at the output

### Logic check

In [None]:
# logic check: Check if the elements of the matrix A (above) are greater of 2 or not and return False or True.
A > 2

### Sort

In [None]:
# sort
a = np.floor(10*np.random.random((2,6)))
print(a)
# sort - row wise
b = np.sort(a)
print(b)

In [None]:
# sort - col wise
c = np.sort(a, axis=0)
print(c)

In [None]:
# sort flatten
d = np.sort(a, axis=None)
print(d)

### Operations

In [None]:
# element wise add 
B = A
C = A + B
print(C)

In [None]:
# element wise mult
C = A * 4
print(C)

In [None]:
# element wise f()
def f(x):
    return 3*x**2 - 2*x + 7
f(A)

In [None]:
# function apply on linspace
x = np.linspace(0, 2*3.14, 10)
f = np.sin(x)
f

In [None]:
# inplace 
A *= 2
print(A)

In [None]:
# sums along axis
print(A)
print(A.sum(axis=0)) # sum col
print(A.sum(axis=1)) # sum row
print(A.cumsum(axis=1)) # cumulative sum row

In [None]:
# broadcast row vector on all rows
A = np.ones((5,5))
B = A + [1, 2, 3, 4, 5]
B

In [None]:
# universial functions 
print(np.exp(A))
print(np.sqrt(A))

### Basic Statistics

In [None]:
# stats properties
A = np.arange(9).reshape(3,3)
print(A.sum(), A.min(), A.max())

In [None]:
# time example
time = np.linspace(20, 145, 5)
data = np.sin(np.arange(20)).reshape(5,4) 
print(time)
print(data)

In [None]:
ind = data.argmax(axis=0) # get max index for each series (col)
time_max = time[ind] # times corresponding to maxima
print(ind)
print(time_max)

### Basic Linear Algebra

In [None]:
# matrix product
A = B
A @ B

In [None]:
# matrix product alt 
A.dot(B)

In [None]:
# matrix
m = np.matrix([[1,-2,3],[0,4,5],[7,8,-9]])
v = np.matrix([[2],[3],[4]])

In [None]:
# transpose
m.T

In [None]:
# inverse
m.I

In [None]:
# multiply 
m * v

In [None]:
# determinant 
np.linalg.det(m)

In [None]:
# eigenvalues
np.linalg.eigvals(m)

In [None]:
# Solve for x in mx = v
np.linalg.solve(m, v)

## Pandas Examples

### Create

In [None]:
# DF from dictionary 
ex1 = {'one' : pd.Series([1., 2., 3.], index=['a', 'b', 'c']),
     'two' : pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])}
df1 = pd.DataFrame(ex1)
df1

In [None]:
ex2 = {'one' : pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd']),
     'two' : pd.Series([1., 2., 3.], index=['a', 'b', 'c'])}
df2 = pd.DataFrame(ex2)
df2

In [None]:
# define DF from random numbers 
dfr1 = pd.DataFrame(np.random.randn(10, 4), columns=['A', 'B', 'C', 'D'])
dfr2 = pd.DataFrame(np.random.randn(10, 4), columns=['A', 'B', 'C', 'D'])
dfr1.astype('int32').dtypes

In [None]:
# multi-index frame (dict of tuples)
pd.DataFrame({('a', 'b'): {('A', 'B'): 1, ('A', 'C'): 2},
    ('a', 'a'): {('A', 'C'): 3, ('A', 'B'): 4},
    ('a', 'c'): {('A', 'B'): 5, ('A', 'C'): 6},
    ('b', 'a'): {('A', 'C'): 7, ('A', 'B'): 8},
    ('b', 'b'): {('A', 'D'): 9, ('A', 'B'): 10}})

### Combining

In [None]:
# simple addition
# define variables and target 
variables = ['A','B']
target   = ['D']


# combine variables and target to create a smaller dataset of dfr1 (without C column)
dfsum = dfr1[variables+target]



# Shuffle dataset
dfsum = dfsum.sample(frac=1).reset_index(drop=True)

# Get first 2 observations of dfsum columns == 0
df_frame = [dfsum[dfsum.A == 0].head(2),
            dfsum[dfsum.B == 0].head(2)]
# concat the df_frame in a new dataset df_sample
df_sample = pd.concat(df_frame)
df_sample

In [None]:
# addition with 2 different DFs
dfSum2 = df1 + df2
dfSum2

In [None]:
# operations automatically align the data based on label
df1[1:] + df1[:-1]

In [None]:
# create new column from existing columns
df3 = df1
df3['three'] = df3['one'] * df3['two']
df3

In [None]:
# append boolean column based on conditional 
df3['flag'] = df3['one'] > 2
df3

In [None]:
# append scalar
df3['five'] = 6.
df3

In [None]:
# append random series 
df3['random'] = pd.Series(np.random.randn(4), index=['a', 'b', 'c', 'd'])
df3

In [None]:
# append using assign from precomputing values (not persistent)
df3.assign(ratio = (df3.one / df3.random))

In [None]:
# assign with lambda (not persistent)
df3.assign(ratio_two = lambda x: (x.two / x.random))

In [None]:
# subtract all columns by first column - broadcasting column wise
df3.sub(df3['one'], axis=0)

### Resizing and Reshaping

In [None]:
# pop column (can only run once in notebook)
flag = df3.pop('three')
flag

In [None]:
# transpose column names - get first 5 columns
dfr1[:5].T

In [None]:
# transform into numpy array 
np.asarray(dfr1)

### Filtering

In [None]:
# boolean compare elementwise
dfr1.gt(1)

In [None]:
# boolean compare two DFs
dfr1.gt(dfr2)

In [None]:
# boolean column reductions
(dfr1 > 1).all()

In [None]:
(dfr1 > 1).any()

In [None]:
# same does not work 
# TODO - what do (1) mean?
dfr1.sub(dfr1.mean(1), axis=0).div(dfr1.std(1), axis=0)

In [None]:
# use apply to get mean on rows
dfr1.apply(np.mean, axis=1)

In [None]:
# get diff min and max on columns
dfr1.apply(lambda x: x.max() - x.min())

In [None]:
# get row with max value on columns
dfr1.apply(lambda x: x.idxmax())

In [None]:
# apply custom function
def subtract_and_divide(x, sub, divide=1):
    return (x - sub) / divide
dfr1.apply(subtract_and_divide, args=(5,), divide=3)

In [None]:
dfr1.agg(np.sum)

In [None]:
# aggegrate - general form of sum(), here map functions to columns
def mymean(x):
    return x.mean()
dfr1.agg({'A': 'sum', 'B': mymean})

In [None]:
# Transform - convert to absolute
dfr1.transform(np.abs)

In [None]:
# and for series
dfr1.A.transform(np.abs)

In [None]:
# map()
f = lambda x: x+100
dfr1.A.map(f)
dfr1.applymap(f)

## Matplotlib Examples

In [None]:
# normal distribution histogram
gaussian_numbers = np.random.randn(10000)
plt.hist(gaussian_numbers)
plt.title("Gaussian Histogram")
plt.xlabel("Value")
plt.ylabel("Frequency")
plt.show()

In [None]:
# line plot 
x = np.arange(0, 10, 0.2)
y = np.sin(x)
fig, ax = plt.subplots()
ax.plot(x, y)
plt.show()

In [None]:
# Multiple plots
x = np.linspace(0, 2, 100)

plt.plot(x, x, label='linear')
plt.plot(x, x**2, label='quadratic')
plt.plot(x, x**3, label='cubic')

plt.xlabel('x label')
plt.ylabel('y label')

plt.title("Title of plot")

plt.legend()
plt.show()

In [None]:
x = np.linspace(0, 2, 100)

plt.figure(1, figsize=(9, 3)) 

plt.subplot(131)
plt.xlabel('x label')
plt.ylabel('y label')
plt.plot(x, x, label='linear', linewidth=3)

plt.subplot(132)
plt.xlabel('x label')
plt.ylabel('y label')
plt.plot(x, x**2, 'r--', label='quadratic')

plt.subplot(133)
plt.xlabel('x label')
plt.ylabel('y label')
plt.plot(x, x**3, label='cubic')

plt.show()

In [None]:
# evenly sampled time at 200ms intervals
t = np.arange(0., 5., 0.2)

# red dashes, blue squares and green triangles
plt.plot(t, t, 'r--', t, t**2, 'bs', t, t**3, 'g^')
plt.show()

## Itertools Examples

In [None]:
fruit = ['apple', 'banana', 'pear', 'grapefruit', 'orange']
list(it.combinations(fruit, 2))

In [None]:
x = [1, 2, 3]
y = ['a', 'b', 'c']
list(zip(x, y))

In [None]:
x = [1, 2, 3, 4, 5]
y = ['a', 'b', 'c']
list(it.zip_longest(x, y))

In [None]:
a = [1, 2, 3]
b = [4, 5, 6]
list(map(op.mul, a, b))

In [None]:
cycle_ = it.cycle(["Hi", "low"])
[next(cycle_) for _ in range(5)]

In [None]:
a = [5, 6, 7]
b = [8, 9, 10]
list(it.product(a, b))

In [None]:
letters = 'abcde'
print(list(it.permutations(letters, 2)))
print('')
numbers = range(5)
print(list(it.permutations(numbers, 2)))

In [None]:
x = list(range(6))
print(x)
list(it.accumulate(x))