### Author: Emmanuel Rodriguez

Date: 4 Aug 2022

### Intro Numpy and Pandas

# Intro Numpy

In [1]:
# Import libraries
import numpy as np 

In [2]:
an_array = np.array([3, 33, 333]) # rank 1 array

print(an_array)
print(type(an_array))

[  3  33 333]
<class 'numpy.ndarray'>


### Create a Rank 2 numpy array:

A rank 2 ndarray is an array with two dimensions.

In [3]:
arrayTwo = np.array([[11,12,13],[21,22,23]]) # Creates rank 2 array

print(arrayTwo)

print("The shape is 2 rows, by 3 columns:", arrayTwo.shape) # rows x columns

print("Accessing elements [0,0],[0,1], and [1,0] of the ndarray: ", arrayTwo[0,0], arrayTwo[0,1], arrayTwo[1,0])

[[11 12 13]
 [21 22 23]]
The shape is 2 rows, by 3 columns: (2, 3)
Accessing elements [0,0],[0,1], and [1,0] of the ndarray:  11 12 21


### Use both integer indexing & slice indexing

In [4]:
# Create a 2D array of shape (3,3)
anArray = np.array([[11,12,13],[21,22,23],[31,32,33]])
print(anArray)

[[11 12 13]
 [21 22 23]
 [31 32 33]]


In [5]:
# Using both integer indexing & slicing generates an array of lower rank (1D array)
row_rank1 = anArray[1,:]

print(row_rank1, row_rank1.shape) # Notice only a single []

[21 22 23] (3,)


In [6]:
# Slicing alone, generates an array of the same rank as the anArray
row_rank2 = anArray[1:2,:] # Slicing row 1 (inclusive) through row 2 (exclusive), e.g. only row 1

print(row_rank2, row_rank2.shape) # Notice the  [[]] indicating a 2D array, and shown by the shape of 1x3 

[[21 22 23]] (1, 3)


### Statistical, Sorting, and Set Operations

Basic statistical operations:

In [7]:
# Random 2 x 5 matrix
mat = 10 * np.random.randn(2,5)
print(mat)

[[ 19.54041063   5.51243567   5.33715326  -2.09953829 -12.88927037]
 [ -7.31276845  -3.46734795  -7.6130577   -8.05052423  -3.68003109]]


In [8]:
# Compute the mean for all elements
print(mat.mean())

-1.4722538516850228


In [9]:
# Compute the mean by row
print(mat.mean(axis = 1))

[ 3.08023818 -6.02474588]


In [10]:
# Compute the mean by column
print(mat.mean(axis = 0))

[ 6.11382109  1.02254386 -1.13795222 -5.07503126 -8.28465073]


In [11]:
# Sum all elements
print(mat.sum())

-14.722538516850227


In [12]:
# Compute the medians for each row
print(np.median(mat, axis = 1))

[ 5.33715326 -7.31276845]


# For the Numpy package documentation, visit:

https://numpy.org/

# Intro Pandas

In [13]:
# Import libraries
import pandas as pd

In [15]:
#pandas Series

ser = pd.Series(data = [100,200,300,400,500], index = ['Demetri','Gerald','Ramon','David','Emmanuel'])

In [16]:
ser

Demetri     100
Gerald      200
Ramon       300
David       400
Emmanuel    500
dtype: int64

In [17]:
# Alternatively
ser = pd.Series([100,200,300,400,500], ['Demetri','Gerald','Ramon','David','Emmanuel'])

In [18]:
ser

Demetri     100
Gerald      200
Ramon       300
David       400
Emmanuel    500
dtype: int64

In [19]:
ser.index

Index(['Demetri', 'Gerald', 'Ramon', 'David', 'Emmanuel'], dtype='object')

In [20]:
# Index using the index variables
print(ser['Gerald'])

200


In [21]:
# Use the locaation function, loc
print(ser.loc['David'])

400


In [22]:
# pandas DataFrame

# Dictionary - a dictionary maps a key to a value, a dataframe maps a column name to a Series of column data.
d = {'one' : pd.Series([100., 200., 300.], index=['apple','ball','clock']),
    'two' : pd.Series([111., 222., 333., 4444.], index = ['apple','ball','cerill','dancy'])}

# Label 'one' has a series of some values with indices 'apple'...
# Similarily for Label 'two', but with four labels 

In [23]:
# Load dictionary into a dataframe
df = pd.DataFrame(d)
df

Unnamed: 0,one,two
apple,100.0,111.0
ball,200.0,222.0
cerill,,333.0
clock,300.0,
dancy,,4444.0


# For the Pandas documentation visit:

https://pandas.pydata.org/