# Pandas 101: Explore pandas and NumPy

In [1]:
# import packages
import numpy as np
import pandas as pd

In [2]:
# create a Series object using a scalar
pd.Series(6.3)

0    6.3
dtype: float64

In [3]:
# create a Series object using a list
pd.Series([0,1,2,3,4,5])

0    0
1    1
2    2
3    3
4    4
5    5
dtype: int64

In [4]:
# create a Series object using an ndarray
pd.Series(np.arange(10))

0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
8    8
9    9
dtype: int64

In [5]:
# create a Series using a dictionary
s = pd.Series({"name": "PyDataPDX", "city": "Portland", "state": "Oregon" })
s

name     PyDataPDX
city      Portland
state       Oregon
dtype: object

In [6]:
# access the "name" index of `s`
s["name"]

'PyDataPDX'

In [7]:
# create a DataFrame using Series objects
a = pd.Series({"A":1, "B":2, "C":3}, name="Order")
b = pd.Series({"A":True,"B":False,"C":False}, name="?Vowel")
c = pd.Series({"A":"Apple","B":"Banana","C":"Cantaloupe"},name='Example of Fruit')
df = pd.DataFrame([a,b,c])
df

Unnamed: 0,A,B,C
Order,1,2,3
?Vowel,True,False,False
Example of Fruit,Apple,Banana,Cantaloupe


In [8]:
# transpose the DataFrame to swap rows and columns
df.T

Unnamed: 0,Order,?Vowel,Example of Fruit
A,1,True,Apple
B,2,False,Banana
C,3,False,Cantaloupe


In [9]:
# create a DataFrame using a dictionary of dictionaries
a = {"A":1, "B":2, "C":3}
b = {"A":True,"B":False,"C":False}
c = {"A":"Apple","B":"Banana","C":"Cantaloupe"}
df = pd.DataFrame({"Order": a, "?Vowel": b, "Example of fruit": c})
df

Unnamed: 0,Order,?Vowel,Example of fruit
A,1,True,Apple
B,2,False,Banana
C,3,False,Cantaloupe


In [10]:
# access columns by passing a column name to a DataFrame in between square brackets
df["Order"]

A    1
B    2
C    3
Name: Order, dtype: int64

In [11]:
# you can't access rows the same way
df["A"]

KeyError: 'A'

In [12]:
# access rows using the .loc method by passing a row label 
df.loc["A"]

Order                   1
?Vowel               True
Example of fruit    Apple
Name: A, dtype: object

In [13]:
# access rows using the .iloc method and passing a row integer-location 
df.iloc[0]

Order                   1
?Vowel               True
Example of fruit    Apple
Name: A, dtype: object

In [14]:
# access all rows for multiple columns
df.loc[:, ["Order", "?Vowel"]]

Unnamed: 0,Order,?Vowel
A,1,True
B,2,False
C,3,False


In [15]:
# access some rows and some of the columns
df.loc["B":"C", ["Order", "Example of fruit"]]

Unnamed: 0,Order,Example of fruit
B,2,Banana
C,3,Cantaloupe


In [16]:
# get rows by passing a boolean array to []
df[[True, False, False]]

Unnamed: 0,Order,?Vowel,Example of fruit
A,1,True,Apple


In [17]:
# get rows where ?Vowel==True
df[df["?Vowel"]]

Unnamed: 0,Order,?Vowel,Example of fruit
A,1,True,Apple


In [18]:
# get rows where ?Vowel==False
df[~df["?Vowel"]]

Unnamed: 0,Order,?Vowel,Example of fruit
B,2,False,Banana
C,3,False,Cantaloupe


In [19]:
# get rows where ?Vowel==False
df[np.logical_not(df["?Vowel"])]

Unnamed: 0,Order,?Vowel,Example of fruit
B,2,False,Banana
C,3,False,Cantaloupe


In [20]:
# create an ndarray from a nested list
a = [[1,2,3],[4,5,6]]
arr = np.array(a)
arr

array([[1, 2, 3],
       [4, 5, 6]])

In [21]:
# what type of object is arr?
type(arr)

numpy.ndarray

In [22]:
# what is the shape of arr?
arr.shape

(2, 3)

In [23]:
# maximum value of flattened ndarray
arr.max()

6

In [24]:
# maximum value along axis=0
arr.max(axis=0)

array([4, 5, 6])

In [25]:
# maximum value along axis=1
arr.max(axis=1)

array([3, 6])

In [26]:
# minimum value of flattened ndarray
arr.min()

1

In [27]:
# minimum value along axis=0
arr.min(axis=0)

array([1, 2, 3])

In [28]:
# minimum value along axis=1
arr.min(axis=1)

array([1, 4])

In [29]:
# mean of flattened ndarray
arr.mean()

3.5

In [30]:
# mean along axis=0
arr.mean(axis=0)

array([2.5, 3.5, 4.5])

In [31]:
# mean along axis=1
arr.mean(axis=1)

array([2., 5.])

In [32]:
# operations between array and scalar
arr = np.array([1,2,3])
b = 2
arr * b

array([2, 4, 6])

![image.png](attachment:6db314b0-dcfb-43a7-91bf-65ecfcab700e.png)

Source: [Array Broadcasting in Numpy](https://numpy.org/devdocs/user/theory.broadcasting.html)

In [33]:
# exponentiation between array and scalar
arr = np.array(a)
arr**2

array([[ 1,  4,  9],
       [16, 25, 36]])

In [34]:
# create a 2 x 2 array
arr2 = np.array([[1,2], [1,2]])
arr2.shape

(2, 2)

In [35]:
# view array size
arr.shape

(2, 3)

In [36]:
# add two arrays with incompatible trailing dimensions
arr + arr2

ValueError: operands could not be broadcast together with shapes (2,3) (2,2) 

In [37]:
# create a 1 x 3 array
arr2 = np.array([[1, 2, 3]])
arr2.shape

(1, 3)

In [38]:
# add two arrays with compatible trailing dimensions
arr + arr2

array([[2, 4, 6],
       [5, 7, 9]])

In [39]:
# add two arrays with appropriate trailing dimensions
arr2 = np.array([[1,2,3], [1,2,3]])
arr + arr2

array([[2, 4, 6],
       [5, 7, 9]])

In [40]:
# create a 4x4 ndarray of zeros
np.zeros((4,4))

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [41]:
# create a 4x4 ndarray of zero-like value
np.zeros((4,4), dtype=bool)

array([[False, False, False, False],
       [False, False, False, False],
       [False, False, False, False],
       [False, False, False, False]])

In [42]:
# create a 4x4 ndarray of ones
np.ones((4,4))

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]])

In [43]:
# create a 4x4 ndarray of one-like value
np.ones((4,4), dtype=bool)

array([[ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True]])