In [434]:
# import pandas, numpy, math
import pandas as pd
import numpy as np

# Continue The Walrus DataFrame

In [435]:
# example data from lecture 1, re-using it in lecture2
walrus_data = {"name": ["Antje", "ET", "Jocko"],
               "gender": ["Female", "Male", "Male"],
               "weight": [2000, 3400, 3000]
              }
walruses = pd.DataFrame(walrus_data)

In [436]:
# returns if each row in the gender column is a Female
walruses["gender"] == "Female"

# actually store if the gender is a Female as a Series
walrus_is_female = walruses["gender"] == "Female"

# return the rows where the walruses are Female
walruses[walrus_is_female]

Unnamed: 0,gender,name,weight
0,Female,Antje,2000


In [437]:
# get a Series where each row in the weight column is greater than 2500 and (bit-wise operator) are Male
male_walrus_is_greater_2500 = (walruses["weight"] > 2500) & (walruses["gender"] == "Male")

# return the rows where the male walruses are greater than 2500
walruses[male_walrus_is_greater_2500]

Unnamed: 0,gender,name,weight
1,Male,ET,3400
2,Male,Jocko,3000


# Ghosts DataFrame

In [438]:
# Create a Ghosts DataFrame with random integers in the Matrix using numpy
ghosts = pd.DataFrame(np.random.randint(1, 8, (4, 2)), index=["Inky", "Blinky", "Pinky", "Sue"],
                      columns=["dots_eaten", "pacmen_eaten"])

# return the ghosts DataFrame
ghosts

Unnamed: 0,dots_eaten,pacmen_eaten
Inky,4,2
Blinky,4,7
Pinky,7,4
Sue,7,5


In [439]:
# set the pacmen_eaten column to its square roots
ghosts["pacmen_eaten"] = np.sqrt(ghosts["pacmen_eaten"])

"""
You can also use the math module and apply the sqrt:
    import math
    ghosts["pacmen_eaten"] = ghosts["pacmen_eaten"].apply(math.sqrt)
"""

# return the pacmen_eaten column
ghosts["pacmen_eaten"]

Inky      1.414214
Blinky    2.645751
Pinky     2.000000
Sue       2.236068
Name: pacmen_eaten, dtype: float64

# String Methods and CSV Files

In [440]:
"""
read the csv file called walrus_vocalizations, separate white space as tab (tsv files are
separated by tabs), and set the column names to "name" and "sound"
"""
walrus_sounds = pd.read_csv("walrus_vocalizations.tsv", sep="\t", names=["name", "sound"])

In [441]:
# cast case all the object data-types in the sound column, then make them lowercase
walrus_sounds["sound"] = walrus_sounds["sound"].str.lower()

# return the walrus_sounds' sound column
walrus_sounds["sound"]

0         lound grunt
1             whistle
2          soft grunt
3             chortle
4             chortle
5    piercing whistle
6        medium_grunt
7              bellow
Name: sound, dtype: object

In [442]:
# get a Series of walrus_sounds' sound columns that contain a grunt
grunts = walrus_sounds["sound"].str.contains("grunt")

# return all the walruses that make a grunt sound
walrus_sounds[grunts]

Unnamed: 0,name,sound
0,Antje,lound grunt
2,Jocko,soft grunt
6,ET,medium_grunt
