In [46]:
# pandas is a library written for data manipulation and analysis
import pandas as pd

# Walrus DataFrame

In [47]:
# example data
walrus_data = {"name": ["Antje", "ET", "Jocko"],
               "gender": ["Female", "Male", "Male"],
               "weight": [2000, 3400, 3000]
              }

In [48]:
# creating a DataFrame (a list of series) for the walrus_data using pandas
walruses = pd.DataFrame(walrus_data)

In [49]:
# only in Jupyter notebook, returns data frames in a nice tabular form
walruses

Unnamed: 0,gender,name,weight
0,Female,Antje,2000
1,Male,ET,3400
2,Male,Jocko,3000


In [50]:
# display the first two rows of the data frame
walruses.head(2)

Unnamed: 0,gender,name,weight
0,Female,Antje,2000
1,Male,ET,3400


In [51]:
# print a tuple of the number of rows and columns
print(walruses.shape)

# print a tuple from the first two rows, the number of rows and columns
print(walruses.head(2).shape)

(3, 3)
(2, 3)


In [52]:
# print the data types of each row
print(walruses.dtypes)

gender    object
name      object
weight     int64
dtype: object


In [53]:
# print the weights of all the walruses
print(walruses["weight"])

# print the weight and gender of all the walruses
print(walruses[["weight", "name"]])

0    2000
1    3400
2    3000
Name: weight, dtype: int64
   weight   name
0    2000  Antje
1    3400     ET
2    3000  Jocko


In [54]:
# print the name of the 0th row walrus
print(walruses.loc[0, "name"])

# print the name and gender of the 0th row walrus
print(walruses.loc[0, ["name", "gender"]])

# print the name and gender of the 0th and 2nd row walruses
print(walruses.loc[[0, 2], ["name", "gender"]])

Antje
name       Antje
gender    Female
Name: 0, dtype: object
    name  gender
0  Antje  Female
2  Jocko    Male


# Indexing the Walrus DataFrame

In [55]:
# the index for walruses becomes the "name" column
walruses = walruses.set_index("name")

"""
if you wish to reset the indexing back to row numbers, use:
    walruses = walruses.reset_index("name")
"""

'\nif you wish to reset the indexing back to row numbers, use:\n    walruses = walruses.reset_index("name")\n'

In [56]:
# return the Antje row
walruses.loc["Antje"]

# return the 0th row (Antje row)
walruses.iloc[0]

gender    Female
weight      2000
Name: Antje, dtype: object

In [57]:
# set all the walruses' genders to Female
walruses["gender"] = "Female"

# add all the walruses' weight by one
walruses["weight"] += 1

# Series and The Walrus DataFrame

In [58]:
# create a new panda Series data-type
walruses_food_consumption = pd.Series([60, 150], index=["Antje", "ET"])

In [59]:
# return the Series in a simple table form
walruses_food_consumption

Antje     60
ET       150
dtype: int64

In [60]:
# add the values in the walruses_food_consumption Series to walruses' weight
walrus_weight_today = walruses["weight"] + walruses_food_consumption

# set any NaN values to 5000 that the walruses_food_consumption Series did not have an index for
walrus_weight_today.fillna(5000, inplace=True)

# now create and set a new the walruses' weight_today column to the walrus weight today varaible
walruses["weight_today"] = walrus_weight_today

# display the walruses' tabular data
walruses

Unnamed: 0_level_0,gender,weight,weight_today
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Antje,Female,2001,2061.0
ET,Female,3401,3551.0
Jocko,Female,3001,5000.0
