# Chapter 1: pandas Foundations

## Importing pandas

In [3]:
import pandas as pd
import numpy as np

In [4]:
import pyarrow as pa

## Series

### How to do it

In [7]:
# construct a Series using a list
pd.Series([0, 1, 2])

0    0
1    1
2    2
dtype: int64

In [8]:
# using a tuple
pd.Series((12.34, 56.78, 91.01))

0    12.34
1    56.78
2    91.01
dtype: float64

In [9]:
# using range() function
pd.Series(range(0, 7, 2))

0    0
1    2
2    4
3    6
dtype: int64

In [10]:
# providing type information
pd.Series(range(3), dtype="int8")

0    0
1    1
2    2
dtype: int8

In [11]:
# specifying a name (default: None)
pd.Series(["apple", "banana", "orange"], name="fruit")

0     apple
1    banana
2    orange
Name: fruit, dtype: object

## DataFrame

### How to do it

In [14]:
# constructing a dataframe using a list of lists
pd.DataFrame([
    [0, 1, 2],
    [3, 4, 5],
    [6, 7, 8],
])

Unnamed: 0,0,1,2
0,0,1,2
1,3,4,5
2,6,7,8


In [15]:
# lable the columns
pd.DataFrame([
    [1, 2],
    [4, 8],
], columns=["col_a", "col_b"])

Unnamed: 0,col_a,col_b
0,1,2
1,4,8


In [16]:
# constructing a DataFrame using a dictionary
pd.DataFrame({
    "first_name": ["Jane", "John"],
    "last_name": ["Doe", "Smith"],
})

Unnamed: 0,first_name,last_name
0,Jane,Doe
1,John,Smith


In [17]:
# values of DataFrame can be any sequence, includng a pd.Series
ser1 = pd.Series(range(3), dtype="int8", name="int8_col")
ser2 = pd.Series(range(3), dtype="int16", name="int16_col")
pd.DataFrame({ser1.name: ser1, ser2.name: ser2})

Unnamed: 0,int8_col,int16_col
0,0,0
1,1,1
2,2,2


## Index

### How to do it

In [20]:
# change the default row index
pd.Series([4, 4, 2], index=["dog", "cat", "human"])

dog      4
cat      4
human    2
dtype: int64

In [21]:
# construct an pd.Index object
index = pd.Index(["dog", "cat", "human"], name="animal")
pd.Series([4, 4, 2], name="num_legs", index=index)

animal
dog      4
cat      4
human    2
Name: num_legs, dtype: int64

In [22]:
# specify index and column for DataFrame
pd.DataFrame([
    [24, 180],
    [42, 166],
], columns=["age", "height_cm"], index=["Jack", "Jill"])

Unnamed: 0,age,height_cm
Jack,24,180
Jill,42,166


## Series attributes

### How to do it

In [25]:
index = pd.Index(["dog", "cat", "human"], name="animal")
ser = pd.Series([4, 4, 2], name="num_legs", index=index)
ser

animal
dog      4
cat      4
human    2
Name: num_legs, dtype: int64

In [26]:
ser.dtype

dtype('int64')

In [27]:
ser.name

'num_legs'

In [28]:
ser.index

Index(['dog', 'cat', 'human'], dtype='object', name='animal')

In [29]:
ser.index.name

'animal'

In [30]:
ser.shape

(3,)

In [31]:
ser.size

3

In [32]:
len(ser)

3

## DataFrame attributes

### How to do it

In [35]:
index = pd.Index(["Jack", "Jill"], name="person")
df = pd.DataFrame([
    [24, 180, "red"],
    [42, 166, "blue"],
], columns=["age", "height_cm", "favorite_color"], index=index)

df

Unnamed: 0_level_0,age,height_cm,favorite_color
person,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Jack,24,180,red
Jill,42,166,blue


In [36]:
df.dtypes

age                int64
height_cm          int64
favorite_color    object
dtype: object

In [37]:
df.index

Index(['Jack', 'Jill'], dtype='object', name='person')

In [38]:
df.columns

Index(['age', 'height_cm', 'favorite_color'], dtype='object')

In [39]:
df.shape

(2, 3)

In [40]:
df.size

6

In [41]:
len(df)

2