In [1]:
import pandas as pd
print(pd.__version__)

1.4.4


In [2]:
import numpy as np

# Pandas Series

In [42]:
x = pd.Series({"a" : 0.1,
               "b" : 0.4,
               "c" : 0.2})

print(x)

y = pd.Series([1, 2, 3])
print(y)

a    0.1
b    0.4
c    0.2
dtype: float64
0    1
1    2
2    3
dtype: int64


In [44]:
# Explicit index slicing 
# The final index is included
print(x['a':'b']) 

# Implicit index slicing
# The final index is not included
print(x[0:1])

a    0.1
b    0.4
dtype: float64
a    0.1
dtype: float64


In [47]:
z = pd.Series([1, 2, 3], index=[1, 2, 3])
print(z)

# Explicit index when indexing 
print(z[1])

# Implicit index when slicing
print(z[1:3])

# loc attribute allows indexing and slicing that always references the explicit index
print(z.loc[1]) # 1
print(z.loc[1:3]) # 1 2 3

1    1
2    2
3    3
dtype: int64
1
2    2
3    3
dtype: int64
1
1    1
2    2
3    3
dtype: int64


In [49]:
# iloc attribute allows indexing and slicing that always references the implicit index
print(z.iloc[1]) # 2
print(z.iloc[1:3]) # 2 3

2
2    2
3    3
dtype: int64


In [5]:
type(x[1:4])

pandas.core.series.Series

In [8]:
# Constructing Series from array
print(pd.Series([1, 2, 3]))
print(pd.Series([1, 2, 3], index=[10, 11, 12]))

0    1
1    2
2    3
dtype: int64
10    1
11    2
12    3
dtype: int64


In [9]:
# Construct series from scalar to fill index
print(pd.Series(5, index=[1,2,3]))

1    5
2    5
3    5
dtype: int64


In [12]:
# Construct series from dictionary
print(pd.Series({1: 10, 2: 20, 3:30}))
print(pd.Series({1: 10, 2: 20, 3:30}, index=[3,2]))

1    10
2    20
3    30
dtype: int64
3    30
2    20
dtype: int64


## DataFrame objects

In [19]:
# Construct from a single Series object
series = pd.Series({"cat": 1, "dog": 2})
pd.DataFrame(series, columns=["id"])

Unnamed: 0,id
cat,1
dog,2


In [22]:
# Construct from a list of dicts
lod = [{"a": 1, "b": 2}, {"a": 2, "b": 5, "c":10}]
pd.DataFrame(lod, index=["r1", "r2"])

Unnamed: 0,a,b,c
r1,1,2,
r2,2,5,10.0


In [23]:
# Construct from a dictionary of series objects
s1 = pd.Series({"cat": 1, "dog": 2})
s2 = pd.Series({"cat": 100, "dog": 200})
pd.DataFrame({"i1": s1, "i2": s2})

Unnamed: 0,i1,i2
cat,1,100
dog,2,200


In [25]:
# Construct from a two-dimensional NumPy array
a2d = np.random.randint(1, 100, size=(3,2))
pd.DataFrame(a2d, index=[1,2,3], columns=["c1", "c2"])

Unnamed: 0,c1,c2
1,70,72
2,67,24
3,69,3


In [27]:
# Construct from a NumPy structured array
sa = np.zeros(3, dtype=[("A", "i8"), ("B", "f8")])
pd.DataFrame(sa)

Unnamed: 0,A,B
0,0,0.0
1,0,0.0
2,0,0.0


## Index

In [30]:
# Index as immutable array
index = pd.Index([1, 5, 10])
print(index[::2])

Int64Index([1, 10], dtype='int64')


In [32]:
index[0] = -1

TypeError: Index does not support mutable operations

In [33]:
# Index as ordered set 
indA = pd.Index([1, 3, 5, 7, 9])
indB = pd.Index([2, 3, 5, 7, 11])

In [37]:
# union
indA | indB

  indA | indB


Int64Index([1, 2, 3, 5, 7, 9, 11], dtype='int64')

In [38]:
# intersection
indA & indB

  indA & indB


Int64Index([3, 5, 7], dtype='int64')

In [39]:
# difference
indA ^ indB

  indA ^ indB


Int64Index([1, 2, 9, 11], dtype='int64')