In [1]:
import pandas as pd
import numpy as np
index = pd.date_range("1/1/2000", periods=8)

s = pd.Series(np.random.randn(5), index=["a", "b", "c", "d", "e"])

df = pd.DataFrame(np.random.randn(8, 3), index=index, columns=["A", "B", "C"])

In [2]:
#Head
long_series = pd.Series(np.random.randn(1000))
long_series.head()

0    0.401926
1    1.638128
2    0.415337
3    0.588477
4   -1.063058
dtype: float64

In [3]:
#Tail
long_series.tail(3)

997    0.718283
998   -0.847769
999    2.248140
dtype: float64

In [4]:
s.array

<NumpyExtensionArray>
[  np.float64(0.7227492522135575), np.float64(-0.14673906838054823),
   np.float64(0.5094811442379116),   np.float64(0.5321976532750041),
  np.float64(-0.8814972820196645)]
Length: 5, dtype: float64

In [5]:
s.index.array

<NumpyExtensionArray>
['a', 'b', 'c', 'd', 'e']
Length: 5, dtype: object

In [6]:
s.to_numpy()

array([ 0.72274925, -0.14673907,  0.50948114,  0.53219765, -0.88149728])

In [7]:
np.asarray(s)

array([ 0.72274925, -0.14673907,  0.50948114,  0.53219765, -0.88149728])

In [8]:
df = pd.DataFrame(
    {
        "one": pd.Series(np.random.randn(3), index=["a", "b", "c"]),
        "two": pd.Series(np.random.randn(4), index=["a", "b", "c", "d"]),
        "three": pd.Series(np.random.randn(3), index=["b", "c", "d"]),
    }
)
df

Unnamed: 0,one,two,three
a,1.259465,-0.160643,
b,-0.525446,-0.247493,-0.157688
c,0.117082,-3.063924,0.034587
d,,0.107755,-0.351685


In [9]:
row = df.iloc[1]
column = df["two"]
df.sub(row, axis="columns")

Unnamed: 0,one,two,three
a,1.784911,0.08685,
b,0.0,0.0,0.0
c,0.642528,-2.816431,0.192275
d,,0.355248,-0.193997


In [10]:
#Align a level of a MultiIndexed DataFrame with a Series.
dfmi = df.copy()
dfmi.index = pd.MultiIndex.from_tuples(
    [(1, "a"), (1, "b"), (1, "c"), (2, "a")], names=["first", "second"]
)
dfmi.sub(column, axis=0, level="second")

Unnamed: 0_level_0,Unnamed: 1_level_0,one,two,three
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,a,1.420108,0.0,
1,b,-0.277953,0.0,0.089805
1,c,3.181007,0.0,3.098511
2,a,,0.268399,-0.191042


In [11]:
(df > 0).all()

one      False
two      False
three    False
dtype: bool

In [12]:
df + df == df * 2

Unnamed: 0,one,two,three
a,True,True,False
b,True,True,True
c,True,True,True
d,False,True,True


In [13]:
(df + df == df * 2).all()

one      False
two       True
three    False
dtype: bool

In [14]:
np.nan == np.nan

False

In [15]:
(df + df).equals(df * 2)

True

In [16]:
df1 = pd.DataFrame({"col": ["foo", 0, np.nan]})
df2 = pd.DataFrame({"col": [np.nan, 0, "foo"]}, index=[2, 1, 0])
df1.equals(df2)

False

In [17]:
#Series or DataFrame index needs to be in the same order for equality to be True:
df1.equals(df2.sort_index())

True

In [18]:
#Comparing array like objects
pd.Series(["foo", "bar", "baz"]) == "foo"

0     True
1    False
2    False
dtype: bool

In [19]:
pd.Index(["foo", "bar", "baz"]) == "foo"

array([ True, False, False])

In [20]:
pd.Series(["foo", "bar", "baz"]) == pd.Index(["foo", "bar", "qux"])

0     True
1     True
2    False
dtype: bool

In [21]:
pd.Series(["foo", "bar", "baz"]) == np.array(["foo", "bar", "qux"])

0     True
1     True
2    False
dtype: bool

In [22]:
#Combining overlapping data sets
df1 = pd.DataFrame( {"A": [1.0, np.nan, 3.0, 5.0, np.nan], "B": [np.nan, 2.0, 3.0, np.nan, 6.0]})
df2 = pd.DataFrame(
    {
        "A": [5.0, 2.0, 4.0, np.nan, 3.0, 7.0],
        "B": [np.nan, np.nan, 3.0, 4.0, 6.0, 8.0],
    })
df1

Unnamed: 0,A,B
0,1.0,
1,,2.0
2,3.0,3.0
3,5.0,
4,,6.0


In [23]:
df2

Unnamed: 0,A,B
0,5.0,
1,2.0,
2,4.0,3.0
3,,4.0
4,3.0,6.0
5,7.0,8.0


In [24]:
df1.combine_first(df2)

Unnamed: 0,A,B
0,1.0,
1,2.0,2.0
2,3.0,3.0
3,5.0,4.0
4,3.0,6.0
5,7.0,8.0


In [25]:
#General Dataframe combine
def combiner(x, y):
    return np.where(pd.isna(x), y, x)
df1.combine(df2, combiner)

Unnamed: 0,A,B
0,1.0,
1,2.0,2.0
2,3.0,3.0
3,5.0,4.0
4,3.0,6.0
5,7.0,8.0


In [None]:
Function application
To apply your own or another library’s functions to pandas objects, you should be aware of the three methods below. The appropriate method to use depends on whether your function expects to operate on an entire DataFrame or Series, row- or column-wise, or elementwise.

Tablewise Function Application: pipe()

Row or Column-wise Function Application: apply()

Aggregation API: agg() and transform()

Applying Elementwise Functions: map()