In [2]:
import pandas as pd
import numpy as np
index = pd.date_range("1/1/2000", periods=8)

s = pd.Series(np.random.randn(5), index=["a", "b", "c", "d", "e"])

df = pd.DataFrame(np.random.randn(8, 3), index=index, columns=["A", "B", "C"])

In [3]:
#Head
long_series = pd.Series(np.random.randn(1000))
long_series.head()

0   -0.752096
1    0.747226
2    1.413943
3   -0.376787
4    0.084180
dtype: float64

In [4]:
#Tail
long_series.tail(3)

997    0.941730
998   -0.538879
999   -1.009390
dtype: float64

In [5]:
s.array

<NumpyExtensionArray>
[  np.float64(-1.6657381818523658), np.float64(-0.007138336537014826),
   np.float64(0.39523683700938433),   np.float64(-0.7332888904864809),
    np.float64(-2.383997862379493)]
Length: 5, dtype: float64

In [6]:
s.index.array

<NumpyExtensionArray>
['a', 'b', 'c', 'd', 'e']
Length: 5, dtype: object

In [7]:
s.to_numpy()

array([-1.66573818, -0.00713834,  0.39523684, -0.73328889, -2.38399786])

In [8]:
np.asarray(s)

array([-1.66573818, -0.00713834,  0.39523684, -0.73328889, -2.38399786])

In [9]:
df = pd.DataFrame(
    {
        "one": pd.Series(np.random.randn(3), index=["a", "b", "c"]),
        "two": pd.Series(np.random.randn(4), index=["a", "b", "c", "d"]),
        "three": pd.Series(np.random.randn(3), index=["b", "c", "d"]),
    }
)
df

Unnamed: 0,one,two,three
a,0.032608,0.091689,
b,1.499013,-0.012183,-0.734969
c,-1.236385,0.749538,-0.031445
d,,-0.481496,-0.98203


In [10]:
row = df.iloc[1]
column = df["two"]
df.sub(row, axis="columns")

Unnamed: 0,one,two,three
a,-1.466405,0.103872,
b,0.0,0.0,0.0
c,-2.735398,0.761721,0.703524
d,,-0.469312,-0.247061


In [11]:
#Align a level of a MultiIndexed DataFrame with a Series.
dfmi = df.copy()
dfmi.index = pd.MultiIndex.from_tuples(
    [(1, "a"), (1, "b"), (1, "c"), (2, "a")], names=["first", "second"]
)
dfmi.sub(column, axis=0, level="second")

Unnamed: 0_level_0,Unnamed: 1_level_0,one,two,three
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,a,-0.059081,0.0,
1,b,1.511196,0.0,-0.722785
1,c,-1.985922,0.0,-0.780982
2,a,,-0.573184,-1.073719


In [12]:
(df > 0).all()

one      False
two      False
three    False
dtype: bool

In [13]:
df + df == df * 2

Unnamed: 0,one,two,three
a,True,True,False
b,True,True,True
c,True,True,True
d,False,True,True


In [14]:
(df + df == df * 2).all()

one      False
two       True
three    False
dtype: bool

In [15]:
np.nan == np.nan

False

In [16]:
(df + df).equals(df * 2)

True

In [17]:
df1 = pd.DataFrame({"col": ["foo", 0, np.nan]})
df2 = pd.DataFrame({"col": [np.nan, 0, "foo"]}, index=[2, 1, 0])
df1.equals(df2)

False

In [19]:
#Series or DataFrame index needs to be in the same order for equality to be True:
df1.equals(df2.sort_index())

True

In [20]:
#Comparing array like objects
pd.Series(["foo", "bar", "baz"]) == "foo"

0     True
1    False
2    False
dtype: bool

In [21]:
pd.Index(["foo", "bar", "baz"]) == "foo"

array([ True, False, False])

In [22]:
pd.Series(["foo", "bar", "baz"]) == pd.Index(["foo", "bar", "qux"])

0     True
1     True
2    False
dtype: bool

In [23]:
pd.Series(["foo", "bar", "baz"]) == np.array(["foo", "bar", "qux"])

0     True
1     True
2    False
dtype: bool

In [24]:
#Combining overlapping data sets
df1 = pd.DataFrame( {"A": [1.0, np.nan, 3.0, 5.0, np.nan], "B": [np.nan, 2.0, 3.0, np.nan, 6.0]})
df2 = pd.DataFrame(
    {
        "A": [5.0, 2.0, 4.0, np.nan, 3.0, 7.0],
        "B": [np.nan, np.nan, 3.0, 4.0, 6.0, 8.0],
    })
df1

Unnamed: 0,A,B
0,1.0,
1,,2.0
2,3.0,3.0
3,5.0,
4,,6.0


In [25]:
df2

Unnamed: 0,A,B
0,5.0,
1,2.0,
2,4.0,3.0
3,,4.0
4,3.0,6.0
5,7.0,8.0


In [26]:
df1.combine_first(df2)

Unnamed: 0,A,B
0,1.0,
1,2.0,2.0
2,3.0,3.0
3,5.0,4.0
4,3.0,6.0
5,7.0,8.0


In [27]:
#General Dataframe combine
def combiner(x, y):
    return np.where(pd.isna(x), y, x)
df1.combine(df2, combiner)

Unnamed: 0,A,B
0,1.0,
1,2.0,2.0
2,3.0,3.0
3,5.0,4.0
4,3.0,6.0
5,7.0,8.0
