In [1]:
import pandas as pd
import numpy as np

In [2]:
long_series = pd.Series(np.random.randn(1000))

In [3]:
long_series.head()

0   -0.381837
1   -1.276574
2    0.678281
3    1.518029
4   -2.205800
dtype: float64

In [6]:
long_series.tail()

995    0.251807
996   -0.837322
997   -0.797342
998   -0.220923
999    0.195656
dtype: float64

In [47]:
df = pd.DataFrame(
    {
        "one": pd.Series(np.random.randn(3), index=["a", "b", "c"]),
        "two": pd.Series(np.random.randn(4), index=["a", "b", "c", "d"]),
        "three": pd.Series(np.random.randn(3), index=["b", "c", "d"]),
    }
)

In [17]:
df

Unnamed: 0,one,two,three
a,-0.134873,0.833167,
b,-0.660981,1.149629,0.103997
c,1.32511,0.684285,-0.482162
d,,0.554305,1.454564


In [19]:
df.columns = [x.upper() for x in df.columns]

In [20]:
df

Unnamed: 0,ONE,TWO,THREE
a,-0.134873,0.833167,
b,-0.660981,1.149629,0.103997
c,1.32511,0.684285,-0.482162
d,,0.554305,1.454564


In [22]:
df.columns = [x.lower() for x in df.columns]

In [48]:
df

Unnamed: 0,one,two,three
a,0.70836,-0.375225,
b,0.931497,-0.221574,-0.328956
c,-0.331133,1.584805,1.707875
d,,-0.655138,-1.352264


In [50]:
row = df.iloc[1]
row

one      0.931497
two     -0.221574
three   -0.328956
Name: b, dtype: float64

In [51]:
df.sub(row, axis="columns")

Unnamed: 0,one,two,three
a,-0.223137,-0.15365,
b,0.0,0.0,0.0
c,-1.26263,1.806379,2.036832
d,,-0.433564,-1.023308


In [52]:
df.sub(row, axis=1)

Unnamed: 0,one,two,three
a,-0.223137,-0.15365,
b,0.0,0.0,0.0
c,-1.26263,1.806379,2.036832
d,,-0.433564,-1.023308


In [53]:
column = df["two"]
column

a   -0.375225
b   -0.221574
c    1.584805
d   -0.655138
Name: two, dtype: float64

In [54]:
df.sub(column, axis="index")

Unnamed: 0,one,two,three
a,1.083585,0.0,
b,1.153071,0.0,-0.107382
c,-1.915938,0.0,0.123071
d,,0.0,-0.697125


In [55]:
df.sub(column, axis=0)

Unnamed: 0,one,two,three
a,1.083585,0.0,
b,1.153071,0.0,-0.107382
c,-1.915938,0.0,0.123071
d,,0.0,-0.697125


In [60]:
df.gt(0)

Unnamed: 0,one,two,three
a,True,False,False
b,True,False,False
c,False,True,True
d,False,False,False


In [61]:
df.ge(0)

Unnamed: 0,one,two,three
a,True,False,False
b,True,False,False
c,False,True,True
d,False,False,False


In [62]:
df.lt(0)

Unnamed: 0,one,two,three
a,False,True,False
b,False,True,True
c,True,False,False
d,False,True,True


In [63]:
df.le(0)

Unnamed: 0,one,two,three
a,False,True,False
b,False,True,True
c,True,False,False
d,False,True,True


In [64]:
df.eq(0)

Unnamed: 0,one,two,three
a,False,False,False
b,False,False,False
c,False,False,False
d,False,False,False


In [65]:
(df > 0).all()

one      False
two      False
three    False
dtype: bool

In [66]:
(df > 0).any()

one      True
two      True
three    True
dtype: bool

In [67]:
(df > 0).any().any()

True

In [69]:
pd.DataFrame(columns=list("ABC")).empty

True

In [70]:
pd.Series([True]).bool()

True

In [72]:
pd.Series([False]).bool()

False

In [73]:
pd.DataFrame([[True]]).bool()

True

In [74]:
pd.DataFrame([[False]]).bool()

False

In [75]:
np.nan == np.nan

False

In [77]:
(df + df) == df * 2

Unnamed: 0,one,two,three
a,True,True,False
b,True,True,True
c,True,True,True
d,False,True,True


In [79]:
(df + df).equals(df * 2)

True

In [80]:
df1 = pd.DataFrame({"col": ["foo", 0, np.nan]})
df2 = pd.DataFrame({"col": [np.nan, 0, "foo"]}, index=[2, 1, 0])

In [81]:
df1

Unnamed: 0,col
0,foo
1,0
2,


In [82]:
df2

Unnamed: 0,col
2,
1,0
0,foo


In [83]:
df1.equals(df2)

False

In [84]:
df2.sort_index()

Unnamed: 0,col
0,foo
1,0
2,


In [85]:
df1.equals(df2.sort_index())

True

In [86]:
df1 = pd.DataFrame(
    {"A": [1.0, np.nan, 3.0, 5.0, np.nan], "B": [np.nan, 2.0, 3.0, np.nan, 6.0]}
)


df2 = pd.DataFrame(
    {
        "A": [5.0, 2.0, 4.0, np.nan, 3.0, 7.0],
        "B": [np.nan, np.nan, 3.0, 4.0, 6.0, 8.0],
    }
)

In [87]:
df1.combine_first(df2)

Unnamed: 0,A,B
0,1.0,
1,2.0,2.0
2,3.0,3.0
3,5.0,4.0
4,3.0,6.0
5,7.0,8.0


In [90]:
def combiner(x, y):
    return np.where(pd.isna(x), y, x)
df1.combine(df2, func=combiner)

Unnamed: 0,A,B
0,1.0,
1,2.0,2.0
2,3.0,3.0
3,5.0,4.0
4,3.0,6.0
5,7.0,8.0


In [93]:
df

Unnamed: 0,one,two,three
a,0.70836,-0.375225,
b,0.931497,-0.221574,-0.328956
c,-0.331133,1.584805,1.707875
d,,-0.655138,-1.352264


In [98]:
df.mean(axis=0)

one      0.436241
two      0.083217
three    0.008885
dtype: float64

In [102]:
df.cumsum(axis=0)

Unnamed: 0,one,two,three
a,0.70836,-0.375225,
b,1.639857,-0.596799,-0.328956
c,1.308724,0.988006,1.378919
d,,0.332868,0.026655


In [103]:
df.cumsum(axis=0, skipna=False)

Unnamed: 0,one,two,three
a,0.70836,-0.375225,
b,1.639857,-0.596799,
c,1.308724,0.988006,
d,,0.332868,


In [105]:
s1 = pd.Series(np.random.randn(5))

In [106]:
s1

0   -0.673882
1    0.199215
2   -0.923475
3    0.221200
4   -0.480320
dtype: float64

In [108]:
s1.idxmin(), s1.idxmax()

(2, 3)

In [109]:
arr = np.random.randn(30)

In [110]:
arr

array([ 1.72427447,  1.08119299, -0.41845572, -0.18474442,  0.04937488,
       -1.10012399,  0.09898024, -1.67916255, -1.05573019, -0.85252715,
        1.05678666,  0.93648109, -1.79231195,  0.5850092 ,  1.12107442,
       -0.94058895, -1.43041595, -0.37490163, -0.0980306 ,  0.39407125,
        0.2179638 ,  0.15867345,  0.4318084 ,  0.31323525, -0.39094217,
        0.39168837,  0.48752716,  0.59123102,  0.85576259, -2.07183709])

In [124]:
factor = pd.cut(arr, 4)
factor

[(0.775, 1.724], (0.775, 1.724], (-1.123, -0.174], (-1.123, -0.174], (-0.174, 0.775], ..., (-0.174, 0.775], (-0.174, 0.775], (-0.174, 0.775], (0.775, 1.724], (-2.076, -1.123]]
Length: 30
Categories (4, interval[float64, right]): [(-2.076, -1.123] < (-1.123, -0.174] < (-0.174, 0.775] < (0.775, 1.724]]

In [125]:
factor.value_counts()

(-2.076, -1.123]     4
(-1.123, -0.174]     8
(-0.174, 0.775]     12
(0.775, 1.724]       6
Name: count, dtype: int64

In [113]:
factor = pd.cut(arr, [-5, -1, 0, 1, 5])
factor

[(1, 5], (1, 5], (-1, 0], (-1, 0], (0, 1], ..., (0, 1], (0, 1], (0, 1], (0, 1], (-5, -1]]
Length: 30
Categories (4, interval[int64, right]): [(-5, -1] < (-1, 0] < (0, 1] < (1, 5]]

In [118]:
factor.value_counts()

(-5, -1]     6
(-1, 0]      7
(0, 1]      13
(1, 5]       4
Name: count, dtype: int64

In [122]:
factor = pd.qcut(arr, [0, 0.5, 1])
factor

[(0.129, 1.724], (0.129, 1.724], (-2.073, 0.129], (-2.073, 0.129], (-2.073, 0.129], ..., (0.129, 1.724], (0.129, 1.724], (0.129, 1.724], (0.129, 1.724], (-2.073, 0.129]]
Length: 30
Categories (2, interval[float64, right]): [(-2.073, 0.129] < (0.129, 1.724]]

In [123]:
factor.value_counts()

(-2.073, 0.129]    15
(0.129, 1.724]     15
Name: count, dtype: int64

In [127]:
factor = pd.cut(arr, [-np.inf, 0, np.inf])
factor.value_counts()

(-inf, 0.0]    13
(0.0, inf]     17
Name: count, dtype: int64

In [133]:
def extract_city_name(df):
    """
    Chicago, IL -> Chicago for city_name column
    """
    df["city_name"] = df["city_and_code"].str.split(",").str.get(0)
    return df


def add_country_name(df, country_name=None):
    """
    Chicago -> Chicago-US for city_name column
    """
    col = "city_name"
    df["city_and_country"] = df[col] + country_name
    return df


df_p = pd.DataFrame({"city_and_code": ["Chicago, IL"]})

In [134]:
df_p

Unnamed: 0,city_and_code
0,"Chicago, IL"


In [137]:
df_p.pipe(extract_city_name).pipe(add_country_name, country_name="US")

Unnamed: 0,city_and_code,city_name,city_and_country
0,"Chicago, IL",Chicago,ChicagoUS
