In [1]:
import pandas as pd
import numpy as np

In [2]:
dates = pd.date_range("20250101", periods=6)
dates

DatetimeIndex(['2025-01-01', '2025-01-02', '2025-01-03', '2025-01-04',
               '2025-01-05', '2025-01-06'],
              dtype='datetime64[ns]', freq='D')

In [3]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list("ABCD"))
df

Unnamed: 0,A,B,C,D
2025-01-01,-0.026527,-0.015031,0.677819,-0.788622
2025-01-02,1.041708,-1.650464,1.26782,-0.92516
2025-01-03,1.220726,0.738887,-0.97061,-1.693516
2025-01-04,1.091509,1.967964,1.785499,-2.316161
2025-01-05,2.632787,1.058175,1.275991,1.025387
2025-01-06,0.631203,-0.386878,0.327058,-0.746183


In [5]:
df2 = pd.DataFrame(
    {
        "A": 1.0,
        "B": pd.Timestamp("20130102"),
        "C": pd.Series(1, index=list(range(4)), dtype="float32"),
        "D": np.array([3] * 4, dtype="int32"),
        "E": pd.Categorical(["test", "train", "test", "train"]),
        "F": "foo",
    }
)
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [7]:
df.to_numpy()

array([[-0.02652688, -0.01503079,  0.67781916, -0.78862161],
       [ 1.04170774, -1.65046447,  1.26782042, -0.92516   ],
       [ 1.22072607,  0.73888694, -0.97061012, -1.69351596],
       [ 1.09150872,  1.96796356,  1.78549914, -2.31616086],
       [ 2.63278733,  1.05817544,  1.27599111,  1.02538669],
       [ 0.63120292, -0.38687787,  0.32705765, -0.74618287]])

In [8]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,1.098568,0.285442,0.727263,-0.907376
std,0.878214,1.25886,0.975556,1.129483
min,-0.026527,-1.650464,-0.97061,-2.316161
25%,0.733829,-0.293916,0.414748,-1.501427
50%,1.066608,0.361928,0.97282,-0.856891
75%,1.188422,0.978353,1.273948,-0.756793
max,2.632787,1.967964,1.785499,1.025387


In [9]:
df.T

Unnamed: 0,2025-01-01,2025-01-02,2025-01-03,2025-01-04,2025-01-05,2025-01-06
A,-0.026527,1.041708,1.220726,1.091509,2.632787,0.631203
B,-0.015031,-1.650464,0.738887,1.967964,1.058175,-0.386878
C,0.677819,1.26782,-0.97061,1.785499,1.275991,0.327058
D,-0.788622,-0.92516,-1.693516,-2.316161,1.025387,-0.746183


In [12]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2025-01-01,-0.788622,0.677819,-0.015031,-0.026527
2025-01-02,-0.92516,1.26782,-1.650464,1.041708
2025-01-03,-1.693516,-0.97061,0.738887,1.220726
2025-01-04,-2.316161,1.785499,1.967964,1.091509
2025-01-05,1.025387,1.275991,1.058175,2.632787
2025-01-06,-0.746183,0.327058,-0.386878,0.631203


In [13]:
df.sort_values(by='B')

Unnamed: 0,A,B,C,D
2025-01-02,1.041708,-1.650464,1.26782,-0.92516
2025-01-06,0.631203,-0.386878,0.327058,-0.746183
2025-01-01,-0.026527,-0.015031,0.677819,-0.788622
2025-01-03,1.220726,0.738887,-0.97061,-1.693516
2025-01-05,2.632787,1.058175,1.275991,1.025387
2025-01-04,1.091509,1.967964,1.785499,-2.316161


In [14]:
df['A']

2025-01-01   -0.026527
2025-01-02    1.041708
2025-01-03    1.220726
2025-01-04    1.091509
2025-01-05    2.632787
2025-01-06    0.631203
Freq: D, Name: A, dtype: float64

In [15]:
df[0:3]

Unnamed: 0,A,B,C,D
2025-01-01,-0.026527,-0.015031,0.677819,-0.788622
2025-01-02,1.041708,-1.650464,1.26782,-0.92516
2025-01-03,1.220726,0.738887,-0.97061,-1.693516


In [16]:
df.loc[dates[0]]

A   -0.026527
B   -0.015031
C    0.677819
D   -0.788622
Name: 2025-01-01 00:00:00, dtype: float64

In [18]:
df.at[dates[0], "A"]

-0.026526876795842803

In [19]:
df.iloc[3]

A    1.091509
B    1.967964
C    1.785499
D   -2.316161
Name: 2025-01-04 00:00:00, dtype: float64

# Boolean indexing

In [21]:
df[df['A'] > 0]

Unnamed: 0,A,B,C,D
2025-01-02,1.041708,-1.650464,1.26782,-0.92516
2025-01-03,1.220726,0.738887,-0.97061,-1.693516
2025-01-04,1.091509,1.967964,1.785499,-2.316161
2025-01-05,2.632787,1.058175,1.275991,1.025387
2025-01-06,0.631203,-0.386878,0.327058,-0.746183


In [22]:
df[df > 0]

Unnamed: 0,A,B,C,D
2025-01-01,,,0.677819,
2025-01-02,1.041708,,1.26782,
2025-01-03,1.220726,0.738887,,
2025-01-04,1.091509,1.967964,1.785499,
2025-01-05,2.632787,1.058175,1.275991,1.025387
2025-01-06,0.631203,,0.327058,


In [23]:
df2 = df.copy()
df2["E"] = ["one", "one", "two", "three", "four", "three"]
df2

Unnamed: 0,A,B,C,D,E
2025-01-01,-0.026527,-0.015031,0.677819,-0.788622,one
2025-01-02,1.041708,-1.650464,1.26782,-0.92516,one
2025-01-03,1.220726,0.738887,-0.97061,-1.693516,two
2025-01-04,1.091509,1.967964,1.785499,-2.316161,three
2025-01-05,2.632787,1.058175,1.275991,1.025387,four
2025-01-06,0.631203,-0.386878,0.327058,-0.746183,three


In [24]:
df2[df2["E"].isin(["two", "four"])]

Unnamed: 0,A,B,C,D,E
2025-01-03,1.220726,0.738887,-0.97061,-1.693516,two
2025-01-05,2.632787,1.058175,1.275991,1.025387,four


In [25]:
df.mean()

A    1.098568
B    0.285442
C    0.727263
D   -0.907376
dtype: float64

In [26]:
df.mean(axis=1)

2025-01-01   -0.038090
2025-01-02   -0.066524
2025-01-03   -0.176128
2025-01-04    0.632203
2025-01-05    1.498085
2025-01-06   -0.043700
Freq: D, dtype: float64

In [28]:
s = pd.Series([1, 3, 5, np.nan, 6, 8], index=dates).shift(1)
s

2025-01-01    NaN
2025-01-02    1.0
2025-01-03    3.0
2025-01-04    5.0
2025-01-05    NaN
2025-01-06    6.0
Freq: D, dtype: float64

In [29]:
df.sub(s, axis="index")

Unnamed: 0,A,B,C,D
2025-01-01,,,,
2025-01-02,0.041708,-2.650464,0.26782,-1.92516
2025-01-03,-1.779274,-2.261113,-3.97061,-4.693516
2025-01-04,-3.908491,-3.032036,-3.214501,-7.316161
2025-01-05,,,,
2025-01-06,-5.368797,-6.386878,-5.672942,-6.746183


In [30]:
df.agg(lambda x: np.mean(x) * 5.6)

A    6.151979
B    1.598476
C    4.072672
D   -5.081304
dtype: float64

In [31]:
df.transform(lambda x: x * 101.2)

Unnamed: 0,A,B,C,D
2025-01-01,-2.68452,-1.521116,68.595299,-79.808507
2025-01-02,105.420824,-167.027004,128.303427,-93.626192
2025-01-03,123.537478,74.775358,-98.225744,-171.383815
2025-01-04,110.460682,199.157913,180.692513,-234.395479
2025-01-05,266.438078,107.087354,129.1303,103.769133
2025-01-06,63.877736,-39.15204,33.098234,-75.513706


In [32]:
s = pd.Series(np.random.randint(0, 7, size=10))
s

0    6
1    1
2    1
3    1
4    5
5    2
6    6
7    4
8    4
9    6
dtype: int64

In [33]:
s.value_counts()

6    3
1    3
4    2
5    1
2    1
Name: count, dtype: int64

# Merge

In [34]:
df = pd.DataFrame(np.random.randn(10, 4))
df

Unnamed: 0,0,1,2,3
0,1.455533,0.644116,0.860738,0.772719
1,0.964607,-0.756267,0.329009,0.437942
2,-1.217523,1.034979,1.036112,-1.209801
3,0.142338,-1.236631,-0.486688,0.37011
4,-0.604451,-1.65337,-0.524062,0.393779
5,0.161852,1.473377,-0.040677,-0.65503
6,0.647458,-0.290724,0.195938,-0.467784
7,-1.568411,-1.290597,-1.94783,1.127255
8,-2.074071,1.644518,2.03214,-0.132579
9,-2.047475,-0.509086,-0.076457,1.276365


In [35]:
pieces = [df[:3], df[3:7], df[7:]]
pieces

[          0         1         2         3
 0  1.455533  0.644116  0.860738  0.772719
 1  0.964607 -0.756267  0.329009  0.437942
 2 -1.217523  1.034979  1.036112 -1.209801,
           0         1         2         3
 3  0.142338 -1.236631 -0.486688  0.370110
 4 -0.604451 -1.653370 -0.524062  0.393779
 5  0.161852  1.473377 -0.040677 -0.655030
 6  0.647458 -0.290724  0.195938 -0.467784,
           0         1         2         3
 7 -1.568411 -1.290597 -1.947830  1.127255
 8 -2.074071  1.644518  2.032140 -0.132579
 9 -2.047475 -0.509086 -0.076457  1.276365]

In [36]:
pd.concat(pieces)

Unnamed: 0,0,1,2,3
0,1.455533,0.644116,0.860738,0.772719
1,0.964607,-0.756267,0.329009,0.437942
2,-1.217523,1.034979,1.036112,-1.209801
3,0.142338,-1.236631,-0.486688,0.37011
4,-0.604451,-1.65337,-0.524062,0.393779
5,0.161852,1.473377,-0.040677,-0.65503
6,0.647458,-0.290724,0.195938,-0.467784
7,-1.568411,-1.290597,-1.94783,1.127255
8,-2.074071,1.644518,2.03214,-0.132579
9,-2.047475,-0.509086,-0.076457,1.276365


In [38]:
pd.concat([pieces[0], pieces[2]])

Unnamed: 0,0,1,2,3
0,1.455533,0.644116,0.860738,0.772719
1,0.964607,-0.756267,0.329009,0.437942
2,-1.217523,1.034979,1.036112,-1.209801
7,-1.568411,-1.290597,-1.94783,1.127255
8,-2.074071,1.644518,2.03214,-0.132579
9,-2.047475,-0.509086,-0.076457,1.276365


# Join

In [39]:
left = pd.DataFrame({"key": ["foo", "foo"], "lval": [1, 2]})
right = pd.DataFrame({"key": ["foo", "foo"], "rval": [4, 5]})
left

Unnamed: 0,key,lval
0,foo,1
1,foo,2


In [40]:
right

Unnamed: 0,key,rval
0,foo,4
1,foo,5


In [41]:
pd.merge(left, right, on="key")

Unnamed: 0,key,lval,rval
0,foo,1,4
1,foo,1,5
2,foo,2,4
3,foo,2,5
