In [1]:
import pandas as pd
import numpy as np

# Basic data structure:

-  Series -  a one-dimensional labeled array holding data of any type such as integers, strings, Python objects etc.


- DataFrame - a two-dimensional data structure that holds data like a two-dimension array or a table with rows and columns.

# Object creation


Creating a Series, Pandas automatically creates a default RangeIndex.

In [3]:
s = pd.Series([1,3,5, np.nan, 6,8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [5]:
dates = pd.date_range("20261101", periods=6)
dates

DatetimeIndex(['2026-11-01', '2026-11-02', '2026-11-03', '2026-11-04',
               '2026-11-05', '2026-11-06'],
              dtype='datetime64[ns]', freq='D')

In [7]:
df = pd.DataFrame(np.random.randn(6,4), index=dates,columns=['A', 'B','C','D'])
df

Unnamed: 0,A,B,C,D
2026-11-01,1.743922,-0.595428,0.859537,-1.392572
2026-11-02,-1.325845,0.69447,0.323627,0.794893
2026-11-03,0.067001,-0.071717,-0.819771,0.130103
2026-11-04,-0.648118,-1.179193,0.604312,-1.145927
2026-11-05,0.023841,-1.028412,1.876223,2.143507
2026-11-06,1.011514,0.613764,0.102417,-0.358097


In [9]:
df2 = pd.DataFrame(
    {
        "A": 1.0,
        "B": pd.Timestamp("20261105"),
        "C": pd.Series(1, index=list(range(4)), dtype='float32'),
        "D": np.array([3]*4, dtype='int32'),
        "E": pd.Categorical(['test','train','test','train']),
        'F': 'foo'
    }
)
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2026-11-05,1.0,3,test,foo
1,1.0,2026-11-05,1.0,3,train,foo
2,1.0,2026-11-05,1.0,3,test,foo
3,1.0,2026-11-05,1.0,3,train,foo


In [10]:
df2.dtypes

A          float64
B    datetime64[s]
C          float32
D            int32
E         category
F           object
dtype: object

# Viewing Data

In [11]:
df.head()

Unnamed: 0,A,B,C,D
2026-11-01,1.743922,-0.595428,0.859537,-1.392572
2026-11-02,-1.325845,0.69447,0.323627,0.794893
2026-11-03,0.067001,-0.071717,-0.819771,0.130103
2026-11-04,-0.648118,-1.179193,0.604312,-1.145927
2026-11-05,0.023841,-1.028412,1.876223,2.143507


In [12]:
df.tail(3)

Unnamed: 0,A,B,C,D
2026-11-04,-0.648118,-1.179193,0.604312,-1.145927
2026-11-05,0.023841,-1.028412,1.876223,2.143507
2026-11-06,1.011514,0.613764,0.102417,-0.358097


In [13]:
df.index

DatetimeIndex(['2026-11-01', '2026-11-02', '2026-11-03', '2026-11-04',
               '2026-11-05', '2026-11-06'],
              dtype='datetime64[ns]', freq='D')

In [14]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [15]:
df

Unnamed: 0,A,B,C,D
2026-11-01,1.743922,-0.595428,0.859537,-1.392572
2026-11-02,-1.325845,0.69447,0.323627,0.794893
2026-11-03,0.067001,-0.071717,-0.819771,0.130103
2026-11-04,-0.648118,-1.179193,0.604312,-1.145927
2026-11-05,0.023841,-1.028412,1.876223,2.143507
2026-11-06,1.011514,0.613764,0.102417,-0.358097


In [16]:
df.to_numpy()

array([[ 1.74392204, -0.59542763,  0.85953665, -1.39257214],
       [-1.32584508,  0.69447024,  0.32362739,  0.79489345],
       [ 0.06700107, -0.07171656, -0.81977136,  0.13010254],
       [-0.64811805, -1.1791929 ,  0.60431187, -1.14592661],
       [ 0.0238414 , -1.02841166,  1.87622292,  2.14350712],
       [ 1.01151416,  0.61376412,  0.10241731, -0.35809741]])

Important! Numpy supports one data type for entire array, while pandas dataframe requiers one data type only within a column.

In [17]:
df2.to_numpy()

array([[1.0, Timestamp('2026-11-05 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2026-11-05 00:00:00'), 1.0, 3, 'train', 'foo'],
       [1.0, Timestamp('2026-11-05 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2026-11-05 00:00:00'), 1.0, 3, 'train', 'foo']],
      dtype=object)

In [18]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.145386,-0.261086,0.491057,0.028651
std,1.106398,0.807193,0.890464,1.312945
min,-1.325845,-1.179193,-0.819771,-1.392572
25%,-0.480128,-0.920166,0.15772,-0.948969
50%,0.045421,-0.333572,0.46397,-0.113997
75%,0.775386,0.442394,0.79573,0.628696
max,1.743922,0.69447,1.876223,2.143507


In [19]:
df.T

Unnamed: 0,2026-11-01,2026-11-02,2026-11-03,2026-11-04,2026-11-05,2026-11-06
A,1.743922,-1.325845,0.067001,-0.648118,0.023841,1.011514
B,-0.595428,0.69447,-0.071717,-1.179193,-1.028412,0.613764
C,0.859537,0.323627,-0.819771,0.604312,1.876223,0.102417
D,-1.392572,0.794893,0.130103,-1.145927,2.143507,-0.358097


In [None]:
df.sort_index(axis=1, ascending=False) #sort by axis

Unnamed: 0,D,C,B,A
2026-11-01,-1.392572,0.859537,-0.595428,1.743922
2026-11-02,0.794893,0.323627,0.69447,-1.325845
2026-11-03,0.130103,-0.819771,-0.071717,0.067001
2026-11-04,-1.145927,0.604312,-1.179193,-0.648118
2026-11-05,2.143507,1.876223,-1.028412,0.023841
2026-11-06,-0.358097,0.102417,0.613764,1.011514


In [None]:
df.sort_values(by='B', ascending=False) #sort by values

Unnamed: 0,A,B,C,D
2026-11-02,-1.325845,0.69447,0.323627,0.794893
2026-11-06,1.011514,0.613764,0.102417,-0.358097
2026-11-03,0.067001,-0.071717,-0.819771,0.130103
2026-11-01,1.743922,-0.595428,0.859537,-1.392572
2026-11-05,0.023841,-1.028412,1.876223,2.143507
2026-11-04,-0.648118,-1.179193,0.604312,-1.145927


# Selection

## Getitem

In [22]:
df['A']

2026-11-01    1.743922
2026-11-02   -1.325845
2026-11-03    0.067001
2026-11-04   -0.648118
2026-11-05    0.023841
2026-11-06    1.011514
Freq: D, Name: A, dtype: float64

In [23]:
df[0:3]

Unnamed: 0,A,B,C,D
2026-11-01,1.743922,-0.595428,0.859537,-1.392572
2026-11-02,-1.325845,0.69447,0.323627,0.794893
2026-11-03,0.067001,-0.071717,-0.819771,0.130103


In [24]:
df['20261102':'20261105']

Unnamed: 0,A,B,C,D
2026-11-02,-1.325845,0.69447,0.323627,0.794893
2026-11-03,0.067001,-0.071717,-0.819771,0.130103
2026-11-04,-0.648118,-1.179193,0.604312,-1.145927
2026-11-05,0.023841,-1.028412,1.876223,2.143507


## Selection by Label

In [25]:
df.loc[dates[0]]

A    1.743922
B   -0.595428
C    0.859537
D   -1.392572
Name: 2026-11-01 00:00:00, dtype: float64

In [26]:
df.loc[:,["A","B"]]

Unnamed: 0,A,B
2026-11-01,1.743922,-0.595428
2026-11-02,-1.325845,0.69447
2026-11-03,0.067001,-0.071717
2026-11-04,-0.648118,-1.179193
2026-11-05,0.023841,-1.028412
2026-11-06,1.011514,0.613764


In [27]:
df.loc['20261102':'20261105', ['A', 'B']]

Unnamed: 0,A,B
2026-11-02,-1.325845,0.69447
2026-11-03,0.067001,-0.071717
2026-11-04,-0.648118,-1.179193
2026-11-05,0.023841,-1.028412


In [28]:
df.loc[dates[0], 'A']

np.float64(1.743922042022371)

In [29]:
df.at[dates[0], 'A'] #faster access to a scalar

np.float64(1.743922042022371)

# Selection by Position

In [30]:
df.iloc[3]

A   -0.648118
B   -1.179193
C    0.604312
D   -1.145927
Name: 2026-11-04 00:00:00, dtype: float64

In [None]:
df.iloc[3:5, 0:2] #right border does not include

Unnamed: 0,A,B
2026-11-04,-0.648118,-1.179193
2026-11-05,0.023841,-1.028412


In [32]:
df.iloc[[1,3,5],[0,3]]

Unnamed: 0,A,D
2026-11-02,-1.325845,0.794893
2026-11-04,-0.648118,-1.145927
2026-11-06,1.011514,-0.358097


In [34]:
df.iloc[:, 1:4]

Unnamed: 0,B,C,D
2026-11-01,-0.595428,0.859537,-1.392572
2026-11-02,0.69447,0.323627,0.794893
2026-11-03,-0.071717,-0.819771,0.130103
2026-11-04,-1.179193,0.604312,-1.145927
2026-11-05,-1.028412,1.876223,2.143507
2026-11-06,0.613764,0.102417,-0.358097


In [35]:
df.iloc[3:6, :]

Unnamed: 0,A,B,C,D
2026-11-04,-0.648118,-1.179193,0.604312,-1.145927
2026-11-05,0.023841,-1.028412,1.876223,2.143507
2026-11-06,1.011514,0.613764,0.102417,-0.358097


In [36]:
df.iloc[1,1]

np.float64(0.6944702422935074)

In [37]:
df.iat[1,1] #again for faster access to a scalar

np.float64(0.6944702422935074)

# Boolean indexing

In [None]:
df[df['A'] > 0] #equivalently df[df.A > 0]

Unnamed: 0,A,B,C,D
2026-11-01,1.743922,-0.595428,0.859537,-1.392572
2026-11-03,0.067001,-0.071717,-0.819771,0.130103
2026-11-05,0.023841,-1.028412,1.876223,2.143507
2026-11-06,1.011514,0.613764,0.102417,-0.358097


In [40]:
df[df >  0]

Unnamed: 0,A,B,C,D
2026-11-01,1.743922,,0.859537,
2026-11-02,,0.69447,0.323627,0.794893
2026-11-03,0.067001,,,0.130103
2026-11-04,,,0.604312,
2026-11-05,0.023841,,1.876223,2.143507
2026-11-06,1.011514,0.613764,0.102417,


In [41]:
df3 = df.copy()

In [43]:
df3["E"] =  ['one', 'one', 'two', 'three', 'four', 'three']
df3

Unnamed: 0,A,B,C,D,E
2026-11-01,1.743922,-0.595428,0.859537,-1.392572,one
2026-11-02,-1.325845,0.69447,0.323627,0.794893,one
2026-11-03,0.067001,-0.071717,-0.819771,0.130103,two
2026-11-04,-0.648118,-1.179193,0.604312,-1.145927,three
2026-11-05,0.023841,-1.028412,1.876223,2.143507,four
2026-11-06,1.011514,0.613764,0.102417,-0.358097,three


In [44]:
df3[df3['E'].isin(['two', 'four'])]

Unnamed: 0,A,B,C,D,E
2026-11-03,0.067001,-0.071717,-0.819771,0.130103,two
2026-11-05,0.023841,-1.028412,1.876223,2.143507,four


# Setting

In [52]:
s1 = pd.Series(np.arange(1,7), index=pd.date_range('20261101', periods=6))
s1

2026-11-01    1
2026-11-02    2
2026-11-03    3
2026-11-04    4
2026-11-05    5
2026-11-06    6
Freq: D, dtype: int64

In [66]:
df["F"] = s1
df

Unnamed: 0,A,B,C,D,F,1
2026-11-01,0.0,-0.595428,0.859537,5.0,1,0.0
2026-11-02,-1.325845,0.69447,1.0,5.0,2,
2026-11-03,0.067001,-0.071717,-0.819771,5.0,3,
2026-11-04,-0.648118,-1.179193,0.604312,5.0,4,
2026-11-05,0.023841,-1.028412,1.876223,5.0,5,
2026-11-06,1.011514,0.613764,0.102417,5.0,6,


In [69]:
df.at[dates[0], "A"] = 0
df

Unnamed: 0,A,B,C,D,F
2026-11-01,0.0,-0.595428,0.859537,5.0,1
2026-11-02,-1.325845,0.69447,1.0,5.0,2
2026-11-03,0.067001,-0.071717,-0.819771,5.0,3
2026-11-04,-0.648118,-1.179193,0.604312,5.0,4
2026-11-05,0.023841,-1.028412,1.876223,5.0,5
2026-11-06,1.011514,0.613764,0.102417,5.0,6


In [70]:
df.iloc[1,2] =1
df

Unnamed: 0,A,B,C,D,F
2026-11-01,0.0,-0.595428,0.859537,5.0,1
2026-11-02,-1.325845,0.69447,1.0,5.0,2
2026-11-03,0.067001,-0.071717,-0.819771,5.0,3
2026-11-04,-0.648118,-1.179193,0.604312,5.0,4
2026-11-05,0.023841,-1.028412,1.876223,5.0,5
2026-11-06,1.011514,0.613764,0.102417,5.0,6


In [71]:
df.loc[:, 'D'] = np.array([5] * len(df))
df

Unnamed: 0,A,B,C,D,F
2026-11-01,0.0,-0.595428,0.859537,5.0,1
2026-11-02,-1.325845,0.69447,1.0,5.0,2
2026-11-03,0.067001,-0.071717,-0.819771,5.0,3
2026-11-04,-0.648118,-1.179193,0.604312,5.0,4
2026-11-05,0.023841,-1.028412,1.876223,5.0,5
2026-11-06,1.011514,0.613764,0.102417,5.0,6


# Missing values

In [77]:
df1 = df.reindex(index=dates[0:4], columns=list(df.columns) + ['E'])
df1.loc[dates[0] : dates[1], 'E'] = 1
df1

Unnamed: 0,A,B,C,D,F,E
2026-11-01,0.0,-0.595428,0.859537,5.0,1,1.0
2026-11-02,-1.325845,0.69447,1.0,5.0,2,1.0
2026-11-03,0.067001,-0.071717,-0.819771,5.0,3,
2026-11-04,-0.648118,-1.179193,0.604312,5.0,4,


In [78]:
df1.dropna(how='any')

Unnamed: 0,A,B,C,D,F,E
2026-11-01,0.0,-0.595428,0.859537,5.0,1,1.0
2026-11-02,-1.325845,0.69447,1.0,5.0,2,1.0


In [79]:
df1.fillna(value=5)

Unnamed: 0,A,B,C,D,F,E
2026-11-01,0.0,-0.595428,0.859537,5.0,1,1.0
2026-11-02,-1.325845,0.69447,1.0,5.0,2,1.0
2026-11-03,0.067001,-0.071717,-0.819771,5.0,3,5.0
2026-11-04,-0.648118,-1.179193,0.604312,5.0,4,5.0


In [80]:
pd.isna(df1)

Unnamed: 0,A,B,C,D,F,E
2026-11-01,False,False,False,False,False,False
2026-11-02,False,False,False,False,False,False
2026-11-03,False,False,False,False,False,True
2026-11-04,False,False,False,False,False,True


# Operations

## Stats

In [None]:
df.mean() #calculate mean value for each column

A   -0.145268
B   -0.261086
C    0.603786
D    5.000000
F    3.500000
dtype: float64

In [84]:
df.mean(axis=1)  #calculate mean value for each row

2026-11-01    1.252822
2026-11-02    1.473725
2026-11-03    1.435103
2026-11-04    1.555400
2026-11-05    2.174331
2026-11-06    2.545539
Freq: D, dtype: float64

In [86]:
s = pd.Series([1,3,5, np.nan, 6,8], index=dates).shift(2)
s

2026-11-01    NaN
2026-11-02    NaN
2026-11-03    1.0
2026-11-04    3.0
2026-11-05    5.0
2026-11-06    NaN
Freq: D, dtype: float64

In [87]:
df

Unnamed: 0,A,B,C,D,F
2026-11-01,0.0,-0.595428,0.859537,5.0,1
2026-11-02,-1.325845,0.69447,1.0,5.0,2
2026-11-03,0.067001,-0.071717,-0.819771,5.0,3
2026-11-04,-0.648118,-1.179193,0.604312,5.0,4
2026-11-05,0.023841,-1.028412,1.876223,5.0,5
2026-11-06,1.011514,0.613764,0.102417,5.0,6


In [89]:
df.sub(s, axis='index') #substraction

Unnamed: 0,A,B,C,D,F
2026-11-01,,,,,
2026-11-02,,,,,
2026-11-03,-0.932999,-1.071717,-1.819771,4.0,2.0
2026-11-04,-3.648118,-4.179193,-2.395688,2.0,1.0
2026-11-05,-4.976159,-6.028412,-3.123777,0.0,0.0
2026-11-06,,,,,


## User defined functitons

In [None]:
df.agg(lambda x: np.mean(x) * 5.6) #reduces data, return less row than were

A    -0.813499
B    -1.462080
C     3.381203
D    28.000000
F    19.600000
dtype: float64

In [None]:
df.transform(lambda x: x * 1.02) #broadcasts data, n rows = n rows

Unnamed: 0,A,B,C,D,F
2026-11-01,0.0,-0.607336,0.876727,5.1,1.02
2026-11-02,-1.352362,0.70836,1.02,5.1,2.04
2026-11-03,0.068341,-0.073151,-0.836167,5.1,3.06
2026-11-04,-0.66108,-1.202777,0.616398,5.1,4.08
2026-11-05,0.024318,-1.04898,1.913747,5.1,5.1
2026-11-06,1.031744,0.626039,0.104466,5.1,6.12


In [97]:
s3 = pd.DataFrame({'A': [1, 2, 3, 4], 'group': ['a', 'a', 'b', 'b']})

s3.groupby('group')['A'].agg('mean')

group
a    1.5
b    3.5
Name: A, dtype: float64

In [None]:
s3.groupby('group')['A'].transform('mean') 

0    1.5
1    1.5
2    3.5
3    3.5
Name: A, dtype: float64

## Value Counts

In [100]:
s4 = pd.Series(np.random.randint(0,7, size = 10))
s4

0    1
1    4
2    5
3    0
4    4
5    0
6    5
7    3
8    4
9    1
dtype: int32

In [102]:
s4.value_counts()

4    3
1    2
5    2
0    2
3    1
Name: count, dtype: int64

In [105]:
s5 = pd.Series(["A", "B", "C", "Aaba", "Baca", np.nan, "CABA", "dog", "cat"])
s5.str.lower()

0       a
1       b
2       c
3    aaba
4    baca
5     NaN
6    caba
7     dog
8     cat
dtype: object

# Merge

## Concat

In [107]:
df = pd.DataFrame(np.random.randn(10,4))
df

Unnamed: 0,0,1,2,3
0,0.570203,-1.57482,0.507968,0.841703
1,-0.340312,2.167531,-0.911802,0.153415
2,0.050443,-0.048493,-0.317088,-1.539992
3,0.574533,1.046458,-0.589003,-0.190717
4,1.235322,1.120261,-2.291991,0.078823
5,-1.481849,-1.104775,-0.689896,-0.108587
6,-1.493286,0.8153,-0.815978,-1.462325
7,-0.436694,-1.069699,-0.425618,0.434712
8,0.346597,1.827017,-0.509202,-0.568368
9,0.005046,-1.271399,0.161494,-1.948027


In [109]:
pieces = [df[:3], df[3:7], df[7:]]

In [110]:
pd.concat(pieces)

Unnamed: 0,0,1,2,3
0,0.570203,-1.57482,0.507968,0.841703
1,-0.340312,2.167531,-0.911802,0.153415
2,0.050443,-0.048493,-0.317088,-1.539992
3,0.574533,1.046458,-0.589003,-0.190717
4,1.235322,1.120261,-2.291991,0.078823
5,-1.481849,-1.104775,-0.689896,-0.108587
6,-1.493286,0.8153,-0.815978,-1.462325
7,-0.436694,-1.069699,-0.425618,0.434712
8,0.346597,1.827017,-0.509202,-0.568368
9,0.005046,-1.271399,0.161494,-1.948027


## Join

In [111]:
left = pd.DataFrame({"key": ["foo", "foo"], "lval": [1, 2]})
right = pd.DataFrame({"key": ["foo", "foo"], "rval": [4, 5]})

In [113]:
pd.merge(left, right, on='key') # decard square as keys are not unique

Unnamed: 0,key,lval,rval
0,foo,1,4
1,foo,1,5
2,foo,2,4
3,foo,2,5


In [None]:
left = pd.DataFrame({"key": ["foo", "bar"], "lval": [1, 2]})
right = pd.DataFrame({"key": ["foo", "bar"], "rval": [4, 5]})

pd.merge(left, right, on="key") #unique key -> only two rows as an output

Unnamed: 0,key,lval,rval
0,foo,1,4
1,bar,2,5


# Grouping

In [116]:
df = pd.DataFrame(
    {
        "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
        "B": ["one", "one", "two", "three", "two", "two", "one", "three"],
        "C": np.random.randn(8),
        "D": np.random.randn(8),
    }
)
df

Unnamed: 0,A,B,C,D
0,foo,one,0.839249,-1.214042
1,bar,one,-0.599335,-0.753517
2,foo,two,-1.029322,-2.685474
3,bar,three,-0.584513,1.943948
4,foo,two,-2.506221,1.193113
5,bar,two,2.003868,0.024368
6,foo,one,-0.662711,0.820017
7,foo,three,-0.404874,-1.395872


In [118]:
df.groupby('A')[['C','D']].mean()

Unnamed: 0_level_0,C,D
A,Unnamed: 1_level_1,Unnamed: 2_level_1
bar,0.27334,0.404933
foo,-0.752776,-0.656452


In [120]:
df.groupby(['A', 'B']).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,C,D
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,-0.599335,-0.753517
bar,three,-0.584513,1.943948
bar,two,2.003868,0.024368
foo,one,0.176538,-0.394025
foo,three,-0.404874,-1.395872
foo,two,-3.535543,-1.492361


# Reshaping

In [121]:
arrays = [
   ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
   ["one", "two", "one", "two", "one", "two", "one", "two"],
]

In [123]:
index = pd.MultiIndex.from_arrays(arrays, names=['first', 'second'])
index

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           names=['first', 'second'])

In [125]:
df = pd.DataFrame(np.random.randn(8, 2), index=index, columns=["A", "B"])
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,0.88816,-0.153148
bar,two,0.654081,0.858413
baz,one,1.162841,-0.945861
baz,two,0.21594,0.546898
foo,one,-0.106056,0.931404
foo,two,-0.497826,-1.403459
qux,one,-1.297754,-0.096799
qux,two,0.594402,-0.401439


In [None]:
stacked = df.stack() #making columns names as indexes, prolonging table
stacked

first  second   
bar    one     A    0.888160
               B   -0.153148
       two     A    0.654081
               B    0.858413
baz    one     A    1.162841
               B   -0.945861
       two     A    0.215940
               B    0.546898
foo    one     A   -0.106056
               B    0.931404
       two     A   -0.497826
               B   -1.403459
qux    one     A   -1.297754
               B   -0.096799
       two     A    0.594402
               B   -0.401439
dtype: float64

In [129]:
stacked.unstack()

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,0.88816,-0.153148
bar,two,0.654081,0.858413
baz,one,1.162841,-0.945861
baz,two,0.21594,0.546898
foo,one,-0.106056,0.931404
foo,two,-0.497826,-1.403459
qux,one,-1.297754,-0.096799
qux,two,0.594402,-0.401439


# Pivot tables

In [131]:
df = pd.DataFrame(
    {
        "A": ["one", "one", "two", "three"] * 3,
        "B": ["A", "B", "C"] * 4,
        "C": ["foo", "foo", "foo", "bar", "bar", "bar"] * 2,
        "D": np.random.randn(12),
        "E": np.random.randn(12),
    }
)
df

Unnamed: 0,A,B,C,D,E
0,one,A,foo,-1.197207,-0.686328
1,one,B,foo,2.10209,0.531339
2,two,C,foo,-0.983886,-0.640461
3,three,A,bar,-0.253265,0.622043
4,one,B,bar,-0.413624,0.443078
5,one,C,bar,0.064678,2.728387
6,two,A,foo,0.202602,-0.241307
7,three,B,foo,1.393796,-0.393744
8,one,C,foo,0.34384,0.672023
9,one,A,bar,-0.269919,0.689591


In [135]:
pd.pivot_table(df, values='D', index=['A','B'], columns=['C'])

Unnamed: 0_level_0,C,bar,foo
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
one,A,-0.269919,-1.197207
one,B,-0.413624,2.10209
one,C,0.064678,0.34384
three,A,-0.253265,
three,B,,1.393796
three,C,0.235219,
two,A,,0.202602
two,B,1.102195,
two,C,,-0.983886


# Time Series

In [138]:
range = pd.date_range('11/1/2026', periods=100, freq='s')
range

DatetimeIndex(['2026-11-01 00:00:00', '2026-11-01 00:00:01',
               '2026-11-01 00:00:02', '2026-11-01 00:00:03',
               '2026-11-01 00:00:04', '2026-11-01 00:00:05',
               '2026-11-01 00:00:06', '2026-11-01 00:00:07',
               '2026-11-01 00:00:08', '2026-11-01 00:00:09',
               '2026-11-01 00:00:10', '2026-11-01 00:00:11',
               '2026-11-01 00:00:12', '2026-11-01 00:00:13',
               '2026-11-01 00:00:14', '2026-11-01 00:00:15',
               '2026-11-01 00:00:16', '2026-11-01 00:00:17',
               '2026-11-01 00:00:18', '2026-11-01 00:00:19',
               '2026-11-01 00:00:20', '2026-11-01 00:00:21',
               '2026-11-01 00:00:22', '2026-11-01 00:00:23',
               '2026-11-01 00:00:24', '2026-11-01 00:00:25',
               '2026-11-01 00:00:26', '2026-11-01 00:00:27',
               '2026-11-01 00:00:28', '2026-11-01 00:00:29',
               '2026-11-01 00:00:30', '2026-11-01 00:00:31',
               '2026-11-

In [141]:
ts = pd.Series(np.random.randint(0,500, len(range)), index=range)
ts

2026-11-01 00:00:00    267
2026-11-01 00:00:01    488
2026-11-01 00:00:02    471
2026-11-01 00:00:03    188
2026-11-01 00:00:04    491
                      ... 
2026-11-01 00:01:35    365
2026-11-01 00:01:36    367
2026-11-01 00:01:37      0
2026-11-01 00:01:38    294
2026-11-01 00:01:39    373
Freq: s, Length: 100, dtype: int32