In [20]:
import numpy as np
import pandas as pd

In [21]:
s = pd.Series([1, 3, 5, np.nan, 6, 8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [22]:
dates = pd.date_range("20130101", periods=6)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [23]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list("ABCD"))
df

Unnamed: 0,A,B,C,D
2013-01-01,0.073775,1.802566,0.54983,0.04416
2013-01-02,0.728844,-1.356209,0.041041,0.373683
2013-01-03,-1.293694,-0.98494,0.15009,0.287151
2013-01-04,-0.009731,-1.012103,1.713468,1.621567
2013-01-05,-0.044049,-0.351983,0.045208,-1.507044
2013-01-06,-0.132429,-0.33447,-0.89958,0.194614


In [24]:
df2 = pd.DataFrame(
    {
        "A": 1.0,
        "B": pd.Timestamp("20130102"),
        "C": pd.Series(1, index=list(range(4)), dtype="float32"),
        "D": np.array([3] * 4, dtype="int32"),
        "E": pd.Categorical(["test", "train", "test", "train"]),
        "F": "foo",
    }
)
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [25]:
df2.dtypes

A          float64
B    datetime64[s]
C          float32
D            int32
E         category
F           object
dtype: object

In [26]:
df.head()

Unnamed: 0,A,B,C,D
2013-01-01,0.073775,1.802566,0.54983,0.04416
2013-01-02,0.728844,-1.356209,0.041041,0.373683
2013-01-03,-1.293694,-0.98494,0.15009,0.287151
2013-01-04,-0.009731,-1.012103,1.713468,1.621567
2013-01-05,-0.044049,-0.351983,0.045208,-1.507044


In [27]:
df.tail(3)

Unnamed: 0,A,B,C,D
2013-01-04,-0.009731,-1.012103,1.713468,1.621567
2013-01-05,-0.044049,-0.351983,0.045208,-1.507044
2013-01-06,-0.132429,-0.33447,-0.89958,0.194614


In [28]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [29]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [30]:
df.to_numpy()

array([[ 0.07377461,  1.80256586,  0.54983047,  0.04416049],
       [ 0.72884364, -1.35620947,  0.04104092,  0.37368287],
       [-1.293694  , -0.98493983,  0.1500895 ,  0.28715106],
       [-0.00973119, -1.01210336,  1.71346807,  1.62156702],
       [-0.044049  , -0.35198295,  0.04520806, -1.50704373],
       [-0.1324294 , -0.33447018, -0.89958005,  0.19461383]])

In [31]:
df2.dtypes

A          float64
B    datetime64[s]
C          float32
D            int32
E         category
F           object
dtype: object

In [32]:
df2.to_numpy()

array([[1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo']],
      dtype=object)

In [33]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,-0.112881,-0.372857,0.266676,0.169022
std,0.656265,1.138827,0.85406,0.999117
min,-1.293694,-1.356209,-0.89958,-1.507044
25%,-0.110334,-1.005312,0.042083,0.081774
50%,-0.02689,-0.668461,0.097649,0.240882
75%,0.052898,-0.338848,0.449895,0.35205
max,0.728844,1.802566,1.713468,1.621567


In [34]:
df.T

Unnamed: 0,2013-01-01,2013-01-02,2013-01-03,2013-01-04,2013-01-05,2013-01-06
A,0.073775,0.728844,-1.293694,-0.009731,-0.044049,-0.132429
B,1.802566,-1.356209,-0.98494,-1.012103,-0.351983,-0.33447
C,0.54983,0.041041,0.15009,1.713468,0.045208,-0.89958
D,0.04416,0.373683,0.287151,1.621567,-1.507044,0.194614


In [35]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2013-01-01,0.04416,0.54983,1.802566,0.073775
2013-01-02,0.373683,0.041041,-1.356209,0.728844
2013-01-03,0.287151,0.15009,-0.98494,-1.293694
2013-01-04,1.621567,1.713468,-1.012103,-0.009731
2013-01-05,-1.507044,0.045208,-0.351983,-0.044049
2013-01-06,0.194614,-0.89958,-0.33447,-0.132429


In [36]:
df.sort_values(by="B")

Unnamed: 0,A,B,C,D
2013-01-02,0.728844,-1.356209,0.041041,0.373683
2013-01-04,-0.009731,-1.012103,1.713468,1.621567
2013-01-03,-1.293694,-0.98494,0.15009,0.287151
2013-01-05,-0.044049,-0.351983,0.045208,-1.507044
2013-01-06,-0.132429,-0.33447,-0.89958,0.194614
2013-01-01,0.073775,1.802566,0.54983,0.04416


In [37]:
df["A"]

2013-01-01    0.073775
2013-01-02    0.728844
2013-01-03   -1.293694
2013-01-04   -0.009731
2013-01-05   -0.044049
2013-01-06   -0.132429
Freq: D, Name: A, dtype: float64

In [38]:
df[0:3]

Unnamed: 0,A,B,C,D
2013-01-01,0.073775,1.802566,0.54983,0.04416
2013-01-02,0.728844,-1.356209,0.041041,0.373683
2013-01-03,-1.293694,-0.98494,0.15009,0.287151


In [39]:
df["20130102":"20130104"]

Unnamed: 0,A,B,C,D
2013-01-02,0.728844,-1.356209,0.041041,0.373683
2013-01-03,-1.293694,-0.98494,0.15009,0.287151
2013-01-04,-0.009731,-1.012103,1.713468,1.621567


In [40]:
df.loc[dates[0]]

A    0.073775
B    1.802566
C    0.549830
D    0.044160
Name: 2013-01-01 00:00:00, dtype: float64

In [41]:
df.loc[:, ["A", "B"]]

Unnamed: 0,A,B
2013-01-01,0.073775,1.802566
2013-01-02,0.728844,-1.356209
2013-01-03,-1.293694,-0.98494
2013-01-04,-0.009731,-1.012103
2013-01-05,-0.044049,-0.351983
2013-01-06,-0.132429,-0.33447


In [42]:
df.loc["20130102":"20130104", ["A", "B"]]

Unnamed: 0,A,B
2013-01-02,0.728844,-1.356209
2013-01-03,-1.293694,-0.98494
2013-01-04,-0.009731,-1.012103


In [47]:
df.loc[dates[0], "A"].item()

0.07377460713717493

In [49]:
df.at[dates[0], "A"].item()

0.07377460713717493

In [50]:
df.iloc[3]

A   -0.009731
B   -1.012103
C    1.713468
D    1.621567
Name: 2013-01-04 00:00:00, dtype: float64

In [51]:
df.iloc[3:5, 0:2]

Unnamed: 0,A,B
2013-01-04,-0.009731,-1.012103
2013-01-05,-0.044049,-0.351983


In [52]:
df.iloc[[1, 2, 4], [0, 2]]

Unnamed: 0,A,C
2013-01-02,0.728844,0.041041
2013-01-03,-1.293694,0.15009
2013-01-05,-0.044049,0.045208


In [53]:
df.iloc[1:3, :]

Unnamed: 0,A,B,C,D
2013-01-02,0.728844,-1.356209,0.041041,0.373683
2013-01-03,-1.293694,-0.98494,0.15009,0.287151


In [54]:
df.iloc[:, 1:3]

Unnamed: 0,B,C
2013-01-01,1.802566,0.54983
2013-01-02,-1.356209,0.041041
2013-01-03,-0.98494,0.15009
2013-01-04,-1.012103,1.713468
2013-01-05,-0.351983,0.045208
2013-01-06,-0.33447,-0.89958


In [58]:
df.iloc[1, 1].item()

-1.3562094660025705

In [59]:
df.iat[1, 1].item()

-1.3562094660025705

In [60]:
df[df["A"] > 0]

Unnamed: 0,A,B,C,D
2013-01-01,0.073775,1.802566,0.54983,0.04416
2013-01-02,0.728844,-1.356209,0.041041,0.373683


In [61]:
df[df > 0]

Unnamed: 0,A,B,C,D
2013-01-01,0.073775,1.802566,0.54983,0.04416
2013-01-02,0.728844,,0.041041,0.373683
2013-01-03,,,0.15009,0.287151
2013-01-04,,,1.713468,1.621567
2013-01-05,,,0.045208,
2013-01-06,,,,0.194614


In [62]:
df2 = df.copy()
df2["E"] = ["one", "one", "two", "three", "four", "three"]
df2

Unnamed: 0,A,B,C,D,E
2013-01-01,0.073775,1.802566,0.54983,0.04416,one
2013-01-02,0.728844,-1.356209,0.041041,0.373683,one
2013-01-03,-1.293694,-0.98494,0.15009,0.287151,two
2013-01-04,-0.009731,-1.012103,1.713468,1.621567,three
2013-01-05,-0.044049,-0.351983,0.045208,-1.507044,four
2013-01-06,-0.132429,-0.33447,-0.89958,0.194614,three


In [63]:
df2[df2["E"].isin(["two", "four"])]

Unnamed: 0,A,B,C,D,E
2013-01-03,-1.293694,-0.98494,0.15009,0.287151,two
2013-01-05,-0.044049,-0.351983,0.045208,-1.507044,four


In [64]:
s1 = pd.Series([1, 2, 3, 4, 5, 6], index=pd.date_range("20130102", periods=6))
s1

2013-01-02    1
2013-01-03    2
2013-01-04    3
2013-01-05    4
2013-01-06    5
2013-01-07    6
Freq: D, dtype: int64

In [65]:
df["F"] = s1
df.at[dates[0], "A"] = 0
df.iat[0, 1] = 0
df.loc[:, "D"] = np.array([5] * len(df))
df

Unnamed: 0,A,B,C,D,F
2013-01-01,0.0,0.0,0.54983,5.0,
2013-01-02,0.728844,-1.356209,0.041041,5.0,1.0
2013-01-03,-1.293694,-0.98494,0.15009,5.0,2.0
2013-01-04,-0.009731,-1.012103,1.713468,5.0,3.0
2013-01-05,-0.044049,-0.351983,0.045208,5.0,4.0
2013-01-06,-0.132429,-0.33447,-0.89958,5.0,5.0
