In [1]:
import numpy as np
import pandas as pd

In [2]:
s = pd.Series([1, 3, 5, np.nan, 6, 8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [3]:
dates = pd.date_range("20130101", periods=6)
dates


DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [4]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list("ABCD"))
df

Unnamed: 0,A,B,C,D
2013-01-01,1.007352,-0.480961,0.732697,-2.786114
2013-01-02,0.19423,1.157815,-1.194514,-1.616823
2013-01-03,-0.355552,1.283856,-1.986582,-0.584996
2013-01-04,-0.456244,0.187752,-0.992779,1.472911
2013-01-05,0.475658,-0.681852,-0.786859,0.278812
2013-01-06,-0.29169,0.119414,-0.315311,-1.465715


In [5]:
df2 = pd.DataFrame(
    {
        "A": 1.0,
        "B": pd.Timestamp("20130102"),
        "C": pd.Series(1, index=list(range(4)), dtype="float32"),
        "D": np.array([3] * 4, dtype="int32"),
        "E": pd.Categorical(["test", "train", "test", "train"]),
        "F": "foo",
    }
)

df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [6]:
df2.dtypes

A          float64
B    datetime64[s]
C          float32
D            int32
E         category
F           object
dtype: object

In [7]:
df.head()

Unnamed: 0,A,B,C,D
2013-01-01,1.007352,-0.480961,0.732697,-2.786114
2013-01-02,0.19423,1.157815,-1.194514,-1.616823
2013-01-03,-0.355552,1.283856,-1.986582,-0.584996
2013-01-04,-0.456244,0.187752,-0.992779,1.472911
2013-01-05,0.475658,-0.681852,-0.786859,0.278812


In [8]:
df.tail(3)

Unnamed: 0,A,B,C,D
2013-01-04,-0.456244,0.187752,-0.992779,1.472911
2013-01-05,0.475658,-0.681852,-0.786859,0.278812
2013-01-06,-0.29169,0.119414,-0.315311,-1.465715


In [9]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [10]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [11]:
df2.dtypes

A          float64
B    datetime64[s]
C          float32
D            int32
E         category
F           object
dtype: object

In [12]:
df2.to_numpy()

array([[1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo']],
      dtype=object)

In [13]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.095625,0.264337,-0.757225,-0.783654
std,0.573328,0.814289,0.91368,1.511927
min,-0.456244,-0.681852,-1.986582,-2.786114
25%,-0.339586,-0.330867,-1.14408,-1.579046
50%,-0.04873,0.153583,-0.889819,-1.025356
75%,0.405301,0.915299,-0.433198,0.06286
max,1.007352,1.283856,0.732697,1.472911


In [14]:
df.T

Unnamed: 0,2013-01-01,2013-01-02,2013-01-03,2013-01-04,2013-01-05,2013-01-06
A,1.007352,0.19423,-0.355552,-0.456244,0.475658,-0.29169
B,-0.480961,1.157815,1.283856,0.187752,-0.681852,0.119414
C,0.732697,-1.194514,-1.986582,-0.992779,-0.786859,-0.315311
D,-2.786114,-1.616823,-0.584996,1.472911,0.278812,-1.465715


In [15]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2013-01-01,-2.786114,0.732697,-0.480961,1.007352
2013-01-02,-1.616823,-1.194514,1.157815,0.19423
2013-01-03,-0.584996,-1.986582,1.283856,-0.355552
2013-01-04,1.472911,-0.992779,0.187752,-0.456244
2013-01-05,0.278812,-0.786859,-0.681852,0.475658
2013-01-06,-1.465715,-0.315311,0.119414,-0.29169


In [16]:
df.sort_values(by="B")

Unnamed: 0,A,B,C,D
2013-01-05,0.475658,-0.681852,-0.786859,0.278812
2013-01-01,1.007352,-0.480961,0.732697,-2.786114
2013-01-06,-0.29169,0.119414,-0.315311,-1.465715
2013-01-04,-0.456244,0.187752,-0.992779,1.472911
2013-01-02,0.19423,1.157815,-1.194514,-1.616823
2013-01-03,-0.355552,1.283856,-1.986582,-0.584996


In [17]:
df["A"]

2013-01-01    1.007352
2013-01-02    0.194230
2013-01-03   -0.355552
2013-01-04   -0.456244
2013-01-05    0.475658
2013-01-06   -0.291690
Freq: D, Name: A, dtype: float64

In [18]:
df[0:3]

Unnamed: 0,A,B,C,D
2013-01-01,1.007352,-0.480961,0.732697,-2.786114
2013-01-02,0.19423,1.157815,-1.194514,-1.616823
2013-01-03,-0.355552,1.283856,-1.986582,-0.584996


In [19]:
df.loc[dates[0]]

A    1.007352
B   -0.480961
C    0.732697
D   -2.786114
Name: 2013-01-01 00:00:00, dtype: float64

In [20]:
df.loc[:, ["A", "B"]]

Unnamed: 0,A,B
2013-01-01,1.007352,-0.480961
2013-01-02,0.19423,1.157815
2013-01-03,-0.355552,1.283856
2013-01-04,-0.456244,0.187752
2013-01-05,0.475658,-0.681852
2013-01-06,-0.29169,0.119414


In [21]:
df.loc["20130102":"20130104", ["A", "B"]]

Unnamed: 0,A,B
2013-01-02,0.19423,1.157815
2013-01-03,-0.355552,1.283856
2013-01-04,-0.456244,0.187752


In [22]:
df.loc[dates[0], "A"]

1.007351843874593

In [23]:
df.at[dates[0], "A"]

1.007351843874593

In [24]:
df.iloc[3]

A   -0.456244
B    0.187752
C   -0.992779
D    1.472911
Name: 2013-01-04 00:00:00, dtype: float64

In [25]:
df.iloc[3:5, 0:2]

Unnamed: 0,A,B
2013-01-04,-0.456244,0.187752
2013-01-05,0.475658,-0.681852


In [26]:
df.iloc[[1, 2, 4], [0, 2]]

Unnamed: 0,A,C
2013-01-02,0.19423,-1.194514
2013-01-03,-0.355552,-1.986582
2013-01-05,0.475658,-0.786859


In [27]:
df.iloc[1:3, :]

Unnamed: 0,A,B,C,D
2013-01-02,0.19423,1.157815,-1.194514,-1.616823
2013-01-03,-0.355552,1.283856,-1.986582,-0.584996


In [28]:
df.iloc[:, 1:3]

Unnamed: 0,B,C
2013-01-01,-0.480961,0.732697
2013-01-02,1.157815,-1.194514
2013-01-03,1.283856,-1.986582
2013-01-04,0.187752,-0.992779
2013-01-05,-0.681852,-0.786859
2013-01-06,0.119414,-0.315311


In [29]:
df.iloc[1, 1]

1.1578145980812333

In [30]:
df.iat[1, 1]

1.1578145980812333

In [31]:
df[df["A"] > 0]

Unnamed: 0,A,B,C,D
2013-01-01,1.007352,-0.480961,0.732697,-2.786114
2013-01-02,0.19423,1.157815,-1.194514,-1.616823
2013-01-05,0.475658,-0.681852,-0.786859,0.278812


In [32]:
df[df > 0]

Unnamed: 0,A,B,C,D
2013-01-01,1.007352,,0.732697,
2013-01-02,0.19423,1.157815,,
2013-01-03,,1.283856,,
2013-01-04,,0.187752,,1.472911
2013-01-05,0.475658,,,0.278812
2013-01-06,,0.119414,,


In [33]:
df2 = df.copy()