In [24]:
import numpy as np
import pandas as pd

### Initialize Series

In [25]:
s = pd.Series([1, 3, 5, np.nan, 6, 8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [26]:
dates = pd.date_range("20130101", periods=6)
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list("ABCD"))
df

Unnamed: 0,A,B,C,D
2013-01-01,0.591305,-1.021271,0.403952,-0.096755
2013-01-02,-0.343819,1.90058,-0.144319,0.24878
2013-01-03,1.237456,-0.35344,-0.062738,0.495192
2013-01-04,0.371814,1.526031,1.41147,0.306119
2013-01-05,-0.221033,0.773045,0.523985,-0.712593
2013-01-06,-1.58035,-1.165516,-0.929063,1.853594


In [27]:
df2 = pd.DataFrame(
    {
        "A": 1.0,
        "B": pd.Timestamp("20130102"),
        "C": pd.Series(1, index=list(range(4)), dtype="float32"),
        "D": np.array([3] * 4, dtype="int32"),
        "E": pd.Categorical(["test", "train", "test", "train"]),
        "F": "foo",
    }
)
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [28]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

### Viewing DataFrames

In [29]:
df2.head()

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [30]:
df2.tail(2)

Unnamed: 0,A,B,C,D,E,F
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [31]:
df2.columns

Index(['A', 'B', 'C', 'D', 'E', 'F'], dtype='object')

In [32]:
df2.index

Int64Index([0, 1, 2, 3], dtype='int64')

In [33]:
df2.describe()

Unnamed: 0,A,C,D
count,4.0,4.0,4.0
mean,1.0,1.0,3.0
std,0.0,0.0,0.0
min,1.0,1.0,3.0
25%,1.0,1.0,3.0
50%,1.0,1.0,3.0
75%,1.0,1.0,3.0
max,1.0,1.0,3.0


### DataFrame Selection

In [34]:
df["A"]

2013-01-01    0.591305
2013-01-02   -0.343819
2013-01-03    1.237456
2013-01-04    0.371814
2013-01-05   -0.221033
2013-01-06   -1.580350
Freq: D, Name: A, dtype: float64

In [35]:
df[0:3]

Unnamed: 0,A,B,C,D
2013-01-01,0.591305,-1.021271,0.403952,-0.096755
2013-01-02,-0.343819,1.90058,-0.144319,0.24878
2013-01-03,1.237456,-0.35344,-0.062738,0.495192


In [36]:
df["20130102":"20130104"]

Unnamed: 0,A,B,C,D
2013-01-02,-0.343819,1.90058,-0.144319,0.24878
2013-01-03,1.237456,-0.35344,-0.062738,0.495192
2013-01-04,0.371814,1.526031,1.41147,0.306119


In [37]:
df.loc[dates[0]]

A    0.591305
B   -1.021271
C    0.403952
D   -0.096755
Name: 2013-01-01 00:00:00, dtype: float64

In [38]:
df.loc[:, ["A", "B"]]

Unnamed: 0,A,B
2013-01-01,0.591305,-1.021271
2013-01-02,-0.343819,1.90058
2013-01-03,1.237456,-0.35344
2013-01-04,0.371814,1.526031
2013-01-05,-0.221033,0.773045
2013-01-06,-1.58035,-1.165516
