### Object creation

In [1]:
import pandas as pd
import numpy as np
s = pd.Series([1, 3, 5, np.nan, 6, 8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [2]:
dates = pd.date_range("20130101", periods=6)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [3]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list("ABCD"))
df

Unnamed: 0,A,B,C,D
2013-01-01,0.028373,0.189288,0.397805,-0.038911
2013-01-02,-1.272149,0.878156,0.130835,1.916757
2013-01-03,-0.390442,-1.197286,-1.162059,-0.678051
2013-01-04,1.082627,-0.491576,-0.936435,1.400679
2013-01-05,-1.418523,2.471154,-0.682607,-0.268379
2013-01-06,-1.477718,-0.018321,-0.022115,-0.00096


In [4]:
df2 = pd.DataFrame(
    {
        "A": 1.0,
        "B": pd.Timestamp("20130102"),
        "C": pd.Series(1, index=list(range(4)), dtype="float32"),
        "D": np.array([3] * 4, dtype="int32"),
        "E": pd.Categorical(["test", "train", "test", "train"]),
        "F": "foo",
    }
)

df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [5]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

In [6]:
df2.index

Int64Index([0, 1, 2, 3], dtype='int64')

In [7]:
df2.columns

Index(['A', 'B', 'C', 'D', 'E', 'F'], dtype='object')

##### For df, our DataFrame of all floating-point values, DataFrame.to_numpy() is fast and doesn’t require copying data:


In [8]:
df.to_numpy()

array([[ 2.83730547e-02,  1.89287585e-01,  3.97804763e-01,
        -3.89109044e-02],
       [-1.27214856e+00,  8.78155688e-01,  1.30834554e-01,
         1.91675655e+00],
       [-3.90441692e-01, -1.19728637e+00, -1.16205949e+00,
        -6.78051250e-01],
       [ 1.08262709e+00, -4.91576311e-01, -9.36435220e-01,
         1.40067879e+00],
       [-1.41852337e+00,  2.47115355e+00, -6.82606562e-01,
        -2.68379433e-01],
       [-1.47771763e+00, -1.83207936e-02, -2.21150445e-02,
        -9.60182019e-04]])

##### For df2, the DataFrame with multiple dtypes, DataFrame.to_numpy() is relatively expensive:

In [9]:
df2.to_numpy()

array([[1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo']],
      dtype=object)

##### describe() shows a quick statistic summary of your data:


In [10]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,-0.574639,0.305236,-0.379096,0.388522
std,1.015706,1.267285,0.633532,1.026007
min,-1.477718,-1.197286,-1.162059,-0.678051
25%,-1.38193,-0.373262,-0.872978,-0.211012
50%,-0.831295,0.085483,-0.352361,-0.019936
75%,-0.076331,0.705939,0.092597,1.050269
max,1.082627,2.471154,0.397805,1.916757


##### Transposing your data:


In [11]:
# interchanging row and column
df.T

Unnamed: 0,2013-01-01,2013-01-02,2013-01-03,2013-01-04,2013-01-05,2013-01-06
A,0.028373,-1.272149,-0.390442,1.082627,-1.418523,-1.477718
B,0.189288,0.878156,-1.197286,-0.491576,2.471154,-0.018321
C,0.397805,0.130835,-1.162059,-0.936435,-0.682607,-0.022115
D,-0.038911,1.916757,-0.678051,1.400679,-0.268379,-0.00096


In [12]:
# sorting by an axis
df.sort_index(axis=1, ascending=False) 

Unnamed: 0,D,C,B,A
2013-01-01,-0.038911,0.397805,0.189288,0.028373
2013-01-02,1.916757,0.130835,0.878156,-1.272149
2013-01-03,-0.678051,-1.162059,-1.197286,-0.390442
2013-01-04,1.400679,-0.936435,-0.491576,1.082627
2013-01-05,-0.268379,-0.682607,2.471154,-1.418523
2013-01-06,-0.00096,-0.022115,-0.018321,-1.477718


In [13]:
# sorting by values
df.sort_values(by="B")

Unnamed: 0,A,B,C,D
2013-01-03,-0.390442,-1.197286,-1.162059,-0.678051
2013-01-04,1.082627,-0.491576,-0.936435,1.400679
2013-01-06,-1.477718,-0.018321,-0.022115,-0.00096
2013-01-01,0.028373,0.189288,0.397805,-0.038911
2013-01-02,-1.272149,0.878156,0.130835,1.916757
2013-01-05,-1.418523,2.471154,-0.682607,-0.268379


### Selection


In [14]:
df["A"]

2013-01-01    0.028373
2013-01-02   -1.272149
2013-01-03   -0.390442
2013-01-04    1.082627
2013-01-05   -1.418523
2013-01-06   -1.477718
Freq: D, Name: A, dtype: float64

In [15]:
df[0:3]

Unnamed: 0,A,B,C,D
2013-01-01,0.028373,0.189288,0.397805,-0.038911
2013-01-02,-1.272149,0.878156,0.130835,1.916757
2013-01-03,-0.390442,-1.197286,-1.162059,-0.678051


In [16]:
df["20130102":"20130104"]

Unnamed: 0,A,B,C,D
2013-01-02,-1.272149,0.878156,0.130835,1.916757
2013-01-03,-0.390442,-1.197286,-1.162059,-0.678051
2013-01-04,1.082627,-0.491576,-0.936435,1.400679
