### Object creation

In [2]:
import pandas as pd
import numpy as np
s = pd.Series([1, 3, 5, np.nan, 6, 8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [3]:
dates = pd.date_range("20130101", periods=6)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [4]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list("ABCD"))
df

Unnamed: 0,A,B,C,D
2013-01-01,-1.250644,-0.534361,0.95434,0.656682
2013-01-02,0.178218,1.345802,-0.561345,-0.568141
2013-01-03,1.587934,-0.431034,0.812915,-0.986012
2013-01-04,0.698823,1.334608,0.972331,-0.251313
2013-01-05,0.523233,2.411277,1.112896,-0.087835
2013-01-06,0.531764,0.091027,0.199342,-0.182397


In [7]:
df2 = pd.DataFrame(
    {
        "A": 1.0,
        "B": pd.Timestamp("20130102"),
        "C": pd.Series(1, index=list(range(4)), dtype="float32"),
        "D": np.array([3] * 4, dtype="int32"),
        "E": pd.Categorical(["test", "train", "test", "train"]),
        "F": "foo",
    }
)

df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [8]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

In [9]:
df2.index

Int64Index([0, 1, 2, 3], dtype='int64')

In [10]:
df2.columns

Index(['A', 'B', 'C', 'D', 'E', 'F'], dtype='object')

##### For df, our DataFrame of all floating-point values, DataFrame.to_numpy() is fast and doesn’t require copying data:


In [11]:
df.to_numpy()

array([[-1.25064438, -0.53436068,  0.95433968,  0.6566823 ],
       [ 0.17821788,  1.34580228, -0.56134526, -0.56814078],
       [ 1.587934  , -0.43103433,  0.81291514, -0.98601154],
       [ 0.69882254,  1.33460792,  0.97233141, -0.25131321],
       [ 0.52323328,  2.41127715,  1.1128965 , -0.08783539],
       [ 0.5317641 ,  0.09102699,  0.1993421 , -0.18239664]])

##### For df2, the DataFrame with multiple dtypes, DataFrame.to_numpy() is relatively expensive:

In [12]:
df2.to_numpy()

array([[1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo']],
      dtype=object)

##### describe() shows a quick statistic summary of your data:


In [13]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.378221,0.702887,0.581747,-0.236503
std,0.927793,1.1766,0.644955,0.546768
min,-1.250644,-0.534361,-0.561345,-0.986012
25%,0.264472,-0.300519,0.352735,-0.488934
50%,0.527499,0.712817,0.883627,-0.216855
75%,0.657058,1.343004,0.967833,-0.111476
max,1.587934,2.411277,1.112896,0.656682


##### Transposing your data:


In [14]:
# interchanging row and column
df.T

Unnamed: 0,2013-01-01,2013-01-02,2013-01-03,2013-01-04,2013-01-05,2013-01-06
A,-1.250644,0.178218,1.587934,0.698823,0.523233,0.531764
B,-0.534361,1.345802,-0.431034,1.334608,2.411277,0.091027
C,0.95434,-0.561345,0.812915,0.972331,1.112896,0.199342
D,0.656682,-0.568141,-0.986012,-0.251313,-0.087835,-0.182397


In [15]:
# sorting by an axis
df.sort_index(axis=1, ascending=False) 

Unnamed: 0,D,C,B,A
2013-01-01,0.656682,0.95434,-0.534361,-1.250644
2013-01-02,-0.568141,-0.561345,1.345802,0.178218
2013-01-03,-0.986012,0.812915,-0.431034,1.587934
2013-01-04,-0.251313,0.972331,1.334608,0.698823
2013-01-05,-0.087835,1.112896,2.411277,0.523233
2013-01-06,-0.182397,0.199342,0.091027,0.531764


In [17]:
# sorting by values
df.sort_values(by="B")

Unnamed: 0,A,B,C,D
2013-01-01,-1.250644,-0.534361,0.95434,0.656682
2013-01-03,1.587934,-0.431034,0.812915,-0.986012
2013-01-06,0.531764,0.091027,0.199342,-0.182397
2013-01-04,0.698823,1.334608,0.972331,-0.251313
2013-01-02,0.178218,1.345802,-0.561345,-0.568141
2013-01-05,0.523233,2.411277,1.112896,-0.087835


### Selection
