In [1]:
import numpy as np
import pandas as pd

In [2]:
# Object creation
s = pd.Series([1, 3, 5, np.nan, 6, 8])

s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [3]:
# creating a df by creating a np array w/ datetime index

dates = pd.date_range('20130101', periods = 6)

dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [4]:
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
2013-01-01,-0.769549,-0.292417,-0.25548,1.072737
2013-01-02,0.106198,-0.132677,0.005047,-1.083091
2013-01-03,-0.509943,1.355943,1.708975,-0.415815
2013-01-04,0.292912,0.890165,0.908447,-0.127216
2013-01-05,-1.243808,1.104641,-2.053229,0.071053
2013-01-06,-0.348326,-0.156388,0.754032,0.639921


In [5]:
df2 = pd.DataFrame({
    'A': 1.,
    'B': pd.Timestamp('20130102'),
    'C': pd.Series(1, index=list(range(4)),  dtype='float32'), 
    'D': np.array([3] * 4, dtype='int32'),
    'E': pd.Categorical(["test", "train", "test", "train"]),
    'F': 'foo'
})

df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [7]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

In [19]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [20]:
df.to_numpy()

array([[-0.76954902, -0.29241684, -0.25547958,  1.07273715],
       [ 0.1061983 , -0.13267732,  0.00504735, -1.0830913 ],
       [-0.50994315,  1.35594277,  1.70897452, -0.4158151 ],
       [ 0.29291158,  0.89016519,  0.90844696, -0.12721632],
       [-1.24380785,  1.10464093, -2.0532287 ,  0.07105281],
       [-0.34832569, -0.15638838,  0.7540319 ,  0.63992061]])

In [21]:
df2.to_numpy()

array([[1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo']],
      dtype=object)

In [22]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,-0.412086,0.461544,0.177965,0.026265
std,0.565477,0.734933,1.296568,0.764637
min,-1.243808,-0.292417,-2.053229,-1.083091
25%,-0.704648,-0.150461,-0.190348,-0.343665
50%,-0.429134,0.378744,0.37954,-0.028082
75%,-0.007433,1.051022,0.869843,0.497704
max,0.292912,1.355943,1.708975,1.072737


In [24]:
df.T

Unnamed: 0,2013-01-01,2013-01-02,2013-01-03,2013-01-04,2013-01-05,2013-01-06
A,-0.769549,0.106198,-0.509943,0.292912,-1.243808,-0.348326
B,-0.292417,-0.132677,1.355943,0.890165,1.104641,-0.156388
C,-0.25548,0.005047,1.708975,0.908447,-2.053229,0.754032
D,1.072737,-1.083091,-0.415815,-0.127216,0.071053,0.639921


In [27]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2013-01-01,1.072737,-0.25548,-0.292417,-0.769549
2013-01-02,-1.083091,0.005047,-0.132677,0.106198
2013-01-03,-0.415815,1.708975,1.355943,-0.509943
2013-01-04,-0.127216,0.908447,0.890165,0.292912
2013-01-05,0.071053,-2.053229,1.104641,-1.243808
2013-01-06,0.639921,0.754032,-0.156388,-0.348326


In [31]:
df.sort_values(by='A', ascending=False)

Unnamed: 0,A,B,C,D
2013-01-04,0.292912,0.890165,0.908447,-0.127216
2013-01-02,0.106198,-0.132677,0.005047,-1.083091
2013-01-06,-0.348326,-0.156388,0.754032,0.639921
2013-01-03,-0.509943,1.355943,1.708975,-0.415815
2013-01-01,-0.769549,-0.292417,-0.25548,1.072737
2013-01-05,-1.243808,1.104641,-2.053229,0.071053


In [32]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 6 entries, 2013-01-01 to 2013-01-06
Freq: D
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   A       6 non-null      float64
 1   B       6 non-null      float64
 2   C       6 non-null      float64
 3   D       6 non-null      float64
dtypes: float64(4)
memory usage: 240.0 bytes


In [39]:
df[0:3]

Unnamed: 0,A,B,C,D
2013-01-01,-0.769549,-0.292417,-0.25548,1.072737
2013-01-02,0.106198,-0.132677,0.005047,-1.083091
2013-01-03,-0.509943,1.355943,1.708975,-0.415815


In [40]:
df['20130102':'20130104']

Unnamed: 0,A,B,C,D
2013-01-02,0.106198,-0.132677,0.005047,-1.083091
2013-01-03,-0.509943,1.355943,1.708975,-0.415815
2013-01-04,0.292912,0.890165,0.908447,-0.127216


In [41]:
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [46]:
df2.index = df2.B

In [49]:
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [48]:
df

Unnamed: 0,A,B,C,D
2013-01-01,-0.769549,-0.292417,-0.25548,1.072737
2013-01-02,0.106198,-0.132677,0.005047,-1.083091
2013-01-03,-0.509943,1.355943,1.708975,-0.415815
2013-01-04,0.292912,0.890165,0.908447,-0.127216
2013-01-05,-1.243808,1.104641,-2.053229,0.071053
2013-01-06,-0.348326,-0.156388,0.754032,0.639921


In [51]:
df.loc[dates[1]]

A    0.106198
B   -0.132677
C    0.005047
D   -1.083091
Name: 2013-01-02 00:00:00, dtype: float64

In [52]:
df.loc[dates[1], ['A', 'B']]

A    0.106198
B   -0.132677
Name: 2013-01-02 00:00:00, dtype: float64

In [53]:
df.loc['20130102':'20130104', ['A', 'B']]

Unnamed: 0,A,B
2013-01-02,0.106198,-0.132677
2013-01-03,-0.509943,1.355943
2013-01-04,0.292912,0.890165


In [54]:
df

Unnamed: 0,A,B,C,D
2013-01-01,-0.769549,-0.292417,-0.25548,1.072737
2013-01-02,0.106198,-0.132677,0.005047,-1.083091
2013-01-03,-0.509943,1.355943,1.708975,-0.415815
2013-01-04,0.292912,0.890165,0.908447,-0.127216
2013-01-05,-1.243808,1.104641,-2.053229,0.071053
2013-01-06,-0.348326,-0.156388,0.754032,0.639921


In [56]:
df.iloc[3:5, 0:2]

Unnamed: 0,A,B
2013-01-04,0.292912,0.890165
2013-01-05,-1.243808,1.104641


In [57]:
df.iloc[1:3, :]

Unnamed: 0,A,B,C,D
2013-01-02,0.106198,-0.132677,0.005047,-1.083091
2013-01-03,-0.509943,1.355943,1.708975,-0.415815


In [58]:
 df.iloc[:, 1:3]

Unnamed: 0,B,C
2013-01-01,-0.292417,-0.25548
2013-01-02,-0.132677,0.005047
2013-01-03,1.355943,1.708975
2013-01-04,0.890165,0.908447
2013-01-05,1.104641,-2.053229
2013-01-06,-0.156388,0.754032


In [65]:
df[df > 0]

Unnamed: 0,A,B,C,D
2013-01-01,,,,1.072737
2013-01-02,0.106198,,0.005047,
2013-01-03,,1.355943,1.708975,
2013-01-04,0.292912,0.890165,0.908447,
2013-01-05,,1.104641,,0.071053
2013-01-06,,,0.754032,0.639921


In [66]:
df2 = df.copy()

df2['E'] = ['one', 'one', 'two', 'three', 'four', 'three']

df2

Unnamed: 0,A,B,C,D,E
2013-01-01,-0.769549,-0.292417,-0.25548,1.072737,one
2013-01-02,0.106198,-0.132677,0.005047,-1.083091,one
2013-01-03,-0.509943,1.355943,1.708975,-0.415815,two
2013-01-04,0.292912,0.890165,0.908447,-0.127216,three
2013-01-05,-1.243808,1.104641,-2.053229,0.071053,four
2013-01-06,-0.348326,-0.156388,0.754032,0.639921,three


In [69]:
df2[df2['E'].isin(['two', 'four'])]

Unnamed: 0,A,B,C,D,E
2013-01-03,-0.509943,1.355943,1.708975,-0.415815,two
2013-01-05,-1.243808,1.104641,-2.053229,0.071053,four
