In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

###### Series

In [4]:
s = pd.Series([1, 3, 5, np.nan, 6, 8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [6]:
dates = pd.date_range('20180101', periods=6)
dates

DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
               '2018-01-05', '2018-01-06'],
              dtype='datetime64[ns]', freq='D')

###### Dataframes

In [8]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
2018-01-01,0.187897,-0.928026,-0.493906,-0.1182
2018-01-02,-0.829532,0.950785,-0.383044,0.745718
2018-01-03,-0.273993,-0.644008,1.26545,-0.662355
2018-01-04,-0.657633,-1.155094,-0.154159,-0.028968
2018-01-05,0.332823,1.178461,0.228959,0.481295
2018-01-06,-0.059197,0.635927,-0.025163,0.196176


In [9]:
df2 = pd.DataFrame({
    'A': 1,
    'B': pd.Timestamp('20130102'),
    'C': pd.Series(1, index=list(range(4)), dtype='float32'),
    'D': np.array([3] * 4, dtype='int32'),
    'E': pd.Categorical(["test", "train", "test", "train"]),
    'F': 'foo'
})
df2

Unnamed: 0,A,B,C,D,E,F
0,1,2013-01-02,1.0,3,test,foo
1,1,2013-01-02,1.0,3,train,foo
2,1,2013-01-02,1.0,3,test,foo
3,1,2013-01-02,1.0,3,train,foo


In [10]:
df2.dtypes

A             int64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

In [11]:
df.head()

Unnamed: 0,A,B,C,D
2018-01-01,0.187897,-0.928026,-0.493906,-0.1182
2018-01-02,-0.829532,0.950785,-0.383044,0.745718
2018-01-03,-0.273993,-0.644008,1.26545,-0.662355
2018-01-04,-0.657633,-1.155094,-0.154159,-0.028968
2018-01-05,0.332823,1.178461,0.228959,0.481295


In [14]:
df.tail(3)

Unnamed: 0,A,B,C,D
2018-01-04,-0.657633,-1.155094,-0.154159,-0.028968
2018-01-05,0.332823,1.178461,0.228959,0.481295
2018-01-06,-0.059197,0.635927,-0.025163,0.196176


In [15]:
df.index

DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
               '2018-01-05', '2018-01-06'],
              dtype='datetime64[ns]', freq='D')

In [17]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [18]:
df.values

array([[ 0.18789662, -0.92802588, -0.49390571, -0.11819995],
       [-0.82953241,  0.95078476, -0.38304411,  0.74571778],
       [-0.27399288, -0.64400808,  1.26544969, -0.66235544],
       [-0.65763268, -1.15509414, -0.15415872, -0.02896754],
       [ 0.33282297,  1.17846137,  0.22895929,  0.48129454],
       [-0.05919748,  0.63592744, -0.02516285,  0.19617623]])

In [21]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,-0.216606,0.006341,0.073023,0.102278
std,0.461248,1.030257,0.638256,0.493353
min,-0.829532,-1.155094,-0.493906,-0.662355
25%,-0.561723,-0.857021,-0.325823,-0.095892
50%,-0.166595,-0.00404,-0.089661,0.083604
75%,0.126123,0.87207,0.165429,0.410015
max,0.332823,1.178461,1.26545,0.745718


In [51]:
df.T

Unnamed: 0,2018-01-01 00:00:00,2018-01-02 00:00:00,2018-01-03 00:00:00,2018-01-04 00:00:00,2018-01-05 00:00:00,2018-01-06 00:00:00
A,0.187897,-0.829532,-0.273993,-0.657633,0.332823,-0.059197
B,-0.928026,0.950785,-0.644008,-1.155094,1.178461,0.635927
C,-0.493906,-0.383044,1.26545,-0.154159,0.228959,-0.025163
D,-0.1182,0.745718,-0.662355,-0.028968,0.481295,0.196176


In [52]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2018-01-01,-0.1182,-0.493906,-0.928026,0.187897
2018-01-02,0.745718,-0.383044,0.950785,-0.829532
2018-01-03,-0.662355,1.26545,-0.644008,-0.273993
2018-01-04,-0.028968,-0.154159,-1.155094,-0.657633
2018-01-05,0.481295,0.228959,1.178461,0.332823
2018-01-06,0.196176,-0.025163,0.635927,-0.059197


In [53]:
df.sort_values("B")

Unnamed: 0,A,B,C,D
2018-01-04,-0.657633,-1.155094,-0.154159,-0.028968
2018-01-01,0.187897,-0.928026,-0.493906,-0.1182
2018-01-03,-0.273993,-0.644008,1.26545,-0.662355
2018-01-06,-0.059197,0.635927,-0.025163,0.196176
2018-01-02,-0.829532,0.950785,-0.383044,0.745718
2018-01-05,0.332823,1.178461,0.228959,0.481295


In [54]:
df.A.values.reshape((6,1))

array([[ 0.18789662],
       [-0.82953241],
       [-0.27399288],
       [-0.65763268],
       [ 0.33282297],
       [-0.05919748]])

###### Indexing

In [55]:
df['A']

2018-01-01    0.187897
2018-01-02   -0.829532
2018-01-03   -0.273993
2018-01-04   -0.657633
2018-01-05    0.332823
2018-01-06   -0.059197
Freq: D, Name: A, dtype: float64

In [56]:
df[0:3]

Unnamed: 0,A,B,C,D
2018-01-01,0.187897,-0.928026,-0.493906,-0.1182
2018-01-02,-0.829532,0.950785,-0.383044,0.745718
2018-01-03,-0.273993,-0.644008,1.26545,-0.662355


In [60]:
df.loc[dates[0]]

A    0.187897
B   -0.928026
C   -0.493906
D   -0.118200
Name: 2018-01-01 00:00:00, dtype: float64

In [61]:
df.loc[:, ['A', 'B']]

Unnamed: 0,A,B
2018-01-01,0.187897,-0.928026
2018-01-02,-0.829532,0.950785
2018-01-03,-0.273993,-0.644008
2018-01-04,-0.657633,-1.155094
2018-01-05,0.332823,1.178461
2018-01-06,-0.059197,0.635927


In [62]:
df.loc[dates[0], 'A']

0.18789661528091223

In [63]:
df.at[dates[0], 'A']

0.18789661528091223

In [70]:
df.iloc[3:5, ]

Unnamed: 0,A,B,C,D
2018-01-04,-0.657633,-1.155094,-0.154159,-0.028968
2018-01-05,0.332823,1.178461,0.228959,0.481295


In [71]:
df.iloc[3:5, 0:2]

Unnamed: 0,A,B
2018-01-04,-0.657633,-1.155094
2018-01-05,0.332823,1.178461


In [72]:
df.iloc[[1, 2, 4], [0, 2]]

Unnamed: 0,A,C
2018-01-02,-0.829532,-0.383044
2018-01-03,-0.273993,1.26545
2018-01-05,0.332823,0.228959


In [73]:
df.iloc[1:3, :]

Unnamed: 0,A,B,C,D
2018-01-02,-0.829532,0.950785,-0.383044,0.745718
2018-01-03,-0.273993,-0.644008,1.26545,-0.662355


In [76]:
df.iloc[1, 1]

0.95078475639670046

In [77]:
df.iat[1, 1]

0.95078475639670046

In [79]:
df[df.A > 0]

Unnamed: 0,A,B,C,D
2018-01-01,0.187897,-0.928026,-0.493906,-0.1182
2018-01-05,0.332823,1.178461,0.228959,0.481295


In [80]:
df[df > 0]

Unnamed: 0,A,B,C,D
2018-01-01,0.187897,,,
2018-01-02,,0.950785,,0.745718
2018-01-03,,,1.26545,
2018-01-04,,,,
2018-01-05,0.332823,1.178461,0.228959,0.481295
2018-01-06,,0.635927,,0.196176


In [81]:
df2 = df.copy()
df2

Unnamed: 0,A,B,C,D
2018-01-01,0.187897,-0.928026,-0.493906,-0.1182
2018-01-02,-0.829532,0.950785,-0.383044,0.745718
2018-01-03,-0.273993,-0.644008,1.26545,-0.662355
2018-01-04,-0.657633,-1.155094,-0.154159,-0.028968
2018-01-05,0.332823,1.178461,0.228959,0.481295
2018-01-06,-0.059197,0.635927,-0.025163,0.196176


In [82]:
df2['E'] = ['one', 'one', 'two', 'three', 'four', 'three']

In [83]:
df2

Unnamed: 0,A,B,C,D,E
2018-01-01,0.187897,-0.928026,-0.493906,-0.1182,one
2018-01-02,-0.829532,0.950785,-0.383044,0.745718,one
2018-01-03,-0.273993,-0.644008,1.26545,-0.662355,two
2018-01-04,-0.657633,-1.155094,-0.154159,-0.028968,three
2018-01-05,0.332823,1.178461,0.228959,0.481295,four
2018-01-06,-0.059197,0.635927,-0.025163,0.196176,three


In [84]:

df2[df2['E'].isin(['two', 'four'])]

Unnamed: 0,A,B,C,D,E
2018-01-03,-0.273993,-0.644008,1.26545,-0.662355,two
2018-01-05,0.332823,1.178461,0.228959,0.481295,four


###### Setting Values

In [85]:
s1 = pd.Series([1, 2, 3, 4, 5, 6], index=pd.date_range('20130102', periods=6))
s1

2013-01-02    1
2013-01-03    2
2013-01-04    3
2013-01-05    4
2013-01-06    5
2013-01-07    6
Freq: D, dtype: int64

In [91]:
df['F'] = s1
df

Unnamed: 0,A,B,C,D,F
2018-01-01,0.187897,-0.928026,-0.493906,-0.1182,
2018-01-02,-0.829532,0.950785,-0.383044,0.745718,
2018-01-03,-0.273993,-0.644008,1.26545,-0.662355,
2018-01-04,-0.657633,-1.155094,-0.154159,-0.028968,
2018-01-05,0.332823,1.178461,0.228959,0.481295,
2018-01-06,-0.059197,0.635927,-0.025163,0.196176,


In [92]:
df.at[dates[0], 'A'] = 0

In [93]:
df

Unnamed: 0,A,B,C,D,F
2018-01-01,0.0,-0.928026,-0.493906,-0.1182,
2018-01-02,-0.829532,0.950785,-0.383044,0.745718,
2018-01-03,-0.273993,-0.644008,1.26545,-0.662355,
2018-01-04,-0.657633,-1.155094,-0.154159,-0.028968,
2018-01-05,0.332823,1.178461,0.228959,0.481295,
2018-01-06,-0.059197,0.635927,-0.025163,0.196176,


In [95]:
df.iat[0, 0] = 11
df

Unnamed: 0,A,B,C,D,F
2018-01-01,11.0,1.0,-0.493906,-0.1182,
2018-01-02,-0.829532,0.950785,-0.383044,0.745718,
2018-01-03,-0.273993,-0.644008,1.26545,-0.662355,
2018-01-04,-0.657633,-1.155094,-0.154159,-0.028968,
2018-01-05,0.332823,1.178461,0.228959,0.481295,
2018-01-06,-0.059197,0.635927,-0.025163,0.196176,


In [97]:
df.loc[:, 'D'] = np.array([5] * len(df))

In [98]:
df

Unnamed: 0,A,B,C,D,F
2018-01-01,11.0,1.0,-0.493906,5,
2018-01-02,-0.829532,0.950785,-0.383044,5,
2018-01-03,-0.273993,-0.644008,1.26545,5,
2018-01-04,-0.657633,-1.155094,-0.154159,5,
2018-01-05,0.332823,1.178461,0.228959,5,
2018-01-06,-0.059197,0.635927,-0.025163,5,


In [106]:
df2 = df.copy()
df2

Unnamed: 0,A,B,C,D,F
2018-01-01,11.0,1.0,-0.493906,5,
2018-01-02,-0.829532,0.950785,-0.383044,5,
2018-01-03,-0.273993,-0.644008,1.26545,5,
2018-01-04,-0.657633,-1.155094,-0.154159,5,
2018-01-05,0.332823,1.178461,0.228959,5,
2018-01-06,-0.059197,0.635927,-0.025163,5,


In [107]:
df2[df2 > 0 ] = -df2
df2

Unnamed: 0,A,B,C,D,F
2018-01-01,-11.0,-1.0,-0.493906,-5,
2018-01-02,-0.829532,-0.950785,-0.383044,-5,
2018-01-03,-0.273993,-0.644008,-1.26545,-5,
2018-01-04,-0.657633,-1.155094,-0.154159,-5,
2018-01-05,-0.332823,-1.178461,-0.228959,-5,
2018-01-06,-0.059197,-0.635927,-0.025163,-5,


###### Missing Data