In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
index = pd.date_range('1/1/2000', periods=8)

In [3]:
s = pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e'])

In [4]:
df = pd.DataFrame(np.random.randn(8, 3), index=index,
    columns=['A', 'B', 'C'])

In [5]:
long_series = pd.Series(np.random.randn(1000))

In [6]:
long_series.head()

0   -1.306052
1    1.311655
2   -1.448971
3    0.397081
4   -1.478924
dtype: float64

In [7]:
long_series.tail(3)

997   -0.817051
998    0.553671
999   -0.876898
dtype: float64

In [8]:
df[:2]

Unnamed: 0,A,B,C
2000-01-01,0.543146,0.768146,-0.224688
2000-01-02,1.453651,0.105877,-0.675718


In [9]:
df.columns = [x.lower() for x in df.columns]

In [10]:
df

Unnamed: 0,a,b,c
2000-01-01,0.543146,0.768146,-0.224688
2000-01-02,1.453651,0.105877,-0.675718
2000-01-03,1.978981,-0.384107,-1.390631
2000-01-04,0.092735,-1.36246,0.358917
2000-01-05,0.562135,0.068743,0.531018
2000-01-06,0.525816,-2.372307,1.217148
2000-01-07,2.894966,1.921026,-0.367852
2000-01-08,-1.909868,0.566768,-0.485984


In [11]:
s.array

<PandasArray>
[ -1.2154691544397906,   2.5116437443566215, -0.17809932824525326,
  0.13590952137127646,  -0.6327054768601116]
Length: 5, dtype: float64

In [12]:
s.index.array

<PandasArray>
['a', 'b', 'c', 'd', 'e']
Length: 5, dtype: object

In [13]:
s.to_numpy()

array([-1.21546915,  2.51164374, -0.17809933,  0.13590952, -0.63270548])

In [14]:
np.asarray(s)

array([-1.21546915,  2.51164374, -0.17809933,  0.13590952, -0.63270548])

In [15]:
ser = pd.Series(pd.date_range('2000', periods=2, tz="CET"))

In [16]:
ser.to_numpy(dtype=object)

array([Timestamp('2000-01-01 00:00:00+0100', tz='CET', freq='D'),
       Timestamp('2000-01-02 00:00:00+0100', tz='CET', freq='D')],
      dtype=object)

In [17]:
ser.to_numpy(dtype="datetime64[ns]")

array(['1999-12-31T23:00:00.000000000', '2000-01-01T23:00:00.000000000'],
      dtype='datetime64[ns]')

In [18]:
df.to_numpy()

array([[ 0.54314568,  0.76814593, -0.22468842],
       [ 1.4536509 ,  0.10587665, -0.67571757],
       [ 1.97898081, -0.38410686, -1.39063142],
       [ 0.09273464, -1.36246046,  0.35891705],
       [ 0.56213499,  0.06874292,  0.5310182 ],
       [ 0.52581644, -2.37230698,  1.21714809],
       [ 2.89496646,  1.92102601, -0.36785188],
       [-1.90986767,  0.56676751, -0.48598385]])

In [19]:
df = pd.DataFrame({
    'one': pd.Series(np.random.randn(3), index=['a', 'b', 'c']),
    'two': pd.Series(np.random.randn(4), index=['a', 'b', 'c', 'd']),
    'three': pd.Series(np.random.randn(3), index=['b', 'c', 'd'])})

In [20]:
df

Unnamed: 0,one,two,three
a,-0.213332,-0.753963,
b,0.608714,0.232951,-0.388502
c,-0.480816,-1.231682,-0.99423
d,,-0.396663,1.411884


In [21]:
row = df.iloc[1]

In [22]:
column = df['two']

In [23]:
df.sub(row, axis='columns')

Unnamed: 0,one,two,three
a,-0.822047,-0.986914,
b,0.0,0.0,0.0
c,-1.089531,-1.464632,-0.605727
d,,-0.629614,1.800386


In [24]:
df.sub(row, axis=1)

Unnamed: 0,one,two,three
a,-0.822047,-0.986914,
b,0.0,0.0,0.0
c,-1.089531,-1.464632,-0.605727
d,,-0.629614,1.800386


In [25]:
df.sub(column, axis='index')

Unnamed: 0,one,two,three
a,0.540631,0.0,
b,0.375764,0.0,-0.621453
c,0.750866,0.0,0.237452
d,,0.0,1.808547


In [26]:
df.sub(column, axis=0)

Unnamed: 0,one,two,three
a,0.540631,0.0,
b,0.375764,0.0,-0.621453
c,0.750866,0.0,0.237452
d,,0.0,1.808547


In [27]:
dfmi = df.copy()

In [28]:
dfmi

Unnamed: 0,one,two,three
a,-0.213332,-0.753963,
b,0.608714,0.232951,-0.388502
c,-0.480816,-1.231682,-0.99423
d,,-0.396663,1.411884


In [29]:
dfmi.index = pd.MultiIndex.from_tuples([(1, 'a'), (1, 'b'),
                                        (1, 'c'), (2, 'a')],
                                        names=['first', 'second'])

In [30]:
dfmi.sub(column, axis=0, level='second')

Unnamed: 0_level_0,Unnamed: 1_level_0,one,two,three
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,a,0.540631,0.0,
1,b,0.375764,0.0,-0.621453
1,c,0.750866,0.0,0.237452
2,a,,0.3573,2.165847


In [31]:
s = pd.Series(np.arange(10))

In [32]:
s

0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
8    8
9    9
dtype: int32

In [33]:
div, rem = divmod(s, 3)

In [34]:
div

0    0
1    0
2    0
3    1
4    1
5    1
6    2
7    2
8    2
9    3
dtype: int32

In [35]:
rem

0    0
1    1
2    2
3    0
4    1
5    2
6    0
7    1
8    2
9    0
dtype: int32

In [36]:
divmod(s, 3)

(0    0
 1    0
 2    0
 3    1
 4    1
 5    1
 6    2
 7    2
 8    2
 9    3
 dtype: int32,
 0    0
 1    1
 2    2
 3    0
 4    1
 5    2
 6    0
 7    1
 8    2
 9    0
 dtype: int32)

In [37]:
idx = pd.Index(np.arange(10))

In [38]:
idx

Int64Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype='int64')

In [39]:
div, rem = divmod(idx, 3)

In [40]:
div

Int64Index([0, 0, 0, 1, 1, 1, 2, 2, 2, 3], dtype='int64')

In [41]:
rem

Int64Index([0, 1, 2, 0, 1, 2, 0, 1, 2, 0], dtype='int64')

In [42]:
div, rem = divmod(s, [2, 2, 3, 3, 4, 4, 5, 5, 6, 6])

In [43]:
div

0    0
1    0
2    0
3    1
4    1
5    1
6    1
7    1
8    1
9    1
dtype: int32

In [44]:
rem

0    0
1    1
2    2
3    0
4    0
5    1
6    1
7    2
8    2
9    3
dtype: int32

In [45]:
df

Unnamed: 0,one,two,three
a,-0.213332,-0.753963,
b,0.608714,0.232951,-0.388502
c,-0.480816,-1.231682,-0.99423
d,,-0.396663,1.411884


In [46]:
df2 = df.copy()
df2['three'][0] = 1
df2

Unnamed: 0,one,two,three
a,-0.213332,-0.753963,1.0
b,0.608714,0.232951,-0.388502
c,-0.480816,-1.231682,-0.99423
d,,-0.396663,1.411884


In [47]:
df + df2

Unnamed: 0,one,two,three
a,-0.426665,-1.507926,
b,1.217429,0.465901,-0.777005
c,-0.961632,-2.463364,-1.988459
d,,-0.793326,2.823768


In [48]:
df.add(df2, fill_value=0)

Unnamed: 0,one,two,three
a,-0.426665,-1.507926,1.0
b,1.217429,0.465901,-0.777005
c,-0.961632,-2.463364,-1.988459
d,,-0.793326,2.823768


In [49]:
df.gt(df2)

Unnamed: 0,one,two,three
a,False,False,False
b,False,False,False
c,False,False,False
d,False,False,False


In [50]:
df2.ne(df)

Unnamed: 0,one,two,three
a,False,False,True
b,False,False,False
c,False,False,False
d,True,False,False


In [51]:
df

Unnamed: 0,one,two,three
a,-0.213332,-0.753963,
b,0.608714,0.232951,-0.388502
c,-0.480816,-1.231682,-0.99423
d,,-0.396663,1.411884


In [52]:
(df > 0).all()

one      False
two      False
three    False
dtype: bool

In [53]:
(df > 0).any()

one      True
two      True
three    True
dtype: bool

In [54]:
(df > 0).any().any()

True

In [55]:
df.empty

False

In [56]:
pd.DataFrame(columns=list('ABC')).empty

True

In [57]:
pd.Series([True]).bool()

True

In [58]:
pd.Series([False]).bool()

False

In [59]:
pd.DataFrame([[True]]).bool()

True

In [60]:
pd.DataFrame([[False]]).bool()

False

In [61]:
df + df == df * 2

Unnamed: 0,one,two,three
a,True,True,False
b,True,True,True
c,True,True,True
d,False,True,True


In [62]:
(df + df == df * 2).all()

one      False
two       True
three    False
dtype: bool

In [63]:
np.nan == np.nan

False

In [64]:
(df + df).equals(df * 2)

True

In [65]:
df1 = pd.DataFrame({'col': ['foo', 0, np.nan]})

In [66]:
df2 = pd.DataFrame({'col': [np.nan, 0, 'foo']}, index=[2, 1, 0])

In [67]:
df1.equals(df2)

False

In [68]:
df1.equals(df2.sort_index())

True

In [69]:
pd.Series(['foo', 'bar', 'baz']) == 'foo'

0     True
1    False
2    False
dtype: bool

In [70]:
pd.Series(['foo', 'bar', 'baz']) == 'foo'

0     True
1    False
2    False
dtype: bool

In [71]:
pd.Series(['foo', 'bar', 'baz']) == pd.Index(['foo', 'bar', 'qux'])

0     True
1     True
2    False
dtype: bool

In [72]:
pd.Series(['foo', 'bar', 'baz']) == pd.Index(['foo', 'bar', 'qux'])

0     True
1     True
2    False
dtype: bool

In [73]:
#pd.Series(['foo', 'bar', 'baz']) == pd.Series(['foo', 'bar']) #Trying to compare Index or Series objects of different lengths will raise a ValueError
#pd.Series(['foo', 'bar', 'baz']) == pd.Series(['foo'])

In [74]:
np.array([1, 2, 3]) == np.array([2])

array([False,  True, False])

In [75]:
np.array([1, 2, 3]) == np.array([1, 2])

  np.array([1, 2, 3]) == np.array([1, 2])


False

In [76]:
df1 = pd.DataFrame({'A': [1., np.nan, 3., 5., np.nan],
    'B': [np.nan, 2., 3., np.nan, 6.]})

In [77]:
df2 = pd.DataFrame({'A': [5., 2., 4., np.nan, 3., 7.],
    'B': [np.nan, np.nan, 3., 4., 6., 8.]})

In [78]:
df1

Unnamed: 0,A,B
0,1.0,
1,,2.0
2,3.0,3.0
3,5.0,
4,,6.0


In [79]:
df2

Unnamed: 0,A,B
0,5.0,
1,2.0,
2,4.0,3.0
3,,4.0
4,3.0,6.0
5,7.0,8.0


In [80]:
df1.combine_first(df2)

Unnamed: 0,A,B
0,1.0,
1,2.0,2.0
2,3.0,3.0
3,5.0,4.0
4,3.0,6.0
5,7.0,8.0


In [81]:
def combiner(x, y):
    return np.where(pd.isna(x), y, x)

In [82]:
df

Unnamed: 0,one,two,three
a,-0.213332,-0.753963,
b,0.608714,0.232951,-0.388502
c,-0.480816,-1.231682,-0.99423
d,,-0.396663,1.411884


In [83]:
df.mean(0)

one     -0.028478
two     -0.537339
three    0.009717
dtype: float64

In [84]:
df.mean()

one     -0.028478
two     -0.537339
three    0.009717
dtype: float64

In [85]:
df.mean(1)

a   -0.483648
b    0.151054
c   -0.902243
d    0.507610
dtype: float64

In [86]:
df.sum(0, skipna=False)

one           NaN
two     -2.149358
three         NaN
dtype: float64

In [87]:
df.sum(axis=1, skipna=True)

a   -0.967296
b    0.453163
c   -2.706728
d    1.015221
dtype: float64

In [88]:
#Combined with the broadcasting / arithmetic behavior, one can describe various statistical procedures, 
#like standardization(rendering data zero mean and standard deviation 1), very concisely:
ts_stand = (df - df.mean()) / df.std()

In [89]:
ts_stand.std()

one      1.0
two      1.0
three    1.0
dtype: float64

In [90]:
xs_stand = df.sub(df.mean(1), axis=0).div(df.std(1), axis=0)

In [91]:
xs_stand.std(1)

a    1.0
b    1.0
c    1.0
d    1.0
dtype: float64

In [92]:
df.cumsum()

Unnamed: 0,one,two,three
a,-0.213332,-0.753963,
b,0.395382,-0.521013,-0.388502
c,-0.085434,-1.752695,-1.382732
d,,-2.149358,0.029152


In [93]:
np.mean(df['one'])

-0.028477982584976674

In [94]:
np.mean(df['one'].to_numpy())

nan

In [95]:
series = pd.Series(np.random.randn(500))

In [96]:
series[20:500] = np.nan

In [97]:
series[10:20] = 5

In [98]:
series.nunique()

11

In [99]:
series = pd.Series(np.random.randn(1000))

In [100]:
series[::2] = np.nan

In [101]:
series.describe()

count    500.000000
mean       0.071950
std        1.014958
min       -2.565662
25%       -0.595953
50%        0.042926
75%        0.722688
max        4.253415
dtype: float64

In [102]:
frame = pd.DataFrame(np.random.randn(1000, 5),
    columns=['a', 'b', 'c', 'd', 'e'])

In [103]:
frame.iloc[::2] = np.nan

In [104]:
frame.describe()

Unnamed: 0,a,b,c,d,e
count,500.0,500.0,500.0,500.0,500.0
mean,-0.026726,-0.04489,-0.01626,-0.035301,0.031132
std,1.036037,0.995452,1.018477,1.03986,1.045293
min,-2.680764,-3.295566,-2.781046,-3.375889,-2.748553
25%,-0.735326,-0.704714,-0.626919,-0.780521,-0.675794
50%,-0.08329,-0.014428,0.037217,-0.05154,0.026708
75%,0.618399,0.578704,0.631853,0.621775,0.725943
max,2.907992,3.32311,3.09916,3.200064,3.581784


In [105]:
series.describe(percentiles=[.05, .25, .75, .95])

count    500.000000
mean       0.071950
std        1.014958
min       -2.565662
5%        -1.503289
25%       -0.595953
50%        0.042926
75%        0.722688
95%        1.774851
max        4.253415
dtype: float64

In [106]:
s = pd.Series(['a', 'a', 'b', 'b', 'a', 'a', np.nan, 'c', 'd', 'a'])

In [107]:
s.describe()

count     9
unique    4
top       a
freq      5
dtype: object

In [108]:
frame = pd.DataFrame({'a': ['Yes', 'Yes', 'No', 'No'], 'b': range(4)})

In [109]:
frame.describe()

Unnamed: 0,b
count,4.0
mean,1.5
std,1.290994
min,0.0
25%,0.75
50%,1.5
75%,2.25
max,3.0


In [110]:
frame.describe(include=['object'])

Unnamed: 0,a
count,4
unique,2
top,Yes
freq,2


In [111]:
frame.describe(include=['number'])

Unnamed: 0,b
count,4.0
mean,1.5
std,1.290994
min,0.0
25%,0.75
50%,1.5
75%,2.25
max,3.0


In [112]:
frame.describe(include='all')

Unnamed: 0,a,b
count,4,4.0
unique,2,
top,Yes,
freq,2,
mean,,1.5
std,,1.290994
min,,0.0
25%,,0.75
50%,,1.5
75%,,2.25


In [113]:
s1 = pd.Series(np.random.randn(5))

In [114]:
s1

0    1.029677
1   -0.173711
2    1.414088
3   -0.853765
4   -0.246685
dtype: float64

In [115]:
s1.idxmin(), s1.idxmax()

(3, 2)

In [116]:
df1 = pd.DataFrame(np.random.randn(5, 3), columns=['A', 'B', 'C'])

In [117]:
df1

Unnamed: 0,A,B,C
0,0.510683,0.686446,1.04986
1,3.107683,1.350535,1.53576
2,-1.026353,-0.587211,-1.316563
3,1.706752,1.753125,0.943893
4,-0.612302,0.450708,1.211484


In [118]:
df1.idxmin(axis=0)

A    2
B    2
C    2
dtype: int64

In [119]:
df1.idxmax(axis=1)

0    C
1    A
2    B
3    B
4    C
dtype: object

In [120]:
df3 = pd.DataFrame([2, 1, 1, 3, np.nan], columns=['A'], index=list('edcba'))

In [121]:
df3

Unnamed: 0,A
e,2.0
d,1.0
c,1.0
b,3.0
a,


In [122]:
df3['A'].idxmin()

'd'

In [123]:
data = np.random.randint(0, 7, size=50)

In [124]:
data

array([1, 0, 3, 3, 3, 6, 4, 5, 4, 0, 4, 0, 4, 4, 6, 6, 6, 6, 6, 5, 6, 4,
       1, 0, 3, 5, 5, 0, 0, 0, 5, 2, 6, 5, 1, 4, 2, 6, 3, 6, 5, 6, 0, 3,
       1, 3, 6, 4, 2, 1])

In [125]:
s = pd.Series(data)

In [126]:
s.value_counts()

6    12
4     8
0     8
5     7
3     7
1     5
2     3
dtype: int64

In [127]:
pd.value_counts(data)

6    12
4     8
0     8
5     7
3     7
1     5
2     3
dtype: int64

In [128]:
s5 = pd.Series([1, 1, 3, 3, 3, 5, 5, 7, 7, 7])

In [129]:
s5.mode()

0    3
1    7
dtype: int64

In [130]:
df5 = pd.DataFrame({"A": np.random.randint(0, 7, size=50),
    "B": np.random.randint(-10, 15, size=50)})

In [131]:
df5.mode()

Unnamed: 0,A,B
0,2,14


In [132]:
arr = np.random.randn(20)

In [133]:
factor = pd.cut(arr, 4)

In [134]:
factor

[(1.167, 2.066], (0.267, 1.167], (0.267, 1.167], (0.267, 1.167], (0.267, 1.167], ..., (-1.534, -0.632], (-0.632, 0.267], (-1.534, -0.632], (-1.534, -0.632], (-0.632, 0.267]]
Length: 20
Categories (4, interval[float64]): [(-1.534, -0.632] < (-0.632, 0.267] < (0.267, 1.167] < (1.167, 2.066]]

In [135]:
factor = pd.cut(arr, [-5, -1, 0, 1, 5])

In [136]:
factor

[(1, 5], (0, 1], (0, 1], (1, 5], (0, 1], ..., (-1, 0], (-1, 0], (-5, -1], (-1, 0], (-1, 0]]
Length: 20
Categories (4, interval[int64]): [(-5, -1] < (-1, 0] < (0, 1] < (1, 5]]

In [137]:
arr = np.random.randn(30)

In [138]:
factor = pd.qcut(arr, [0, .25, .5, .75, 1])

In [139]:
factor

[(-0.312, 0.248], (-1.821, -0.727], (0.248, 1.086], (-0.727, -0.312], (-0.727, -0.312], ..., (-0.312, 0.248], (-1.821, -0.727], (0.248, 1.086], (-0.727, -0.312], (-1.821, -0.727]]
Length: 30
Categories (4, interval[float64]): [(-1.821, -0.727] < (-0.727, -0.312] < (-0.312, 0.248] < (0.248, 1.086]]

In [140]:
pd.value_counts(factor)

(0.248, 1.086]      8
(-1.821, -0.727]    8
(-0.312, 0.248]     7
(-0.727, -0.312]    7
dtype: int64

In [141]:
arr = np.random.randn(20)

In [142]:
factor = pd.cut(arr, [-np.inf, 0, np.inf])

In [143]:
factor

[(-inf, 0.0], (-inf, 0.0], (0.0, inf], (0.0, inf], (0.0, inf], ..., (-inf, 0.0], (0.0, inf], (-inf, 0.0], (0.0, inf], (-inf, 0.0]]
Length: 20
Categories (2, interval[float64]): [(-inf, 0.0] < (0.0, inf]]

In [144]:
def extract_city_name(df):
     """
     Chicago, IL -> Chicago for city_name column
     """
     df['city_name'] = df['city_and_code'].str.split(",").str.get(0)
     return df

In [145]:
def add_country_name(df, country_name=None):
     """
     Chicago -> Chicago-US for city_name column
     """
     col = 'city_name'
     df['city_and_country'] = df[col] + country_name
     return df

In [146]:
df_p = pd.DataFrame({'city_and_code': ['Chicago, IL']})

In [147]:
add_country_name(extract_city_name(df_p), country_name='US')

Unnamed: 0,city_and_code,city_name,city_and_country
0,"Chicago, IL",Chicago,ChicagoUS


In [148]:
(df_p.pipe(extract_city_name)
     .pipe(add_country_name, country_name="US"))

Unnamed: 0,city_and_code,city_name,city_and_country
0,"Chicago, IL",Chicago,ChicagoUS


In [149]:
import statsmodels.formula.api as sm

In [150]:
bb = pd.read_csv('baseball.csv', index_col='id')

In [151]:
(bb.query('h > 0')
     .assign(ln_h=lambda df: np.log(df.h))
     .pipe((sm.ols, 'data'), 'hr ~ ln_h + year + g + C(lg)')
     .fit()
     .summary())

0,1,2,3
Dep. Variable:,hr,R-squared:,0.685
Model:,OLS,Adj. R-squared:,0.665
Method:,Least Squares,F-statistic:,34.28
Date:,"Tue, 14 Jul 2020",Prob (F-statistic):,3.48e-15
Time:,15:26:31,Log-Likelihood:,-205.92
No. Observations:,68,AIC:,421.8
Df Residuals:,63,BIC:,432.9
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-8484.7720,4664.146,-1.819,0.074,-1.78e+04,835.780
C(lg)[T.NL],-2.2736,1.325,-1.716,0.091,-4.922,0.375
ln_h,-1.3542,0.875,-1.547,0.127,-3.103,0.395
year,4.2277,2.324,1.819,0.074,-0.417,8.872
g,0.1841,0.029,6.258,0.000,0.125,0.243

0,1,2,3
Omnibus:,10.875,Durbin-Watson:,1.999
Prob(Omnibus):,0.004,Jarque-Bera (JB):,17.298
Skew:,0.537,Prob(JB):,0.000175
Kurtosis:,5.225,Cond. No.,14900000.0


In [152]:
df.apply(np.mean)

one     -0.028478
two     -0.537339
three    0.009717
dtype: float64

In [153]:
    df.apply(np.mean, axis=1)

a   -0.483648
b    0.151054
c   -0.902243
d    0.507610
dtype: float64

In [154]:
df.apply(lambda x: x.max() - x.min())

one      1.089531
two      1.464632
three    2.406113
dtype: float64

In [155]:
df.apply(np.cumsum)

Unnamed: 0,one,two,three
a,-0.213332,-0.753963,
b,0.395382,-0.521013,-0.388502
c,-0.085434,-1.752695,-1.382732
d,,-2.149358,0.029152


In [156]:
df.apply(np.exp)

Unnamed: 0,one,two,three
a,0.807888,0.470498,
b,1.838067,1.262319,0.678072
c,0.618279,0.291801,0.370008
d,,0.67256,4.103679


In [157]:
df.apply('mean')

one     -0.028478
two     -0.537339
three    0.009717
dtype: float64

In [158]:
df.apply('mean', axis=1)

a   -0.483648
b    0.151054
c   -0.902243
d    0.507610
dtype: float64

In [159]:
tsdf = pd.DataFrame(np.random.randn(1000, 3), columns=['A', 'B', 'C'],
     index=pd.date_range('1/1/2000', periods=1000))

In [160]:
tsdf.apply(lambda x: x.idxmax())

A   2000-08-03
B   2002-01-19
C   2000-12-13
dtype: datetime64[ns]

In [161]:
tsdf

Unnamed: 0,A,B,C
2000-01-01,0.925500,1.518568,0.897616
2000-01-02,-1.618418,1.243352,0.006450
2000-01-03,1.664407,0.818687,-0.292438
2000-01-04,-0.028670,-0.792822,-0.501131
2000-01-05,0.512050,0.635529,-1.216886
...,...,...,...
2002-09-22,0.408789,0.894483,-1.368637
2002-09-23,-0.082352,-1.671566,1.472456
2002-09-24,1.228412,-0.684500,0.017480
2002-09-25,0.840688,0.722835,-0.042347


In [162]:
tsdf.apply(pd.Series.interpolate)

Unnamed: 0,A,B,C
2000-01-01,0.925500,1.518568,0.897616
2000-01-02,-1.618418,1.243352,0.006450
2000-01-03,1.664407,0.818687,-0.292438
2000-01-04,-0.028670,-0.792822,-0.501131
2000-01-05,0.512050,0.635529,-1.216886
...,...,...,...
2002-09-22,0.408789,0.894483,-1.368637
2002-09-23,-0.082352,-1.671566,1.472456
2002-09-24,1.228412,-0.684500,0.017480
2002-09-25,0.840688,0.722835,-0.042347


In [163]:
tsdf = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'],
     index=pd.date_range('1/1/2000', periods=10))

In [164]:
tsdf.iloc[3:7] = np.nan

In [165]:
tsdf

Unnamed: 0,A,B,C
2000-01-01,-1.280646,-0.927039,0.915895
2000-01-02,0.019234,-0.900983,0.1443
2000-01-03,-0.406648,0.736574,0.606714
2000-01-04,,,
2000-01-05,,,
2000-01-06,,,
2000-01-07,,,
2000-01-08,-0.377693,-1.02044,-0.141957
2000-01-09,-0.818214,-0.975896,-0.188471
2000-01-10,-0.395222,1.029308,-0.891476


In [166]:
tsdf.agg(np.sum)

A   -3.259190
B   -2.058476
C    0.445004
dtype: float64

In [167]:
tsdf.agg('sum')

A   -3.259190
B   -2.058476
C    0.445004
dtype: float64

In [168]:
tsdf.sum()

A   -3.259190
B   -2.058476
C    0.445004
dtype: float64

In [169]:
tsdf['A'].agg('sum')

-3.2591896737718486

In [170]:
tsdf.agg(['sum'])

Unnamed: 0,A,B,C
sum,-3.25919,-2.058476,0.445004


In [171]:
tsdf.agg(['sum', 'mean'])

Unnamed: 0,A,B,C
sum,-3.25919,-2.058476,0.445004
mean,-0.543198,-0.343079,0.074167


In [172]:
tsdf['A'].agg(['sum', 'mean'])

sum    -3.259190
mean   -0.543198
Name: A, dtype: float64

In [173]:
tsdf['A'].agg(['sum', lambda x: x.mean()])

sum        -3.259190
<lambda>   -0.543198
Name: A, dtype: float64

In [174]:
def mymean(x):
    return x.mean()

In [175]:
tsdf['A'].agg(['sum', mymean])

sum      -3.259190
mymean   -0.543198
Name: A, dtype: float64

In [176]:
tsdf.agg({'A': 'mean', 'B': 'sum'})

A   -0.543198
B   -2.058476
dtype: float64

In [177]:
tsdf.agg({'A': ['mean', 'min'], 'B': 'sum'})

Unnamed: 0,A,B
mean,-0.543198,
min,-1.280646,
sum,,-2.058476


In [178]:
mdf = pd.DataFrame({'A': [1, 2, 3],
    'B': [1., 2., 3.],
    'C': ['foo', 'bar', 'baz'],
    'D': pd.date_range('20130101', periods=3)})

In [179]:
mdf.dtypes

A             int64
B           float64
C            object
D    datetime64[ns]
dtype: object

In [180]:
mdf.agg(['min', 'sum'])

Unnamed: 0,A,B,C,D
min,1,1.0,bar,2013-01-01
sum,6,6.0,foobarbaz,NaT


In [181]:
from functools import partial

In [182]:
q_25 = partial(pd.Series.quantile, q=0.25)

In [183]:
q_25.__name__ = '25%'

In [184]:
q_75 = partial(pd.Series.quantile, q=0.75)

In [185]:
q_75.__name__ = '75%'

In [186]:
tsdf.agg(['count', 'mean', 'std', 'min', q_25, 'median', q_75, 'max'])

Unnamed: 0,A,B,C
count,6.0,6.0,6.0
mean,-0.543198,-0.343079,0.074167
std,0.448046,0.955054,0.63956
min,-1.280646,-1.02044,-0.891476
25%,-0.715322,-0.963682,-0.176843
median,-0.400935,-0.914011,0.001171
75%,-0.382076,0.327185,0.49111
max,0.019234,1.029308,0.915895


In [187]:
tsdf = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'],
    index=pd.date_range('1/1/2000', periods=10))

In [188]:
tsdf.iloc[3:7] = np.nan

In [189]:
tsdf

Unnamed: 0,A,B,C
2000-01-01,-0.072521,-1.602907,-1.817496
2000-01-02,-0.760877,-0.761678,-0.355662
2000-01-03,-1.928893,0.188674,0.594604
2000-01-04,,,
2000-01-05,,,
2000-01-06,,,
2000-01-07,,,
2000-01-08,0.888475,0.22752,1.003392
2000-01-09,0.851337,1.395346,-0.833021
2000-01-10,0.372485,0.347526,-0.319641


In [190]:
tsdf.transform(np.abs)

Unnamed: 0,A,B,C
2000-01-01,0.072521,1.602907,1.817496
2000-01-02,0.760877,0.761678,0.355662
2000-01-03,1.928893,0.188674,0.594604
2000-01-04,,,
2000-01-05,,,
2000-01-06,,,
2000-01-07,,,
2000-01-08,0.888475,0.22752,1.003392
2000-01-09,0.851337,1.395346,0.833021
2000-01-10,0.372485,0.347526,0.319641


In [191]:
tsdf.transform('abs')

Unnamed: 0,A,B,C
2000-01-01,0.072521,1.602907,1.817496
2000-01-02,0.760877,0.761678,0.355662
2000-01-03,1.928893,0.188674,0.594604
2000-01-04,,,
2000-01-05,,,
2000-01-06,,,
2000-01-07,,,
2000-01-08,0.888475,0.22752,1.003392
2000-01-09,0.851337,1.395346,0.833021
2000-01-10,0.372485,0.347526,0.319641


In [192]:
tsdf.transform(lambda x: x.abs())

Unnamed: 0,A,B,C
2000-01-01,0.072521,1.602907,1.817496
2000-01-02,0.760877,0.761678,0.355662
2000-01-03,1.928893,0.188674,0.594604
2000-01-04,,,
2000-01-05,,,
2000-01-06,,,
2000-01-07,,,
2000-01-08,0.888475,0.22752,1.003392
2000-01-09,0.851337,1.395346,0.833021
2000-01-10,0.372485,0.347526,0.319641


In [193]:
np.abs(tsdf)

Unnamed: 0,A,B,C
2000-01-01,0.072521,1.602907,1.817496
2000-01-02,0.760877,0.761678,0.355662
2000-01-03,1.928893,0.188674,0.594604
2000-01-04,,,
2000-01-05,,,
2000-01-06,,,
2000-01-07,,,
2000-01-08,0.888475,0.22752,1.003392
2000-01-09,0.851337,1.395346,0.833021
2000-01-10,0.372485,0.347526,0.319641


In [194]:
tsdf['A'].transform(np.abs)

2000-01-01    0.072521
2000-01-02    0.760877
2000-01-03    1.928893
2000-01-04         NaN
2000-01-05         NaN
2000-01-06         NaN
2000-01-07         NaN
2000-01-08    0.888475
2000-01-09    0.851337
2000-01-10    0.372485
Freq: D, Name: A, dtype: float64

In [195]:
tsdf.transform([np.abs, lambda x: x + 1])

Unnamed: 0_level_0,A,A,B,B,C,C
Unnamed: 0_level_1,absolute,<lambda>,absolute,<lambda>,absolute,<lambda>
2000-01-01,0.072521,0.927479,1.602907,-0.602907,1.817496,-0.817496
2000-01-02,0.760877,0.239123,0.761678,0.238322,0.355662,0.644338
2000-01-03,1.928893,-0.928893,0.188674,1.188674,0.594604,1.594604
2000-01-04,,,,,,
2000-01-05,,,,,,
2000-01-06,,,,,,
2000-01-07,,,,,,
2000-01-08,0.888475,1.888475,0.22752,1.22752,1.003392,2.003392
2000-01-09,0.851337,1.851337,1.395346,2.395346,0.833021,0.166979
2000-01-10,0.372485,1.372485,0.347526,1.347526,0.319641,0.680359


In [196]:
tsdf['A'].transform([np.abs, lambda x: x + 1])

Unnamed: 0,absolute,<lambda>
2000-01-01,0.072521,0.927479
2000-01-02,0.760877,0.239123
2000-01-03,1.928893,-0.928893
2000-01-04,,
2000-01-05,,
2000-01-06,,
2000-01-07,,
2000-01-08,0.888475,1.888475
2000-01-09,0.851337,1.851337
2000-01-10,0.372485,1.372485


In [197]:
tsdf.transform({'A': np.abs, 'B': lambda x: x + 1})

Unnamed: 0,A,B
2000-01-01,0.072521,-0.602907
2000-01-02,0.760877,0.238322
2000-01-03,1.928893,1.188674
2000-01-04,,
2000-01-05,,
2000-01-06,,
2000-01-07,,
2000-01-08,0.888475,1.22752
2000-01-09,0.851337,2.395346
2000-01-10,0.372485,1.347526


In [198]:
tsdf.transform({'A': np.abs, 'B': [lambda x: x + 1, 'sqrt']})

Unnamed: 0_level_0,A,B,B
Unnamed: 0_level_1,absolute,<lambda>,sqrt
2000-01-01,0.072521,-0.602907,
2000-01-02,0.760877,0.238322,
2000-01-03,1.928893,1.188674,0.434366
2000-01-04,,,
2000-01-05,,,
2000-01-06,,,
2000-01-07,,,
2000-01-08,0.888475,1.22752,0.47699
2000-01-09,0.851337,2.395346,1.181248
2000-01-10,0.372485,1.347526,0.589513


In [199]:
tsdf.transform({'A': np.abs, 'B': [lambda x: x + 1, 'sqrt']})

Unnamed: 0_level_0,A,B,B
Unnamed: 0_level_1,absolute,<lambda>,sqrt
2000-01-01,0.072521,-0.602907,
2000-01-02,0.760877,0.238322,
2000-01-03,1.928893,1.188674,0.434366
2000-01-04,,,
2000-01-05,,,
2000-01-06,,,
2000-01-07,,,
2000-01-08,0.888475,1.22752,0.47699
2000-01-09,0.851337,2.395346,1.181248
2000-01-10,0.372485,1.347526,0.589513


In [200]:
s = pd.Series(['six', 'seven', 'six', 'seven', 'six'],
    index=['a', 'b', 'c', 'd', 'e'])

In [201]:
t = pd.Series({'six': 6., 'seven': 7.})

In [202]:
s

a      six
b    seven
c      six
d    seven
e      six
dtype: object

In [203]:
s.map(t)

a    6.0
b    7.0
c    6.0
d    7.0
e    6.0
dtype: float64

In [204]:
s = pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e'])

In [205]:
s

a   -0.187896
b    1.668859
c    0.794210
d    0.162515
e   -0.140466
dtype: float64

In [206]:
s.reindex(['e', 'b', 'f', 'd'])

e   -0.140466
b    1.668859
f         NaN
d    0.162515
dtype: float64

In [207]:
df

Unnamed: 0,one,two,three
a,-0.213332,-0.753963,
b,0.608714,0.232951,-0.388502
c,-0.480816,-1.231682,-0.99423
d,,-0.396663,1.411884


In [208]:
df.reindex(index=['c', 'f', 'b'], columns=['three', 'two', 'one'])

Unnamed: 0,three,two,one
c,-0.99423,-1.231682,-0.480816
f,,,
b,-0.388502,0.232951,0.608714


In [209]:
df.reindex(['c', 'f', 'b'], axis='index')

Unnamed: 0,one,two,three
c,-0.480816,-1.231682,-0.99423
f,,,
b,0.608714,0.232951,-0.388502


In [210]:
rs = s.reindex(df.index)    

In [211]:
rs

a   -0.187896
b    1.668859
c    0.794210
d    0.162515
dtype: float64

In [212]:
rs.index is df.index

True

In [213]:
df.reindex(['c', 'f', 'b'], axis='index')

Unnamed: 0,one,two,three
c,-0.480816,-1.231682,-0.99423
f,,,
b,0.608714,0.232951,-0.388502


In [214]:
df.reindex(['three', 'two', 'one'], axis='columns')

Unnamed: 0,three,two,one
a,,-0.753963,-0.213332
b,-0.388502,0.232951,0.608714
c,-0.99423,-1.231682,-0.480816
d,1.411884,-0.396663,


In [215]:
df2

Unnamed: 0,A,B
0,5.0,
1,2.0,
2,4.0,3.0
3,,4.0
4,3.0,6.0
5,7.0,8.0


In [216]:
df3

Unnamed: 0,A
e,2.0
d,1.0
c,1.0
b,3.0
a,


In [217]:
df

Unnamed: 0,one,two,three
a,-0.213332,-0.753963,
b,0.608714,0.232951,-0.388502
c,-0.480816,-1.231682,-0.99423
d,,-0.396663,1.411884


In [218]:
df.reindex_like(df2)

Unnamed: 0,A,B
0,,
1,,
2,,
3,,
4,,
5,,


In [219]:
df

Unnamed: 0,one,two,three
a,-0.213332,-0.753963,
b,0.608714,0.232951,-0.388502
c,-0.480816,-1.231682,-0.99423
d,,-0.396663,1.411884


In [220]:
s = pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e'])

In [221]:
s1 = s[:4]

In [222]:
s2 = s[1:]

In [223]:
s1.align(s2)

(a   -1.787614
 b    0.572646
 c    0.836128
 d   -1.030443
 e         NaN
 dtype: float64,
 a         NaN
 b    0.572646
 c    0.836128
 d   -1.030443
 e   -0.579169
 dtype: float64)

In [224]:
s1.align(s2, join='inner')

(b    0.572646
 c    0.836128
 d   -1.030443
 dtype: float64,
 b    0.572646
 c    0.836128
 d   -1.030443
 dtype: float64)

In [225]:
s1.align(s2, join='left')

(a   -1.787614
 b    0.572646
 c    0.836128
 d   -1.030443
 dtype: float64,
 a         NaN
 b    0.572646
 c    0.836128
 d   -1.030443
 dtype: float64)

In [226]:
df.align(df2, join='inner')

(Empty DataFrame
 Columns: []
 Index: [],
 Empty DataFrame
 Columns: []
 Index: [])

In [227]:
df.align(df2, join='inner', axis=0)

(Empty DataFrame
 Columns: [one, two, three]
 Index: [],
 Empty DataFrame
 Columns: [A, B]
 Index: [])

In [228]:
df.align(df2.iloc[0], axis=1)

(    A   B       one     three       two
 a NaN NaN -0.213332       NaN -0.753963
 b NaN NaN  0.608714 -0.388502  0.232951
 c NaN NaN -0.480816 -0.994230 -1.231682
 d NaN NaN       NaN  1.411884 -0.396663,
 A        5.0
 B        NaN
 one      NaN
 three    NaN
 two      NaN
 Name: 0, dtype: float64)

In [229]:
rng = pd.date_range('1/3/2000', periods=8)

In [230]:
ts = pd.Series(np.random.randn(8), index=rng)

In [231]:
ts2 = ts[[0, 3, 6]]

In [232]:
ts

2000-01-03    0.185708
2000-01-04    0.359841
2000-01-05    0.249419
2000-01-06    0.480607
2000-01-07   -0.558119
2000-01-08    1.121059
2000-01-09    0.462782
2000-01-10   -0.324913
Freq: D, dtype: float64

In [233]:
ts2

2000-01-03    0.185708
2000-01-06    0.480607
2000-01-09    0.462782
dtype: float64

In [234]:
ts2.reindex(ts.index)

2000-01-03    0.185708
2000-01-04         NaN
2000-01-05         NaN
2000-01-06    0.480607
2000-01-07         NaN
2000-01-08         NaN
2000-01-09    0.462782
2000-01-10         NaN
Freq: D, dtype: float64

In [235]:
ts2.reindex(ts.index, method='ffill')

2000-01-03    0.185708
2000-01-04    0.185708
2000-01-05    0.185708
2000-01-06    0.480607
2000-01-07    0.480607
2000-01-08    0.480607
2000-01-09    0.462782
2000-01-10    0.462782
Freq: D, dtype: float64

In [236]:
ts2.reindex(ts.index, method='bfill')

2000-01-03    0.185708
2000-01-04    0.480607
2000-01-05    0.480607
2000-01-06    0.480607
2000-01-07    0.462782
2000-01-08    0.462782
2000-01-09    0.462782
2000-01-10         NaN
Freq: D, dtype: float64

In [237]:
ts2.reindex(ts.index, method='nearest')

2000-01-03    0.185708
2000-01-04    0.185708
2000-01-05    0.480607
2000-01-06    0.480607
2000-01-07    0.480607
2000-01-08    0.462782
2000-01-09    0.462782
2000-01-10    0.462782
Freq: D, dtype: float64

In [238]:
ts2.reindex(ts.index).fillna(method='ffill')

2000-01-03    0.185708
2000-01-04    0.185708
2000-01-05    0.185708
2000-01-06    0.480607
2000-01-07    0.480607
2000-01-08    0.480607
2000-01-09    0.462782
2000-01-10    0.462782
Freq: D, dtype: float64

In [239]:
ts2.reindex(ts.index, method='ffill', limit=1)

2000-01-03    0.185708
2000-01-04    0.185708
2000-01-05         NaN
2000-01-06    0.480607
2000-01-07    0.480607
2000-01-08         NaN
2000-01-09    0.462782
2000-01-10    0.462782
Freq: D, dtype: float64

In [240]:
ts2.reindex(ts.index, method='ffill', tolerance='1 day')

2000-01-03    0.185708
2000-01-04    0.185708
2000-01-05         NaN
2000-01-06    0.480607
2000-01-07    0.480607
2000-01-08         NaN
2000-01-09    0.462782
2000-01-10    0.462782
Freq: D, dtype: float64

In [241]:
df

Unnamed: 0,one,two,three
a,-0.213332,-0.753963,
b,0.608714,0.232951,-0.388502
c,-0.480816,-1.231682,-0.99423
d,,-0.396663,1.411884


In [242]:
df.drop(['a', 'd'], axis=0)

Unnamed: 0,one,two,three
b,0.608714,0.232951,-0.388502
c,-0.480816,-1.231682,-0.99423


In [243]:
df.drop(['one'], axis=1)

Unnamed: 0,two,three
a,-0.753963,
b,0.232951,-0.388502
c,-1.231682,-0.99423
d,-0.396663,1.411884


In [244]:
df.reindex(df.index.difference(['a', 'd']))

Unnamed: 0,one,two,three
b,0.608714,0.232951,-0.388502
c,-0.480816,-1.231682,-0.99423


In [245]:
s

a   -1.787614
b    0.572646
c    0.836128
d   -1.030443
e   -0.579169
dtype: float64

In [246]:
s.rename(str.upper)

A   -1.787614
B    0.572646
C    0.836128
D   -1.030443
E   -0.579169
dtype: float64

In [247]:
df.rename(columns={'one': 'foo', 'two': 'bar'},
    index={'a': 'apple', 'b': 'banana', 'd': 'durian'})

Unnamed: 0,foo,bar,three
apple,-0.213332,-0.753963,
banana,0.608714,0.232951,-0.388502
c,-0.480816,-1.231682,-0.99423
durian,,-0.396663,1.411884


In [248]:
df.rename({'one': 'foo', 'two': 'bar'}, axis='columns')

Unnamed: 0,foo,bar,three
a,-0.213332,-0.753963,
b,0.608714,0.232951,-0.388502
c,-0.480816,-1.231682,-0.99423
d,,-0.396663,1.411884


In [249]:
df.rename({'a': 'apple', 'b': 'banana', 'd': 'durian'}, axis='index')

Unnamed: 0,one,two,three
apple,-0.213332,-0.753963,
banana,0.608714,0.232951,-0.388502
c,-0.480816,-1.231682,-0.99423
durian,,-0.396663,1.411884


In [250]:
s.rename("scalar-name")

a   -1.787614
b    0.572646
c    0.836128
d   -1.030443
e   -0.579169
Name: scalar-name, dtype: float64

In [251]:
df = pd.DataFrame({'x': [1, 2, 3, 4, 5, 6],
    'y': [10, 20, 30, 40, 50, 60]},
    index=pd.MultiIndex.from_product([['a', 'b', 'c'], [1,2]],
    names=['let', 'num']))

In [252]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,x,y
let,num,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,1,10
a,2,2,20
b,1,3,30
b,2,4,40
c,1,5,50
c,2,6,60


In [253]:
df.rename_axis(index={'let': 'abc'})

Unnamed: 0_level_0,Unnamed: 1_level_0,x,y
abc,num,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,1,10
a,2,2,20
b,1,3,30
b,2,4,40
c,1,5,50
c,2,6,60


In [254]:
df.rename_axis(index=str.upper)

Unnamed: 0_level_0,Unnamed: 1_level_0,x,y
LET,NUM,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,1,10
a,2,2,20
b,1,3,30
b,2,4,40
c,1,5,50
c,2,6,60


In [255]:
df = pd.DataFrame({'col1': np.random.randn(3),
                   'col2': np.random.randn(3)}, index=['a', 'b', 'c'])

In [256]:
for col in df:
    print(col)

col1
col2


In [257]:
#Warning: You should never modify something you are iterating over. This is not guaranteed to work in all cases.
#Depending on the data types, the iterator returns a copy and not a view, and writing to it will have no effect!

df = pd.DataFrame({'a': [1, 2, 3], 'b': ['a', 'b', 'c']})

In [258]:
#For example, in the following case setting the value has no effect:
for index, row in df.iterrows():
    row['a'] = 10

In [259]:
df

Unnamed: 0,a,b
0,1,a
1,2,b
2,3,c


In [260]:
for label, ser in df.items():
    print(label)
    print(ser)

a
0    1
1    2
2    3
Name: a, dtype: int64
b
0    a
1    b
2    c
Name: b, dtype: object


In [261]:
for row_index, row in df.iterrows():
    print(row_index, row, sep='\n')

0
a    1
b    a
Name: 0, dtype: object
1
a    2
b    b
Name: 1, dtype: object
2
a    3
b    c
Name: 2, dtype: object


In [262]:
df

Unnamed: 0,a,b
0,1,a
1,2,b
2,3,c


In [263]:
df_orig = pd.DataFrame([[1, 1.5]], columns=['int', 'float'])

In [264]:
df_orig.dtypes

int        int64
float    float64
dtype: object

In [265]:
df_orig

Unnamed: 0,int,float
0,1,1.5


In [266]:
row = next(df_orig.iterrows())[1]

In [267]:
row

int      1.0
float    1.5
Name: 0, dtype: float64

In [268]:
row['int'].dtype

dtype('float64')

In [269]:
df_orig['int'].dtype

dtype('int64')

In [270]:
df2 = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})

In [271]:
print(df2)

   x  y
0  1  4
1  2  5
2  3  6


In [272]:
print(df2.T)

   0  1  2
x  1  2  3
y  4  5  6


In [273]:
df2_t = pd.DataFrame({idx: values for idx, values in df2.iterrows()})

In [274]:
print(df2_t)

   0  1  2
x  1  2  3
y  4  5  6


In [275]:
df

Unnamed: 0,a,b
0,1,a
1,2,b
2,3,c


In [276]:
for row in df.itertuples():
    print(row)

Pandas(Index=0, a=1, b='a')
Pandas(Index=1, a=2, b='b')
Pandas(Index=2, a=3, b='c')


In [277]:
s = pd.Series(pd.date_range('20130101 09:10:12', periods=4))

In [278]:
s

0   2013-01-01 09:10:12
1   2013-01-02 09:10:12
2   2013-01-03 09:10:12
3   2013-01-04 09:10:12
dtype: datetime64[ns]

In [279]:
s.dt.hour

0    9
1    9
2    9
3    9
dtype: int64

In [280]:
s.dt.second

0    12
1    12
2    12
3    12
dtype: int64

In [281]:
s.dt.day

0    1
1    2
2    3
3    4
dtype: int64

In [282]:
s[s.dt.day == 2]

1   2013-01-02 09:10:12
dtype: datetime64[ns]

In [283]:
stz = s.dt.tz_localize('US/Eastern')

In [284]:
stz

0   2013-01-01 09:10:12-05:00
1   2013-01-02 09:10:12-05:00
2   2013-01-03 09:10:12-05:00
3   2013-01-04 09:10:12-05:00
dtype: datetime64[ns, US/Eastern]

In [285]:
stz.dt.tz

<DstTzInfo 'US/Eastern' LMT-1 day, 19:04:00 STD>

In [286]:
s.dt.tz_localize('UTC').dt.tz_convert('US/Eastern')

0   2013-01-01 04:10:12-05:00
1   2013-01-02 04:10:12-05:00
2   2013-01-03 04:10:12-05:00
3   2013-01-04 04:10:12-05:00
dtype: datetime64[ns, US/Eastern]

In [287]:
s = pd.Series(pd.date_range('20130101', periods=4))

In [288]:
s

0   2013-01-01
1   2013-01-02
2   2013-01-03
3   2013-01-04
dtype: datetime64[ns]

In [289]:
s.dt.strftime('%Y/%m/%d')

0    2013/01/01
1    2013/01/02
2    2013/01/03
3    2013/01/04
dtype: object

In [290]:
s = pd.Series(pd.period_range('20130101', periods=4))

In [291]:
s

0    2013-01-01
1    2013-01-02
2    2013-01-03
3    2013-01-04
dtype: period[D]

In [292]:
s.dt.strftime('%Y/%m/%d')

0    2013/01/01
1    2013/01/02
2    2013/01/03
3    2013/01/04
dtype: object

In [293]:
s = pd.Series(pd.period_range('20130101', periods=4, freq='D'))

In [294]:
s

0    2013-01-01
1    2013-01-02
2    2013-01-03
3    2013-01-04
dtype: period[D]

In [295]:
s.dt.year

0    2013
1    2013
2    2013
3    2013
dtype: int64

In [296]:
s.dt.day

0    1
1    2
2    3
3    4
dtype: int64

In [297]:
s = pd.Series(pd.timedelta_range('1 day 00:00:05', periods=4, freq='s'))

In [298]:
s

0   1 days 00:00:05
1   1 days 00:00:06
2   1 days 00:00:07
3   1 days 00:00:08
dtype: timedelta64[ns]

In [299]:
s.dt.days

0    1
1    1
2    1
3    1
dtype: int64

In [300]:
s.dt.seconds

0    5
1    6
2    7
3    8
dtype: int64

In [301]:
s.dt.components

Unnamed: 0,days,hours,minutes,seconds,milliseconds,microseconds,nanoseconds
0,1,0,0,5,0,0,0
1,1,0,0,6,0,0,0
2,1,0,0,7,0,0,0
3,1,0,0,8,0,0,0


In [302]:
s = pd.Series(['A', 'B', 'C', 'Aaba', 'Baca', np.nan, 'CABA', 'dog', 'cat'],
    dtype="string")

In [303]:
s

0       A
1       B
2       C
3    Aaba
4    Baca
5    <NA>
6    CABA
7     dog
8     cat
dtype: string

In [304]:
df = pd.DataFrame({
    'one': pd.Series(np.random.randn(3), index=['a', 'b', 'c']),
    'two': pd.Series(np.random.randn(4), index=['a', 'b', 'c', 'd']),
    'three': pd.Series(np.random.randn(3), index=['b', 'c', 'd'])})

In [305]:
df

Unnamed: 0,one,two,three
a,1.059741,-0.618103,
b,0.540305,2.996829,-1.198914
c,-0.453099,-0.849338,1.201644
d,,0.747429,1.478799


In [306]:
unsorted_df = df.reindex(index=['a', 'd', 'c', 'b'],
    columns=['three', 'two', 'one'])

In [307]:
unsorted_df

Unnamed: 0,three,two,one
a,,-0.618103,1.059741
d,1.478799,0.747429,
c,1.201644,-0.849338,-0.453099
b,-1.198914,2.996829,0.540305


In [308]:
unsorted_df.sort_index()

Unnamed: 0,three,two,one
a,,-0.618103,1.059741
b,-1.198914,2.996829,0.540305
c,1.201644,-0.849338,-0.453099
d,1.478799,0.747429,


In [309]:
unsorted_df.sort_index(ascending=False)

Unnamed: 0,three,two,one
d,1.478799,0.747429,
c,1.201644,-0.849338,-0.453099
b,-1.198914,2.996829,0.540305
a,,-0.618103,1.059741


In [310]:
unsorted_df.sort_index(axis=1)

Unnamed: 0,one,three,two
a,1.059741,,-0.618103
d,,1.478799,0.747429
c,-0.453099,1.201644,-0.849338
b,0.540305,-1.198914,2.996829


In [311]:
unsorted_df['three'].sort_index()

a         NaN
b   -1.198914
c    1.201644
d    1.478799
Name: three, dtype: float64

In [312]:
df1 = pd.DataFrame({'one': [2, 1, 1, 1],
    'two': [1, 3, 2, 4],
    'three': [5, 4, 3, 2]})

In [313]:
df1

Unnamed: 0,one,two,three
0,2,1,5
1,1,3,4
2,1,2,3
3,1,4,2


In [314]:
df1.sort_values(by='two')

Unnamed: 0,one,two,three
0,2,1,5
2,1,2,3
1,1,3,4
3,1,4,2


In [315]:
df1[['one', 'two', 'three']].sort_values(by=['one', 'two'])

Unnamed: 0,one,two,three
2,1,2,3
1,1,3,4
3,1,4,2
0,2,1,5


In [316]:
s[2] = np.nan

In [317]:
s

0       A
1       B
2    <NA>
3    Aaba
4    Baca
5    <NA>
6    CABA
7     dog
8     cat
dtype: string

In [318]:
s.sort_values()

0       A
3    Aaba
1       B
4    Baca
6    CABA
8     cat
7     dog
2    <NA>
5    <NA>
dtype: string

In [319]:
s.sort_values(na_position='first')

2    <NA>
5    <NA>
0       A
3    Aaba
1       B
4    Baca
6    CABA
8     cat
7     dog
dtype: string

In [320]:
idx = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('a', 2),
    ('b', 2), ('b', 1), ('b', 1)])

In [321]:
idx

MultiIndex([('a', 1),
            ('a', 2),
            ('a', 2),
            ('b', 2),
            ('b', 1),
            ('b', 1)],
           )

In [322]:
idx.names = ['first', 'second']

In [323]:
idx

MultiIndex([('a', 1),
            ('a', 2),
            ('a', 2),
            ('b', 2),
            ('b', 1),
            ('b', 1)],
           names=['first', 'second'])

In [324]:
df_multi = pd.DataFrame({'A': np.arange(6, 0, -1)},
    index=idx)

In [325]:
df_multi

Unnamed: 0_level_0,Unnamed: 1_level_0,A
first,second,Unnamed: 2_level_1
a,1,6
a,2,5
a,2,4
b,2,3
b,1,2
b,1,1


In [326]:
df_multi.sort_values(by=['second', 'A'])

Unnamed: 0_level_0,Unnamed: 1_level_0,A
first,second,Unnamed: 2_level_1
b,1,1
b,1,2
a,1,6
b,2,3
a,2,4
a,2,5


In [327]:
ser = pd.Series([1, 2, 3])

In [328]:
ser.searchsorted([0, 3])

array([0, 2], dtype=int32)

In [329]:
ser.searchsorted([0, 4])

array([0, 3], dtype=int32)

In [330]:
ser.searchsorted([1, 3], side='right')

array([1, 3], dtype=int32)

In [331]:
ser.searchsorted([1, 3], side='left')

array([0, 2], dtype=int32)

In [332]:
ser = pd.Series([3, 1, 2])

In [333]:
ser.searchsorted([0, 3], sorter=np.argsort(ser))

array([0, 2], dtype=int32)

In [334]:
s = pd.Series(np.random.permutation(10))

In [335]:
s

0    9
1    6
2    8
3    3
4    0
5    4
6    7
7    5
8    1
9    2
dtype: int32

In [336]:
s.sort_values()

4    0
8    1
9    2
3    3
5    4
7    5
1    6
6    7
2    8
0    9
dtype: int32

In [337]:
s.nsmallest(3)

4    0
8    1
9    2
dtype: int32

In [338]:
s.nlargest(3)

0    9
2    8
6    7
dtype: int32

In [339]:
df = pd.DataFrame({'a': [-2, -1, 1, 10, 8, 11, -1],
    'b': list('abdceff'),
    'c': [1.0, 2.0, 4.0, 3.2, np.nan, 3.0, 4.0]})

In [340]:
df.nlargest(3, 'a')

Unnamed: 0,a,b,c
5,11,f,3.0
3,10,c,3.2
4,8,e,


In [341]:
df

Unnamed: 0,a,b,c
0,-2,a,1.0
1,-1,b,2.0
2,1,d,4.0
3,10,c,3.2
4,8,e,
5,11,f,3.0
6,-1,f,4.0


In [342]:
df.nlargest(5, ['a', 'c'])

Unnamed: 0,a,b,c
5,11,f,3.0
3,10,c,3.2
4,8,e,
2,1,d,4.0
6,-1,f,4.0


In [343]:
df.nsmallest(3, 'a')

Unnamed: 0,a,b,c
0,-2,a,1.0
1,-1,b,2.0
6,-1,f,4.0


In [344]:
df.nsmallest(5, ['a', 'c'])

Unnamed: 0,a,b,c
0,-2,a,1.0
1,-1,b,2.0
6,-1,f,4.0
2,1,d,4.0
4,8,e,


In [345]:
df1.columns = pd.MultiIndex.from_tuples([('a', 'one'),
    ('a', 'two'),
    ('b', 'three')])

In [346]:
df1

Unnamed: 0_level_0,a,a,b
Unnamed: 0_level_1,one,two,three
0,2,1,5
1,1,3,4
2,1,2,3
3,1,4,2


In [347]:
df1.sort_values(by=('a', 'two'))

Unnamed: 0_level_0,a,a,b
Unnamed: 0_level_1,one,two,three
0,2,1,5
2,1,2,3
1,1,3,4
3,1,4,2


In [348]:
dft = pd.DataFrame({'A': np.random.rand(3),
    'B': 1,
    'C': 'foo',
    'D': pd.Timestamp('20010102'),
    'E': pd.Series([1.0] * 3).astype('float32'),
    'F': False,
    'G': pd.Series([1] * 3, dtype='int8')})

In [349]:
dft

Unnamed: 0,A,B,C,D,E,F,G
0,0.89724,1,foo,2001-01-02,1.0,False,1
1,0.934752,1,foo,2001-01-02,1.0,False,1
2,0.968552,1,foo,2001-01-02,1.0,False,1


In [350]:
dft.dtypes

A           float64
B             int64
C            object
D    datetime64[ns]
E           float32
F              bool
G              int8
dtype: object

In [351]:
dft['A'].dtype

dtype('float64')

In [352]:
pd.Series([1, 2, 3, 4, 5, 6.])

0    1.0
1    2.0
2    3.0
3    4.0
4    5.0
5    6.0
dtype: float64

In [353]:
pd.Series([1, 2, 3, 6., 'foo'])

0      1
1      2
2      3
3      6
4    foo
dtype: object

In [354]:
dft.dtypes.value_counts()

datetime64[ns]    1
float32           1
object            1
float64           1
int64             1
int8              1
bool              1
dtype: int64

In [355]:
df1 = pd.DataFrame(np.random.randn(8, 1), columns=['A'], dtype='float32')

In [356]:
df1

Unnamed: 0,A
0,-0.446075
1,0.536757
2,1.703488
3,0.927097
4,0.10597
5,-0.463611
6,-1.359791
7,0.292342


In [357]:
df1.dtypes

A    float32
dtype: object

In [358]:
df2 = pd.DataFrame({'A': pd.Series(np.random.randn(8), dtype='float16'),
    'B': pd.Series(np.random.randn(8)),
    'C': pd.Series(np.array(np.random.randn(8),
    dtype='uint8'))})

In [359]:
df2

Unnamed: 0,A,B,C
0,0.949219,2.249729,0
1,0.155151,0.288224,0
2,0.067749,-0.862326,255
3,-0.851074,-0.975482,0
4,-0.810059,0.316473,1
5,-1.144531,-0.749903,0
6,-0.182373,-0.754199,255
7,3.193359,1.117544,1


In [360]:
df2.dtypes

A    float16
B    float64
C      uint8
dtype: object

In [361]:
pd.DataFrame([1, 2], columns=['a']).dtypes

a    int64
dtype: object

In [362]:
pd.DataFrame({'a': [1, 2]}).dtypes

a    int64
dtype: object

In [363]:
pd.DataFrame({'a': 1}, index=list(range(2))).dtypes

a    int64
dtype: object

In [364]:
frame = pd.DataFrame(np.array([1, 2]))

In [365]:
df3 = df1.reindex_like(df2).fillna(value=0.0) + df2

In [366]:
df3

Unnamed: 0,A,B,C
0,0.503144,2.249729,0.0
1,0.691908,0.288224,0.0
2,1.771237,-0.862326,255.0
3,0.076023,-0.975482,0.0
4,-0.704089,0.316473,1.0
5,-1.608142,-0.749903,0.0
6,-1.542164,-0.754199,255.0
7,3.485702,1.117544,1.0


In [367]:
df3.dtypes

A    float32
B    float64
C    float64
dtype: object

In [368]:
df3.to_numpy().dtype

dtype('float64')

In [369]:
df3

Unnamed: 0,A,B,C
0,0.503144,2.249729,0.0
1,0.691908,0.288224,0.0
2,1.771237,-0.862326,255.0
3,0.076023,-0.975482,0.0
4,-0.704089,0.316473,1.0
5,-1.608142,-0.749903,0.0
6,-1.542164,-0.754199,255.0
7,3.485702,1.117544,1.0


In [370]:
df3.dtypes

A    float32
B    float64
C    float64
dtype: object

In [371]:
df3.astype('float32').dtypes

A    float32
B    float32
C    float32
dtype: object

In [372]:
dft = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9]})

In [373]:
dft[['a', 'b']] = dft[['a', 'b']].astype(np.uint8)

In [374]:
dft

Unnamed: 0,a,b,c
0,1,4,7
1,2,5,8
2,3,6,9


In [375]:
dft.dtypes

a    uint8
b    uint8
c    int64
dtype: object

In [376]:
dft1 = pd.DataFrame({'a': [1, 0, 1], 'b': [4, 5, 6], 'c': [7, 8, 9]})

In [377]:
dft1 = dft1.astype({'a': np.bool, 'c': np.float64})

In [378]:
dft1

Unnamed: 0,a,b,c
0,True,4,7.0
1,False,5,8.0
2,True,6,9.0


In [379]:
dft1.dtypes

a       bool
b      int64
c    float64
dtype: object

In [380]:
dft = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9]})

In [381]:
dft

Unnamed: 0,a,b,c
0,1,4,7
1,2,5,8
2,3,6,9


In [382]:
dft.loc[:, ['a', 'b']].astype(np.uint8).dtypes

a    uint8
b    uint8
dtype: object

In [383]:
dft.loc[:, ['a', 'b']] = dft.loc[:, ['a', 'b']].astype(np.uint8)

In [384]:
dft.dtypes

a    int64
b    int64
c    int64
dtype: object

In [385]:
import datetime

In [386]:
df = pd.DataFrame([[1, 2],
    ['a', 'b'],
    [datetime.datetime(2016, 3, 2),
    datetime.datetime(2016, 3, 2)]])

In [387]:
df = df.T

In [388]:
df

Unnamed: 0,0,1,2
0,1,a,2016-03-02
1,2,b,2016-03-02


In [389]:
df.dtypes

0            object
1            object
2    datetime64[ns]
dtype: object

In [390]:
df.infer_objects().dtypes

0             int64
1            object
2    datetime64[ns]
dtype: object

In [391]:
m = ['1.1', 2, 3]

In [392]:
pd.to_numeric(m)

array([1.1, 2. , 3. ])

In [393]:
m = ['2016-07-09', datetime.datetime(2016, 3, 2)]

In [394]:
pd.to_datetime(m)

DatetimeIndex(['2016-07-09', '2016-03-02'], dtype='datetime64[ns]', freq=None)

In [395]:
m = ['5us', pd.Timedelta('1day')]

In [396]:
pd.to_timedelta(m)

TimedeltaIndex(['0 days 00:00:00.000005', '1 days 00:00:00'], dtype='timedelta64[ns]', freq=None)

In [397]:
m = ['apple', datetime.datetime(2016, 3, 2)]

In [398]:
m

['apple', datetime.datetime(2016, 3, 2, 0, 0)]

In [399]:
pd.to_datetime(m, errors='coerce')

DatetimeIndex(['NaT', '2016-03-02'], dtype='datetime64[ns]', freq=None)

In [400]:
m = ['apple', 2, 3]

In [401]:
pd.to_numeric(m, errors='coerce')

array([nan,  2.,  3.])

In [402]:
m = ['apple', pd.Timedelta('1day')]

In [403]:
pd.to_timedelta(m, errors='coerce')

TimedeltaIndex([NaT, '1 days'], dtype='timedelta64[ns]', freq=None)

In [404]:
m = ['apple', datetime.datetime(2016, 3, 2)]

In [405]:
pd.to_datetime(m, errors='ignore')

Index(['apple', 2016-03-02 00:00:00], dtype='object')

In [406]:
m = ['apple', 2, 3]

In [407]:
pd.to_numeric(m, errors='ignore')

array(['apple', 2, 3], dtype=object)

In [408]:
m = ['apple', pd.Timedelta('1day')]

In [409]:
pd.to_timedelta(m, errors='ignore')

array(['apple', Timedelta('1 days 00:00:00')], dtype=object)

In [410]:
m = ['1', 2, 3]

In [411]:
pd.to_numeric(m, downcast='integer')

array([1, 2, 3], dtype=int8)

In [412]:
pd.to_numeric(m, downcast='signed')

array([1, 2, 3], dtype=int8)

In [413]:
pd.to_numeric(m, downcast='unsigned')

array([1, 2, 3], dtype=uint8)

In [414]:
pd.to_numeric(m, downcast='float')

array([1., 2., 3.], dtype=float32)

In [415]:
df = pd.DataFrame([
    ['2016-07-09', datetime.datetime(2016, 3, 2)]] * 2, dtype='O')

In [416]:
df

Unnamed: 0,0,1
0,2016-07-09,2016-03-02 00:00:00
1,2016-07-09,2016-03-02 00:00:00


In [417]:
df.apply(pd.to_datetime)

Unnamed: 0,0,1
0,2016-07-09,2016-03-02
1,2016-07-09,2016-03-02


In [418]:
df = pd.DataFrame([['1.1', 2, 3]] * 2, dtype='O')

In [419]:
df

Unnamed: 0,0,1,2
0,1.1,2,3
1,1.1,2,3


In [420]:
df.apply(pd.to_numeric)

Unnamed: 0,0,1,2
0,1.1,2,3
1,1.1,2,3


In [421]:
df = pd.DataFrame([['5us', pd.Timedelta('1day')]] * 2, dtype='O')

In [422]:
df

Unnamed: 0,0,1
0,5us,1 days 00:00:00
1,5us,1 days 00:00:00


In [423]:
df.apply(pd.to_timedelta)

Unnamed: 0,0,1
0,00:00:00.000005,1 days
1,00:00:00.000005,1 days


In [424]:
df3

Unnamed: 0,A,B,C
0,0.503144,2.249729,0.0
1,0.691908,0.288224,0.0
2,1.771237,-0.862326,255.0
3,0.076023,-0.975482,0.0
4,-0.704089,0.316473,1.0
5,-1.608142,-0.749903,0.0
6,-1.542164,-0.754199,255.0
7,3.485702,1.117544,1.0


In [425]:
dfi = df3.astype('int32')

In [426]:
dfi['E'] = 1

In [427]:
dfi

Unnamed: 0,A,B,C,E
0,0,2,0,1
1,0,0,0,1
2,1,0,255,1
3,0,0,0,1
4,0,0,1,1
5,-1,0,0,1
6,-1,0,255,1
7,3,1,1,1


In [428]:
dfi.dtypes

A    int32
B    int32
C    int32
E    int64
dtype: object

In [429]:
casted = dfi[dfi > 0]

In [430]:
casted

Unnamed: 0,A,B,C,E
0,,2.0,,1
1,,,,1
2,1.0,,255.0,1
3,,,,1
4,,,1.0,1
5,,,,1
6,,,255.0,1
7,3.0,1.0,1.0,1


In [431]:
casted.dtypes

A    float64
B    float64
C    float64
E      int64
dtype: object

In [432]:
dfa = df3.copy()

In [433]:
dfa['A'] = dfa['A'].astype('float32')

In [434]:
dfa.dtypes

A    float32
B    float64
C    float64
dtype: object

In [435]:
casted = dfa[df2 > 0]

In [436]:
casted

Unnamed: 0,A,B,C
0,0.503144,2.249729,
1,0.691908,0.288224,
2,1.771237,,255.0
3,,,
4,,0.316473,1.0
5,,,
6,,,255.0
7,3.485702,1.117544,1.0


In [437]:
casted.dtypes

A    float32
B    float64
C    float64
dtype: object

In [438]:
df = pd.DataFrame({'string': list('abc'),
    'int64': list(range(1, 4)),
    'uint8': np.arange(3, 6).astype('u1'),
    'float64': np.arange(4.0, 7.0),
    'bool1': [True, False, True],
    'bool2': [False, True, False],
    'dates': pd.date_range('now', periods=3),
    'category': pd.Series(list("ABC")).astype('category')})

In [439]:
df['tdeltas'] = df.dates.diff()

In [440]:
df['uint64'] = np.arange(3, 6).astype('u8')

In [441]:
df['other_dates'] = pd.date_range('20130101', periods=3)

In [442]:
df['tz_aware_dates'] = pd.date_range('20130101', periods=3, tz='US/Eastern')

In [443]:
df

Unnamed: 0,string,int64,uint8,float64,bool1,bool2,dates,category,tdeltas,uint64,other_dates,tz_aware_dates
0,a,1,3,4.0,True,False,2020-07-14 15:26:36.460704,A,NaT,3,2013-01-01,2013-01-01 00:00:00-05:00
1,b,2,4,5.0,False,True,2020-07-15 15:26:36.460704,B,1 days,4,2013-01-02,2013-01-02 00:00:00-05:00
2,c,3,5,6.0,True,False,2020-07-16 15:26:36.460704,C,1 days,5,2013-01-03,2013-01-03 00:00:00-05:00


In [444]:
df.dtypes

string                                object
int64                                  int64
uint8                                  uint8
float64                              float64
bool1                                   bool
bool2                                   bool
dates                         datetime64[ns]
category                            category
tdeltas                      timedelta64[ns]
uint64                                uint64
other_dates                   datetime64[ns]
tz_aware_dates    datetime64[ns, US/Eastern]
dtype: object

In [445]:
df.select_dtypes(include=[bool])

Unnamed: 0,bool1,bool2
0,True,False
1,False,True
2,True,False


In [446]:
df.select_dtypes(include=['bool'])

Unnamed: 0,bool1,bool2
0,True,False
1,False,True
2,True,False


In [447]:
df.select_dtypes(include=['number', 'bool'], exclude=['unsignedinteger'])

Unnamed: 0,int64,float64,bool1,bool2,tdeltas
0,1,4.0,True,False,NaT
1,2,5.0,False,True,1 days
2,3,6.0,True,False,1 days


In [448]:
df.select_dtypes(include=['object'])

Unnamed: 0,string
0,a
1,b
2,c


In [449]:
df

Unnamed: 0,string,int64,uint8,float64,bool1,bool2,dates,category,tdeltas,uint64,other_dates,tz_aware_dates
0,a,1,3,4.0,True,False,2020-07-14 15:26:36.460704,A,NaT,3,2013-01-01,2013-01-01 00:00:00-05:00
1,b,2,4,5.0,False,True,2020-07-15 15:26:36.460704,B,1 days,4,2013-01-02,2013-01-02 00:00:00-05:00
2,c,3,5,6.0,True,False,2020-07-16 15:26:36.460704,C,1 days,5,2013-01-03,2013-01-03 00:00:00-05:00


In [450]:
def subdtypes(dtype):
    subs = dtype.__subclasses__()
    if not subs:
        return dtype
    return [dtype, [subdtypes(dt) for dt in subs]]

In [451]:
subdtypes(np.generic)

[numpy.generic,
 [[numpy.number,
   [[numpy.integer,
     [[numpy.signedinteger,
       [numpy.int8,
        numpy.int16,
        numpy.intc,
        numpy.int32,
        numpy.int64,
        numpy.timedelta64]],
      [numpy.unsignedinteger,
       [numpy.uint8, numpy.uint16, numpy.uintc, numpy.uint32, numpy.uint64]]]],
    [numpy.inexact,
     [[numpy.floating,
       [numpy.float16, numpy.float32, numpy.float64, numpy.longdouble]],
      [numpy.complexfloating,
       [numpy.complex64, numpy.complex128, numpy.clongdouble]]]]]],
  [numpy.flexible,
   [[numpy.character, [numpy.bytes_, numpy.str_]],
    [numpy.void, [numpy.record]]]],
  numpy.bool_,
  numpy.datetime64,
  numpy.object_]]