In [72]:
import pandas as pd
import numpy as np

In [73]:
long_series = pd.Series(np.random.randn(1000))
long_series.head()

0    0.412910
1    0.938876
2   -0.274993
3    0.191322
4   -0.725818
dtype: float64

In [74]:
long_series.tail()

995    0.035804
996   -1.680818
997   -0.707842
998   -0.612729
999    1.257640
dtype: float64

# Matching / broadcasting behavioR

In [75]:
df = pd.DataFrame({ 'one' : pd.Series(np.random.randn(3), index=['a', 'b', 'c']),
                    'two' : pd.Series(np.random.randn(4), index=['a', 'b', 'c', 'd']),
                    'three' : pd.Series(np.random.randn(3), index=['b', 'c', 'd'])})
df

Unnamed: 0,one,two,three
a,0.145989,2.004197,
b,0.24323,1.157022,0.59342
c,-0.223351,-0.887341,-0.060299
d,,1.186453,2.212427


In [76]:
row = df.iloc[1]
row

one      0.243230
two      1.157022
three    0.593420
Name: b, dtype: float64

In [77]:
column = df['two']
column

a    2.004197
b    1.157022
c   -0.887341
d    1.186453
Name: two, dtype: float64

In [78]:
df.sub(row,axis='columns')

Unnamed: 0,one,two,three
a,-0.097241,0.847175,
b,0.0,0.0,0.0
c,-0.46658,-2.044363,-0.65372
d,,0.029431,1.619007


In [79]:
df.sub(row, axis=1)

Unnamed: 0,one,two,three
a,-0.097241,0.847175,
b,0.0,0.0,0.0
c,-0.46658,-2.044363,-0.65372
d,,0.029431,1.619007


In [80]:
df.sub(column,axis='index')

Unnamed: 0,one,two,three
a,-1.858208,0.0,
b,-0.913792,0.0,-0.563602
c,0.66399,0.0,0.827041
d,,0.0,1.025974


In [81]:
df

Unnamed: 0,one,two,three
a,0.145989,2.004197,
b,0.24323,1.157022,0.59342
c,-0.223351,-0.887341,-0.060299
d,,1.186453,2.212427


# Missing values / Fill values

In [82]:
df2 = pd.DataFrame({ 'one' : pd.Series(np.random.randn(3), index=['a', 'b', 'c']),
                    'two' : pd.Series(np.random.randn(4), index=['a', 'b', 'c', 'd']),
                    'three' : pd.Series(np.random.randn(3), index=['b', 'c', 'd'])})
df2

Unnamed: 0,one,two,three
a,0.858434,0.54975,
b,-0.50158,0.685276,0.383565
c,-0.871467,0.04166,1.038308
d,,0.346324,-2.257662


In [83]:
df2.add(df, fill_value=999)

Unnamed: 0,one,two,three
a,1.004423,2.553947,
b,-0.25835,1.842298,0.976985
c,-1.094818,-0.845681,0.978008
d,,1.532777,-0.045235


# Boolean Reductions

In [84]:
(df > 0).any()

one      True
two      True
three    True
dtype: bool

In [85]:
(df.loc['a']['two']) > -0.5

True

In [86]:
(df > 0).all()

one      False
two      False
three    False
dtype: bool

In [87]:
pd.Series([True]).bool()

True

In [88]:
pd.DataFrame([[True]]).bool()

True

# Comparing if objects are equivalen

In [89]:
df+df == df*2

Unnamed: 0,one,two,three
a,True,True,False
b,True,True,True
c,True,True,True
d,False,True,True


In [90]:
(df+df == df*2).all()

one      False
two       True
three    False
dtype: bool

### Series or DataFrame index needs to be in the same order for equality to be True:

In [91]:
df1 = pd.DataFrame({'col':['foo', 0, np.nan]})
df2 = pd.DataFrame({'col':[np.nan, 0, 'foo']}, index=[2,1,0])
df1.equals(df2)

False

In [92]:
df1.equals(df2.sort_index())

True

## Comparing array-like objects

In [93]:
df.index == 'b'

array([False,  True, False, False])

In [94]:
np.array([1, 2, 3]) == np.array([2])

array([False,  True, False])

In [95]:
pd.Series(['foo', 'bar', 'baz']) == np.array(['foo', 'bar', 'qux'])

0     True
1     True
2    False
dtype: bool

## Combining overlapping data sets

In [96]:
df1 = pd.DataFrame({'A' : [1., np.nan, 3., 5., np.nan],
                    'B' : [np.nan, 2., 3., np.nan, 6.]})
df2 = pd.DataFrame({'A' : [5., 2., 4., np.nan, 3., 7.],
                    'B' : [np.nan, np.nan, 3., 4., 6., 8.]})

In [97]:
df1

Unnamed: 0,A,B
0,1.0,
1,,2.0
2,3.0,3.0
3,5.0,
4,,6.0


In [98]:
df2

Unnamed: 0,A,B
0,5.0,
1,2.0,
2,4.0,3.0
3,,4.0
4,3.0,6.0
5,7.0,8.0


In [99]:
df1.combine_first(df2)

Unnamed: 0,A,B
0,1.0,
1,2.0,2.0
2,3.0,3.0
3,5.0,4.0
4,3.0,6.0
5,7.0,8.0


# Descriptive statistics

In [100]:
df3 = pd.DataFrame({'A' : [2.0, 2.0, 4.0, 0.0, 2.0],
                    'B' : [2.0, 3.0, np.nan, 5.0, 6.0],
                    'C' : [2.0, 3.0, 4.0, 5.0, 6.0]
                   }
                   ,index=list('vwxyz')
                  )
df3

Unnamed: 0,A,B,C
v,2.0,2.0,2.0
w,2.0,3.0,3.0
x,4.0,,4.0
y,0.0,5.0,5.0
z,2.0,6.0,6.0


### **__DataFrame: “index” (axis=0, default), “columns” (axis=1)__**

In [101]:
df3.mean(0)

A    2.0
B    4.0
C    4.0
dtype: float64

In [102]:
df3.std()

A    1.414214
B    1.825742
C    1.581139
dtype: float64

In [103]:
df3.mean(1)

v    2.000000
w    2.666667
x    4.000000
y    3.333333
z    4.666667
dtype: float64

In [104]:
df3.cumsum()

Unnamed: 0,A,B,C
v,2.0,2.0,2.0
w,4.0,5.0,5.0
x,8.0,,9.0
y,8.0,10.0,14.0
z,10.0,16.0,20.0


### **_missing data_**

In [105]:
df3.sum(1, skipna=False)

v     6.0
w     8.0
x     NaN
y    10.0
z    14.0
dtype: float64

# Summarizing data: describe

## Series

In [106]:
series = pd.Series(np.random.randn(10))

In [107]:
series[::2]

0   -0.520024
2   -0.361462
4   -0.202174
6    0.614444
8    0.559100
dtype: float64

In [108]:
series[::2] = np.nan

In [109]:
series

0         NaN
1   -0.054401
2         NaN
3   -0.081151
4         NaN
5    1.948706
6         NaN
7   -0.445121
8         NaN
9    0.698569
dtype: float64

In [110]:
series.describe()

count    5.000000
mean     0.413321
std      0.953803
min     -0.445121
25%     -0.081151
50%     -0.054401
75%      0.698569
max      1.948706
dtype: float64

In [111]:
series.describe(percentiles=[.25, .50, .95])

count    5.000000
mean     0.413321
std      0.953803
min     -0.445121
25%     -0.081151
50%     -0.054401
95%      1.698679
max      1.948706
dtype: float64

## DataFrame

In [112]:
frame = pd.DataFrame(np.random.randn(10, 5), columns=['a', 'b', 'c', 'd', 'e'])
frame.iloc[::2] = np.nan
frame

Unnamed: 0,a,b,c,d,e
0,,,,,
1,-0.183802,1.836662,-1.284203,0.064864,0.130745
2,,,,,
3,0.698689,-0.890003,1.614556,1.195417,-0.055026
4,,,,,
5,0.858656,0.764957,1.126264,0.519718,0.963181
6,,,,,
7,-2.325727,-0.325276,-0.397012,0.35125,-0.149977
8,,,,,
9,-0.847118,2.176424,0.20319,-0.449138,2.883858


In [113]:
frame.describe()

Unnamed: 0,a,b,c,d,e
count,5.0,5.0,5.0,5.0,5.0
mean,-0.35986,0.712553,0.252559,0.336422,0.754556
std,1.297917,1.328021,1.16237,0.604677,1.268877
min,-2.325727,-0.890003,-1.284203,-0.449138,-0.149977
25%,-0.847118,-0.325276,-0.397012,0.064864,-0.055026
50%,-0.183802,0.764957,0.20319,0.35125,0.130745
75%,0.698689,1.836662,1.126264,0.519718,0.963181
max,0.858656,2.176424,1.614556,1.195417,2.883858


## INDEX min/max Value

In [114]:
df3 = pd.DataFrame([2, 1, 1, 3, np.nan], columns=['A'], index=list('edcba'))
df3

Unnamed: 0,A
e,2.0
d,1.0
c,1.0
b,3.0
a,


In [115]:
df3['A'].idxmin()

'd'

## Value counts (histogramming) / Mode

In [116]:
data = np.random.randint(0,7, size=50)
data

array([3, 6, 3, 1, 5, 5, 6, 3, 5, 3, 6, 4, 3, 5, 1, 0, 2, 3, 2, 4, 1, 0,
       5, 3, 5, 4, 0, 2, 1, 0, 2, 1, 2, 2, 2, 6, 4, 4, 5, 3, 3, 4, 4, 2,
       3, 5, 2, 5, 6, 6])

In [117]:
num = pd.value_counts(data)
num

3    10
5     9
2     9
4     7
6     6
1     5
0     4
dtype: int64

In [118]:
s = pd.Series(data)
s.value_counts()

3    10
5     9
2     9
4     7
6     6
1     5
0     4
dtype: int64

In [119]:
df6 = pd.DataFrame({'A' : [2, 2, 4, 0, 2],
                    'B' : [2, 3, 3, 5, 6],
                    'C' : [1, 3, 4, 2, 3]
                   })
df6

Unnamed: 0,A,B,C
0,2,2,1
1,2,3,3
2,4,3,4
3,0,5,2
4,2,6,3


In [120]:
df6.mode()

Unnamed: 0,A,B,C
0,2,3,3


### _Row or Column wise function application_

In [121]:
df6.apply(np.mean)

A    2.0
B    3.8
C    2.6
dtype: float64

In [122]:
df6.apply('mean', axis=1)

0    1.666667
1    2.666667
2    3.666667
3    2.333333
4    3.666667
dtype: float64

## Discretization

In [123]:
arr = np.random.randn(5)
arr

array([ 0.99066162,  0.3826152 , -0.26756129,  0.32533997,  0.54043049])

In [124]:
factor = pd.cut(arr,2)
factor

[(0.362, 0.991], (0.362, 0.991], (-0.269, 0.362], (-0.269, 0.362], (0.362, 0.991]]
Categories (2, interval[float64]): [(-0.269, 0.362] < (0.362, 0.991]]

# Row or Column-wise function 

**_functions can be applied along the axes of a DataFrame_**

_If the applied function returns a Series, the final output is a DataFrame. The columns match the index of the Series returned by the applied function._

_If the applied function returns any other type, the final output is a Series._


In [125]:
tsdf = pd.DataFrame(np.random.randn(5, 3), columns=['A', 'B', 'C'],
                    index=pd.date_range('1/1/2000', periods=5))
tsdf

Unnamed: 0,A,B,C
2000-01-01,-0.199551,1.440627,-1.555569
2000-01-02,1.271645,-0.923588,-0.230834
2000-01-03,-1.761236,-1.385148,-0.968003
2000-01-04,-1.161726,0.00012,-1.661901
2000-01-05,-0.087539,2.472424,-0.991983


In [126]:
tsdf.apply(lambda x: x.idxmax())

A   2000-01-02
B   2000-01-05
C   2000-01-02
dtype: datetime64[ns]

In [127]:
tsdf.iloc[[1,3]] = np.nan
tsdf

Unnamed: 0,A,B,C
2000-01-01,-0.199551,1.440627,-1.555569
2000-01-02,,,
2000-01-03,-1.761236,-1.385148,-0.968003
2000-01-04,,,
2000-01-05,-0.087539,2.472424,-0.991983


In [128]:
tsdf.apply(pd.Series.interpolate)

Unnamed: 0,A,B,C
2000-01-01,-0.199551,1.440627,-1.555569
2000-01-02,-0.980393,0.02774,-1.261786
2000-01-03,-1.761236,-1.385148,-0.968003
2000-01-04,-0.924387,0.543638,-0.979993
2000-01-05,-0.087539,2.472424,-0.991983


frame

In [129]:
frame.apply(pd.Series.interpolate)

Unnamed: 0,a,b,c,d,e
0,,,,,
1,-0.183802,1.836662,-1.284203,0.064864,0.130745
2,0.257443,0.47333,0.165177,0.63014,0.03786
3,0.698689,-0.890003,1.614556,1.195417,-0.055026
4,0.778672,-0.062523,1.37041,0.857567,0.454077
5,0.858656,0.764957,1.126264,0.519718,0.963181
6,-0.733535,0.219841,0.364626,0.435484,0.406602
7,-2.325727,-0.325276,-0.397012,0.35125,-0.149977
8,-1.586422,0.925574,-0.096911,-0.048944,1.366941
9,-0.847118,2.176424,0.20319,-0.449138,2.883858


In [130]:
def subtract_and_divide(x, sub, divide=1):
    return (x - sub) / divide

tsdf.apply(subtract_and_divide, args=(5,), divide=3)

Unnamed: 0,A,B,C
2000-01-01,-1.733184,-1.186458,-2.18519
2000-01-02,,,
2000-01-03,-2.253745,-2.128383,-1.989334
2000-01-04,,,
2000-01-05,-1.695846,-0.842525,-1.997328


# Aggregation

In [131]:
df7 = pd.DataFrame({'A' : [2.0, 2.0, 4.0, 0.0, 2.0],
                    'B' : [2.0, 3.0, np.nan, 5.0, 6.0],
                    'C' : [2.0, 3.0, 4.0, 5.0, 6.0]
                   })
df7

Unnamed: 0,A,B,C
0,2.0,2.0,2.0
1,2.0,3.0,3.0
2,4.0,,4.0
3,0.0,5.0,5.0
4,2.0,6.0,6.0


In [132]:
df7.agg(np.sum)

A    10.0
B    16.0
C    20.0
dtype: float64

In [133]:
df7.agg('sum')

A    10.0
B    16.0
C    20.0
dtype: float64

In [134]:
df7.sum()

A    10.0
B    16.0
C    20.0
dtype: float64

In [135]:
df7.A.agg('sum')

10.0

In [136]:
df7.A.sum()

10.0

## Aggregation with functions

In [137]:
df7.agg(['sum'])

Unnamed: 0,A,B,C
sum,10.0,16.0,20.0


In [138]:
df7.agg(['sum','mean'])

Unnamed: 0,A,B,C
sum,10.0,16.0,20.0
mean,2.0,4.0,4.0


In [139]:
df7.agg(['sum', (lambda x: x.sum())])

Unnamed: 0,A,B,C
sum,10.0,16.0,20.0
<lambda>,10.0,16.0,20.0


In [140]:
def lambdasum(x):
    return x.sum()

df7.agg(['sum', lambdasum])

Unnamed: 0,A,B,C
sum,10.0,16.0,20.0
lambdasum,10.0,16.0,20.0


## Aggregating with a dictionary

In [141]:
df7.agg(({'A': 'sum', 'B': mymean}))

NameError: name 'mymean' is not defined

In [None]:
df7.agg(({'A': ['sum','mean'], 'B': sum}))

# Custom describe

In [None]:
from functools import partial

In [None]:
q_25 = partial(pd.Series.quantile,q=0.25)
q_25.__name__ = '25% Percentile Rank'

tsdf.agg(['count', 'mean', 'std', 'min', q_25, 'median', 'max'])

# Transform

### **_as numpy function_**

In [None]:
tsdf.transform(np.abs)

In [None]:
np.abs(tsdf)

### **_as string function_**

In [None]:
tsdf.transform('abs')

### **_as user defined function_**

In [None]:
tsdf.transform( lambda x: x.abs())

In [None]:
tsdf.A.transform(np.abs)

In [None]:
# Transform with dictionary

In [None]:
tsdf.transform({'A': np.abs, 'B': lambda x: x+2 })

In [None]:
#tsdf.transform({'A': np.abs, 'B': [lambda x: x+2, np.sum]})
tsdf.transform({'A': np.abs, 'B': [lambda x: x+1, np.cumsum]})


# Element-wise function

**_taking a single value and returning a single value_**

## DataFrame element-wise

In [None]:
ewdf = pd.DataFrame({'A' : [2.32310, 12.0, 4222.0, 10.0, 22.0],
                    'B' : [2.01, 3.0, 3.2, 5.0, 6.0],
                    'C' : [2.1110, 3.0, 4.0, 5.0, 6.0]
                   })
ewdf

In [None]:
f = lambda x: len(str(x))

ewdf['A'].map(f)

In [None]:
ewdf.applymap(f)

## Series Element-wise

In [None]:
s = pd.Series(['six', 'five', 'six', 'seven', 'six'],
              index=['a', 'b', 'c', 'd', 'e'])
t = pd.Series({'six' : 6., 'seven' : 7.})
s

In [None]:
s.map(t)

# Reindexing and altering labels

## **_Reindex with Series_**

In [None]:
s = pd.Series(np.random.randn(5), index=list('abcde'))
s.head()

In [None]:
s.reindex(['e','f','a'])

## **_Reindexing with DataFrames_**

In [None]:
ridf = pd.DataFrame({ 'one' : pd.Series(np.random.randn(3), index=['a', 'b', 'c']),
                    'two' : pd.Series(np.random.randn(4), index=['a', 'b', 'c', 'd']),
                    'three' : pd.Series(np.random.randn(3), index=['b', 'c', 'd'])})
ridf

In [None]:
ridf.reindex(index=['d','a'],columns=['three','one'])

In [None]:
ridf.reindex(list('da'), axis='index')

In [None]:
ridf.reindex(['three','one'], axis='columns')

In [None]:
ridf.reindex(ridf.columns, axis='columns')

In [None]:
ridf2 = ridf.reindex(['three','one'], axis='columns')
ridf3 = ridf.reindex(ridf.columns, axis='columns')
ridf2.reindex_like(ridf3)

# Aligning objects

In [None]:
s = pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e'])
s1 = s[:4]
s1

In [None]:
s2 = s[1:]
s2

In [None]:
s3 = s1.align(s2)

# Filling while reindexing

| Method           | Action |
| ---------------- | ------- |
| pad / ffill      | Fill values forward |
| bfill / backfill | Fill values backward |
| nearest          | Fill from the nearest index value |

### _fillna() and interpolate() will not perform any checks on the order of the index._
### _reindex require that the indexes are ordered increasing or decreasing._

In [None]:
rng = pd.date_range('1/3/2000', periods=11, freq='3D')
ts = pd.Series(np.random.randn(11), index=rng)
ts

In [None]:
ts2 = ts[[0, 5, 10]]
ts2

In [None]:
ts2.reindex(ts.index)

In [None]:
ts2.reindex(ts.index, method='ffill')

In [None]:
ts2.reindex(ts.index, method="bfill")

In [None]:
ts2.reindex(ts.index, method="nearest")

In [None]:
ts2.reindex(ts.index).fillna(method="bfill")

In [None]:
ts2.reindex(ts.index).fillna(value=10)

In [None]:
ts2.reindex(ts.index).interpolate()

### _tolerance specifies the maximum distance between the index and indexer values_

In [None]:
ts2.reindex(ts.index, method='nearest', tolerance='3 Day')

In [None]:
ts2.reindex(ts.index, method='nearest', limit=2)

# Dropping and Renaming axis label

In [None]:
dpdf = pd.DataFrame({ 'one' : pd.Series(np.random.randn(3), index=['a', 'b', 'c']),
                    'two' : pd.Series(np.random.randn(4), index=['a', 'b', 'c', 'd']),
                    'three' : pd.Series(np.random.randn(3), index=['b', 'c', 'd'])})
dpdf

In [None]:
dpdf.drop(['a','d'], axis='index')

In [None]:
dpdf.drop(['three'], axis='columns')

In [None]:
dpdf.rename(columns={'one': 'foo', 'three': 'bar'},
            index={'a': 'apple', 'b': 'banana', 'd': 'durian'})

In [None]:
dpdf.rename({'one': 'foo', 'three': 'bar'}, axis='columns')

# DateTime accessor

In [None]:
s = pd.Series(pd.date_range('20130101 09:10:12', periods=6))
s

In [None]:
stz = s.dt.tz_localize('US/Eastern')
stz

In [None]:
stz.dt.tz

In [None]:
s.dt.tz_localize('UTC').dt.tz_convert('US/Eastern')

In [None]:
s.dt.strftime('%Y/%m/%d')

# Sorting

In [None]:
sdf = pd.DataFrame({'A':[2,1,1,1],'C':[5,4,3,2], 'B':[1,3,2,4],})
sdf

In [None]:
sdf.sort_values(by="one")

In [None]:
sdf.sort_values(by=["A","B"])

In [None]:
sdf.sort_index(ascending=False)

In [None]:
sdf.sort_index(axis="columns")

# Sort By Indexes and Values

In [None]:
idx = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('a', 2),('b', 2), ('b', 1), ('b', 1)])
idx.names = ['first', 'second']
idx

In [None]:
np.arange
df_multi = pd.DataFrame({'A': np.arange(6, 0, -1),
                         'B': np.arange(9, 3, -1)
                        }
                        ,
                        index=idx)
df_multi

In [None]:
df_multi.sort_values(by=['second', 'A'])

In [None]:
df_multi.sort_values(by=['A', 'B'])

In [None]:
srs = pd.Series(np.random.randn(20))
srs

## Sorting by Series

In [None]:
srs.nsmallest(4)

In [None]:
srs.nlargest(3)

## Sorting by DataFrames

In [None]:
srdf = pd.DataFrame({'a': [-2, -1, 1, 10, 8, 11, -1],
                    'b': list('abdceff'),
                    'c': [1.0, 2.0, 4.0, 3.2, np.nan, 3.0, 4.0]})
srdf

In [None]:
srdf.nsmallest(3, 'a')

In [None]:
srdf.nsmallest(2, ['a', 'c'])

# Dtypes

In [None]:
dft = pd.DataFrame(dict(
                    A = np.random.rand(3),
                    B = 1,
                    C = 'foo',
                    D = pd.Timestamp('20010102'),
                    E = pd.Series([1.0]*3).astype('float32'),
                    F = pd.Series([3.0]*3).astype('float32'),
                    G = False,
                    H = pd.Series([1]*3,dtype='int8'))
                   
                  )
                  
dft

In [None]:
dft.dtypes

In [None]:
dft.A.dtype

In [None]:
dft.B

In [None]:
dft.B.astype('float64')

# Type Casting

In [None]:
dft[['A','B']].dtypes

In [None]:
dft[['A','B']].astype('float64').dtypes

In [None]:
dft1 = pd.DataFrame({'a': [1,0,1], 'b': [4,5,6], 'c': [7, 8, 9]})
dft1

In [None]:
dft1.dtypes

In [None]:
dft2 = dft1.astype({'a': np.bool, 'b': np.str ,'c': np.float64})
dft2

In [None]:
dft2.dtypes

## Numerice Typecasting

In [None]:
m = ['1', 2, 3]
pd.to_numeric(m, downcast='integer')

In [None]:
pd.to_numeric(m, downcast='float')

## Converting Object by inferring

In [None]:
import datetime

In [None]:
indf = pd.DataFrame([[1.0, 29.0],
                     ['a', 'b'],
                     [datetime.datetime(2016, 3, 2), datetime.datetime(2016, 3, 2)]])
indf.T

In [None]:
indf.T.dtypes

In [None]:
indf.T.infer_objects().dtypes

## Converting Date

In [None]:
m = ['2016-07-09', datetime.datetime(2016, 3, 2)]
m

In [None]:
pd.to_datetime(m)

## Conversion Force and Coerce

In [None]:
m = ['apple', datetime.datetime(2016, 3, 2)]
#pd.to_datetime(m, errors='raise')
#pd.to_datetime(m, errors='coerce')
pd.to_datetime(m, errors='ignore')

In [None]:
m = ['apple', 2, 3]
pd.to_numeric(m, errors='coerce')

# Converting to Multi-dimention or DataFrames

In [None]:
cndf = pd.DataFrame([['2016-07-09', datetime.datetime(2016, 3, 2)]] * 2, dtype='O')
cndf

In [None]:
cndf.dtypes

In [None]:
cndf.apply(pd.to_datetime)

In [None]:
indf = pd.DataFrame([['1.1', 2, 3]] * 2, dtype='O')
indf

In [None]:
indf.dtypes

In [None]:
indf = indf.apply(pd.to_numeric)
indf

In [None]:
indf.dtypes

In [None]:
to_float = lambda y: pd.to_numeric(y, downcast='float')
indf.loc[0].map(to_float)

In [None]:
fldf = indf.apply(to_float)
fldf

In [None]:
fldf.dtypes