# Gotchas in Pandas

In [8]:
import pandas as pd
import numpy as np

## *Using If/Truth Statements with pandas*

In [7]:
#1
if pd.Series([False, True, False]):
    print("i was true")

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [9]:
#Solution
if pd.Series([False, True, False]) is not None:
    print("i was something")

i was something


In [10]:
#using any Function
#2
if pd.Series([False, True, False]).any():
    print("i am any")

i am any


# *NA Gotchas*

In [11]:
#he special value NaN (Not-A-Number) is used everywhere as the NA value
#there are API functions isnull and notnull which can be used across the dtypes to detect NA values.

#2
s = pd.Series([1, 2, 3, 4, 5, 6], index=list('abcdef'))
s

a    1
b    2
c    3
d    4
e    5
f    6
dtype: int64

In [12]:
s.dtype

dtype('int64')

In [13]:
#3
s2 = s.reindex(['a', 'b', 'c', 'h', 'e', 'r'])
s2

a    1.0
b    2.0
c    3.0
h    NaN
e    5.0
r    NaN
dtype: float64

In [14]:
#4
s.dtype

dtype('int64')

In [15]:
s2.dtype

dtype('float64')

In [16]:
series=pd.Series([1,2,3,4])
df=pd.DataFrame(index=[3,4,5,6])
df['col']=series
df

Unnamed: 0,col
3,4.0
4,
5,
6,


In [17]:
#Solution
df['col']=series.values
df

Unnamed: 0,col
3,1
4,2
5,3
6,4


In [18]:
df=pd.DataFrame({'col':[1,np.nan]})
df==np.nan

Unnamed: 0,col
0,False
1,False


In [19]:
df=pd.DataFrame({'col':[1,np.nan]})   
df.isnull()

Unnamed: 0,col
0,False
1,True


In [None]:
### *Reason: there's a lack of NA support from python and numpy libraries*

## Reindexing Gotchas

In [None]:
df = pd.DataFrame(np.random.randn(6, 4), columns=['one', 'two', 'three', 'four'],index=list('abcdef'))
df

In [None]:
df.ix[['b', 'c', 'e']]

In [None]:
df.reindex(['b', 'c', 'e'])

In [None]:
df.ix[[1, 2, 4]]

In [None]:
df.reindex([1, 2, 4])

In [None]:
#Reindex being strictly label based indexing can't perform this function

In [3]:
series = pd.Series([1, 2, 3, 4, 5, 6])
series

0    1
1    2
2    3
3    4
4    5
5    6
dtype: int64

In [4]:
true = pd.Series([True])
true.dtype

dtype('bool')

In [5]:
true = pd.Series([True]).reindex_like(series)
true.dtype

dtype('O')

### the dtype changes to python object because reindex_like silently inserts NaNs and the dtype changes accordingly. 

## Some other tips
*  Avoid multithreading while using pandas as it may delete some frames or dataframe.copy might skip some frames while handdling very large scale data sets
* Avoid using with BS4.


# links
## http://pandas.pydata.org/pandas-docs/stable/gotchas.html
## http://docs.python-guide.org/en/latest/writing/gotchas/#late-binding-closures
## https://gist.github.com/manojpandey/41b90cba1fd62095e247d1b2448ef85b