In [3]:
import pandas as pd
import numpy as np

In [4]:
df = pd.DataFrame(
   ...:     np.random.randn(5, 3),
   ...:     index=["a", "c", "e", "f", "h"],
   ...:     columns=["one", "two", "three"],
   ...: )

df

Unnamed: 0,one,two,three
a,-1.592069,-1.449828,-0.091397
c,-0.179998,-0.873616,0.277267
e,0.179332,0.662626,-1.604805
f,0.181952,0.361031,1.84899
h,-1.65528,-0.48389,-0.527139


In [4]:
df["four"] = "bar"

In [5]:
df["five"] = df["one"] > 0

In [6]:
df

Unnamed: 0,one,two,three,four,five
a,0.451126,1.669191,-0.122225,bar,True
c,-2.438164,0.842862,0.219011,bar,False
e,0.086412,1.574959,-1.750693,bar,True
f,-0.290275,0.87651,-1.184469,bar,False
h,1.396183,-1.3819,-0.980313,bar,True


In [8]:
df2 = df.reindex(["a", "b", "c", "d", "e", "f", "g", "h"])
df2

Unnamed: 0,one,two,three,four,five
a,0.451126,1.669191,-0.122225,bar,True
b,,,,,
c,-2.438164,0.842862,0.219011,bar,False
d,,,,,
e,0.086412,1.574959,-1.750693,bar,True
f,-0.290275,0.87651,-1.184469,bar,False
g,,,,,
h,1.396183,-1.3819,-0.980313,bar,True


In [9]:
df2["one"]

a    0.451126
b         NaN
c   -2.438164
d         NaN
e    0.086412
f   -0.290275
g         NaN
h    1.396183
Name: one, dtype: float64

In [10]:
pd.isna(df2["one"])

a    False
b     True
c    False
d     True
e    False
f    False
g     True
h    False
Name: one, dtype: bool

In [11]:
df2["four"].notna()

a     True
b    False
c     True
d    False
e     True
f     True
g    False
h     True
Name: four, dtype: bool

In [12]:
df2.isna()

Unnamed: 0,one,two,three,four,five
a,False,False,False,False,False
b,True,True,True,True,True
c,False,False,False,False,False
d,True,True,True,True,True
e,False,False,False,False,False
f,False,False,False,False,False
g,True,True,True,True,True
h,False,False,False,False,False


In [13]:
None == None

True

In [14]:
np.nan == np.nan

False

In [15]:
df2["one"] == np.nan

a    False
b    False
c    False
d    False
e    False
f    False
g    False
h    False
Name: one, dtype: bool

### Integer dtypes and missing data

In [16]:
pd.Series([1, 2, np.nan, 4], dtype=pd.Int64Dtype())

0       1
1       2
2    <NA>
3       4
dtype: Int64

### Datetimes

In [5]:
df2 = df.copy()

In [6]:
df2["timestamp"] = pd.Timestamp("20120101")

In [7]:
df2

Unnamed: 0,one,two,three,timestamp
a,-1.592069,-1.449828,-0.091397,2012-01-01
c,-0.179998,-0.873616,0.277267,2012-01-01
e,0.179332,0.662626,-1.604805,2012-01-01
f,0.181952,0.361031,1.84899,2012-01-01
h,-1.65528,-0.48389,-0.527139,2012-01-01


In [8]:
df2.loc[["a", "c", "h"], ["one", "timestamp"]] = np.nan

In [9]:
df2

Unnamed: 0,one,two,three,timestamp
a,,-1.449828,-0.091397,NaT
c,,-0.873616,0.277267,NaT
e,0.179332,0.662626,-1.604805,2012-01-01
f,0.181952,0.361031,1.84899,2012-01-01
h,,-0.48389,-0.527139,NaT


In [10]:
df2.dtypes.value_counts()

float64           3
datetime64[ns]    1
dtype: int64

### Inserting missing data