In [1]:
import pandas as pd
import xlrd
import numpy as np

In [2]:
df = pd.read_excel("Canada.xlsx")
df.columns.values

array(['Type', 'Coverage', 'OdName', 'AREA', 'AreaName', 'REG', 'RegName',
       'DEV', 'DevName', 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987,
       1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
       1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009,
       2010, 2011, 2012, 2013], dtype=object)

In [3]:
df_new = df[['AreaName', 1980]]
df_new = df_new.head(10)

#### Adding some null values

In [4]:
df_new.loc[10,1980] = np.nan
df_new.loc[11,1980] = np.nan

In [5]:
df_new

Unnamed: 0,AreaName,1980
0,Asia,16.0
1,Europe,1.0
2,Africa,80.0
3,Oceania,0.0
4,Europe,0.0
5,Africa,1.0
6,Latin America and the Caribbean,0.0
7,Latin America and the Caribbean,368.0
8,Asia,0.0
9,Oceania,702.0


# value_counts()  --- Counts Occurances of unique values in column

#### We can't apply it on Entire DataFrame. It is applicable only on column

In [6]:
df_new["AreaName"].value_counts()  # By Default dropna=True ; will not consider NULL Values

Europe                             2
Oceania                            2
Latin America and the Caribbean    2
Africa                             2
Asia                               2
Name: AreaName, dtype: int64

In [7]:
df_new["AreaName"].value_counts(dropna=False)  # will consider NULL Values

Europe                             2
Oceania                            2
Latin America and the Caribbean    2
Africa                             2
Asia                               2
NaN                                2
Name: AreaName, dtype: int64

In [8]:
df_new[1980].value_counts()

0.0      4
1.0      2
702.0    1
368.0    1
80.0     1
16.0     1
Name: 1980, dtype: int64

In [9]:
df_new[1980].value_counts(dropna=False)

0.0      4
NaN      2
1.0      2
702.0    1
368.0    1
80.0     1
16.0     1
Name: 1980, dtype: int64

#### Check how many null values are there in Column

In [10]:
df_new[1980].value_counts(dropna=False).index.tolist()

[0.0, nan, 1.0, 702.0, 368.0, 80.0, 16.0]

In [11]:
df_new[1980].value_counts(dropna=False)[np.nan]  # NULL value index name is np.nan

2

#### Check for which all index NULL values are there

In [12]:
df_new

Unnamed: 0,AreaName,1980
0,Asia,16.0
1,Europe,1.0
2,Africa,80.0
3,Oceania,0.0
4,Europe,0.0
5,Africa,1.0
6,Latin America and the Caribbean,0.0
7,Latin America and the Caribbean,368.0
8,Asia,0.0
9,Oceania,702.0


In [13]:
df_new[df_new[1980].isnull()].index.tolist()

[10, 11]

#### get unique values using value_counts() approach

In [14]:
df_new

Unnamed: 0,AreaName,1980
0,Asia,16.0
1,Europe,1.0
2,Africa,80.0
3,Oceania,0.0
4,Europe,0.0
5,Africa,1.0
6,Latin America and the Caribbean,0.0
7,Latin America and the Caribbean,368.0
8,Asia,0.0
9,Oceania,702.0


In [15]:
df_new[1980].value_counts().index.tolist()

[0.0, 1.0, 702.0, 368.0, 80.0, 16.0]

#### To get unique values we already have another method unique()

In [16]:
df_new[1980].unique().tolist()

[16.0, 1.0, 80.0, 0.0, 368.0, 702.0, nan]