In [1]:
import pandas as pd
import numpy as np

In [2]:
test_index = pd.Index([1, 1, 2, 3, 5, 8, np.nan, 13, np.nan, 2, 3], name='test_index')
test_index

Float64Index([1.0, 1.0, 2.0, 3.0, 5.0, 8.0, nan, 13.0, nan, 2.0, 3.0], dtype='float64', name='test_index')

**Get name, shape and dimensions of the index**

In [3]:
print('Name: {}'.format(test_index.name))
print('Shape: {}'.format(test_index.shape))
print('Dimensions: {}'.format(test_index.ndim))

Name: test_index
Shape: (11,)
Dimensions: 1


**Get values in the index**
<br>
This can be retrieved by either accessing the *values* attribute or invoking the *get_values()* method.

In [4]:
test_index.values

array([  1.,   1.,   2.,   3.,   5.,   8.,  nan,  13.,  nan,   2.,   3.])

In [5]:
test_index.get_values()

array([  1.,   1.,   2.,   3.,   5.,   8.,  nan,  13.,  nan,   2.,   3.])

**Get datatype of elements**

In [6]:
test_index.dtype

dtype('float64')

**Check if index is empty**

In [7]:
test_index

Float64Index([1.0, 1.0, 2.0, 3.0, 5.0, 8.0, nan, 13.0, nan, 2.0, 3.0], dtype='float64', name='test_index')

In [8]:
test_index.empty

False

In [9]:
empty_index = pd.Index([])
empty_index.empty

True

**Check for duplicates**
<br>
Use either of the *has_duplicates* or *is_unique* attributes to find if the index has duplicates

In [10]:
test_index

Float64Index([1.0, 1.0, 2.0, 3.0, 5.0, 8.0, nan, 13.0, nan, 2.0, 3.0], dtype='float64', name='test_index')

In [11]:
test_index.has_duplicates

True

In [12]:
test_index.is_unique

False

**Get count of unique elements**

In [13]:
test_index

Float64Index([1.0, 1.0, 2.0, 3.0, 5.0, 8.0, nan, 13.0, nan, 2.0, 3.0], dtype='float64', name='test_index')

In [14]:
# The following command excludes null elements from the count
test_index.nunique()

6

In [15]:
# To include nulls in the count, pass dropna=False to the above method
test_index.nunique(dropna=False)

7

**Unique elements in the index**

In [16]:
test_index.unique()

Float64Index([1.0, 2.0, 3.0, 5.0, 8.0, nan, 13.0], dtype='float64', name='test_index')

**Get duplicated values**

In [17]:
test_index.get_duplicates()

[1.0, 2.0, 3.0]

**Get a boolean array with duplicated elements marked as such**

In [18]:
test_index

Float64Index([1.0, 1.0, 2.0, 3.0, 5.0, 8.0, nan, 13.0, nan, 2.0, 3.0], dtype='float64', name='test_index')

In [19]:
# All duplicate elements are marked as boolean True
test_index.duplicated(keep=False)

array([ True,  True,  True,  True, False, False,  True, False,  True,
        True,  True], dtype=bool)

In [20]:
# Mark all but the first occurrence as a duplicate
test_index.duplicated(keep='first')

array([False,  True, False, False, False, False, False, False,  True,
        True,  True], dtype=bool)

In [21]:
# Mark all but the last occurrence as a duplicate
test_index.duplicated(keep='last')

array([ True, False,  True,  True, False, False,  True, False, False,
       False, False], dtype=bool)

**Check for nulls or missing values**

In [22]:
test_index.hasnans

True

**Get non-null elements**

In [23]:
non_nulls_mask = test_index.notnull()
test_index[non_nulls_mask]

Float64Index([1.0, 1.0, 2.0, 3.0, 5.0, 8.0, 13.0, 2.0, 3.0], dtype='float64', name='test_index')

**Get null elements**

In [24]:
nulls_mask = test_index.isnull()
test_index[nulls_mask]

Float64Index([nan, nan], dtype='float64', name='test_index')

**Get count of occurrences of each element in the index**

In [25]:
test_index.value_counts()

3.0     2
2.0     2
1.0     2
13.0    1
8.0     1
5.0     1
Name: test_index, dtype: int64

**Check if element is in the index**
<br>
A membership test can be performed by using either the standard Python *IN* operator or by invoking the *contains(key)* method on the index object.

In [26]:
5 in test_index, test_index.contains(5)

(True, True)

Verify if elements are of a certain type

In [27]:
int_index = pd.Index([1, 3, 5, 7, 9, 11])
int_index.is_integer()

True

In [28]:
str_index = pd.Index(['One', 'Two', 'Three'])
str_index.is_object()

True

In [29]:
float_index = pd.Index([1.0, 2.0, 3.0, 5.0])
float_index.is_floating()

True

In [30]:
numeric_index = pd.Index([1, 2.0, 3, 4.0])
numeric_index.is_numeric()

True

In [31]:
bool_index = pd.Index([True, True, False, False, False])
bool_index.is_boolean()

True

**Insert element into index**
<br>
This can be done using the *insert(loc, item)* method.
<br><br>
Note that this does not modify the index in-place; it returns a new index object instead.

In [32]:
test_index

Float64Index([1.0, 1.0, 2.0, 3.0, 5.0, 8.0, nan, 13.0, nan, 2.0, 3.0], dtype='float64', name='test_index')

In [33]:
# The following command returns a new index object. Index test_index is left unchanged
test_index.insert(2, 'Hello')

Index([1.0, 1.0, 'Hello', 2.0, 3.0, 5.0, 8.0, nan, 13.0, nan, 2.0, 3.0], dtype='object', name='test_index')

**Union and intersection of two index objects**

In [34]:
cities_idx_1 = pd.Index(['Toronto', 'Mississauga', 'Waterloo', 'Brampton'])
cities_idx_2 = pd.Index(['London', 'Ottawa', 'Toronto', 'Hamilton', 'Mississauga'])

In [35]:
cities_idx_1.union(cities_idx_2)

Index(['Brampton', 'Hamilton', 'London', 'Mississauga', 'Ottawa', 'Toronto',
       'Waterloo'],
      dtype='object')

In [36]:
cities_idx_1.intersection(cities_idx_2)

Index(['Toronto', 'Mississauga'], dtype='object')

**Drop NA/NaN elements from an index**
<br>
Call the *dropna()* method on an index object to return a new index with the NA/NaN values removed

In [37]:
index_with_nulls = pd.Index([1, 2, 3, np.nan, 5, 6, np.nan, 8])
index_with_nulls

Float64Index([1.0, 2.0, 3.0, nan, 5.0, 6.0, nan, 8.0], dtype='float64')

In [38]:
# The original index remains unchanged.
index_with_nulls.dropna()

Float64Index([1.0, 2.0, 3.0, 5.0, 6.0, 8.0], dtype='float64')

**Drop duplicates from an index**
<br>
Call the *drop_duplicates()* method

In [39]:
index_with_dulicates = pd.Index([1, 2, 1, 2, 3, 3, 4, 5, 6, 4])

In [40]:
index_with_dulicates.drop_duplicates()

Int64Index([1, 2, 3, 4, 5, 6], dtype='int64')