# Chapter 6: Index Alignment
## Recipes
* [Examining the Index object](#Examining-the-index)
* [Producing Cartesian products](#Producing-Cartesian-products)
* [Exploding indexes](#Exploding-Indexes)
* [Filling values with unequal indexes](#Filling-values-with-unequal-indexes)
* [Appending columns from different DataFrames](#Appending-columns-from-different-DataFrames)
* [Highlighting the maximum value from each column](#Highlighting-maximum-value-from-each-column)
* [Replicating idxmax with method chaining](#Replicating-idxmax-with-method-chaining)
* [Finding the most common maximum](#Finding-the-most-common-maximum)

In [1]:
import numpy as np
import pandas as pd

# Examining the index

In [2]:
college = pd.read_csv('data/college.csv')
columns = college.columns
columns

Index(['INSTNM', 'CITY', 'STABBR', 'HBCU', 'MENONLY', 'WOMENONLY', 'RELAFFIL',
       'SATVRMID', 'SATMTMID', 'DISTANCEONLY', 'UGDS', 'UGDS_WHITE',
       'UGDS_BLACK', 'UGDS_HISP', 'UGDS_ASIAN', 'UGDS_AIAN', 'UGDS_NHPI',
       'UGDS_2MOR', 'UGDS_NRA', 'UGDS_UNKN', 'PPTUG_EF', 'CURROPER', 'PCTPELL',
       'PCTFLOAN', 'UG25ABV', 'MD_EARN_WNE_P10', 'GRAD_DEBT_MDN_SUPP'],
      dtype='object')

In [3]:
college.values

array([['Alabama A & M University', 'Normal', 'AL', ..., 0.1049, '30300',
        '33888'],
       ['University of Alabama at Birmingham', 'Birmingham', 'AL', ...,
        0.2422, '39700', '21941.5'],
       ['Amridge University', 'Montgomery', 'AL', ...,
        0.8540000000000001, '40100', '23370'],
       ...,
       ['National Personal Training Institute of Cleveland',
        'Highland Heights', 'OH', ..., nan, nan, '6333'],
       ['Bay Area Medical Academy - San Jose Satellite Location',
        'San Jose', 'CA', ..., nan, nan, 'PrivacySuppressed'],
       ['Excel Learning Center-San Antonio South', 'San Antonio', 'TX',
        ..., nan, nan, '12125']], dtype=object)

In [4]:
columns[5]

'WOMENONLY'

In [5]:
columns[[1, 8, 10]]

Index(['CITY', 'SATMTMID', 'UGDS'], dtype='object')

In [6]:
columns[-7: -4]

Index(['PPTUG_EF', 'CURROPER', 'PCTPELL'], dtype='object')

In [8]:
columns.min(), columns.max(), columns.isnull().sum()

('CITY', 'WOMENONLY', 0)

In [9]:
columns + '_A'

Index(['INSTNM_A', 'CITY_A', 'STABBR_A', 'HBCU_A', 'MENONLY_A', 'WOMENONLY_A',
       'RELAFFIL_A', 'SATVRMID_A', 'SATMTMID_A', 'DISTANCEONLY_A', 'UGDS_A',
       'UGDS_WHITE_A', 'UGDS_BLACK_A', 'UGDS_HISP_A', 'UGDS_ASIAN_A',
       'UGDS_AIAN_A', 'UGDS_NHPI_A', 'UGDS_2MOR_A', 'UGDS_NRA_A',
       'UGDS_UNKN_A', 'PPTUG_EF_A', 'CURROPER_A', 'PCTPELL_A', 'PCTFLOAN_A',
       'UG25ABV_A', 'MD_EARN_WNE_P10_A', 'GRAD_DEBT_MDN_SUPP_A'],
      dtype='object')

In [11]:
college.columns = columns + '_A'

In [12]:
college.head()

Unnamed: 0,INSTNM_A,CITY_A,STABBR_A,HBCU_A,MENONLY_A,WOMENONLY_A,RELAFFIL_A,SATVRMID_A,SATMTMID_A,DISTANCEONLY_A,...,UGDS_2MOR_A,UGDS_NRA_A,UGDS_UNKN_A,PPTUG_EF_A,CURROPER_A,PCTPELL_A,PCTFLOAN_A,UG25ABV_A,MD_EARN_WNE_P10_A,GRAD_DEBT_MDN_SUPP_A
0,Alabama A & M University,Normal,AL,1.0,0.0,0.0,0,424.0,420.0,0.0,...,0.0,0.0059,0.0138,0.0656,1,0.7356,0.8284,0.1049,30300,33888.0
1,University of Alabama at Birmingham,Birmingham,AL,0.0,0.0,0.0,0,570.0,565.0,0.0,...,0.0368,0.0179,0.01,0.2607,1,0.346,0.5214,0.2422,39700,21941.5
2,Amridge University,Montgomery,AL,0.0,0.0,0.0,1,,,1.0,...,0.0,0.0,0.2715,0.4536,1,0.6801,0.7795,0.854,40100,23370.0
3,University of Alabama in Huntsville,Huntsville,AL,0.0,0.0,0.0,0,595.0,590.0,0.0,...,0.0172,0.0332,0.035,0.2146,1,0.3072,0.4596,0.264,45500,24097.0
4,Alabama State University,Montgomery,AL,1.0,0.0,0.0,0,425.0,430.0,0.0,...,0.0098,0.0243,0.0137,0.0892,1,0.7347,0.7554,0.127,26600,33118.5


In [13]:
columns > 'G'

array([ True, False,  True,  True,  True,  True,  True,  True,  True,
       False,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True, False,  True,  True,  True,  True,  True])

In [14]:
columns[1] = 'City'

TypeError: Index does not support mutable operations

In [15]:
c1 = columns[:4]
c1

Index(['INSTNM', 'CITY', 'STABBR', 'HBCU'], dtype='object')

In [16]:
c2 = columns[2:5]
c2

Index(['STABBR', 'HBCU', 'MENONLY'], dtype='object')

In [17]:
c1.union(c2)

Index(['CITY', 'HBCU', 'INSTNM', 'MENONLY', 'STABBR'], dtype='object')

In [18]:
c1 | c2

Index(['CITY', 'HBCU', 'INSTNM', 'MENONLY', 'STABBR'], dtype='object')

In [19]:
c1.symmetric_difference(c2)

Index(['CITY', 'INSTNM', 'MENONLY'], dtype='object')

In [20]:
c1 ^ c2

Index(['CITY', 'INSTNM', 'MENONLY'], dtype='object')

# Producing Cartesian Products

In [21]:
s1 = pd.Series(index=list('aaab'), data=np.arange(4))
s1

a    0
a    1
a    2
b    3
dtype: int32

In [24]:
s2 = pd.Series(index=list('cababb'), data=np.arange(6))
s2

c    0
a    1
b    2
a    3
b    4
b    5
dtype: int32

In [25]:
data=np.arange(6)

In [26]:
data

array([0, 1, 2, 3, 4, 5])

In [27]:
s1 + s2

a    1.0
a    3.0
a    2.0
a    4.0
a    3.0
a    5.0
b    5.0
b    7.0
b    8.0
c    NaN
dtype: float64

## There's more

In [30]:
s1 = pd.Series(index=list('aaabb'), data=np.arange(5))
s2 = pd.Series(index=list('bbaaa'), data=np.arange(5))
s1 + s2

a    2
a    3
a    4
a    3
a    4
a    5
a    4
a    5
a    6
b    3
b    4
b    4
b    5
dtype: int32