In [1]:
import pandas as pd
import numpy as np

In [2]:
url = 'https://github.com/mattharrison/datasets/raw/master/data/siena2018-pres.csv'

In [3]:
df = pd.read_csv(url, index_col=0)

In [4]:
def tweak_siena_pres(df):
    def int64_to_uint8(df_):
        cols = df_.select_dtypes('int64')
        return (df_
                .astype({col:'uint8' for col in cols}))


    return (df
     .rename(columns={'Seq.':'Seq'})    # 1
     .rename(columns={k:v.replace(' ', '_') for k,v in
        {'Bg': 'Background',
         'PL': 'Party leadership', 'CAb': 'Communication ability',
         'RC': 'Relations with Congress', 'CAp': 'Court appointments',
         'HE': 'Handling of economy', 'L': 'Luck',
         'AC': 'Ability to compromise', 'WR': 'Willing to take risks',
         'EAp': 'Executive appointments', 'OA': 'Overall ability',
         'Im': 'Imagination', 'DA': 'Domestic accomplishments',
         'Int': 'Integrity', 'EAb': 'Executive ability',
         'FPA': 'Foreign policy accomplishments',
         'LA': 'Leadership ability',
         'IQ': 'Intelligence', 'AM': 'Avoid crucial mistakes',
         'EV': "Experts' view", 'O': 'Overall'}.items()})
     .astype({'Party':'category'})  # 2
     .pipe(int64_to_uint8)  # 3
     .assign(Average_rank=lambda df_:(df_.select_dtypes('uint8') # 4
                 .sum(axis=1).rank(method='dense').astype('uint8')),
             Quartile=lambda df_:pd.qcut(df_.Average_rank, 4,
                 labels='1st 2nd 3rd 4th'.split())
            )
    )

In [5]:
pres = tweak_siena_pres(df)

In [6]:
pres.isna()

Unnamed: 0,Seq,President,Party,Background,Imagination,Integrity,Intelligence,Luck,Willing_to_take_risks,Ability_to_compromise,...,Court_appointments,Handling_of_economy,Executive_appointments,Domestic_accomplishments,Foreign_policy_accomplishments,Avoid_crucial_mistakes,Experts'_view,Overall,Average_rank,Quartile
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
5,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
6,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
7,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
8,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
9,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
10,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [7]:
pres.columns

Index(['Seq', 'President', 'Party', 'Background', 'Imagination', 'Integrity',
       'Intelligence', 'Luck', 'Willing_to_take_risks',
       'Ability_to_compromise', 'Executive_ability', 'Leadership_ability',
       'Communication_ability', 'Overall_ability', 'Party_leadership',
       'Relations_with_Congress', 'Court_appointments', 'Handling_of_economy',
       'Executive_appointments', 'Domestic_accomplishments',
       'Foreign_policy_accomplishments', 'Avoid_crucial_mistakes',
       'Experts'_view', 'Overall', 'Average_rank', 'Quartile'],
      dtype='object')

In [8]:
pres[pres.Integrity.isna()]

Unnamed: 0,Seq,President,Party,Background,Imagination,Integrity,Intelligence,Luck,Willing_to_take_risks,Ability_to_compromise,...,Court_appointments,Handling_of_economy,Executive_appointments,Domestic_accomplishments,Foreign_policy_accomplishments,Avoid_crucial_mistakes,Experts'_view,Overall,Average_rank,Quartile


In [9]:
pres.isna().sum()

Seq                               0
President                         0
Party                             0
Background                        0
Imagination                       0
Integrity                         0
Intelligence                      0
Luck                              0
Willing_to_take_risks             0
Ability_to_compromise             0
Executive_ability                 0
Leadership_ability                0
Communication_ability             0
Overall_ability                   0
Party_leadership                  0
Relations_with_Congress           0
Court_appointments                0
Handling_of_economy               0
Executive_appointments            0
Domestic_accomplishments          0
Foreign_policy_accomplishments    0
Avoid_crucial_mistakes            0
Experts'_view                     0
Overall                           0
Average_rank                      0
Quartile                          0
dtype: int64

In [10]:
pres.isna().mean()

Seq                               0.0
President                         0.0
Party                             0.0
Background                        0.0
Imagination                       0.0
Integrity                         0.0
Intelligence                      0.0
Luck                              0.0
Willing_to_take_risks             0.0
Ability_to_compromise             0.0
Executive_ability                 0.0
Leadership_ability                0.0
Communication_ability             0.0
Overall_ability                   0.0
Party_leadership                  0.0
Relations_with_Congress           0.0
Court_appointments                0.0
Handling_of_economy               0.0
Executive_appointments            0.0
Domestic_accomplishments          0.0
Foreign_policy_accomplishments    0.0
Avoid_crucial_mistakes            0.0
Experts'_view                     0.0
Overall                           0.0
Average_rank                      0.0
Quartile                          0.0
dtype: float

In [11]:
pres.isna().any()

Seq                               False
President                         False
Party                             False
Background                        False
Imagination                       False
Integrity                         False
Intelligence                      False
Luck                              False
Willing_to_take_risks             False
Ability_to_compromise             False
Executive_ability                 False
Leadership_ability                False
Communication_ability             False
Overall_ability                   False
Party_leadership                  False
Relations_with_Congress           False
Court_appointments                False
Handling_of_economy               False
Executive_appointments            False
Domestic_accomplishments          False
Foreign_policy_accomplishments    False
Avoid_crucial_mistakes            False
Experts'_view                     False
Overall                           False
Average_rank                      False


In [12]:
pres.drop_duplicates()

Unnamed: 0,Seq,President,Party,Background,Imagination,Integrity,Intelligence,Luck,Willing_to_take_risks,Ability_to_compromise,...,Court_appointments,Handling_of_economy,Executive_appointments,Domestic_accomplishments,Foreign_policy_accomplishments,Avoid_crucial_mistakes,Experts'_view,Overall,Average_rank,Quartile
1,1,George Washington,Independent,7,7,1,10,1,6,2,...,1,1,1,2,2,1,2,1,1,1st
2,2,John Adams,Federalist,3,13,4,4,24,14,31,...,4,13,15,19,13,16,10,14,13,2nd
3,3,Thomas Jefferson,Democratic-Republican,2,2,14,1,8,5,14,...,7,20,4,6,9,7,5,5,5,1st
4,4,James Madison,Democratic-Republican,4,6,7,3,16,15,6,...,6,14,7,11,19,11,8,7,7,1st
5,5,James Monroe,Democratic-Republican,9,14,11,18,6,16,7,...,11,9,9,10,5,6,9,8,8,1st
6,6,John Quincy Adams,Democratic-Republican,1,9,6,5,29,19,24,...,15,17,18,21,15,14,18,18,18,2nd
7,7,Andrew Jackson,Democratic,37,15,29,28,4,4,38,...,30,25,25,17,23,20,19,19,19,2nd
8,8,Martin Van Buren,Democratic,23,22,27,25,34,28,20,...,25,31,26,29,27,24,28,25,25,3rd
9,9,William Henry Harrison,Whig,22,38,28,37,44,32,41,...,42,41,40,42,44,37,39,39,38,4th
10,10,John Tyler,Independent,34,33,35,34,22,26,37,...,38,34,36,36,26,32,36,37,37,4th


In [13]:
pres.drop_duplicates(subset='Party')

Unnamed: 0,Seq,President,Party,Background,Imagination,Integrity,Intelligence,Luck,Willing_to_take_risks,Ability_to_compromise,...,Court_appointments,Handling_of_economy,Executive_appointments,Domestic_accomplishments,Foreign_policy_accomplishments,Avoid_crucial_mistakes,Experts'_view,Overall,Average_rank,Quartile
1,1,George Washington,Independent,7,7,1,10,1,6,2,...,1,1,1,2,2,1,2,1,1,1st
2,2,John Adams,Federalist,3,13,4,4,24,14,31,...,4,13,15,19,13,16,10,14,13,2nd
3,3,Thomas Jefferson,Democratic-Republican,2,2,14,1,8,5,14,...,7,20,4,6,9,7,5,5,5,1st
7,7,Andrew Jackson,Democratic,37,15,29,28,4,4,38,...,30,25,25,17,23,20,19,19,19,2nd
9,9,William Henry Harrison,Whig,22,38,28,37,44,32,41,...,42,41,40,42,44,37,39,39,38,4th
16,16,Abraham Lincoln,Republican,28,1,2,2,18,1,1,...,3,4,2,1,6,2,1,3,3,1st


In [14]:
pres.drop_duplicates(subset='Party', keep='last')

Unnamed: 0,Seq,President,Party,Background,Imagination,Integrity,Intelligence,Luck,Willing_to_take_risks,Ability_to_compromise,...,Court_appointments,Handling_of_economy,Executive_appointments,Domestic_accomplishments,Foreign_policy_accomplishments,Avoid_crucial_mistakes,Experts'_view,Overall,Average_rank,Quartile
2,2,John Adams,Federalist,3,13,4,4,24,14,31,...,4,13,15,19,13,16,10,14,13,2nd
6,6,John Quincy Adams,Democratic-Republican,1,9,6,5,29,19,24,...,15,17,18,21,15,14,18,18,18,2nd
10,10,John Tyler,Independent,34,33,35,34,22,26,37,...,38,34,36,36,26,32,36,37,37,4th
13,13,Millard Fillmore,Whig,40,37,36,38,35,38,32,...,39,37,37,37,37,33,37,38,39,4th
43,44,Barack Obama,Democratic,24,11,13,9,15,23,16,...,14,10,13,13,20,10,11,17,17,2nd
44,45,Donald Trump,Republican,43,40,44,44,10,25,42,...,40,39,44,40,42,41,42,42,42,4th


In [15]:
pres.drop_duplicates(subset='Party', keep=False)

Unnamed: 0,Seq,President,Party,Background,Imagination,Integrity,Intelligence,Luck,Willing_to_take_risks,Ability_to_compromise,...,Court_appointments,Handling_of_economy,Executive_appointments,Domestic_accomplishments,Foreign_policy_accomplishments,Avoid_crucial_mistakes,Experts'_view,Overall,Average_rank,Quartile
2,2,John Adams,Federalist,3,13,4,4,24,14,31,...,4,13,15,19,13,16,10,14,13,2nd


In [16]:
(pres
.assign(first_in_party_seq=lambda df_:df_.Party != df_.Party.shift(1),
       )
.loc[lambda df_:df_.first_in_party_seq]
)

Unnamed: 0,Seq,President,Party,Background,Imagination,Integrity,Intelligence,Luck,Willing_to_take_risks,Ability_to_compromise,...,Handling_of_economy,Executive_appointments,Domestic_accomplishments,Foreign_policy_accomplishments,Avoid_crucial_mistakes,Experts'_view,Overall,Average_rank,Quartile,first_in_party_seq
1,1,George Washington,Independent,7,7,1,10,1,6,2,...,1,1,2,2,1,2,1,1,1st,True
2,2,John Adams,Federalist,3,13,4,4,24,14,31,...,13,15,19,13,16,10,14,13,2nd,True
3,3,Thomas Jefferson,Democratic-Republican,2,2,14,1,8,5,14,...,20,4,6,9,7,5,5,5,1st,True
7,7,Andrew Jackson,Democratic,37,15,29,28,4,4,38,...,25,25,17,23,20,19,19,19,2nd,True
9,9,William Henry Harrison,Whig,22,38,28,37,44,32,41,...,41,40,42,44,37,39,39,38,4th,True
10,10,John Tyler,Independent,34,33,35,34,22,26,37,...,34,36,36,26,32,36,37,37,4th,True
11,11,James K. Polk,Democratic,19,10,23,23,9,7,18,...,15,16,12,8,8,13,12,11,1st,True
12,12,Zachary Taylor,Whig,30,26,22,32,37,24,26,...,27,33,27,30,26,30,30,30,3rd,True
14,14,Franklin Pierce,Democratic,38,39,38,40,39,38,39,...,40,39,41,39,38,40,40,41,4th,True
16,16,Abraham Lincoln,Republican,28,1,2,2,18,1,1,...,4,2,1,6,2,1,3,3,1st,True
