In [None]:
'''
AI\ ASSIGNMENT.ipynb
AI Assignment 1: Data Prep & Data Analysis
Nana Kwaku Amoako
Tue 28 May, 2024
'''

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np

In [None]:
# Get URL of wikipedia page with data
url = "https://en.wikipedia.org/wiki/List_of_presidents_of_the_United_States"
page = requests.get(url)

In [None]:
# test that page is loaded successfully
page.status_code

In [None]:
# # parse page
soup = BeautifulSoup(page.content, 'html.parser')

In [None]:
# find the table in the webpage
table = soup.find('table', {'class': 'wikitable'})

In [None]:
# read table data into pandas DataFrame
df = pd.read_html(str(table))[0]

### Question 1

In [None]:
# view that page read into df
df.head(46)

### DATA CLEANING

In [None]:
# make a new birthyear column
# Initialize the Birth Year column with None
df['Birth Year'] = None

# Extract the birth year based on the format
for index, row in df.iterrows():
    name_birth_death = row['Name (Birth–Death)']
    if 'b. ' in name_birth_death:
        birth_year = name_birth_death.split('b. ')[1].split(')')[0].strip()
    else:
        birth_year = name_birth_death.split('(')[1].split('–')[0].strip()
    
    df.at[index, 'Birth Year'] = birth_year


# for birth in df['Name (Birth–Death)']:
#     if 'b. ' in birth:
#         birthyear = birth.split('b. ')[1].split('–')[0]
#         df['Birth Year'] = birthyear
#     else:
#         df['Birth Year'] = df['Name (Birth–Death)'].str.extract(r'\((?:b\.\s)?(\d{4})[–-]')

In [None]:
# make a death year column
df['Death Year'] = df['Name (Birth–Death)'].str.extract(r'[–-](\d{4})\)')
df['Death Year'].fillna('Alive', inplace=True)

In [None]:
# make the name column just names
df['Name'] = df['Name (Birth–Death)'].str.replace(r'\s*\(.*\)\s*', '', regex=True)


In [None]:
# drop the old name column 
df.drop(columns=['Name (Birth–Death)'], inplace=True)

In [None]:
# Drop the 'Portrait' column
df.columns = df.columns.str.strip() # clean trailing whitespaces
df.drop(columns=['Portrait'], axis=0, inplace=True)

In [None]:
# drop party[b][15] column
df.columns = df.columns.str.strip() # clean trailing whitespaces
df.drop(columns=['Party[b][15]'], axis=0, inplace=True)

In [None]:
# drop No.[a] column
df.drop(columns=['No.[a]'], axis=0, inplace=True)

In [None]:
df['Name'] = df['Name'].str.split('[').str[0].str.strip()

In [None]:
df['Vice President'] = df['Vice President[16]'].str.split('[').str[0].str.strip()
# drop old column
df = df.drop('Vice President[16]', axis=1)

In [None]:
df['Party'] = df['Party[b][15].1'].str.split('[').str[0].str.strip()
# drop old column
df = df.drop('Party[b][15].1', axis=1)

In [None]:
df['Term'] = df['Term[14]'].str.split('[').str[0].str.strip()
# drop old column
df = df.drop('Term[14]', axis=1)

In [None]:
# fill na's in election column
df.replace('–', np.nan, inplace=True)
df['Election'].fillna('No Election', inplace=True)

adding year entered and left office

In [190]:
from datetime import datetime

# Define date format
date = "%B %d, %Y"

# Initialize lists to hold the new column data
years_entered_office = []
years_left_office = []

# Loop through terms to extract start and end years
for term in df['Term']:
    try:
        # Split term into start and end date
        start_date, end_date = term.split(" – ")
        start = datetime.strptime(start_date.strip(), date)
        years_entered_office.append(start.year)

        # Incumbent workaround
        if end_date.strip() == 'Incumbent':
            end = datetime.now()
        else:
            end = datetime.strptime(end_date.strip(), date)
        years_left_office.append(end.year)
    except ValueError:
        # Handle terms with only one date (assuming it's the start date)
        start = datetime.strptime(term.strip(), date)
        years_entered_office.append(start.year)
        years_left_office.append(None)

# Add new columns to the dataframe
df['year entered office'] = years_entered_office
df['year left office'] = years_left_office


df


Unnamed: 0,Election,Birth Year,Death Year,Name,Vice President,Party,Term,year entered office,year left office,Years in Office,Forename,Surname,VP,VP more than once
0,1788–1789 1792,1732,1799,George Washington,John Adams,Unaffiliated,"April 30, 1789 – March 4, 1797",1789,1797.0,8,George,Washington,,John Adams
1,1796,1735,1826,John Adams,Thomas Jefferson,Federalist,"March 4, 1797 – March 4, 1801",1797,1801.0,4,John,Adams,Short,
2,1800 1804,1743,1826,Thomas Jefferson,Aaron Burr George Clinton,Democratic- Republican,"March 4, 1801 – March 4, 1809",1801,1809.0,8,Thomas,Jefferson,,Aaron Burr George Clinton
3,1808 1812,1751,1836,James Madison,George Clinton,Democratic- Republican,"March 4, 1809 – March 4, 1817",1809,1817.0,8,James,Madison,,George Clinton
4,1816 1820,1758,1831,James Monroe,Daniel D. Tompkins,Democratic- Republican,"March 4, 1817 – March 4, 1825",1817,1825.0,8,James,Monroe,,Daniel D. Tompkins
5,1824,1767,1848,John Quincy Adams,John C. Calhoun,Democratic- Republican,"March 4, 1825 – March 4, 1829",1825,1829.0,4,John,Adams,Short,
6,1828 1832,1767,1845,Andrew Jackson,John C. Calhoun,Democratic,"March 4, 1829 – March 4, 1837",1829,1837.0,8,Andrew,Jackson,,John C. Calhoun
7,1836,1782,1862,Martin Van Buren,Richard Mentor Johnson,Democratic,"March 4, 1837 – March 4, 1841",1837,1841.0,4,Martin,Buren,Short,
8,1840,1773,1841,William Henry Harrison,John Tyler,Whig,"March 4, 1841 – April 4, 1841",1841,1841.0,0,William,Harrison,Short,
9,No Election,1790,1862,John Tyler,Vacant throughout presidency,Whig,"April 4, 1841",1841,,4,John,Tyler,Short,


### Question 2: How many presidents are on the web page ?

In [None]:
# 2: Presidents on webpage
presidents_num = df['Name'].nunique()
print(f"The number of presidents on the webpage is: {presidents_num}")

### Question 3: How many presidencies ever existed in the United states ?

In [None]:
# 3: Presidencies existing in the US
presidencies_num = len(df)
print(f"The number of presidencies in the US is: {presidencies_num}")

### Question 4: Which presidents served more than one term and had only one vice president for the whole duration of their presidency? 

Cleaning vp column further

In [130]:
# 4: More than one term
from datetime import datetime

# date format
date = "%B %d, %Y"

years_in_office = []

# loop through terms to calculate how long mandem been in office
for i in range(len(df['Term'])):
    # get start and end date
    try:
        term = df['Term'][i] # Term will be the index we are traversing at 
        start_date, end_date = term.split(" – ")
        start = datetime.strptime(start_date.strip(), date) # convert to datetime object

        # Incumbent workaround
        if end_date.strip() == 'Incumbent':
            end = datetime.now()
        else:
            end = datetime.strptime(end_date.strip(), date)


        # Calculating years in office
        office_years = (end.year - start.year)
        years_in_office.append(office_years) # adding the years in office to the list
    except ValueError:
        # terms with no end date
        start = datetime.strptime(term.strip(), date)   # take the start date from the row as it is 

        # Logic: use start date of next term
        # i, i+1

        if i+1 < len(df['Term']):
            next_term = df['Term'][i+1]
            next_start_date, _ = next_term.split(" – ")
            end = datetime.strptime(next_start_date.strip(), date)
        else:
            end = datetime.now() 
        
        office_years = (end.year - start.year)
        years_in_office.append(office_years) # adding the years in office to the list

In [131]:
# Add years_in_office df
df['Years in Office'] = years_in_office

In [132]:
vp_list = []

for vp in df['Vice President']:
    if 'Vacant' in vp:
        if 'through' in vp:
            # handle partial vacancies
            parts = vp.split('through')
            date = parts[1].split()[:3] 
            remaining_vp = ' '.join(parts[1].split()[3:]) 
            vp_list.append(f"Vacant through {' '.join(date)} - {remaining_vp}")
        else:
            # handle complete vacancies
            vp_list.append('Vacant throughout presidency')
    else:
        # split names 
        vp_names = vp.split()
        if len(vp_names) > 3:
            vp_list.append(' - '.join([' '.join(vp_names[:3]), ' '.join(vp_names[3:])]))
        elif len(vp_names) == 4:
            vp_list.append(' - '.join([' '.join(vp_names[:2]), ' '.join(vp_names[2:])]))
        else:
            vp_list.append(' '.join(vp_names))


df['VP'] = vp_list

In [133]:
# new column with the vice presidents who served more than once

df['VP'] = ''
df['VP more than once'] = ''

for i in range(len(df)):
    if df['Years in Office'][i] > 4:
        if 'Vacant' in df['Vice President'][i]:
            df['VP'][i] = 'Vacant'
        else:
            df['VP more than once'][i] = df['Vice President'][i]
    else:
        df['VP'][i] = 'Short' # vice president didn't serve for long


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['VP more than once'][i] = df['Vice President'][i]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['VP'][i] = 'Short' # vice president didn't serve for long
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['VP'][i] = 'Vacant'


In [140]:
# print stuff

print("Presidents who only served for more than one term: ")
for i in range(len(df)):
    if df['Years in Office'][i] > 4:
        print(df['Name'][i])

Presidents who only served for more than one term: 
George Washington
Thomas Jefferson
James Madison
James Monroe
Andrew Jackson
Ulysses S. Grant
Theodore Roosevelt
Woodrow Wilson
Calvin Coolidge
Franklin D. Roosevelt
Harry S. Truman
Dwight D. Eisenhower
Lyndon B. Johnson
Richard Nixon
Ronald Reagan
Bill Clinton
George W. Bush
Barack Obama


In [141]:
print("Presidents who only had one VP: ")
for i in range(len(df)):
    if len(df['VP more than once'][i]) > 2:
        print(df['Name'][i])

Presidents who only had one VP: 
George Washington
Thomas Jefferson
James Madison
James Monroe
Andrew Jackson
Ulysses S. Grant
Woodrow Wilson
Franklin D. Roosevelt
Dwight D. Eisenhower
Richard Nixon
Ronald Reagan
Bill Clinton
George W. Bush
Barack Obama


### Question 5: Which president was in office longer than others and when did he get in office?

In [142]:
max_year = df['Years in Office'].max()
pres_max_year = df[df['Years in Office'] == max_year]['Name'].to_string(index=False)
election_year =  df[df['Years in Office'] == max_year]['Election'].values[0]
election_year_value = election_year.split(' ')[0]

# print(max_year)
# print(pres_max_year)
# print(election_year_value)

print(f"The president who was in the office longest was {pres_max_year} with {max_year} years in office. He got into office in {election_year_value}")

The president who was in the office longest was Franklin D. Roosevelt with 12 years in office. He got into office in 1932


### Question 6: Which presidents were elected after serving as vice presidents?

In [144]:
# loop through vp list
# if vp name exists in pres list
# print it

# Goal: The code managed to pick the names of the presidents who once served as vice presidents and the periods they served as vice president and president
vp_pres = []

for vp in df['Vice President']:
    for name in df['Name']:
        if name in vp:
            vp_pres.append(vp)


print("The presidents who were elected after serving as vice presidents are:", ', '.join(vp_pres))
print(f"and they are {len(vp_pres)} in number.")

The presidents who were elected after serving as vice presidents are: John Adams, Thomas Jefferson, John Tyler, Millard Fillmore, Hannibal Hamlin Andrew Johnson, Chester A. Arthur, Calvin Coolidge, John Nance Garner Henry A. Wallace Harry S. Truman, Richard Nixon, Lyndon B. Johnson, George H. W. Bush, Joe Biden
and they are 12 in number.


### Question 7: How many presidents were never elected, and what are their names?

In [146]:
# no election
# picks the president
# prints the presido

no_elect_guys = []

no_elect_guys = df[df['Election'] == 'No Election']['Name'].to_list()

print("The presidents who were never elected are:",', '.join(no_elect_guys))
print(f"and they are {len(no_elect_guys)}")


The presidents who were never elected are: John Tyler, Millard Fillmore, Andrew Johnson, Chester A. Arthur, Gerald Ford
and they are 5


### Question 8: How many presidents have the same forenames and/or surnames and who are they? 

In [149]:
# cleaning
df['Forename'] = df['Name'].str.split().str[0]
df['Surname'] = df['Name'].str.split().str[-1]

In [152]:
# fisrt name
dupli_fname = df.duplicated(subset=["Forename"], keep=False)
dupli_fname_pres = df[dupli_fname]['Name'].tolist()
print("List of presidents with the same first name:")
print(f"{dupli_fname_pres}")
print(f"and they are {len(dupli_fname_pres)} in number")


List of presidents with the same first name:
['George Washington', 'John Adams', 'James Madison', 'James Monroe', 'John Quincy Adams', 'Andrew Jackson', 'William Henry Harrison', 'John Tyler', 'James K. Polk', 'Franklin Pierce', 'James Buchanan', 'Andrew Johnson', 'James A. Garfield', 'Grover Cleveland', 'Grover Cleveland', 'William McKinley', 'William Howard Taft', 'Franklin D. Roosevelt', 'John F. Kennedy', 'George H. W. Bush', 'George W. Bush']
and they are 21 in number


In [155]:
# LAST NAME
dupli_lname = df.duplicated(subset=["Surname"], keep=False)
dupli_lname_pres = df[dupli_lname]['Name'].tolist()
print("List of presidents with the same last name:")
print(f"{dupli_lname_pres}")
print(f"and they are {len(dupli_lname_pres)} in number")

List of presidents with the same last name:
['John Adams', 'John Quincy Adams', 'William Henry Harrison', 'Andrew Johnson', 'Grover Cleveland', 'Benjamin Harrison', 'Grover Cleveland', 'Theodore Roosevelt', 'Franklin D. Roosevelt', 'Lyndon B. Johnson', 'George H. W. Bush', 'George W. Bush']
and they are 12 in number


In [154]:
# displays the presidents with the same first name and their term
print("Presidents with the same first name and the term they served")
df[df['Name'].isin(dupli_fname_pres)][['Name', 'Term']]

Presidents with the same first name and the term they served


Unnamed: 0,Name,Term
0,George Washington,"April 30, 1789 – March 4, 1797"
1,John Adams,"March 4, 1797 – March 4, 1801"
3,James Madison,"March 4, 1809 – March 4, 1817"
4,James Monroe,"March 4, 1817 – March 4, 1825"
5,John Quincy Adams,"March 4, 1825 – March 4, 1829"
6,Andrew Jackson,"March 4, 1829 – March 4, 1837"
8,William Henry Harrison,"March 4, 1841 – April 4, 1841"
9,John Tyler,"April 4, 1841"
10,James K. Polk,"March 4, 1845 – March 4, 1849"
13,Franklin Pierce,"March 4, 1853 – March 4, 1857"


In [148]:
print("Presidents with the same first name and the term they served")
df[df['Name'].isin(dupli_lname_pres)][['Name', 'Term']]

Presidents with the same first name and the term they served


Unnamed: 0,Name,Term
1,John Adams,"March 4, 1797 – March 4, 1801"
5,John Quincy Adams,"March 4, 1825 – March 4, 1829"
8,William Henry Harrison,"March 4, 1841 – April 4, 1841"
16,Andrew Johnson,"April 15, 1865"
21,Grover Cleveland,"March 4, 1885 – March 4, 1889"
22,Benjamin Harrison,"March 4, 1889 – March 4, 1893"
23,Grover Cleveland,"March 4, 1893 – March 4, 1897"
25,Theodore Roosevelt,"September 14, 1901"
31,Franklin D. Roosevelt,"March 4, 1933 – April 12, 1945"
35,Lyndon B. Johnson,"November 22, 1963"


In [157]:
df

Unnamed: 0,Election,Birth Year,Death Year,Name,Vice President,Party,Term,year entered office,year left office,Years in Office,Forename,Surname,VP,VP more than once
0,1788–1789 1792,1732,1799,George Washington,John Adams,Unaffiliated,"April 30, 1789 – March 4, 1797",1789,1797.0,8,George,Washington,,John Adams
1,1796,1735,1826,John Adams,Thomas Jefferson,Federalist,"March 4, 1797 – March 4, 1801",1797,1801.0,4,John,Adams,Short,
2,1800 1804,1743,1826,Thomas Jefferson,Aaron Burr George Clinton,Democratic- Republican,"March 4, 1801 – March 4, 1809",1801,1809.0,8,Thomas,Jefferson,,Aaron Burr George Clinton
3,1808 1812,1751,1836,James Madison,George Clinton,Democratic- Republican,"March 4, 1809 – March 4, 1817",1809,1817.0,8,James,Madison,,George Clinton
4,1816 1820,1758,1831,James Monroe,Daniel D. Tompkins,Democratic- Republican,"March 4, 1817 – March 4, 1825",1817,1825.0,8,James,Monroe,,Daniel D. Tompkins
5,1824,1767,1848,John Quincy Adams,John C. Calhoun,Democratic- Republican,"March 4, 1825 – March 4, 1829",1825,1829.0,4,John,Adams,Short,
6,1828 1832,1767,1845,Andrew Jackson,John C. Calhoun,Democratic,"March 4, 1829 – March 4, 1837",1829,1837.0,8,Andrew,Jackson,,John C. Calhoun
7,1836,1782,1862,Martin Van Buren,Richard Mentor Johnson,Democratic,"March 4, 1837 – March 4, 1841",1837,1841.0,4,Martin,Buren,Short,
8,1840,1773,1841,William Henry Harrison,John Tyler,Whig,"March 4, 1841 – April 4, 1841",1841,1841.0,0,William,Harrison,Short,
9,No Election,1790,1862,John Tyler,Vacant throughout presidency,Whig,"April 4, 1841",1841,,4,John,Tyler,Short,


### Question 9: Who was the oldest president ever elected for office? 

In [158]:
oldest_pres = 0
oldest_president = ''
year_entered_office = 0
year_left_office = 0

In [159]:
for i in range(len(df)):
    election_years = df['Election'][i].split()
    for year in election_years:
        if year.isdigit():  # Check if the election year is a digit
            election_age = int(year) - int(df['Birth Year'][i])
            if election_age > oldest_pres:
                oldest_pres = election_age
                oldest_president = df['Name'][i]
                year_entered_office = df['year entered office'][i]
                year_left_office = df['year left office'][i]

In [162]:
print(f"The oldest president ever elected for office is {oldest_president} at {oldest_pres} years old.")
print(f"he assumed office in {year_entered_office} and left {int(year_left_office)}")
print(f"Duration of presidency: {int(year_left_office - year_entered_office)} years")


The oldest president ever elected for office is Joe Biden at 78 years old.
he assumed office in 2021 and left 2024
Duration of presidency: 3 years


### Question 10: Who was the youngest president ever elected for office?

In [170]:
youngest_pres = float('inf')
youngest_president = ''
year_entered_office = 0
year_left_office = 0

In [171]:
for i in range(len(df)):
    election_years = df['Election'][i].split()
    for year in election_years:
        if year.isdigit():  # Check if the election year is a digit
            election_age = int(year) - int(df['Birth Year'][i])
            if election_age < youngest_pres:
                youngest_pres = election_age
                youngest_president = df['Name'][i]
                young_year_entered_office = df['year entered office'][i]
                young_year_left_office = df['year left office'][i]

In [172]:
print(f"The oldest president ever elected for office is {youngest_president} at {youngest_pres} years old.")
print(f"he assumed office in {young_year_entered_office} and left {int(young_year_left_office)}")
print(f"Duration of presidency: {int(young_year_left_office - young_year_entered_office)} years")


The oldest president ever elected for office is John F. Kennedy at 43 years old.
he assumed office in 1961 and left 1963
Duration of presidency: 2 years


### Question 11: Which president lived up to a very old age, after leaving office?

In [174]:
oldest_age = 0
presname = ''
tenure_began = ''
tenure_end = ''

In [180]:
df

Unnamed: 0,Election,Birth Year,Death Year,Name,Vice President,Party,Term,year entered office,year left office,Years in Office,Forename,Surname,VP,VP more than once
0,1788–1789 1792,1732,1799,George Washington,John Adams,Unaffiliated,"April 30, 1789 – March 4, 1797",1789,1797.0,8,George,Washington,,John Adams
1,1796,1735,1826,John Adams,Thomas Jefferson,Federalist,"March 4, 1797 – March 4, 1801",1797,1801.0,4,John,Adams,Short,
2,1800 1804,1743,1826,Thomas Jefferson,Aaron Burr George Clinton,Democratic- Republican,"March 4, 1801 – March 4, 1809",1801,1809.0,8,Thomas,Jefferson,,Aaron Burr George Clinton
3,1808 1812,1751,1836,James Madison,George Clinton,Democratic- Republican,"March 4, 1809 – March 4, 1817",1809,1817.0,8,James,Madison,,George Clinton
4,1816 1820,1758,1831,James Monroe,Daniel D. Tompkins,Democratic- Republican,"March 4, 1817 – March 4, 1825",1817,1825.0,8,James,Monroe,,Daniel D. Tompkins
5,1824,1767,1848,John Quincy Adams,John C. Calhoun,Democratic- Republican,"March 4, 1825 – March 4, 1829",1825,1829.0,4,John,Adams,Short,
6,1828 1832,1767,1845,Andrew Jackson,John C. Calhoun,Democratic,"March 4, 1829 – March 4, 1837",1829,1837.0,8,Andrew,Jackson,,John C. Calhoun
7,1836,1782,1862,Martin Van Buren,Richard Mentor Johnson,Democratic,"March 4, 1837 – March 4, 1841",1837,1841.0,4,Martin,Buren,Short,
8,1840,1773,1841,William Henry Harrison,John Tyler,Whig,"March 4, 1841 – April 4, 1841",1841,1841.0,0,William,Harrison,Short,
9,No Election,1790,1862,John Tyler,Vacant throughout presidency,Whig,"April 4, 1841",1841,,4,John,Tyler,Short,


In [185]:
for i in range(len(df)):
    if (df["Death Year"][i] == 'Alive'):
        df["Death Year"][i] = 0
    else:
        if pd.isna(df["year left office"][i]):
            df["year left office"][i] = 0

        oldest = int(df["Death Year"][i]) - int(df["year left office"][i])
        if oldest > oldest_age:
            oldest_age = oldest
            presname = df['Name'][i]
            tenure_began = df['Election'][i]
            tenure_end = df['year left office'][i]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["year left office"][i] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["Death Year"][i] = 0


In [187]:
print(f"Name: {presname}")
print(f"Age: {oldest_age}")
print(f"Year elected: {tenure_began}")
print(f"Year tenure ended: {int(tenure_end)}")


Name: Gerald Ford
Age: 2006
Year elected: No Election
Year tenure ended: 0


### Question 12: Draw a graph that shows the number of presidents and their respective political parties which they came from, and which party have many elected presidents?

In [None]:
df

In [None]:
# no of pres
# political parties
# party with the most presidents
import matplotlib.pyplot as plt


# df.plot()

df.plot(kind = 'scatter', x = 'Name', y = 'Party')

# df["Years in office"].plot(kind = 'hist')

plt.show()

In [None]:
# `# Plotting - from web
# In [3]: np.random.seed(123456)

# In [4]: ts = pd.Series(np.random.randn(1000), index=pd.date_range("1/1/2000", periods=1000))

# In [5]: ts = ts.cumsum()

# In [6]: ts.plot();`

# https://pandas.pydata.org/docs/user_guide/visualization.html

code below works. requires further cleaning of vacant columns.
we will strip vacant through
then we will add the cleaning for the commas again
then we can finally perform the operation

In [191]:
df

Unnamed: 0,Election,Birth Year,Death Year,Name,Vice President,Party,Term,year entered office,year left office,Years in Office,Forename,Surname,VP,VP more than once
0,1788–1789 1792,1732,1799,George Washington,John Adams,Unaffiliated,"April 30, 1789 – March 4, 1797",1789,1797.0,8,George,Washington,,John Adams
1,1796,1735,1826,John Adams,Thomas Jefferson,Federalist,"March 4, 1797 – March 4, 1801",1797,1801.0,4,John,Adams,Short,
2,1800 1804,1743,1826,Thomas Jefferson,Aaron Burr George Clinton,Democratic- Republican,"March 4, 1801 – March 4, 1809",1801,1809.0,8,Thomas,Jefferson,,Aaron Burr George Clinton
3,1808 1812,1751,1836,James Madison,George Clinton,Democratic- Republican,"March 4, 1809 – March 4, 1817",1809,1817.0,8,James,Madison,,George Clinton
4,1816 1820,1758,1831,James Monroe,Daniel D. Tompkins,Democratic- Republican,"March 4, 1817 – March 4, 1825",1817,1825.0,8,James,Monroe,,Daniel D. Tompkins
5,1824,1767,1848,John Quincy Adams,John C. Calhoun,Democratic- Republican,"March 4, 1825 – March 4, 1829",1825,1829.0,4,John,Adams,Short,
6,1828 1832,1767,1845,Andrew Jackson,John C. Calhoun,Democratic,"March 4, 1829 – March 4, 1837",1829,1837.0,8,Andrew,Jackson,,John C. Calhoun
7,1836,1782,1862,Martin Van Buren,Richard Mentor Johnson,Democratic,"March 4, 1837 – March 4, 1841",1837,1841.0,4,Martin,Buren,Short,
8,1840,1773,1841,William Henry Harrison,John Tyler,Whig,"March 4, 1841 – April 4, 1841",1841,1841.0,0,William,Harrison,Short,
9,No Election,1790,1862,John Tyler,Vacant throughout presidency,Whig,"April 4, 1841",1841,,4,John,Tyler,Short,
