# White House visitor logs

### Import Python tools and Jupyter configuration

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import re
import datetime as dt

In [3]:
import altair as alt
import altair_latimes as lat
import numpy as np

In [4]:
alt.themes.register("latimes", lat.theme)
alt.themes.enable("latimes")

ThemeRegistry.enable('latimes')

In [5]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()
pd.options.display.max_colwidth = None

### Data dictionary

In [6]:
# "https://obamawhitehouse.archives.gov/files/disclosures/visitors/WhiteHouse-WAVES-Key-1209.txt"

In [7]:
today = dt.date.today().strftime("%m-%d-%Y")

In [8]:
df = pd.read_csv(
    "data/processed/log_latest.csv",
    parse_dates=[
        "appt_made_date",
        "appt_start_date",
        "appt_end_date",
        "appt_cancel_date",
        "releasedate",
    ],
)

In [9]:
df.sort_values(["total_people"], ascending=False).head()

Unnamed: 0,namelast,namefirst,namemid,uin,bdgnbr,access_type,toa,poa,tod,pod,appt_made_date,appt_start_date,appt_end_date,appt_cancel_date,total_people,last_updatedby,post,lastentrydate,terminal_suffix,visitee_namelast,visitee_namefirst,meeting_loc,meeting_room,caller_name_last,caller_name_first,caller_room,description,releasedate
3210,CORTEZ,TRISTEN,R,U23376,,VA,4/16/2021 11:57,SOS,NAN,NAN,2021-04-10 14:16:00,2021-04-16,2021-04-16 23:59:00,NaT,95.0,ET,WIN,4/10/2021 14:16,ET,TELEKY,ED,WH,EW 216,TELEKY,EDWARD,,NAN,2021-07-29
3197,BELLOT,ALEIA,T,U23376,,VA,4/16/2021 12:04,SOS,NAN,NAN,2021-04-10 14:16:00,2021-04-16,2021-04-16 23:59:00,NaT,95.0,ET,WIN,4/10/2021 14:16,ET,TELEKY,ED,WH,EW 216,TELEKY,EDWARD,,NAN,2021-07-29
3253,MAYS,OMARI,J,U23376,,VA,4/16/2021 12:09,SOS,NAN,NAN,2021-04-10 14:16:00,2021-04-16,2021-04-16 23:59:00,NaT,95.0,ET,WIN,4/10/2021 14:16,ET,TELEKY,ED,WH,EW 216,TELEKY,EDWARD,,NAN,2021-07-29
3271,ROY,KEVIN,M,U23376,,VA,NAN,NAN,NAN,NAN,2021-04-12 09:56:00,2021-04-16,2021-04-16 23:59:00,NaT,95.0,ET,WIN,4/12/2021 9:56,ET,TELEKY,ED,WH,EW 216,TELEKY,EDWARD,,NAN,2021-07-29
3272,SANTIAGO,ANDREW,B,U23376,,VA,4/16/2021 11:54,SOS,NAN,NAN,2021-04-12 09:56:00,2021-04-16,2021-04-16 23:59:00,NaT,95.0,ET,WIN,4/12/2021 9:56,ET,TELEKY,ED,WH,EW 216,TELEKY,EDWARD,,NAN,2021-07-29


### Buildings

In [10]:
df.meeting_loc.value_counts()

OEOB    1579
WH      1398
EEOB     837
NEOB     374
Name: meeting_loc, dtype: int64

### Rooms 

In [16]:
df.meeting_room.value_counts().head()

234           316
EW206         231
472           209
97            137
EEOB 210/2    125
Name: meeting_room, dtype: int64

### Who's visited the Oval?

In [12]:
len(df[df["meeting_room"].str.contains("OVAL OFFIC")])

108

In [13]:
df[df["meeting_room"].str.contains("OVAL OFFIC")].head()

Unnamed: 0,namelast,namefirst,namemid,uin,bdgnbr,access_type,toa,poa,tod,pod,appt_made_date,appt_start_date,appt_end_date,appt_cancel_date,total_people,last_updatedby,post,lastentrydate,terminal_suffix,visitee_namelast,visitee_namefirst,meeting_loc,meeting_room,caller_name_last,caller_name_first,caller_room,description,releasedate
115,ESHOO,ANNA,G,U21725,,VA,3/3/21 13:31,B0401,NAN,NAN,2021-03-03 11:27:00,2021-03-03 13:30:00,2021-03-03 23:59:00,NaT,9.0,GT,WIN,3/3/2021,GT,NAN,POTUS,WH,OVAL OFFIC,TAYLOR,GABRIELLE,,NAN,2021-06-29
116,VANHOLLEN,CHRISTOPHER,N,U21725,,VA,3/3/21 13:27,B0401,NAN,NAN,2021-03-03 11:27:00,2021-03-03 13:30:00,2021-03-03 23:59:00,NaT,9.0,GT,WIN,3/3/2021,GT,NAN,POTUS,WH,OVAL OFFIC,TAYLOR,GABRIELLE,,NAN,2021-06-29
117,DEGETTE,DIANA,L,U21725,,VA,NAN,NAN,NAN,NAN,2021-03-03 11:27:00,2021-03-03 13:30:00,2021-03-03 23:59:00,NaT,9.0,GT,WIN,3/3/2021,GT,NAN,POTUS,WH,OVAL OFFIC,TAYLOR,GABRIELLE,,NAN,2021-06-29
118,UPTON,FREDERICK,S,U21725,,VA,NAN,NAN,NAN,NAN,2021-03-03 11:27:00,2021-03-03 13:30:00,2021-03-03 23:59:00,NaT,9.0,GT,WIN,3/3/2021,GT,NAN,POTUS,WH,OVAL OFFIC,TAYLOR,GABRIELLE,,NAN,2021-06-29
121,CRAPO,MICHAEL,D,U21725,,VA,NAN,NAN,NAN,NAN,2021-03-03 11:27:00,2021-03-03 13:30:00,2021-03-03 23:59:00,NaT,9.0,GT,WIN,3/3/2021,GT,NAN,POTUS,WH,OVAL OFFIC,TAYLOR,GABRIELLE,,NAN,2021-06-29


### Where meeting room is ...

In [14]:
df[df["meeting_room"].str.contains("EW206")].head()

Unnamed: 0,namelast,namefirst,namemid,uin,bdgnbr,access_type,toa,poa,tod,pod,appt_made_date,appt_start_date,appt_end_date,appt_cancel_date,total_people,last_updatedby,post,lastentrydate,terminal_suffix,visitee_namelast,visitee_namefirst,meeting_loc,meeting_room,caller_name_last,caller_name_first,caller_room,description,releasedate
288,MENDEZ,ANTHONY,A,U21895,,VA,NAN,NAN,NAN,NAN,2021-03-08 08:41:00,2021-03-08 10:30:00,2021-03-08 23:59:00,NaT,5.0,DV,WIN,3/8/2021,DV,VIA,DAN,WH,EW206,VIA,DANIEL,,NAN,2021-06-29
290,CAMPBELL,EVAN,M,U21895,,VA,NAN,NAN,NAN,NAN,2021-03-08 08:41:00,2021-03-08 10:30:00,2021-03-08 23:59:00,NaT,5.0,DV,WIN,3/8/2021,DV,VIA,DAN,WH,EW206,VIA,DANIEL,,NAN,2021-06-29
291,KLIMCZAK,FRANCES,D,U21895,,VA,NAN,NAN,NAN,NAN,2021-03-08 08:41:00,2021-03-08 10:30:00,2021-03-08 23:59:00,NaT,5.0,DV,WIN,3/8/2021,DV,VIA,DAN,WH,EW206,VIA,DANIEL,,NAN,2021-06-29
292,AHRENS,KATHERINE,L,U21895,,VA,NAN,NAN,NAN,NAN,2021-03-08 08:41:00,2021-03-08 10:30:00,2021-03-08 23:59:00,NaT,5.0,DV,WIN,3/8/2021,DV,VIA,DAN,WH,EW206,VIA,DANIEL,,NAN,2021-06-29
440,REGINA,ANTHONY,C,U22043,,VA,3/11/21 13:02,SOS,NAN,NAN,2021-03-10 11:34:00,2021-03-11 13:00:00,2021-03-11 23:59:00,NaT,22.0,DV,WIN,3/10/2021,DV,VIA,DAN,WH,EW206,VIA,DANIEL,,NAN,2021-06-29


### Where 'visitee' is explicity the Vice President

In [15]:
df[df["visitee_namefirst"].str.contains("VPOTUS")]

In [18]:
df[df["visitee_namefirst"].str.contains("VPOTUS")].to_csv(
    "data/processed/vpotus_visitee.csv", index=False
)