# White House visitor logs

### Import Python tools and Jupyter configuration

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import re
import datetime as dt

In [3]:
import altair as alt
import altair_latimes as lat
import numpy as np

In [4]:
alt.themes.register("latimes", lat.theme)
alt.themes.enable("latimes")

ThemeRegistry.enable('latimes')

In [5]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()
pd.options.display.max_colwidth = None

### Data dictionary

In [6]:
# "https://obamawhitehouse.archives.gov/files/disclosures/visitors/WhiteHouse-WAVES-Key-1209.txt"

In [7]:
today = dt.date.today().strftime("%m-%d-%Y")

In [8]:
df = pd.read_csv(
    "data/processed/log_latest.csv",
    parse_dates=[
        "appt_made_date",
        "appt_start_date",
        "appt_end_date",
        "appt_cancel_date",
        "releasedate",
    ],
)

In [9]:
df.sort_values(["total_people"], ascending=False).head()

Unnamed: 0,namelast,namefirst,namemid,uin,bdgnbr,access_type,toa,poa,tod,pod,appt_made_date,appt_start_date,appt_end_date,appt_cancel_date,total_people,last_updatedby,post,lastentrydate,terminal_suffix,visitee_namelast,visitee_namefirst,meeting_loc,meeting_room,caller_name_last,caller_name_first,caller_room,description,releasedate
198,MCBRIDE,JEREMY,V,U19761,,VA,NAN,NAN,NAN,NAN,2021-01-16 10:06:00,2021-01-20 13:00:00,2021-01-20 23:59:00,NaT,34.0,DV,WIN,1/16/2021 10:06,DV,VIA,DAN,WH,EW206,VIA,DANIEL,,HERALD TRUMPETS,2021-05-07
90,EISENSTADT,ALICIA,M,U19761,,VA,NAN,NAN,NAN,NAN,2021-01-16 10:06:00,2021-01-20 13:00:00,2021-01-20 23:59:00,NaT,34.0,DV,WIN,1/16/2021 10:06,DV,VIA,DAN,WH,EW206,VIA,DANIEL,,HERALD TRUMPETS,2021-05-07
266,POWLISON,JOHN,M,U19761,,VA,NAN,NAN,NAN,NAN,2021-01-16 10:06:00,2021-01-20 13:00:00,2021-01-20 23:59:00,NaT,34.0,DV,WIN,1/16/2021 10:06,DV,VIA,DAN,WH,EW206,VIA,DANIEL,,HERALD TRUMPETS,2021-05-07
332,STOCKTON,TRENT,A,U19761,,VA,NAN,NAN,NAN,NAN,2021-01-16 10:06:00,2021-01-20 13:00:00,2021-01-20 23:59:00,NaT,34.0,DV,WIN,1/16/2021 10:06,DV,VIA,DAN,WH,EW206,VIA,DANIEL,,HERALD TRUMPETS,2021-05-07
140,HOOKE,CHARLES,M,U19761,,VA,NAN,NAN,NAN,NAN,2021-01-16 10:06:00,2021-01-20 13:00:00,2021-01-20 23:59:00,NaT,34.0,DV,WIN,1/16/2021 10:06,DV,VIA,DAN,WH,EW206,VIA,DANIEL,,HERALD TRUMPETS,2021-05-07


### Buildings

In [10]:
df.meeting_loc.value_counts()

EEOB    221
WH      138
NEOB     41
Name: meeting_loc, dtype: int64

### Rooms 

In [11]:
df.meeting_room.value_counts()

EW206         89
EEOB 210/2    50
94            23
3202          15
594           15
3201          14
430           14
210           13
180           12
RES CLINIC    11
EAST EXEC/     7
97             7
415            6
97-WHMU CL     6
INDIAN TRE     6
208            6
OVAL OFFIC     6
PASS OFFIC     5
1B29           5
76             4
474            4
291            4
97-WHMU ME     4
STATE FLOO     4
4000           4
1              3
300            3
96-WHMU ME     3
NEOB LOADI     2
212            2
SITUATION      2
1ST FLOOR      2
21             2
EW TOUR        2
71             2
LAFAYETTE      2
WW G67         2
386            2
79             2
18             2
WEST EXEC      2
LOADING DO     2
USHERS OF      2
RES B26        1
WHITE HOUS     1
148            1
316            1
432            1
WW OVAL OF     1
319            1
19             1
27             1
WW G66         1
RM 22          1
14             1
312            1
RESIDENCE      1
330            1
183           

### Who's visited the Oval?

In [12]:
df[df["meeting_room"].str.contains("OVAL OFFIC")]

Unnamed: 0,namelast,namefirst,namemid,uin,bdgnbr,access_type,toa,poa,tod,pod,appt_made_date,appt_start_date,appt_end_date,appt_cancel_date,total_people,last_updatedby,post,lastentrydate,terminal_suffix,visitee_namelast,visitee_namefirst,meeting_loc,meeting_room,caller_name_last,caller_name_first,caller_room,description,releasedate
8,AUSTIN,CHARLENE,D,U20094,,VA,NAN,NAN,NAN,NAN,2021-01-25 06:42:00,2021-01-25 11:00:00,2021-01-25 23:59:00,NaT,6.0,MS,WIN,1/25/2021 6:42,MS,NAN,POTUS,WH,OVAL OFFIC,STRAUSS,MATTHEW,,NAN,2021-05-07
97,FENTON,BRYAN,P,U20094,,VA,NAN,NAN,NAN,NAN,2021-01-25 06:42:00,2021-01-25 11:00:00,2021-01-25 23:59:00,NaT,6.0,MS,WIN,1/25/2021 6:42,MS,NAN,POTUS,WH,OVAL OFFIC,STRAUSS,MATTHEW,,NAN,2021-05-07
136,HILL,CHRISTOPHER,S,U20094,,VA,NAN,NAN,NAN,NAN,2021-01-25 06:42:00,2021-01-25 11:00:00,2021-01-25 23:59:00,NaT,6.0,MS,WIN,1/25/2021 6:42,MS,NAN,POTUS,WH,OVAL OFFIC,STRAUSS,MATTHEW,,NAN,2021-05-07
137,HILL,REGINALD,A,U20094,,VA,NAN,NAN,NAN,NAN,2021-01-25 06:42:00,2021-01-25 11:00:00,2021-01-25 23:59:00,NaT,6.0,MS,WIN,1/25/2021 6:42,MS,NAN,POTUS,WH,OVAL OFFIC,STRAUSS,MATTHEW,,NAN,2021-05-07
147,HYATT,CALEB,N,U20094,,VA,NAN,NAN,NAN,NAN,2021-01-25 06:42:00,2021-01-25 11:00:00,2021-01-25 23:59:00,NaT,6.0,MS,WIN,1/25/2021 6:42,MS,NAN,POTUS,WH,OVAL OFFIC,STRAUSS,MATTHEW,,NAN,2021-05-07
190,MAGSAMEN,KELLY,E,U20094,,VA,NAN,NAN,NAN,NAN,2021-01-25 06:42:00,2021-01-25 11:00:00,2021-01-25 23:59:00,NaT,6.0,MS,WIN,1/25/2021 6:42,MS,NAN,POTUS,WH,OVAL OFFIC,STRAUSS,MATTHEW,,NAN,2021-05-07


In [13]:
df[df["meeting_room"].str.contains("EW206")].head()

Unnamed: 0,namelast,namefirst,namemid,uin,bdgnbr,access_type,toa,poa,tod,pod,appt_made_date,appt_start_date,appt_end_date,appt_cancel_date,total_people,last_updatedby,post,lastentrydate,terminal_suffix,visitee_namelast,visitee_namefirst,meeting_loc,meeting_room,caller_name_last,caller_name_first,caller_room,description,releasedate
9,BAILEY,JANET,C,U19818,,VA,NAN,NAN,NAN,NAN,2021-01-18 09:14:00,2021-01-20 12:00:00,2021-01-20 23:59:00,NaT,19.0,DV,WIN,1/18/2021 9:14,DV,VIA,DAN,WH,EW206,VIA,DANIEL,,MARINE BAND SUPPORTING EVENT,2021-05-07
13,BARR,ROGER,B,U19950,166794.0,VA,1/21/2021 10:03,A0401,1/21/2021 11:38,A0401,2021-01-21 09:27:00,2021-01-21 09:30:00,2021-01-21 23:59:00,NaT,10.0,ET,WIN,1/21/2021 9:27,ET,TELEKY,ED,WH,EW206,TELEKY,EDWARD,,NAN,2021-05-07
14,BARR,ROGER,B,U19919,,VA,NAN,NAN,NAN,NAN,2021-01-20 19:04:00,2021-01-20 20:00:00,2021-01-20 23:59:00,NaT,10.0,ET,WIN,1/20/2021 19:04,ET,TELEKY,ED,WH,EW206,TELEKY,EDWARD,,NAN,2021-05-07
15,BARR,ROGER,B,U19959,,VA,NAN,NAN,NAN,NAN,2021-01-21 10:08:00,2021-01-21 10:45:00,2021-01-21 23:59:00,NaT,10.0,ET,WIN,1/21/2021 10:08,ET,TELEKY,ED,WH,EW206,TELEKY,EDWARD,,NAN,2021-05-07
24,BISHOP,JAMES,M,U19761,,VA,NAN,NAN,NAN,NAN,2021-01-16 10:06:00,2021-01-20 13:00:00,2021-01-20 23:59:00,NaT,34.0,DV,WIN,1/16/2021 10:06,DV,VIA,DAN,WH,EW206,VIA,DANIEL,,HERALD TRUMPETS,2021-05-07


### Where 'visitee' is 'POTUS'

In [15]:
df[df["visitee_namefirst"].str.contains("")].head()

Unnamed: 0,namelast,namefirst,namemid,uin,bdgnbr,access_type,toa,poa,tod,pod,appt_made_date,appt_start_date,appt_end_date,appt_cancel_date,total_people,last_updatedby,post,lastentrydate,terminal_suffix,visitee_namelast,visitee_namefirst,meeting_loc,meeting_room,caller_name_last,caller_name_first,caller_room,description,releasedate
0,AIYER,KRISHNAN,V,U20190,135620.0,VA,1/26/2021 19:58,B0401,NAN,NAN,2021-01-26 19:51:00,2021-01-26 20:00:00,2021-01-26 23:59:00,NaT,1.0,AL,WIN,1/26/2021 19:51,AL,MCCARTHY,JOHN,WH,EW 121,LIN,AUSTIN,,NAN,2021-05-07
1,AKERLOF,GEORGE,A,U20152,,VA,NAN,NAN,NAN,NAN,2021-01-26 09:04:00,2021-01-26 11:00:00,2021-01-26 23:59:00,NaT,6.0,GT,WIN,1/26/2021 9:04,GT,TAYLOR,GABRIELLE,WH,EAST EXEC/,TAYLOR,GABRIELLE,,NAN,2021-05-07
2,AKERLOF,ROBERT,J,U20152,,VA,NAN,NAN,NAN,NAN,2021-01-26 09:04:00,2021-01-26 11:00:00,2021-01-26 23:59:00,NaT,6.0,GT,WIN,1/26/2021 9:04,GT,TAYLOR,GABRIELLE,WH,EAST EXEC/,TAYLOR,GABRIELLE,,NAN,2021-05-07
3,ALARCON,GARRETT,A,U19302,131412.0,VA,1/25/2021 10:27,K0101,1/25/2021 12:15,K01,2021-01-13 09:11:00,2021-01-25 10:00:00,2021-01-25 23:59:00,NaT,1.0,TM,WIN,1/13/2021 9:11,TM,GARY,CW4,NEOB,3201,MICHEL,THOMAS,,NAN,2021-05-07
4,ALARCON,GARRETT,A,U20036,,VA,NAN,NAN,NAN,NAN,2021-01-22 08:56:00,2021-01-25 11:00:00,2021-01-25 23:59:00,NaT,1.0,TM,WIN,1/22/2021 8:56,TM,MARCUS,MR.,NEOB,3201,MICHEL,THOMAS,,NAN,2021-05-07
