In [19]:
import pandas as pd
import numpy as np
from urllib.request import urlopen
import json
import datetime
import re

In [20]:
fomc_link = 'https://www.federalreserve.gov/json/calendar.json'
re_tags = re.compile(r'(&lt;[//]?p|&gt;|&lt;br.*)')

In [21]:
# read the data from url

with urlopen(fomc_link) as url:
     data = json.loads(url.read())
        
# convert the data to dataframe for easy manipulation
df_events = pd.DataFrame(data['events'])        
        

In [22]:
# filter by type - 'FOMC','Speeches','Beige'
type_filter = ['FOMC','Beige','Speeches']
df_filtered = df_events[df_events.type.isin(type_filter)]

In [23]:
df_filtered

Unnamed: 0,days,description,link,live,location,month,time,title,type
0,17,Bank Supervision,,,At the American Bar Association Banking Law Co...,2020-01,12:45 p.m.,Speech -- Vice Chair Randal K. Quarles,Speeches
1,16,The Outlook for Housing,,,At the Home Builders Association of Greater Ka...,2020-01,10:00 a.m.,Speech -- Governor Michelle Bowman,Speeches
2,9,U.S. Economic Outlook and Monetary Policy,,,At the C. Peter McColough Series on Internatio...,2020-01,8:00 a.m.,Speech - Vice Chair Richard H. Clarida,Speeches
3,8,Modernizing the Community Reinvestment Act,,https://www.urban.org/events/conversation-dr-l...,"At in Conversation with Dr. Lael Brainard, Urb...",2020-01,10:00 a.m.,Speech - Governor Lael Brainard,Speeches
4,25,"&lt;p&gt;Meeting of November 4-5, 2020&lt;/p&gt;",,,,2020-11,2:00 p.m.,FOMC Minutes,FOMC
5,7,"&lt;p&gt;Meeting of September 15-16, 2020&lt;/...",,,,2020-10,2:00 p.m.,FOMC Minutes,FOMC
6,19,"&lt;p&gt;Meeting of July 28-29, 2020&lt;/p&gt;",,,,2020-08,2:00 p.m.,FOMC Minutes,FOMC
7,1,"&lt;p&gt;Meeting of June 9-10, 2020&lt;/p&gt;",,,,2020-07,2:00 p.m.,FOMC Minutes,FOMC
8,20,"&lt;p&gt;Meeting of April 28-29, 2020&lt;/p&gt;",,,,2020-05,2:00 p.m.,FOMC Minutes,FOMC
9,8,"&lt;p&gt;Meeting of March 17-18, 2020&lt;/p&gt;",,,,2020-04,2:00 p.m.,FOMC Minutes,FOMC


In [24]:
def filter_records(events):
    '''
    This function strips out relavent event details from
    each row in the dataframe
    '''
   
    # extract date and time
    current_date = datetime.datetime.now()   
    event_datestr = events['days'] + '-' + events['month'] + ',' \
                    + events['time'].replace(' p.m.','PM').replace(' a.m.','AM')
    event_datetime = datetime.datetime.strptime(event_datestr,'%d-%Y-%m,%I:%M%p')
    
    # if the event is for the future
    if event_datetime > current_date:        
        try:          
            filtered_description = re.sub(re_tags,'',events['description'])
        except:
            filtered_description = str(events['title']) + ' ' + str(event_datetime.date())
            
        final_record = {'date':event_datetime, 'description':filtered_description, \
                        'title':events['title'], 'category':events['type']}
        return final_record
    else:
        return np.nan
       

In [25]:
#apply the filter records function in parallel. get the results
df_result = df_filtered.apply(filter_records,axis=1)

In [26]:
#create the final data frame.
df_final = pd.DataFrame(list(df_result.dropna().values))

In [28]:
df_final


Unnamed: 0,category,date,description,title
0,Speeches,2020-01-17 12:45:00,Bank Supervision,Speech -- Vice Chair Randal K. Quarles
1,Speeches,2020-01-16 10:00:00,The Outlook for Housing,Speech -- Governor Michelle Bowman
2,FOMC,2020-11-25 14:00:00,"Meeting of November 4-5, 2020",FOMC Minutes
3,FOMC,2020-10-07 14:00:00,"Meeting of September 15-16, 2020",FOMC Minutes
4,FOMC,2020-08-19 14:00:00,"Meeting of July 28-29, 2020",FOMC Minutes
5,FOMC,2020-07-01 14:00:00,"Meeting of June 9-10, 2020",FOMC Minutes
6,FOMC,2020-05-20 14:00:00,"Meeting of April 28-29, 2020",FOMC Minutes
7,FOMC,2020-04-08 14:00:00,"Meeting of March 17-18, 2020",FOMC Minutes
8,FOMC,2020-02-19 14:00:00,"Meeting of January 28-29, 2020",FOMC Minutes
9,Beige,2020-10-21 14:00:00,Beige Book 2020-10-21,Beige Book


In [27]:
df_final[df_final['category']=='Beige']

Unnamed: 0,category,date,description,title
9,Beige,2020-10-21 14:00:00,Beige Book 2020-10-21,Beige Book
10,Beige,2020-09-02 14:00:00,Beige Book 2020-09-02,Beige Book
11,Beige,2020-07-15 14:00:00,Beige Book 2020-07-15,Beige Book
12,Beige,2020-05-27 14:00:00,Beige Book 2020-05-27,Beige Book
13,Beige,2020-04-15 14:00:00,Beige Book 2020-04-15,Beige Book
14,Beige,2020-03-04 14:00:00,Beige Book 2020-03-04,Beige Book
21,Beige,2020-12-02 14:00:00,Beige Book 2020-12-02,Beige Book
23,Beige,2020-01-15 14:00:00,Beige Book 2020-01-15,Beige Book


In [328]:
(df_final['title'] + ', ' + df_final['date'].apply(lambda x:str(x.date()))).apply(lambda x: hash(x))  

0    -2026374702601328519
1     1654344810102217206
2     -718781993744106731
3     1202626196065023634
4    -3390781090326368851
5      414631357052511788
6     -971017299656773265
7     2670507746457378305
8     2935457177150396471
9     6324856380931168407
10   -4362039153520736077
11    7136433406417625026
12    3820210284135730010
13    9016251672715358600
14    7174661509664547784
15    2828770357456159497
16    8953210859406729929
17   -1884372741657262840
18    5779061263560891537
19    7209124295930398016
20    5931043555395013655
21    9042063859104547112
22   -8486912756061070848
23   -1145584224880200853
dtype: int64

In [29]:
df_uniquedes = (df_final['title'] + ', ' + df_final['date'].apply(lambda x:str(x.date())))

In [31]:
df_final['indexhash'] = (df_final['title'] + ', ' + df_final['date'].apply(lambda x:str(x.date()))).apply(lambda x: hash(x))  

In [41]:
df_final = df_final.set_index('indexhash',drop=True)

In [42]:
df_final.to_pickle('./events_list.pickle')


In [43]:
df_events = pd.read_pickle('./events_list.pickle')

In [56]:
res = set(df_events.index) - set(df_final.index)

In [57]:
res

set()

In [50]:
if res:
    print (True)
else:
    print (False)

False


In [55]:
df_events.loc[res]

Unnamed: 0_level_0,category,date,description,title
indexhash,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
-6395719108065583385,Speeches,2020-01-17 12:45:00,Bank Supervision,Speech -- Vice Chair Randal K. Quarles


In [59]:
df_res = df_events.loc[res]

In [71]:
df_res.empty

True

In [67]:
df_events.empty

-1

In [76]:
df_events.iloc[0]['date'].to_datetime()

  exec(code_obj, self.user_global_ns, self.user_ns)


datetime.datetime(2020, 1, 17, 12, 45)

In [79]:
def dummy(x):
    return x['date']

In [80]:
df_events.apply(dummy,axis=1)

indexhash
-6395719108065583385   2020-01-17 12:45:00
-506358195976120439    2020-01-16 10:00:00
-1823001206848760244   2020-11-25 14:00:00
 3109511532584406098   2020-10-07 14:00:00
 6517358195594349697   2020-08-19 14:00:00
-4231919471837110918   2020-07-01 14:00:00
 4991622313091795258   2020-05-20 14:00:00
-9168352783799231704   2020-04-08 14:00:00
-1366888459877643412   2020-02-19 14:00:00
-3964144781925994210   2020-10-21 14:00:00
-1516649023658354743   2020-09-02 14:00:00
-5622975569362766171   2020-07-15 14:00:00
-8528225560398762934   2020-05-27 14:00:00
 2860286917212854601   2020-04-15 14:00:00
-7229245259843503333   2020-03-04 14:00:00
-330515501324040690    2020-11-05 14:00:00
-6853960410240890285   2020-09-16 14:00:00
 6267401264481000136   2020-07-29 14:00:00
-1146594305652809613   2020-06-10 14:00:00
-8502738537393854084   2020-04-29 14:00:00
-3757092196507350174   2020-03-18 14:00:00
 6538604015813403983   2020-12-02 14:00:00
 1870630683592084046   2020-12-16 14:00:00
 