In [1]:
cd ..

/Users/peterhaglich/Dropbox/Work/IARPA/HFC/gfc/gfc/src


In [2]:
import requests
import time
import datetime
from dateutil.parser import parse
import os
import json

from collections import Counter
from gfcapi.gfcapi import GfcApi

In [3]:
DATA_PATH = os.path.join("..", "data", "gfc2_data")
IFP_DATA_PATH = os.path.join(DATA_PATH, "IFPs")
IFP_CORPUS_PATH = os.path.join(DATA_PATH, "IFP Corpus")
AUTH_PATH = os.path.join("resources", "auth.json")
with open(AUTH_PATH, "r") as f:
    auth_dict = json.load(f)
    secret_token = auth_dict['gfc_token']
    print(secret_token)
SERVER = 'https://api.iarpagfchallenge.com'
Q_ENDPOINT = '/api/v1/questions'
url = SERVER + '/api/v1/questions' # The endpoint to retrieve questions
headers = {'Authorization':'Bearer ' + secret_token}
params = {} # More to come on this in a moment
instance='production'
gf=GfcApi(token=secret_token,server=SERVER,verbose=False)
RCT_START = "2018-03-07"

a0a4d8f1a34501875101650ee2d60a2dc9dd27c38f0e1262a57ce7e1af1743a1


Once we create an instance of the `GfcApi` class, we retrieve Individual Forecasting Problems (IFPs). We could limit our queries of IFPs based on date of creation or update (useful for finding clarifications).  We can also limit our query to active (or closed) questions.

In [4]:
params = {"created_after": parse(RCT_START)}

In [5]:
all_ifps = gf.get_questions(status="all", training_data=False, **params)
print("There are {} IFPs in the GF Challenge".format(len(all_ifps)))

There are 13 pages
There are 122 IFPs in the GF Challenge


In [6]:
today = time.strftime("%Y-%m-%d")
_filename = "All IFP Snapshot {}.json".format(today)
_path = os.path.join(DATA_PATH, "IFPs", _filename)
with open(_path, "w") as f:
    json.dump(all_ifps, f, ensure_ascii=False, sort_keys=True, indent=2)

In [7]:
Counter([x["active?"] for x in all_ifps])

Counter({False: 29, True: 93})

In [8]:
[x["name"] for x in all_ifps]

['Before 30 November 2019, will the Ukrainian government lift any cargo restrictions related to the "Donbas blockade"?',
 "Before 1 November 2019, will Belgium's new federal government be sworn in?",
 'Between 27 June 2019 and 30 September 2019, will the parliament of Moldova be dissolved?\r\n',
 'Will the Government of Canada issue a travel advisory of "Avoid non-essential travel" or "Avoid all travel" for Kazakhstan between 27 June 2019 and 23 October 2019?',
 'How many cases of measles will the European Centre for Disease Prevention and Control record in Italy for October 2019?',
 "Will Hong Kong's Chief Executive Carrie Lam experience a significant leadership disruption between 27 June 2019 and 29 November 2019?",
 'Will there be a significant day-over-day increase in worldwide search interest in the term "extremist" reported by Google Trends between 13 October 2019 and 13 November 2019?',
 'How many positive influenza virus detections will FluNet record for Nepal between 29 July 2

Resolved IFPs

In [9]:
resolved_ifps = [x for x in all_ifps if x["resolved_at"] is not None]
print("There are {} resolved IFPs".format(len(resolved_ifps)))
[(x['id'],x["name"],x['resolved_at']) for x in all_ifps if x["resolved_at"] is not None]

There are 15 resolved IFPs


[(2425,
  "What will be the daily closing price of Germany's DAX index on 28 June 2019?",
  '2019-07-01T17:00:06.886Z'),
 (2390,
  "How many seats will New Democracy win in Greece's next legislative election?",
  '2019-07-09T14:45:13.340Z'),
 (2315,
  "Before 1 July 2019, will the European Commission (EC) extend Switzerland's stock market equivalence status?",
  '2019-07-03T13:30:25.926Z'),
 (2255,
  'Between 6 June 2019 and 30 June 2019, will anti-government protests in Albania result in a fatality?',
  '2019-07-03T13:30:03.736Z'),
 (2165,
  'Will ACLED record any riots in Niger between 30 May 2019 and 9 June 2019?\xa0',
  '2019-06-20T13:30:11.794Z'),
 (2150,
  "How many patents will China publish that have International Patent Classification (IPC) code 'G06N' (COMPUTER SYSTEMS BASED ON SPECIFIC COMPUTATIONAL MODELS) in June 2019?",
  '2019-07-03T18:00:09.144Z'),
 (2115,
  "Who will win Istanbul, Turkey's mayoral election?",
  '2019-06-25T17:00:03.438Z'),
 (2035,
  'Before 30 July 201

In [10]:
resolved_ifps[0]

{'active?': False,
 'answers': [{'active?': False,
   'binary?': False,
   'correctness_known_at': '2019-07-01T13:36:50.000Z',
   'created_at': '2019-06-19T16:45:21.009Z',
   'description': None,
   'ended?': True,
   'ends_at': None,
   'id': 6775,
   'name': 'Less than 11,600',
   'normalized_probability': 0.0,
   'probability': 0.0,
   'probability_formatted': '0%',
   'question_id': 2425,
   'question_name': "What will be the daily closing price of Germany's DAX index on 28 June 2019?",
   'resolved?': True,
   'resolved_at': '2019-07-01T17:00:06.217Z',
   'resolving?': False,
   'sort_order': 0,
   'status': None,
   'updated_at': '2019-07-01T17:00:06.255Z'},
  {'active?': False,
   'binary?': False,
   'correctness_known_at': '2019-07-01T13:36:50.000Z',
   'created_at': '2019-06-19T16:45:20.994Z',
   'description': None,
   'ended?': True,
   'ends_at': None,
   'id': 6774,
   'name': 'Between 11,600 and 12,120, inclusive',
   'normalized_probability': 0.0,
   'probability': 0.0,

In [11]:
for ri in resolved_ifps:
    id_ = "IFP {}".format(ri['id'])
    name_ = ri['name']
    desc = ri['description']
    res_date = ri['resolved_at']
    print("{0}: {1}".format(id_, name_))
    print("Description: {}".format(desc))
    print("Resolved: {}".format(res_date))
    if ri['binary?']:
        yes_prob = ri['answers'][0]['probability']
        if yes_prob > 0:
            print("Outcome: Yes")
        else:
            print("Outcome: No")
    else:
        answers = [x['name'] for x in ri['answers']
                   if x['probability'] > 0]
        print("Outcome: {}".format(answers[0]))
        
    print("\n")

IFP 2425: What will be the daily closing price of Germany's DAX index on 28 June 2019?
Description: This question will be resolved using the daily closing price of the market of interest reported by Google Finance <a href="https://www.google.com/search?tbm=fin&ei=absAXaL7Hu6OggeOupa4DQ&q=INDEXDB%3ADAX&oq=INDEXDB%3ADAX&gs_l=finance-immersive.3..81i8k1l3.128627.210914.0.211577.2.2.0.0.0.0.153.153.0j1.1.0....0...1c.2.64.finance-immersive..1.1.152....0.KOAjfTuHWwQ#scso=_PbwAXaSqF46Rgged964I2:0"target="_blank">DAX</a>. Question will be closed the day before the date of interest.
Resolved: 2019-07-01T17:00:06.886Z
Outcome: More than 12,120 but less than 12,570


IFP 2390: How many seats will New Democracy win in Greece's next legislative election?
Description: Greece's next legislative election is scheduled for 7 July 2019 and all 300 seats in the Hellenic Parliament are to be contested (<a href = "https://www.bloomberg.com/news/articles/2019-05-29/greece-s-snap-elections-here-s-all-you-need

In [12]:
len([x for x in resolved_ifps if x['voided_at'] is not None])

0

In [13]:
resolved_ifps[0]['answers']

[{'active?': False,
  'binary?': False,
  'correctness_known_at': '2019-07-01T13:36:50.000Z',
  'created_at': '2019-06-19T16:45:21.009Z',
  'description': None,
  'ended?': True,
  'ends_at': None,
  'id': 6775,
  'name': 'Less than 11,600',
  'normalized_probability': 0.0,
  'probability': 0.0,
  'probability_formatted': '0%',
  'question_id': 2425,
  'question_name': "What will be the daily closing price of Germany's DAX index on 28 June 2019?",
  'resolved?': True,
  'resolved_at': '2019-07-01T17:00:06.217Z',
  'resolving?': False,
  'sort_order': 0,
  'status': None,
  'updated_at': '2019-07-01T17:00:06.255Z'},
 {'active?': False,
  'binary?': False,
  'correctness_known_at': '2019-07-01T13:36:50.000Z',
  'created_at': '2019-06-19T16:45:20.994Z',
  'description': None,
  'ended?': True,
  'ends_at': None,
  'id': 6774,
  'name': 'Between 11,600 and 12,120, inclusive',
  'normalized_probability': 0.0,
  'probability': 0.0,
  'probability_formatted': '0%',
  'question_id': 2425,
  'q

## Recently Changed IFPs

In [14]:
today = time.strftime("%Y-%m-%dT%H:%M%S")
print(today)

2019-07-09T15:3147


In [15]:
RECENT_DATE = "2019-07-08T00:00:00Z"
recent_date = parse(RECENT_DATE)
new_ifps = [x for x in all_ifps if parse(x["created_at"]) >= recent_date]
print("There are {} new IFPs".format(len(new_ifps)))
newly_resolved_ifps = [x for x in all_ifps if x["resolved_at"] is not None
                       and parse(x["resolved_at"]) >= recent_date]
print("There are {} newly resolved IFPs".format(len(newly_resolved_ifps)))
new_updated = [x for x in all_ifps if parse(x["updated_at"]) > recent_date]
other_updates = [x for x in new_updated if x not in new_ifps]
other_updates = [x for x in other_updates if x not in newly_resolved_ifps]
print("There are {} IFPs recently updated otherwise".format(len(other_updates)))
#ne

There are 0 new IFPs
There are 1 newly resolved IFPs
There are 0 IFPs recently updated otherwise


In [16]:
for ri in newly_resolved_ifps:
    id_ = "IFP {}".format(ri['id'])
    name_ = ri['name']
    desc = ri['description']
    res_date = ri['resolved_at']
    print("{0}: {1}".format(id_, name_))
    print("Description: {}".format(desc))
    print("Resolved: {}".format(res_date))
    if ri['binary?']:
        yes_prob = ri['answers'][0]['probability']
        if yes_prob > 0:
            print("Outcome: Yes")
        else:
            print("Outcome: No")
    else:
        answers = [x['name'] for x in ri['answers']
                   if x['probability'] > 0]
        print("Outcome: {}".format(answers[0]))
        
    print("\n")

IFP 2390: How many seats will New Democracy win in Greece's next legislative election?
Description: Greece's next legislative election is scheduled for 7 July 2019 and all 300 seats in the Hellenic Parliament are to be contested (<a href = "https://www.bloomberg.com/news/articles/2019-05-29/greece-s-snap-elections-here-s-all-you-need-to-know-to-keep-up"target="_blank">Bloomberg</a>). Leftist party Syriza has led the government since 2015, but came in behind New Democracy in recent European and local elections (<a href = "https://www.reuters.com/article/uk-eu-election-greece/greek-pm-calls-snap-vote-after-drubbing-for-ruling-syriza-idUKKCN1SW0TA"target="_blank">Reuters1</a>, <a href = "https://www.euractiv.com/section/elections/news/new-democracy-sweeps-greek-local-elections/"target="_blank">Euractiv</a>, <a href = "https://www.reuters.com/article/us-greece-election-leaders/conservatives-tipped-to-prevail-in-greek-local-election-runoffs-idUSKCN1T30DJ"target="_blank">Reuters2</a>). The q

In [17]:
for item in other_updates:
    print(item['clarifications'])

In [20]:
open_ifps=gf.get_questions(status="open", training_data=False)
print("There are {} open IFPs\n".format(len(open_ifps)))

There are 10 pages
There are 93 open IFPs



In [21]:
today = time.strftime("%Y-%m-%d")
_filename = "Open IFP Snapshot {}.json".format(today)
_path = os.path.join(DATA_PATH, "IFPs", _filename)
with open(_path, "w") as f:
    json.dump(open_ifps, f, ensure_ascii=False, sort_keys=True, indent=2)

In [22]:
ifp_ids = [x["id"] for x in open_ifps]
print(len(ifp_ids), len(set(ifp_ids)))

93 93


In [23]:
closed_ifps=gf.get_questions(status="closed", training_data=False)
print("There are {} closed IFPs\n".format(len(closed_ifps)))

for ifp in closed_ifps:
    print("IFP {}: {}".format(ifp['id'],ifp['name']))
    print("Description: {}".format(ifp['description']))
    print("Starts: {}, Ends: {}".format(ifp['starts_at'],ifp['ends_at']))
    print("Options:")
    for answer in ifp['answers']:
        print(' ({}) {}'.format(answer['id'],answer['name']))
        
    if ifp['clarifications']:
        print('Clarifications:')
        print(ifp['clarifications'])
    print("")    


There are 3 pages
There are 28 closed IFPs

IFP 2425: What will be the daily closing price of Germany's DAX index on 28 June 2019?
Description: This question will be resolved using the daily closing price of the market of interest reported by Google Finance <a href="https://www.google.com/search?tbm=fin&ei=absAXaL7Hu6OggeOupa4DQ&q=INDEXDB%3ADAX&oq=INDEXDB%3ADAX&gs_l=finance-immersive.3..81i8k1l3.128627.210914.0.211577.2.2.0.0.0.0.153.153.0j1.1.0....0...1c.2.64.finance-immersive..1.1.152....0.KOAjfTuHWwQ#scso=_PbwAXaSqF46Rgged964I2:0"target="_blank">DAX</a>. Question will be closed the day before the date of interest.
Starts: 2019-06-19T16:30:00.000Z, Ends: 2019-06-27T18:01:00.000Z
Options:
 (6775) Less than 11,600
 (6774) Between 11,600 and 12,120, inclusive
 (6773) More than 12,120 but less than 12,570
 (6772) Between 12,570 and 13,080, inclusive
 (6771) More than 13,080

IFP 2410: What will be the FAO Vegetable Oil Price Index in June 2019?
Description: This question will be resolved

In [24]:
today = time.strftime("%Y-%m-%d")
_filename = "Closed IFP Snapshot {}.json".format(today)
_path = os.path.join(DATA_PATH, "IFPs", _filename)
with open(_path, "w") as f:
    json.dump(closed_ifps, f, ensure_ascii=False, sort_keys=True, indent=2)

In [25]:
ifp_ids = [x["id"] for x in closed_ifps]
print(len(ifp_ids), len(set(ifp_ids)))

28 28


In [26]:
Counter([x["id"] for x in closed_ifps])

Counter({1925: 1,
         1935: 1,
         1940: 1,
         1945: 1,
         1955: 1,
         1970: 1,
         1980: 1,
         1990: 1,
         2005: 1,
         2010: 1,
         2020: 1,
         2025: 1,
         2035: 1,
         2075: 1,
         2115: 1,
         2140: 1,
         2145: 1,
         2150: 1,
         2165: 1,
         2175: 1,
         2220: 1,
         2235: 1,
         2255: 1,
         2315: 1,
         2335: 1,
         2390: 1,
         2410: 1,
         2425: 1})

In [27]:
closed_ifps[-1]

{'active?': False,
 'answers': [{'active?': False,
   'binary?': False,
   'correctness_known_at': '2019-06-19T17:41:14.000Z',
   'created_at': '2019-05-15T16:45:05.782Z',
   'description': None,
   'ended?': True,
   'ends_at': None,
   'id': 5395,
   'name': 'Less than $62.21 ',
   'normalized_probability': 1.0,
   'probability': 1.0,
   'probability_formatted': '100%',
   'question_id': 1925,
   'question_name': 'What will be the daily closing spot price of Brent crude oil (USD per barrel) on 12 June 2019, according to the U.S. EIA?',
   'resolved?': True,
   'resolved_at': '2019-06-20T13:30:10.531Z',
   'resolving?': False,
   'sort_order': 0,
   'status': None,
   'updated_at': '2019-06-20T13:30:10.540Z'},
  {'active?': False,
   'binary?': False,
   'correctness_known_at': '2019-06-19T17:41:14.000Z',
   'created_at': '2019-05-15T16:45:05.760Z',
   'description': None,
   'ended?': True,
   'ends_at': None,
   'id': 5394,
   'name': 'Between $62.21 and $68.51, inclusive',
   'norm

In [28]:
[(x["active?"], x["name"]) for x in all_ifps if x["id"] == 840]

[]

In [29]:
Counter([x["resolved?"] for x in closed_ifps])

Counter({False: 13, True: 15})

In [30]:
all_ifps=gf.get_questions(status="all")
print("There are {} new IFPs\n".format(len(all_ifps)))

There are 13 pages
There are 122 new IFPs



In [29]:
today = time.strftime("%Y-%m-%d")
_filename = "All IFP Snapshot {}.json".format(today)
_path = os.path.join(DATA_PATH, "IFPs", _filename)
with open(_path, "w") as f:
    json.dump(all_ifps, f, ensure_ascii=False, sort_keys=True, indent=2)

In [30]:
ifp_corpus_dict = {"ID {}".format(x["id"]): " ".join((x["name"], x["description"])) 
                   for x in all_ifps}

In [31]:
ifp_corpus_dict.keys()

dict_keys(['ID 630', 'ID 1070', 'ID 1080', 'ID 1180', 'ID 1210', 'ID 665', 'ID 1190', 'ID 790', 'ID 875', 'ID 695', 'ID 800', 'ID 840', 'ID 1130', 'ID 835', 'ID 1330', 'ID 730', 'ID 1135', 'ID 1220', 'ID 1035', 'ID 1155', 'ID 1315', 'ID 1340', 'ID 1015', 'ID 935', 'ID 855', 'ID 1065', 'ID 1020', 'ID 725', 'ID 625', 'ID 1200', 'ID 815', 'ID 1175', 'ID 950', 'ID 910', 'ID 1325', 'ID 880', 'ID 1230', 'ID 1170', 'ID 930', 'ID 1150', 'ID 735', 'ID 1270', 'ID 1100', 'ID 1090', 'ID 1275', 'ID 1245', 'ID 635', 'ID 980', 'ID 1310', 'ID 1060', 'ID 1235', 'ID 750', 'ID 1367', 'ID 685', 'ID 775', 'ID 805', 'ID 1030', 'ID 785', 'ID 850', 'ID 1225', 'ID 1362', 'ID 1195', 'ID 1205', 'ID 1300', 'ID 1250', 'ID 680', 'ID 765', 'ID 760', 'ID 870', 'ID 1165', 'ID 660', 'ID 1055', 'ID 650', 'ID 655', 'ID 1125', 'ID 845', 'ID 915', 'ID 830', 'ID 1355', 'ID 715', 'ID 925', 'ID 1025', 'ID 1045', 'ID 755', 'ID 975', 'ID 740', 'ID 610', 'ID 1290', 'ID 1240', 'ID 860', 'ID 995', 'ID 985', 'ID 865', 'ID 745', 'ID

In [32]:
ifp_corpus_dict["ID 101"]

KeyError: 'ID 101'

In [None]:
for ifp in ifp_corpus_dict:
    _filename = "{}.txt".format(ifp)
    _path = os.path.join(IFP_CORPUS_PATH, _filename)
    with open(_path, "w") as f:
        f.write(ifp_corpus_dict[ifp])