# Events Example

In [3]:
import signal_api.signalAPI
import pandas as pd
from itertools import groupby, islice
import os
from tqdm import tqdm
from textwrap import wrap

pd.set_option('display.max_colwidth', 0)
pd.options.display.html.use_mathjax = False

## Connect to the API

In [2]:
client_id = os.environ['SIGNAL_API_ID']
client_secret = os.environ['SIGNAL_API_SECRET']
signal_api = signalAPI.Connection(client_id=client_id, client_secret=client_secret)

## Search For Entity And Topic IDs 

In [3]:
long_covid_entities = list(signal_api.entities({'name': 'Long COVID'}))
long_covid_entity_ids = [entity['id'] for entity in long_covid_entities]
long_covid_entities

[{'id': '1021d2f6-e57a-308c-b232-d6c6f2c2588e',
  'type': 'disease',
  'name': 'Long COVID'}]

In [4]:
healthcare_topics = list(signal_api.topics({'name': 'Healthcare'}))
healthcare_topic_ids = [topic['id'] for topic in healthcare_topics]
healthcare_topics

[{'id': 'ec838d2b-49db-457a-b42f-2889fb86b2eb',
  'name': 'Healthcare',
  'description': 'The topic "healthcare" includes all coverage related to the healthcare industry. Keywords include Healthcare, treatment of disease, preventive healthcare, healthcare issues, health insurance, health benefits & coverage, health reforms, publicly funded national healthcare system,  healthcare crisis, mental health care, healthcare quality improvement, healthcare cost containment, healthcare right or privilege, healthcare patient safety, healthcare Infection control practices, pervasive healthcare, health care challenges, healthcare cost, healthcare-associated infections.',
  'private': False},
 {'id': '0d6c8ffc-d2fd-47bc-b304-fff55e76cd19',
  'name': 'Healthcare Crisis',
  'private': False},
 {'id': 'c3f7b9d6-9420-46fb-abb2-490382212189',
  'name': 'Transformation of Healthcare',
  'description': 'Transformation of healthcare is the topic that covers content related to changing healthcare due to adv

## Choose A Set of Entities and Topics

In [5]:
# entities = {
#     'NatWest': 'cae9004a-75c2-49ae-a993-6085bfc59965',
#     'HSBC': 'ab82444f-efed-499c-9c7f-eaa4df9a3479',
#     'Lloyds Bank': 'ea02b0ae-718e-48b9-8fd8-ccfb6988bad1',
#     'Santander Group': 'be600517-817f-48e3-86f3-a9d3233d7cdc',
#     'Santander UK': '5bec1c96-ad87-3804-adc9-0bd1b10588c2',
#     'Barclays': '7de7a01d-259d-47b2-a01e-02ee234591ee',

# }

# # uncomment line below to use all signal entities
# # entities = {}

# topics = {
#     'Corporate fines': 'ef171339-3fc5-4913-834e-33159376303b',
#     'Banking': 'e3d48271-94d7-4502-9a71-f462428ec731'
# }

# # uncomment line below to use all signal topics
# # topics = {} 

## Use the last year as a date range

In [6]:
end_date = pd.to_datetime('today').to_period('D') - 1
start_date = end_date - 365
start_date, end_date

(Period('2022-04-04', 'D'), Period('2023-04-04', 'D'))

### Query the Events API

In [7]:
query = {
      "where": {
          "date": {
              "gte": str(start_date),
              "lte": str(end_date)
      },
    },
    "size": 1000
}


query['where']["entities"] = {
  "id": {
      # include events about any of the entities
      "any": long_covid_entity_ids

      # note you can seach for events about all of the entities too
      # "all": list(entities.values())
  }
}


query['where']["topics"] = {
  "id": {
      # include events about any of the topics
      "any": healthcare_topic_ids

      # note you can seach for events about all of the topics too
      # "all": list(topics.values())
  }
}
    
response = signal_api.events(query)
# remove results that come from a small number of sources
response = (event for event in response if event['source-count-global'] >= (event['story-count-global'] / 5))
# take the first 10 events
events = list(response)[:10]

## Define function for searching documents by story id

In [8]:
def search_by_story_id(story_ids: list, entity_ids: list, topic_ids: list, n=3):
    """
    return one document for each story-id in "story_ids" 
    return the results in the same order as story_ids
    """
    # record the ordering of the story_ids
    rank = {story_id: rnk for rnk, story_id in enumerate(story_ids)}

    # query the seach endpoint
    params = {
        'where': {
            'story-id': {
                'any': list(story_ids)
            },
        },
        'size': 500
    }

    if entity_ids:
        params['where']['entities'] = {
            'id': {'any': entity_ids},
            'salient-only': True
        }
        
    if topic_ids:
        params['where']['topics'] = {
            'id': {'any': topic_ids}
        }

    response = (item['document'] for item in signal_api.search(params))

    # put the results in the correct order
    ordered = sorted(response, key=lambda document: rank[document['story-id']])

    # deduplicate the results
    grouped = islice(groupby(ordered, lambda x: x['story-id']), n)
    return [next(group) for _, group in grouped]

### Get the documents from the Search API

In [9]:
for event in tqdm(events):
    event['documents'] = search_by_story_id(
        event['story-ids'], 
        long_covid_entity_ids, 
        healthcare_topic_ids
    )
    

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:31<00:00,  3.11s/it]


### Display the Results

In [10]:
events_df = pd.DataFrame([
    {
        'hash': event['hash'],
        'date': event['date'],
        'story-count-global': event['story-count-global'],
        'source-count-global': event['source-count-global'],
        'entities': [entity['name'] for entity in event['entities']],
        'topics': [topic['name'] for topic in event['topics']][:3], # just use the first 3 topics 
        'labels': event['labels'],
        'headlines': [doc['title'] for doc in event['documents']],
    }
    for event in events
])
events_df

Unnamed: 0,hash,date,story-count-global,source-count-global,entities,topics,labels,headlines
0,02e5-8f8b9366-932bc9df-b1aa3367-f6f9b7c7,2023-01-12,196,496,"[Long COVID, COVID 19]","[Health Technology, Health Systems, R&D (Kelp) [Kelp Exclusive]]","[mild, long covid, symptoms, long covid symptoms, infection, study, year]","[Study found that symptoms of long COVID disappear within a year, in mild cases, Long COVID symptoms go away within a year, in mild cases, Majority of long covid symptoms in mild cases may disappear after one year | coronavirus]"
1,02c8-d3d8bb2e-0f21c57f-9d4c3e16-fea17fc3,2022-12-14,78,531,"[COVID 19, Long COVID, Centers for Disease Control and Prevention]","[Healthcare Crisis, Health Systems, Public safety]","[long covid, cdc, deaths, 500]","[CDC: Long COVID behind more than 3,500 deaths, CDC: Long COVID behind more than 3,500 deaths, Long Covid has contributed to more than 3,500 deaths in the U.S. since start of pandemic, CDC says]"
2,031d-1fda3b3e-072bc96f-954abb27-efa06fc6,2023-03-09,41,211,"[COVID 19, Long COVID]","[Health Technology, Thought leadership [Kelp Exclusive], Healthcare]","[long covid, covid 19, symptoms]","[Report says long COVID could impact economy, be 'mass disabling event', Report says long COVID could impact economy and be 'mass disabling event' in Canada, Report says long COVID could impact economy and be 'mass disabling event' in Canada]"
3,0245-0b1afd6a-33b3cd86-91ebaa33-edc1ebd2,2022-08-05,40,233,"[COVID 19, Long COVID]","[Medical Research, Health Systems, R&D (Kelp) [Kelp Exclusive]]","[long covid, symptoms, study, infected, coronavirus]","[1 in 8 patients develop long Covid symptoms: Lancet, One in eight COVID patients likely to develop long COVID: Large study, 1 In 8 COVID Patients Likely to Develop Long COVID]"
4,032b-dfdae906-033acdcb-95fbad33-c4a0efe2,2023-03-23,40,82,"[COVID 19, Long COVID]","[Risk Management, Health Technology, Medical Research]","[long covid, risk]","[“Shield” in Long Covid o vaccination - The results of new research, Vaccination Halves Risk of Long COVID, Largest Study to Date Shows, Long Covid Risk Factors Are Tied to Gender, Age and Weight]"
5,02f1-9b9fb90e-133b442d-95caae33-aab16fe1,2023-01-24,39,315,"[Long COVID, COVID 19]","[Labour Market, Employee Remuneration, Health Technology]","[long covid, new york state insurance fund, analysis of workers compensation claims, work, workers compensation]","[Long COVID Sidelined Many American Workers, Long COVID Sidelined Many American Workers, Long COVID Sidelined Many American Workers]"
6,02e4-0b8a93ee-832bcddf-91aa0bf7-e0d9bbc5,2023-01-11,37,349,"[COVID 19, Long COVID]","[R&D (Kelp) [Kelp Exclusive], Health Systems, Health Technology]","[long covid, mild, symptoms, infection, study]","[Long Covid could disappear after a year in case of mild illness, according to a study, Most long covid effects resolve within a year after a mild infection, Most long COVID cases clear up within a year following mild infections, study reveals]"
7,02e8-0d9bb3ee-972be9df-99aa3067-b4f9f7c4,2023-01-15,36,100,"[COVID 19, Long COVID]","[Health Technology, Health Systems, Healthcare]","[long covid, symptoms, mild, study, infection, year, resolve, term]","[Most long-term COVID symptoms disappear in a year for mild cases: study, Long COVID: Most symptoms resolve within a year after mild infection, study finds - National | Globalnews.ca, Long COVID: Most symptoms resolve within a year after mild infection, study finds]"
8,01cb-154b9f06-0f2be9cd-9f4fbf8f-b7b07bf8,2022-04-05,34,734,"[COVID 19, Long COVID, Joe Biden]","[Healthcare, Health Technology, Regulation]","[long covid, biden administration, health and human services, biden, treat]","[Biden administration taking new steps to prevent, detect and treat long Covid, Biden administration launches national research plan to understand and treat long Covid, Biden announces additional steps to treat, research long covid]"
9,0307-5fdbbba6-972b0d9e-9de8be37-ae80fbe1,2023-02-15,33,114,"[COVID 19, Long COVID]","[Health Systems, Healthcare, Health Technology]","[organ damage, long covid patients, long covid, patients, study, symptoms]","[Organ damage persists in nearly 60% of COVID-19 patients for a year after initial diagnosis: study, Organ damage persists in nearly 60% of long-term COVID-19 patients one year after initial diagnosis: study, Organ damage persists in 59% of long Covid patients a year after diagnosis | Health]"


## Search event by hash

In [15]:
# You can retrieve an event by hash to get additional metadata about the event such as source ids 
# and additional story ids

event_hash = events_df.iloc[0]['hash']
event = signal_api.get_event(event_hash)

# show the first 10 sources that mentioned this event

[signal_api.get_source(source_id)['source']['name'] for source_id in tqdm(event['source-ids'][:10])]



  0%|                                                                                                                                                                                                          | 0/10 [00:00<?, ?it/s][A[A

 10%|███████████████████▍                                                                                                                                                                              | 1/10 [00:00<00:01,  6.15it/s][A[A

 20%|██████████████████████████████████████▊                                                                                                                                                           | 2/10 [00:00<00:01,  6.33it/s][A[A

 30%|██████████████████████████████████████████████████████████▏                                                                                                                                       | 3/10 [00:00<00:01,  6.36it/s][A[A

 40%|█████████████████████████████████████████

['Advertiser (Australia)',
 'Yahoo News UK',
 'Headlines Today',
 'Isle of Wight Radio',
 'Head Topics',
 'Yahoo! Noticias',
 'T13.cl',
 'Radio Essex',
 'Knowledia News (IL)',
 'The Windsor Star']