# Events Example

In [1]:
import signal_api.signalAPI as signalAPI
import pandas as pd
from itertools import groupby, islice
import os
from tqdm import tqdm
from textwrap import wrap

pd.set_option('display.max_colwidth', 0)
pd.options.display.html.use_mathjax = False

## Connect to the API

In [2]:
client_id = os.environ['SIGNAL_API_ID']
client_secret = os.environ['SIGNAL_API_SECRET']
signal_api = signalAPI.Connection(client_id=client_id, client_secret=client_secret)

## Search For Entity And Topic IDs 

In [3]:
list(signal_api.entities({'name': 'Long COVID'}))

[{'id': '1021d2f6-e57a-308c-b232-d6c6f2c2588e',
  'type': 'disease',
  'name': 'Long COVID'}]

In [4]:
list(signal_api.topics({'name': 'Healthcare'}))

[{'id': 'ec838d2b-49db-457a-b42f-2889fb86b2eb',
  'name': 'Healthcare',
  'description': 'The topic "healthcare" includes all coverage related to the healthcare industry. Keywords include Healthcare, treatment of disease, preventive healthcare, healthcare issues, health insurance, health benefits & coverage, health reforms, publicly funded national healthcare system,  healthcare crisis, mental health care, healthcare quality improvement, healthcare cost containment, healthcare right or privilege, healthcare patient safety, healthcare Infection control practices, pervasive healthcare, health care challenges, healthcare cost, healthcare-associated infections.',
  'private': False},
 {'id': '0d6c8ffc-d2fd-47bc-b304-fff55e76cd19',
  'name': 'Healthcare Crisis',
  'private': False},
 {'id': 'c3f7b9d6-9420-46fb-abb2-490382212189',
  'name': 'Transformation of Healthcare',
  'description': 'Transformation of healthcare is the topic that covers content related to changing healthcare due to adv

## Search events with the following filters (Entities, Topics, Labels and Sources)

In [5]:
entities = {
    'Long Covid': '1021d2f6-e57a-308c-b232-d6c6f2c2588e',
}

# uncomment line below to use all signal entities
# entities = {}

topics = {
    'Healthcare': 'ec838d2b-49db-457a-b42f-2889fb86b2eb',
    'Healthcare Crisis': '0d6c8ffc-d2fd-47bc-b304-fff55e76cd19',
    'Transformation of Healthcare': 'c3f7b9d6-9420-46fb-abb2-490382212189'
}

# uncomment line below to use all signal topics
# topics = {} 

# uncomment to search events for specific labels
# A label can contain a maximum of 6 words
labels = {
    #'analysis of workers compensation claims'
}

# uncomment to search for events mentioned by specific sources
sources = {
#     'Yahoo News UK': '008a1423-3f16-4de1-8b7e-b0ee2b793655',
#     'Washington Post': '38663047-4bae-42e8-9474-bd6201f1f492'
}

## Use the last year as a date range

In [6]:
end_date = pd.to_datetime('today').to_period('D') - 1
start_date = end_date - 365
start_date, end_date

(Period('2022-04-05', 'D'), Period('2023-04-05', 'D'))

### Query the Events API

In [7]:
query = {
      "where": {
          "date": {
              "gte": str(start_date),
              "lte": str(end_date)
      },
    },
    "size": 1000
}

if entities:
  query['where']["entities"] = {
    "id": {
        # include events about any of the entities
        "any": list(entities.values())

        # note you can seach for events about all of the entities too
        # "all": list(entities.values())
    }
  }

if topics: 
  query['where']["topics"] = {
    "id": {
        # include events about any of the topics
        "any": list(topics.values())

        # note you can seach for events about all of the topics too
        # "all": list(topics.values())
    }
  }

if labels:
  query['where']["labels"] = {
    # include events about any of the labels
    "any": list(labels)

    # note you can seach for events about all of the labels too
    # "all": list(labels.values())
    
  }

if sources: 
  query['where']["sources"] = {
    "id": {
        # include events about any of the sources
        "any": list(sources.values())

        # note you can seach for events about all of the sources too
        # "all": list(sources.values())
    }
  }


response = signal_api.events(query)
# remove results that come from a small number of sources
response = (event for event in response if event['source-count-global'] >= (event['story-count-global'] / 5))
# take the first 10 events
events = list(response)[:10]

## Define function for searching documents by story id

In [8]:
def search_by_story_id(story_ids: list, entity_ids: list, topic_ids: list, source_ids: list = None,  n=3):
    """
    return one document for each story-id in "story_ids" 
    return the results in the same order as story_ids
    """
    # record the ordering of the story_ids
    rank = {story_id: rnk for rnk, story_id in enumerate(story_ids)}

    # query the seach endpoint
    params = {
        'where': {
            'story-id': {
                'any': list(story_ids)
            },
        },
        'size': 500
    }

    if entity_ids:
        params['where']['entities'] = {
            'id': {'any': entity_ids},
            'salient-only': True
        }
        
    if topic_ids:
        params['where']['topics'] = {
            'id': {'any': topic_ids}
        }
    
    if source_ids:
        params['where']['source'] = {
            'id': {'any': source_ids}
        }

    response = (item['document'] for item in signal_api.search(params))

    # put the results in the correct order
    ordered = sorted(response, key=lambda document: rank[document['story-id']])

    # deduplicate the results
    grouped = islice(groupby(ordered, lambda x: x['story-id']), n)
    return [next(group) for _, group in grouped]

### Get the documents from the Search API

In [9]:
for event in tqdm(events):
    arg = {
        'story_ids': event['story-ids'],
        'entity_ids': list(entities.values()), 
        'topic_ids': list(topics.values())
    }

    if sources:
        arg['source_ids'] = list(sources.values())
    event['documents'] = search_by_story_id(**arg)
    

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:47<00:00,  4.72s/it]


### Display the Results

In [10]:
events_df = pd.DataFrame([
    {
        'hash': event['hash'],
        'date': event['date'],
        'story-count-global': event['story-count-global'],
        'source-count-global': event['source-count-global'],
        'entities': [entity['name'] for entity in event['entities']],
        'topics': [topic['name'] for topic in event['topics']][:3], # just use the first 3 topics 
        'labels': event['labels'],
        'headlines': [doc['title'] for doc in event['documents']],
    }
    for event in events
])
events_df

Unnamed: 0,hash,date,story-count-global,source-count-global,entities,topics,labels,headlines
0,02e5-8f8b9366-932bc9df-b1aa3367-f6f9b7c7,2023-01-12,192,479,"[COVID 19, Long COVID]","[Health Technology, Health Systems, Healthcare]","[mild, long covid, symptoms, long covid symptoms, infection, year, study]","[Study found that symptoms of long COVID disappear within a year, in mild cases, Long COVID symptoms go away within a year, in mild cases, Majority of long covid symptoms in mild cases may disappear after one year | coronavirus]"
1,023b-1b1bd948-b7b7c757-99ef6a79-e4c1f6a3,2022-07-26,189,420,"[COVID 19, Long COVID]","[Health Technology, Health Systems, Medical Research]","[long covid, symptoms, hair loss, dysfunction]","[News Hair Loss and Low Sex Drive Added to List of Long Covid Symptoms Research from the University of Birmingham has revealed that symptoms of long COVID are broader than expected and include hair loss and sexual dysfunction., Hair loss and sexual dysfunction are new long COVID symptoms, Hair loss, low sex drive among long Covid symptoms: Study]"
2,02c8-53c99b2e-8f21c57f-9d403e16-fea17fc3,2022-12-14,173,815,"[COVID 19, Long COVID, Centers for Disease Control and Prevention]","[Healthcare Crisis, Health Systems, Health Technology]","[long covid, deaths, cdc, 500, centers for disease control and prevention]","[CDC: Long COVID behind more than 3,500 deaths, CDC: Long COVID behind more than 3,500 deaths, CDC: Long COVID behind more than 3,500 deaths]"
3,023a-1b1bd148-97b36756-99ef6e65-e6e1f7cb,2022-07-25,103,604,"[COVID 19, Long COVID]","[Health Technology, Health Systems, Healthcare]","[hair loss, symptoms, long covid symptoms, long covid, study]","[Hair and libido loss join fatigue and brain fog among wider list of Long COVID symptoms, Hair loss and decreased sex drive on broader list of long Covid symptoms - study, Hair and libido loss join fatigue and brain fog among wider list of Long Covid symptoms]"
4,026c-9bcaf816-133dc9ee-9856e927-b5f0f7d6,2022-09-13,98,603,"[COVID 19, Long COVID, Europe]","[Health Technology, Health Systems, Healthcare]","[long covid, 17 million people in europe, symptoms, years of the pandemic, 17 million, world health organization]","[17 million people in Europe got long Covid in the first two years of the pandemic, WHO: 17 million Europeans have long covid symptoms, WHO says 17 million people in Europe have long Covid symptoms]"
5,0323-5fcffbaa-1733c957-916abbbb-ada17fc7,2023-03-15,67,348,"[Long COVID, COVID 19]","[Health Technology, Healthcare, Health Systems]",[long covid],"[Legislation would improve research and boost access to resources and education regarding the condition, Wednesday March 15, 2023 Markey, Kaine, Duckworth Reintroduce Bill to Help Millions of Americans Living With Long COVID, Legislation would improve research and boost access to resources and education regarding the condition]"
6,022e-1203195a-8237d8f7-106faea1-be80baae,2022-07-13,57,92,"[COVID 19, Long COVID]","[Health Technology, Drug Trials , Healthcare]","[long covid, blood washing, unproven, patients, private clinics, abroad, germany and switzerland, blood, treatment]","[Long covid patients are seeking experimental “blood washing” treatment abroad, Long Covid patients 'travelling abroad for experimental blood-washing treatment', Long Covid sufferers resorting to buying 'costly unproven blood treatment abroad']"
7,022d-5307384a-9337c8bf-9d69ae25-ae90ffaf,2022-07-12,45,107,"[Long COVID, COVID 19]","[Health Technology, Drug Trials , Healthcare]","[long covid, blood, patients, unproven, treatments, washing, long covid symptoms]","[Long Covid Patients Are Seeking Experimental ""Blood Washing"" Treatment Abroad, Investigation Finds, Long COVID patients are seeking experimental 'blood washing' treatment abroad, investigation finds, Long-term COVID patients seek “blood lavage” treatment abroad.]"
8,0267-575bb92a-b72f4ddf-9d6abe27-eca42bc1,2022-09-08,43,347,"[Long COVID, COVID 19]","[Mental Health, Health Technology, Medical Research]","[long covid, stress, psychological distress, depression anxiety, harvard chan school, infection, study, covid, risk, loneliness, researchers]","[Psychological distress increases long COVID-19 risk: Harvard study, Stress Before COVID Infection Could Raise Odds for Long COVID, Stress Before COVID Infection Could Raise Odds for Long COVID]"
9,0275-5b42d302-862be1b7-9dad330f-cec8efd3,2022-09-22,42,262,"[COVID 19, Long COVID]","[Health Technology, Immunology, Medical Research]","[long covid, canadian study, signs of autoimmune, long covid patients, autoimmune diseases, autoimmune disease, lupus, antibodies]","[Canadian study builds on link between long COVID and autoimmune diseases, Canadian study builds on link between long COVID and autoimmune diseases like lupus, Canadian study builds on link between long COVID and autoimmune diseases like lupus]"


## Search event by hash

In [11]:
# You can retrieve an event by hash to get additional metadata about the event such as source ids 
# and additional story ids

event_hash = events_df.iloc[0]['hash']
event = signal_api.get_event(event_hash)

# show the first 10 sources that mentioned this event

[signal_api.get_source(source_id)['source']['name'] for source_id in tqdm(event['source-ids'][:10])]

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:02<00:00,  4.66it/s]


['Advertiser (Australia)',
 'Yahoo News UK',
 'Headlines Today',
 'Isle of Wight Radio',
 'Head Topics',
 'Yahoo! Noticias',
 'T13.cl',
 'Radio Essex',
 'Knowledia News (IL)',
 'The Windsor Star']