In [1]:
# import statements
import utils
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sentence_transformers import SentenceTransformer
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

In [2]:
# Loading data
roberta = SentenceTransformer('stsb-roberta-base')
approval = np.asarray([
    ['Pfizer Applies for Emergency F.D.A. Approval for Covid-19 Vaccine',
     'The drug maker Pfizer said on Friday that it had submitted an application to the Food and Drug Administration to authorize its coronavirus vaccine for emergency use, setting in motion an accelerated regulatory process that could allow the first Americans to get a vaccine by the middle of December.',
    '2020-11-20', 'https://www.nytimes.com/2020/11/20/health/pfizer-covid-vaccine.html'],
    ['Pfizer Vaccine Cleared in U.S., a Landmark in Covid-19 Fight', 
     'Pfizer Inc. gained emergency U.S. authorization for its Covid-19 vaccine on Friday, completing an unprecedented scientific sprint that could eventually help bring an end to a pandemic that has killed nearly 300,000 Americans.',
     '2020-12-11', 'https://www.bloomberg.com/news/articles/2020-12-12/pfizer-covid-vaccine-wins-u-s-fda-emergency-use-authorization'],
    ['FDA Says Pfizer-BioNTech Vaccine Is Safe, Effective',
     'The Food and Drug Administration said the first Covid-19 vaccine being considered for U.S. distribution “met the prescribed success criteria” in a clinical study, paving the way for the agency to green-light distribution as early as this weekend.',
     '2020-12-8', 'https://www.wsj.com/articles/fda-set-to-release-analyses-of-the-pfizer-biontech-covid-19-vaccine-11607423403'],
    ['F.D.A. Clears Pfizer Vaccine, and Millions of Doses Will Be Shipped Right Away', 
     'The Food and Drug Administration authorized Pfizer’s Covid-19 vaccine for emergency use on Friday, clearing the way for millions of highly vulnerable people to begin receiving the vaccine within days.',
     '2020-12-11', 'https://www.nytimes.com/2020/12/11/health/pfizer-vaccine-authorized.html'], 
    ['Covid: FDA approves Pfizer vaccine for emergency use in US', 
     'The US Food and Drug Administration hasa authorised the Pfizer-BioNTech coronavirus vaccine for emergency use.',
     '2020-12-12', 'https://www.bbc.com/news/world-us-canada-55265477']
])

phase3 = np.asarray([
    ['Pfizer, BioNTech’s Covid-19 vaccine shows high efficacy in Phase III study',
    'Pfizer and BioNTech have reported that updated topline results from Phase III study of their Covid-19 vaccine, BNT162b2, demonstrated an efficacy of 91.3% against the disease, measured seven days through up to six months after the second dose.',
    '2021-04-05', 'https://www.pharmaceutical-technology.com/news/pfizer-biontech-vaccine-efficacy/'],
    ['Pfizer/BioNTech says its Covid-19 vaccine is 100% effective and well tolerated in adolescents',
     'Clinical trial results of Pfizer/BioNTech\'s Covid-19 vaccine showed its efficacy is 100% and it is well tolerated in youths ages 12 to 15, the companies said Wednesday.',
     '2021-03-31', 'https://www.cnn.com/2021/03/31/health/pfizer-vaccine-adolescent-trial-results/index.html'], 
    [' Pfizer-BioNTech Announce Positive Topline Results of Pivotal COVID-19 Vaccine Study in Adolescents',
    'In participants aged 12-15 years old, BNT162b2 demonstrated 100% efficacy and robust antibody responses, exceeding those reported in trial of vaccinated 16-25 year old participants in an earlier analysis, and was well tolerated',
    '2021-03-31', 'https://www.businesswire.com/news/home/20210331005503/en/Pfizer-BioNTech-Announce-Positive-Topline-Results-of-Pivotal-COVID-19-Vaccine-Study-in-Adolescents'],
    ['Pfizer/BioNTech vaccine effective after 6 months; Moderna starts variant trial',
     'Pfizer (PFE) and BioNTech\'s (BNTX) COVID-19 vaccine is proving effective six months after the second dose, according to an ongoing analysis of the Phase 3 trial participants.',
     '2020-04-01', 'https://finance.yahoo.com/news/pfizer-bio-n-tech-vaccine-effective-after-6-months-moderna-starts-variant-trial-110722228.html'], 
    ['Pfizer and BioNTech say vaccine prevents Covid-19 in adolescents',
     'Pfizer and BioNTech said Wednesday that their Covid-19 vaccine prevented symptomatic disease and was well-tolerated in a Phase 3 study of adolescents ages 12 to 15.',
     '2020-03-31','https://www.statnews.com/2021/03/31/pfizer-covid19-vaccine-adolescents-data/']
])

phase1 = np.asarray([
    ['U.S. Government Engages Pfizer to Produce Millions of Doses of COVID-19 Vaccine',
     'The U.S. Department of Health and Human Services and the Department of Defense (DoD) today announced an agreement with U.S.-based Pfizer Inc. for large-scale production and nationwide delivery of 100 million doses of a COVID-19 vaccine in the United States following the vaccine’s successful manufacture and approval. The agreement also allows the U.S. government to acquire an additional 500 million doses.',
     '2020-06-22', 'https://www.hhs.gov/about/news/2020/07/22/us-government-engages-pfizer-produce-millions-doses-covid-19-vaccine.html'],
    ['Pfizer and BioNTech Announce Early Positive Update from German Phase 1/2 COVID-19 Vaccine Study, Including First T Cell Response Data',
     'MAINZ, Germany and NEW YORK, July 20, 2020 (GLOBE NEWSWIRE) -- BioNTech SE (Nasdaq: BNTX, “BioNTech” or “the Company”) and Pfizer Inc. (NYSE: PFE) today announced initial data from their ongoing German Phase 1/2, open-label, non-randomized, non-placebo-controlled, dose-escalation trial, that is part of the global mRNA-based vaccine program against SARS-CoV-2.',
     '2020-06-20', 'https://www.globenewswire.com/news-release/2020/07/20/2064351/0/en/Pfizer-and-BioNTech-Announce-Early-Positive-Update-from-German-Phase-1-2-COVID-19-Vaccine-Study-Including-First-T-Cell-Response-Data.html'],
    ['Pfizer Gets $1.95 Billion to Produce Coronavirus Vaccine by Year’s End',
     'WASHINGTON — As nations around the world race to lock up coronavirus vaccines evenbefore they are ready, the Trump administration on Wednesday made one of the largestinvestments yet, announcing a nearly $2 billion contract with Pfizer and a Germanbiotechnology company for 100 million doses by December.',
     '2020-06-22', 'https://www.nytimes.com/2020/07/22/us/politics/pfizer-coronavirus-vaccine.html'],
    ['Covid-19 vaccine from Pfizer and BioNTech shows positive results',
     'An experimental Covid-19 vaccine being developed by the drug giant Pfizer and the biotech firm BioNTech spurred immune responses in healthy patients, but also caused fever and other side effects, especially at higher doses.',
     '2020-06-01', 'https://www.statnews.com/2020/07/01/covid-19-vaccine-from-pfizer-and-biontech-shows-positive-results/'],
    ['Pfizer and BioNTech begin large-scale trial of coronavirus vaccine in the United States',
     'Drug giant Pfizer and its partner BioNTech have begun an advanced trial of one of their experimental coronavirus vaccines in volunteers in the United States.',
     '2020-06-28', 'https://www.cnn.com/2020/07/28/health/pfizer-coronavirus-vaccine-trial-begins-biontech/index.html'],
])

In [89]:
data

Unnamed: 0,title,abstract,time_stamp,url
0,U.S. Government Engages Pfizer to Produce Mill...,The U.S. Department of Health and Human Servic...,2020-06-22,https://www.hhs.gov/about/news/2020/07/22/us-g...
1,Pfizer and BioNTech Announce Early Positive Up...,"MAINZ, Germany and NEW YORK, July 20, 2020 (GL...",2020-06-20,https://www.globenewswire.com/news-release/202...
2,Pfizer Gets $1.95 Billion to Produce Coronavir...,WASHINGTON — As nations around the world race ...,2020-06-22,https://www.nytimes.com/2020/07/22/us/politics...
3,Covid-19 vaccine from Pfizer and BioNTech show...,An experimental Covid-19 vaccine being develop...,2020-06-01,https://www.statnews.com/2020/07/01/covid-19-v...
4,Pfizer and BioNTech begin large-scale trial of...,Drug giant Pfizer and its partner BioNTech hav...,2020-06-28,https://www.cnn.com/2020/07/28/health/pfizer-c...
5,Pfizer Applies for Emergency F.D.A. Approval f...,The drug maker Pfizer said on Friday that it h...,2020-11-20,https://www.nytimes.com/2020/11/20/health/pfiz...
6,"Pfizer Vaccine Cleared in U.S., a Landmark in ...",Pfizer Inc. gained emergency U.S. authorizatio...,2020-12-11,https://www.bloomberg.com/news/articles/2020-1...
7,"FDA Says Pfizer-BioNTech Vaccine Is Safe, Effe...",The Food and Drug Administration said the firs...,2020-12-8,https://www.wsj.com/articles/fda-set-to-releas...
8,"F.D.A. Clears Pfizer Vaccine, and Millions of ...",The Food and Drug Administration authorized Pf...,2020-12-11,https://www.nytimes.com/2020/12/11/health/pfiz...
9,Covid: FDA approves Pfizer vaccine for emergen...,The US Food and Drug Administration has author...,2020-12-12,https://www.bbc.com/news/world-us-canada-55265477


In [3]:
# Creating dataframes
df1 = pd.DataFrame(phase1, columns = ['title', 'abstract', 'time_stamp', 'url'], index = [0, 1, 2, 3 , 4])
df2 = pd.DataFrame(approval, columns = ['title', 'abstract', 'time_stamp', 'url'], index = [5,6,7,8,9])
df3 = pd.DataFrame(phase3, columns = ['title', 'abstract', 'time_stamp', 'url'], index=[10, 11, 12, 13, 14])

In [4]:
v1 = utils.encode([x + ': ' + y for x,y in zip(df1['title'], df1['abstract'])], roberta)
v2 = utils.encode([x + ': ' + y for x,y in zip(df2['title'], df2['abstract'])], roberta)
v3 = utils.encode([x + ': ' + y for x,y in zip(df3['title'], df3['abstract'])], roberta)

In [5]:
data = pd.concat([df1, df2, df3])
v = np.concatenate([v1,v2, v3])

In [7]:
# Vectorizing 

In [None]:
# Retrieving related articles 

In [6]:
# Creating the graph
import networkx as nx

G = nx.Graph()

In [71]:
G.clear()
for root, v0 in zip(data['title'], v):
    for comp, v1 in zip(data['title'], v):
        if utils.doc_sim(v0,v1) > .75:
            G.add_edge(root, comp, weight=utils.doc_sim(v0,v1))

In [72]:
%matplotlib auto
nx.draw(G, with_labels=False)

Using matplotlib backend: MacOSX


In [73]:
from networkx.algorithms import community
from networkx import edge_betweenness_centrality as betweenness

def most_central_edge(G):

    centrality = betweenness(G, weight="weight")

    return max(centrality, key=centrality.get)

comp = community.girvan_newman(G, most_valuable_edge=most_central_edge)

communities =tuple(sorted(c) for c in next(comp))

In [74]:
# Extracting full articles from communities
cdf = []
for com in communities:
    df = pd.DataFrame()
    for art in com:
        x = data.loc[data['title'] == art]
        df = df.append(x)
    cdf.append(df)

In [75]:
# Reducing each community down to an article 
min_coms = [] 
for com in cdf:
    min_time = min(com['time_stamp'])
    df = pd.DataFrame()
    df = df.append(com.loc[com['time_stamp'] == min_time])
    min_coms.append(df)

In [78]:
el = nx.DiGraph()
el.clear()

# Generating Lv and Huang graph 
lv = sorted([(x,list(x['time_stamp'])) for x in min_coms], key = lambda x: x[1])
lv = [x[0] for x in lv]
for i in range(1, len(lv)):
    el.add_edge(list(lv[i-1]['title'])[0], (list(lv[i]['title'])[0]))

In [79]:
# Drawing the graph 
%matplotlib auto
nx.draw_kamada_kawai(el, with_labels=True)

Using matplotlib backend: MacOSX


In [85]:
# Generating NewsChain event line 

# Defining similarity between sub-events 
# Similarity = avg_pairwise_eos 
# Rules for drawing edge, if sim > theta, draw directed edge based on min time 
el = nx.DiGraph()
el.clear()
def pairwise_sim(df1, df2, model):
    t1 = [x + ': ' + y for x in df1['title'] for y in df1['abstract']]
    t2 = [x + ': ' + y for x in df2['title'] for y in df2['abstract']]
    v1 = utils.encode(t1, model)
    v2 = utils.encode(t2, model)
    tot = 0
    for x in v1:
        for y in v2:
            tot += utils.doc_sim(x, y) 
            
    return tot / (len(v1)  + len(v2))

# def pairwise_sim(v1, v2):
#     tot = 0
#     for x in v1:
#         for y in v2:
#             tot += utils.doc_sim(x, y) 
#     return tot / (len(v1) *len(v2))
            
for com1, rep1 in zip(cdf, min_coms):
    for com2, rep2 in zip(cdf, min_coms):
        bruh = pairwise_sim(com1, com2, roberta)
        if bruh > 0.2: 
            if list(rep1['time_stamp'])[0] > list(rep2['time_stamp'])[0]:
                el.add_weighted_edges_from([(str(rep1['title']), str(rep2['title']), bruh)])
            else:
                el.add_weighted_edges_from([(str(rep2['title']), str(rep1['title']), bruh)])

In [86]:
nx.draw(el, with_labels = True)

In [96]:
DG = nx.DiGraph()

DG.add_weighted_edges_from([(1, 2, 0.5), (3, 1, 0.75)])

DG.out_degree(1, weight='weight')


DG.degree(1, weight='weight')
list(min_coms[1]['time_stamp'])[0] > list(min_coms[2]['time_stamp'])[0]

False