# NSF Computer and Information Science and Engineering (CISE) 

- `0505****`: Division of Computer and Network Systems (CNS)
- `0501****`: Division of Computing and Communication Foundations (CCF)
- `0502****`: Division of Information and Intelligent Systems (IIS)
- `0509****`: Office of Advanced Cyberinfrastructure (OAC)

In [55]:
import os,sys,json
import core.coworknet, core.nsf_utils
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np
import networkx as nx
import seaborn as sns
import pandas as pd
from datetime import datetime
from collections import Counter

In [56]:
grant_type = ["Standard Grant", "Continuing grant"] # draw only these types
div_type = {
    "0505": "Division of Computer and Network Systems (CNS)",
    "0501": "Division of Computing and Communication Foundations (CCF)",
    "0502": "Division of Information and Intelligent Systems (IIS)",
    "0509": "Office of Advanced Cyberinfrastructure (OAC)"
}

In [57]:
data = {}
years = [2004]
for y in years:
    data.update(core.nsf_utils.load_numpub_data(y))
data = {k:v for k, v in data.items() if v["type"] in grant_type}
for y in years:
    print("[{}]".format(y))
    print("Total number of grants", len([v for v in data.values() if v["year"] == y]))
    for code, desc in div_type.items():
        print(desc, len([v for v in data.values() if v["org"][:4] == code and v["year"] == y]))

[2004]
Total number of grants 10212
Division of Computer and Network Systems (CNS) 413
Division of Computing and Communication Foundations (CCF) 349
Division of Information and Intelligent Systems (IIS) 365
Office of Advanced Cyberinfrastructure (OAC) 53


In [58]:
grant_list = [k for k, d in data.items() if d["org"][:4] in div_type.keys() and d["num_pis"] == 2]
print(grant_list)

['0401049', '0403020', '0403130', '0403590', '0404001', '0404011', '0404193', '0404297', '0404440', '0406305', '0406345', '0406384', '0410203', '0410246', '0410409', '0410524', '0410553', '0410771', '0410790', '0410918', '0410937', '0411127', '0411152', '0411307', '0412029', '0412858', '0412884', '0412894', '0412930', '0412965', '0413012', '0413078', '0413085', '0413138', '0413196', '0413214', '0413227', '0413251', '0413284', '0413321', '0413328', '0413709', '0414380', '0414409', '0414482', '0414557', '0414567', '0414630', '0414710', '0414762', '0414763', '0414791', '0414815', '0414976', '0414981', '0415016', '0415021', '0415101', '0415175', '0415195', '0415257', '0415865', '0415923', '0416364', '0417607', '0418165', '0420337', '0420434', '0420505', '0420866', '0420873', '0420906', '0420985', '0423014', '0423039', '0423070', '0423253', '0423305', '0423546', '0423584', '0424601', '0426021', '0426557', '0426627', '0426674', '0426879', '0426904', '0427260', '0427385', '0427695', '0428427'

In [59]:
grant_list = [k for k, d in data.items() if d["org"][:4] in div_type.keys() and d["num_pis"] == 2]
print(len(grant_list), "grants with 2 PIs")

246 grants with 2 PIs


In [67]:
no_publications = []
no_edges = []
other = []
only_one_pi = []
two_separate = []
two_cowork = []
for pid in grant_list:
    award, table, ts, te, G_all = core.coworknet.get_grant_coworknet_pis(pid)
    if len(table) == 0:
        no_publications.append(pid)
        continue
    pis = [a for a in G_all.nodes() if "pi" in G_all.nodes[a] and G_all.nodes[a] ["pi"]]
    G = G_all.subgraph(pis)
        
    working_pis = []
    reported_pub_number = 0
    for k, v in table.items():
        if v["type"]: # only count publicatoins in-grant
            reported_pub_number += 1
            working_pis.append([a for a, av in v["authors"].items() if av["pi"]])
    # print(pid, working_pis)
    pi_counter = [set([n[0]+n.split(" ")[-1] for n in group]) for group in working_pis]
    pi_union = set.union(*pi_counter)
    pi_max_group = max([len(pg) for pg in pi_counter])
#     print(pi_counter, pi_union, pi_max_group)
    print("{} union={}, max_group={}, numpub={} {} {}".format(pid, len(pi_union), pi_max_group, reported_pub_number, len(table), len(G.edges)))
    if len(pi_union) == 2 and pi_max_group == 2:
        two_cowork.append(pid)
    elif len(pi_union) == 2:
        two_separate.append(pid)
    elif len(pi_union) == 1:
        only_one_pi.append(pid)
    else:
        other.append(pid)
print("{} ({:.2f}%) grants have no publication".format(len(no_publications), 100*len(no_publications)/len(grant_list)))
print("{} ({:.2f}%) grants have only one working PI".format(len(only_one_pi), 100*len(only_one_pi)/len(grant_list)))
print("{} ({:.2f}%) grants have two PIs working separately".format(len(two_separate), 100*len(two_separate)/len(grant_list)))
print("{} ({:.2f}%) grants have two PIs collaborated".format(len(two_cowork), 100*len(two_cowork)/len(grant_list)))
print("{} ({:.2f}%) grants are unknown".format(len(other), 100*len(other)/len(grant_list)))

0404297 union=2, max_group=2, numpub=3 6 5
0410203 union=2, max_group=2, numpub=6 6 1
0410246 union=2, max_group=2, numpub=13 16 4
0410409 union=1, max_group=1, numpub=2 2 0
0410553 union=2, max_group=2, numpub=7 8 3
0410771 union=2, max_group=2, numpub=9 15 9
0410918 union=2, max_group=2, numpub=8 18 12
0410937 union=2, max_group=2, numpub=19 40 27
0411127 union=2, max_group=2, numpub=1 41 41
0411152 union=0, max_group=0, numpub=1 1 0
0411307 union=2, max_group=2, numpub=6 33 29
0412029 union=1, max_group=1, numpub=1 1 0
0412884 union=1, max_group=1, numpub=8 8 0
0412894 union=1, max_group=1, numpub=7 7 0
0412930 union=2, max_group=2, numpub=12 12 1
0413012 union=2, max_group=2, numpub=2 5 4
0413078 union=1, max_group=1, numpub=3 3 0
0413085 union=2, max_group=2, numpub=4 16 15
0413138 union=2, max_group=2, numpub=10 10 1
0413196 union=1, max_group=1, numpub=23 23 0
0413214 union=2, max_group=1, numpub=16 16 0
0413227 union=2, max_group=2, numpub=4 13 12
0413251 union=2, max_group=2, 

In [76]:
collab_type = {}
dict_period = {}
dict_amount = {}
dict_teams = {}
dict_publications = {}
dict_citations = {}
dict_affiliations = {}
dict_timestamps = {}
convert_t = lambda t: datetime.strptime(t, "%Y-%m-%dT%X")

for grant_id in grant_list:
    award,ptable,ts,te,G_all = core.coworknet.get_grant_coworknet_pis(grant_id)
    pis = [a for a in G_all.nodes() if "pi" in G_all.nodes[a] and G_all.nodes[a] ["pi"]]
#     print(pis)
    G = G_all.subgraph(pis)
    if len(G.edges) == 0:
        continue
        
    print(grant_id)
#     print(ptable.items())
    paper_affiliations = dict()
    pi_affiliations = dict()
    for k, p in ptable.items():
        paper_affiliations[k] = {"date": p["date"]}
        for pi in pis:
            paper_affiliations[k][pi] = p["authors"][pi]["affiliation"] if pi in p["authors"] else None

#     print(sorted(paper_affiliations.values(), key=lambda x: x["date"]))
    for pi in pis:
        pi_affiliations[pi] = [p[pi] for p in sorted(paper_affiliations.values(), key=lambda x: x["date"])]
#         print(pi, pi_affiliations[pi])
        
    reduced_pi_affiliations = dict()
    for pi in pis:
        for aff in pi_affiliations[pi]:
            sname = pi.split()[-1]
            if aff == None:
                continue
            if sname not in reduced_pi_affiliations:
                reduced_pi_affiliations[sname] = [aff]
            elif reduced_pi_affiliations[sname][-1] != aff:
                reduced_pi_affiliations[sname].append(aff)
    print(reduced_pi_affiliations)
    
    affiliation_type = None
    if len(reduced_pi_affiliations.values()) != 2:
        print("*** num of PIs != 2", grant_id)
        affiliation_type = "unknown"
    else:
        a_aff, b_aff = reduced_pi_affiliations.values()
        if len(set(a_aff).intersection(set(b_aff))) > 0:
            affiliation_type = "intersection"
            if set(a_aff) == set(b_aff):
                affiliation_type = "same"
        else:
            affiliation_type = "diff"
        print("***", affiliation_type)
    dict_affiliations[grant_id] = affiliation_type
    
    publications = {"pre-grant":0, "grant":0, "post-grant":0}
    citations = {"pre-grant":0, "grant":0, "post-grant":0}
    teams = {"pre-grant":0, "grant":0, "post-grant":0}
    dates = [convert_t(data["date"]) for _,_,data in G.edges.data()]
    for u,v,data in G.edges.data():
#         print(data["paper"])
        num_coauthor = len([e for e in G_all.edges.data() if e[2]["paper"] == data["paper"]])/2
#         print(data["paper"], num_coauthor)
        if data["grant"] != "other" or (ts <= convert_t(data["date"]) and convert_t(data["date"]) <= te):
#             print("grant") # reported publications or unreported pubs in the grant period
            teams["grant"] += num_coauthor
            publications["grant"] += 1
            citations["grant"] += data["citation"]
        elif convert_t(data["date"]) < ts:
#             print("pre-grant") # unreported publications before the grant period
            teams["pre-grant"] += num_coauthor
            publications["pre-grant"] += 1
            citations["pre-grant"] += data["citation"]
        elif te < convert_t(data["date"]):
#             print("post-grant") # unreported publications after the grant period
            teams["post-grant"] += num_coauthor
            publications["post-grant"] += 1
            citations["post-grant"] += data["citation"]
    dict_period[grant_id] = (te-ts).days
    dict_amount[grant_id] = award["awardAmount"]
    dict_teams[grant_id] = teams
    dict_publications[grant_id] = publications
    dict_citations[grant_id] = citations
    dict_timestamps[grant_id] = sorted(dates)
#     print(publications, dict_timestamps[grant_id])

ctypes = []
for gid, v in dict_publications.items():
    collab_type[gid] = "{}{}{}".format(1 if v["pre-grant"]>0 else 0, 1 if v["grant"]>0 else 0, 1 if v["post-grant"]>0 else 0)
    ctypes.append(collab_type[gid])
    
print(len([k for k in dict_timestamps.keys() if collab_type[k] != "000"]), "grants - two PIs collaborated")
print(Counter(ctypes))

0404297
{'Hoye': [130238516], 'Kiehl': [130238516]}
*** same
0410203
{'Freeh': [137902535], 'Mueller': [137902535]}
*** same
0410246
{'Tadmor': [12912129], 'Leeser': [12912129]}
*** same
0410553
{'Freudenberg': [27837315], 'Gillespie': [27837315]}
*** same
0410771
{'Lemmon': [107639228], 'Hu': [107639228, 94509681, 107639228]}
*** intersection
0410918
{'Andrews': [138006243], 'Debray': [138006243]}
*** same
0410937
{'Clark': [20089843, 51556381, 20089843], 'Martonosi': [20089843, 51556381, 20089843]}
*** same
0411127
{'Scott': [5388228, 155173764, 5388228], 'Dwarkadas': [5388228, 155173764, 5388228]}
*** same
0411307
{'Vahdat': [36258959, 95457486, 36258959, 95457486, 36258959, 95457486, 36258959], 'Snoeren': [36258959, 95457486, 36258959, 95457486, 36258959, 95457486, 36258959]}
*** same
0412930
{'Caruana': [205783295], 'Joachims': [205783295]}
*** same
0413012
{'Hicks': [72816309], 'Kurzweg': [72816309]}
*** same
0413085
{'Klatzky': [74973139], 'Hollis': [74973139]}
*** same
0413138


In [77]:
print(Counter([dict_affiliations[gid] for gid, t in collab_type.items() if t == "010"]))

Counter({'same': 9, 'unknown': 4, 'intersection': 4, 'diff': 3})


In [78]:
print(Counter([dict_affiliations[gid] for gid, t in collab_type.items() if t == "011"]))

Counter({'same': 9, 'intersection': 3, 'diff': 1})


In [79]:
print(Counter([dict_affiliations[gid] for gid, t in collab_type.items() if t == "110"]))

Counter({'same': 8, 'intersection': 7, 'unknown': 1})


In [80]:
print(Counter([dict_affiliations[gid] for gid, t in collab_type.items() if t == "111"]))

Counter({'same': 23, 'intersection': 11, 'diff': 2, 'unknown': 1})
