# How are University of Washington (UW) researchers collaborating with people around the globe?



In [1]:
uw_id = "https://ror.org/00cvxb145"

uwfaculty:

```sql

select    p.Paper_ID, a.Normalized_name, a.Display_name, a.Author_ID, paa.Affiliation_ID, a.Last_known_affiliation_ID,

p.year, p.date, p.Journal_ID, j.Display_name

from PaperAuthorAffiliations_2 paa

left join Papers p on paa.Paper_ID = p.Paper_ID

left join Authors a on paa.Author_ID = a.Author_ID

left join Journals j on p.Journal_ID = j.Journal_ID

where (p.Paper_ID is not NULL and (paa.Affiliation_ID = 201448701 or a.Last_known_affiliation_ID = 201448701))

```

non-uwcollaborators:

```sql


select paa.Paper_ID, a.Normalized_name, a.Display_name, paa.Affiliation_ID, a.Last_known_affiliation_ID

from PaperAuthorAffiliations_2 paa

left join Authors a on paa.Author_ID = a.Author_ID

where (paa.Paper_ID in (select tp.Paper_ID from tmp_uwpaperid tp))  and

             (paa.Affiliation_ID not in (0, 201448701)) and a.Last_known_affiliation_ID != 201448701

```

In [2]:
# specify endpoint
endpoint = 'works'

# build the 'filter' parameter
filters = ",".join((
    f'institutions.ror:{uw_id}',
    'from_publication_date:2003-01-01',
    # 'is_paratext:false',
    # 'type:journal-article', 
))

# put the URL together
filtered_works_url = f'https://api.openalex.org/{endpoint}?filter={filters}'
print(f'complete URL with filters:\n{filtered_works_url}')

complete URL with filters:
https://api.openalex.org/works?filter=institutions.ror:https://ror.org/00cvxb145,from_publication_date:2003-01-01


In [3]:
import requests

In [4]:
r = requests.get(filtered_works_url)
r.json()

{'meta': {'count': 246540,
  'db_response_time_ms': 124,
  'page': 1,
  'per_page': 25},
 'results': [{'id': 'https://openalex.org/W2142225512',
   'doi': 'https://doi.org/10.1177/1049732305276687',
   'title': 'Three Approaches to Qualitative Content Analysis',
   'display_name': 'Three Approaches to Qualitative Content Analysis',
   'publication_year': 2005,
   'publication_date': '2005-11-01',
   'ids': {'openalex': 'https://openalex.org/W2142225512',
    'doi': 'https://doi.org/10.1177/1049732305276687',
    'mag': '2142225512',
    'pmid': 'https://pubmed.ncbi.nlm.nih.gov/16204405'},
   'primary_location': {'is_oa': False,
    'landing_page_url': 'https://doi.org/10.1177/1049732305276687',
    'pdf_url': None,
    'source': {'id': 'https://openalex.org/S32648145',
     'display_name': 'Qualitative Health Research',
     'issn_l': '1049-7323',
     'issn': ['1049-7323', '1552-7557'],
     'host_organization': 'https://openalex.org/P4310320017',
     'type': 'journal'},
    'license

In [5]:
cursor = '*'

select = ",".join((
    'id',
    'ids',
    'title',
    'display_name',
    'publication_year',
    'publication_date',
    'primary_location',
    'open_access',
    'authorships',
    'cited_by_count',
    'is_retracted',
    'is_paratext',
    'updated_date',
    'created_date',
))

# loop through pages
works = []
loop_idx = 0
while cursor:
    
    # set cursor value and request page from OpenAlex
    url = f'{filtered_works_url}&select={select}&cursor={cursor}'
    page_with_results = requests.get(url).json()
    
    results = page_with_results['results']
    works.extend(results)

    # update cursor to meta.next_cursor
    cursor = page_with_results['meta']['next_cursor']
    loop_idx += 1
    if loop_idx in [5, 10, 20, 50] or loop_idx % 100 == 0:
        print(f'{loop_idx} api requests made so far')
print(f'done. made {loop_idx} api requests. collected {len(works)} works')

5 api requests made so far
10 api requests made so far
20 api requests made so far
50 api requests made so far
100 api requests made so far
200 api requests made so far
300 api requests made so far
400 api requests made so far
500 api requests made so far
600 api requests made so far
700 api requests made so far
800 api requests made so far
900 api requests made so far
1000 api requests made so far
1100 api requests made so far
1200 api requests made so far
1300 api requests made so far
1400 api requests made so far
1500 api requests made so far
1600 api requests made so far
1700 api requests made so far
1800 api requests made so far
1900 api requests made so far
2000 api requests made so far
2100 api requests made so far
2200 api requests made so far
2300 api requests made so far
2400 api requests made so far
2500 api requests made so far
2600 api requests made so far
2700 api requests made so far
2800 api requests made so far
2900 api requests made so far
3000 api requests made so fa

In [6]:
import pickle
with open('uw_works_since_2003.pickle', 'wb') as outf:
    pickle.dump(works, outf, protocol=pickle.HIGHEST_PROTOCOL)

In [14]:
sum([work['is_paratext'] is True for work in works])

52

In [15]:
works[0]

{'id': 'https://openalex.org/W2142225512',
 'ids': {'openalex': 'https://openalex.org/W2142225512',
  'doi': 'https://doi.org/10.1177/1049732305276687',
  'mag': '2142225512',
  'pmid': 'https://pubmed.ncbi.nlm.nih.gov/16204405'},
 'title': 'Three Approaches to Qualitative Content Analysis',
 'display_name': 'Three Approaches to Qualitative Content Analysis',
 'publication_year': 2005,
 'publication_date': '2005-11-01',
 'primary_location': {'is_oa': False,
  'landing_page_url': 'https://doi.org/10.1177/1049732305276687',
  'pdf_url': None,
  'source': {'id': 'https://openalex.org/S32648145',
   'display_name': 'Qualitative Health Research',
   'issn_l': '1049-7323',
   'issn': ['1049-7323', '1552-7557'],
   'host_organization': 'https://openalex.org/P4310320017',
   'type': 'journal'},
  'license': None,
  'version': None},
 'open_access': {'is_oa': False, 'oa_status': 'closed', 'oa_url': None},
 'authorships': [{'author_position': 'first',
   'author': {'id': 'https://openalex.org/A2

In [20]:
import pandas as pd
data = []
for work in works:
    for authorship in work['authorships']:
        if authorship:
            author = authorship['author']
            author_id = author['id'] if author else None
            author_name = author['display_name'] if author else None
            author_position = authorship['author_position']
            for institution in authorship['institutions']:
                if institution:
                    institution_id = institution['id']
                    institution_name = institution['display_name']
                    institution_country_code = institution['country_code']
                    data.append({
                        'work_id': work['id'],
                        'work_title': work['title'],
                        'work_display_name': work['display_name'],
                        'work_publication_year': work['publication_year'],
                        'work_publication_date': work['publication_date'],
                        'author_id': author_id,
                        'author_name': author_name,
                        'author_position': author_position,
                        'institution_id': institution_id,
                        'institution_name': institution_name,
                        'institution_country_code': institution_country_code,
                    })
df = pd.DataFrame(data)

In [21]:
df

Unnamed: 0,work_id,work_title,work_display_name,work_publication_year,work_publication_date,author_id,author_name,author_position,institution_id,institution_name,institution_country_code
0,https://openalex.org/W2142225512,Three Approaches to Qualitative Content Analysis,Three Approaches to Qualitative Content Analysis,2005,2005-11-01,https://openalex.org/A2642964564,Hsiu Ching Laura Hsieh,first,https://openalex.org/I64045040,Fooyin University,TW
1,https://openalex.org/W2142225512,Three Approaches to Qualitative Content Analysis,Three Approaches to Qualitative Content Analysis,2005,2005-11-01,https://openalex.org/A2111315299,Sarah E. Shannon,last,https://openalex.org/I201448701,University of Washington,US
2,https://openalex.org/W2963037989,"You Only Look Once: Unified, Real-Time Object ...","You Only Look Once: Unified, Real-Time Object ...",2016,2016-06-27,https://openalex.org/A2392241600,Joseph Redmon,first,https://openalex.org/I201448701,University of Washington,US
3,https://openalex.org/W2963037989,"You Only Look Once: Unified, Real-Time Object ...","You Only Look Once: Unified, Real-Time Object ...",2016,2016-06-27,https://openalex.org/A2310010008,Santosh K. Divvala,middle,https://openalex.org/I2945602774,Allen Institute for Artificial Intelligence,US
4,https://openalex.org/W2963037989,"You Only Look Once: Unified, Real-Time Object ...","You Only Look Once: Unified, Real-Time Object ...",2016,2016-06-27,https://openalex.org/A2473549963,Ross Girshick,middle,https://openalex.org/I2252078561,Facebook,IL
...,...,...,...,...,...,...,...,...,...,...,...
1632158,https://openalex.org/W99980466,LASIK: Early Postoperative Complications,LASIK: Early Postoperative Complications,2008,2008-12-01,https://openalex.org/A2180837181,José L. Güell,middle,https://openalex.org/I4210131277,Instituto de Microcirugía Ocular,ES
1632159,https://openalex.org/W99980466,LASIK: Early Postoperative Complications,LASIK: Early Postoperative Complications,2008,2008-12-01,https://openalex.org/A2111176233,Merce Morral,middle,https://openalex.org/I4210131277,Instituto de Microcirugía Ocular,ES
1632160,https://openalex.org/W99980466,LASIK: Early Postoperative Complications,LASIK: Early Postoperative Complications,2008,2008-12-01,https://openalex.org/A2077428892,Oscar Gris,middle,https://openalex.org/I4210131277,Instituto de Microcirugía Ocular,ES
1632161,https://openalex.org/W99980466,LASIK: Early Postoperative Complications,LASIK: Early Postoperative Complications,2008,2008-12-01,https://openalex.org/A2127986247,Javier Gaytan,middle,https://openalex.org/I4210131277,Instituto de Microcirugía Ocular,ES


In [26]:
def international_collab(gdf):
    if all(gdf['institution_country_code'] == 'US'):
        return False
    else:
        return True
df_international_collab = df.groupby('work_id').apply(international_collab)

In [34]:
df_international_collab.value_counts().sort_index()

False    152031
True      94506
dtype: int64

In [28]:
def outside_uw_collab(gdf):
    if all(gdf['institution_id'] == 'https://openalex.org/I201448701'):
        return False
    else:
        return True
df_outside_uw_collab = df.groupby('work_id').apply(outside_uw_collab)

In [35]:
df_outside_uw_collab.value_counts().sort_index()

False     96886
True     149651
dtype: int64