In [None]:
from google.colab import auth
import gspread
from google.auth import default
import pandas
import random
from numpy import int64

auth.authenticate_user()
creds, _ = default()
gc = gspread.authorize(creds)
owner_emails = ['nickreid@gmail.com', 'pushkala.jayaraman@icahn.mssm.edu', 'gpratishtha17@gmail.com', 'teya.dragovic@gmail.com']

def get_or_create_spreadsheet(worksheet_name):
  try:
    worksheet = gc.open(worksheet_name)
  except gspread.exceptions.SpreadsheetNotFound:
    worksheet = gc.create(worksheet_name)
    for _email in owner_emails:
      worksheet.share(_email, perm_type='user', role='writer')
  return worksheet

def write_A1_query(df, offset=1):
  cols = [
    'ABCDEFGHIJKLMNOPQURSTUVWXYZ'[index]
    for index, col in enumerate(df.columns)
  ]
  return 'A{}:{}{}'.format(
    offset,
    cols[-1],
    len(df)+offset
  )

def write_worksheet(df, spreadsheet, worksheet_name):
  df = remove_int64(df)
  worksheet = spreadsheet.worksheet(worksheet_name)
  worksheet.update(
      values = [
        df.columns.values.tolist()
      ] + [
        [
          value if not pandas.isnull(value) else ''
          for value in row
        ]
        for row in df.values.tolist()
      ],
      range_name = write_A1_query(df)
  )

def remove_int64(df):
  return pandas.DataFrame(
    [
        [
            value if type(value) is not int64 else int(value)
            for value in row
        ]
        for row in df.values
    ],
    index = df.index,
    columns = df.columns
  )

In [None]:
yir_spreadsheet = get_or_create_spreadsheet('2026 TBI YIR Spreadsheet')
articles = pandas.DataFrame(yir_spreadsheet.worksheet('Articles').get_all_records())
reviewers = pandas.DataFrame(yir_spreadsheet.worksheet('Reviewers').get_all_records())

In [None]:
# Update review assignments from each reviewer spreadsheet
yir_spreadsheet = gc.open('2026 TBI YIR Spreadsheet')
review_assignments = pandas.DataFrame(
    yir_spreadsheet.worksheet('Review Assignments').get_all_records()
)

articles_to_add = []

reviewer_initials = reviewers[reviewers['Articles to review']>0]['Initials'].tolist()
for _initials in reviewer_initials:
  reviewer_spreadsheet_name = '2026 AMIA TBI YIR - {}'.format(_initials)
  try:
    reviewer_spreadsheet = gc.open(reviewer_spreadsheet_name)
  except:
    print('Could not open spreadsheet for',_initials)
    continue
  try:
    reviewer_articles_to_screen = pandas.DataFrame(
      reviewer_spreadsheet.worksheet('Articles to review').get_all_records()
    )
  except:
    print('No articles to review worksheet for', _initials)
    continue
  for _, _article in reviewer_articles_to_screen.iterrows():
    if _article['PMID'] == "":
      print('empty PMID for ', _initials)
      continue
    recorded_article = review_assignments.query('PMID=={} and Initials=="{}"'.format(_article['PMID'], _initials))
    if len(recorded_article) == 0:
      articles_to_add.append({
          'PMID': _article['PMID'],
          'Initials': _initials,
          'First Author': _article['First Author'],
          'Title': _article['Title'],
          'Journal': _article['Journal'],
          'Informatics Novelty': _article['Informatics Novelty'],
          'Application Importance': _article['Application Importance'],
          'Presentability': _article['Presentability'],
          'Comments': _article['Comments']
      })
    elif len(recorded_article) == 1:
      _existing_index = recorded_article.index[0]
      review_assignments.loc[_existing_index, 'Informatics Novelty'] = _article['Informatics Novelty']
      review_assignments.loc[_existing_index, 'Application Importance'] = _article['Application Importance']
      review_assignments.loc[_existing_index, 'Presentability'] = _article['Presentability']
      review_assignments.loc[_existing_index, 'Comments'] = _article['Comments']
    else:
      print('WTF Multiple Records',_initials, _article['PMID'])
updated_assignments = pandas.concat(
    [
    review_assignments,
    pandas.DataFrame(articles_to_add)
    ]
)
updated_assignments.sort_values(['Initials', 'PMID'])
write_worksheet(updated_assignments, yir_spreadsheet, 'Review Assignments')

In [None]:
# Update articles reviewed from review assignments
yir_spreadsheet = gc.open('2026 TBI YIR Spreadsheet')
review_assignments = pandas.DataFrame(
    yir_spreadsheet.worksheet('Review Assignments').get_all_records()
)
reviewers = pandas.DataFrame(yir_spreadsheet.worksheet('Reviewers').get_all_records())

for _index, reviewer in reviewers.iterrows():
  reviewed_count = 0
  for _, review in review_assignments[review_assignments['Initials']==reviewer['Initials']].iterrows():
    if all([review[_key]!="" for _key in ['Informatics Novelty', 'Application Importance','Presentability']]):
      reviewed_count += 1
  reviewers.loc[_index, "Articles reviewed"] = reviewed_count

write_worksheet(reviewers, yir_spreadsheet, 'Reviewers')

In [None]:
# Update reviewed articles from screened articles
screened_articles = pandas.DataFrame(
  yir_spreadsheet.worksheet('Screened Articles').get_all_records()
)
reviewed_articles = pandas.DataFrame(
    yir_spreadsheet.worksheet('Reviewed Articles').get_all_records()
)

screened_articles_to_add = screened_articles[
    (screened_articles['Included'] == 'Yes')
    & ~(screened_articles['PMID'].isin(reviewed_articles['PMID']))
]

updated_reviewed_articles = pandas.concat(
    [
      reviewed_articles,
      screened_articles_to_add[[
          'PMID', 'First Author', 'Title', 'Journal'
      ]]
    ]
)
updated_reviewed_articles['PubMed Link'] = [
  articles[articles['PMID']==pmid]['PubMed Link'].iloc[0] if pmid in articles['PMID'].tolist() else ""
  for pmid in updated_reviewed_articles['PMID']
]
write_worksheet(updated_reviewed_articles, yir_spreadsheet, 'Reviewed Articles')

In [None]:
# Add scores to reviewed articles
review_assignments = pandas.DataFrame(
    yir_spreadsheet.worksheet('Review Assignments').get_all_records()
)
reviewed_articles = pandas.DataFrame(
    yir_spreadsheet.worksheet('Reviewed Articles').get_all_records()
)

for index, article in reviewed_articles.iterrows():
  complete_reviews = []
  for _, _review in review_assignments[review_assignments['PMID']==article['PMID']].iterrows():
    if "" not in [_review[_key] for _key in ['Informatics Novelty', 'Application Importance','Presentability']]:
      complete_reviews.append({
          'Initials': _review['Initials'],
          'Informatics Novelty': _review['Informatics Novelty'],
          'Application Importance': _review['Application Importance'],
          'Presentability': _review['Presentability'],
          'Comments':_review['Comments']
      })
  if complete_reviews:
    reviewed_articles.loc[index, 'Reviewed By'] = ", ".join(
        [r['Initials'] for r in complete_reviews]
    )
    review_comments = []
    for r in complete_reviews:
      if r['Comments'] != "":
        review_comments.append("({}) {}".format(
            r['Initials'],
            r['Comments']
        ))
    reviewed_articles.loc[index, 'Comments'] = "\n".join(review_comments)
    for _key in ['Informatics Novelty', 'Application Importance','Presentability']:
      reviewed_articles.loc[index, _key] = pandas.Series(
          [r[_key] for r in complete_reviews if r[_key] != ""]
      ).mean()

write_worksheet(reviewed_articles, yir_spreadsheet, 'Reviewed Articles')

In [None]:
# Assign articles to review
reviewed_articles = pandas.DataFrame(
    yir_spreadsheet.worksheet('Reviewed Articles').get_all_records()
)
reviewer_assignments = pandas.DataFrame(
    yir_spreadsheet.worksheet('Review Assignments').get_all_records()
)
reviewers = pandas.DataFrame(yir_spreadsheet.worksheet('Reviewers').get_all_records())
# reviewers = reviewers[reviewers['Initials'].isin(['PJ'])]


for _, reviewer in reviewers.iterrows():
  articles_assigned_to_review = reviewer_assignments[
      reviewer_assignments['Initials'] == reviewer['Initials']
  ]
  articles_to_assign = reviewer['Articles to review'] - len(articles_assigned_to_review)
  if articles_to_assign > 0:
    print('Assign {} to {}'.format(articles_to_assign, reviewer['Initials']))
    possible_articles_to_assign = []
    for article_id in reviewed_articles['PMID']:
      if article_id in articles_assigned_to_review['PMID'].tolist():
        continue
      if sum(reviewer_assignments['PMID']==article_id) >= 1:
        continue
      possible_articles_to_assign.append(article_id)
    if len(possible_articles_to_assign) == 0:
      print('No articles to assign to {}'.format(reviewer['Initials']))
    if len(possible_articles_to_assign) < articles_to_assign:
      articles_to_assign = len(possible_articles_to_assign)
    for article_id in random.sample(possible_articles_to_assign, articles_to_assign):
      article = reviewed_articles.query('PMID=={}'.format(article_id)).iloc[0]
      reviewer_assignments = pandas.concat([
          reviewer_assignments,
          pandas.DataFrame([{
              'PMID': article_id,
              'Title': article['Title'],
              'First Author': article['First Author'],
              'Journal': article['Journal'],
              'PubMed Link': article['PubMed Link'],
              'Initials': reviewer['Initials']
          }])
      ])

reviewer_assignments.sort_values(['Initials', 'PMID'], inplace=True)
write_worksheet(reviewer_assignments, yir_spreadsheet, 'Review Assignments')

# Write screening assignments to individual spreadsheets if missing
for _initials in reviewer_assignments['Initials'].unique():
  reviewer_spreadsheet_name = '2026 AMIA TBI YIR - {}'.format(_initials)
  try:
    reviewer_spreadsheet = gc.open(reviewer_spreadsheet_name)
  except:
    print(_initials, 'could not open spreadsheet')
    continue
  try:
    reviewer_worksheet = reviewer_spreadsheet.worksheet('Articles to review')
  except:
    reviewer_spreadsheet.add_worksheet("Articles to review", 10, 10)
  try:
    existing_review_articles = pandas.DataFrame(
        reviewer_spreadsheet.worksheet('Articles to review').get_all_records()
    )
  except:
    print(_initials, 'no existing review articles')
    existing_review_articles = pandas.DataFrame()
  articles_to_add = []
  for _, _article in reviewer_assignments.query('Initials=="{}"'.format(_initials)).iterrows():
    if 'PMID' not in existing_review_articles or _article['PMID'] not in existing_review_articles['PMID'].tolist():
      articles_to_add.append(_article)
  if articles_to_add:
    reviewer_screening_articles = pandas.concat(
        [
            existing_review_articles,
            pandas.DataFrame(articles_to_add)[[col for col in reviewer_assignments.columns if col != 'Initials']]
        ]
    )
    write_worksheet(reviewer_screening_articles, reviewer_spreadsheet, 'Articles to review')