In [None]:
# Dependencies
import requests
from requests.exceptions import HTTPError
import json
import pandas as pd
import numpy as np
import re
import io
from config import api_key_prism
from collections import OrderedDict
from pandas.io.json import json_normalize  
import time 
import urllib.request
from pprint import pprint
import pickle

### Get Collections Data from DigitalHub

In [None]:
## Upload a .csv DigitalHub Collections that will become Prism Communities

digitalhub_community_path = "data/2022_08-01 DigitalHub Collection Migration Plan_Communities Only.csv"

## Read the CSV file and store into Pandas DataFrame 
digitalhub_community_df = pd.read_csv(digitalhub_community_path , encoding = "ISO-8859-1", na_values=['NULL', '<NA>'])

## encoding = "ISO-8859-1", na_values=['NULL', '<NA>']

#Change the column names to lower case with underscore for spaces
digitalhub_community_df.columns =  digitalhub_community_df.columns.str.strip().str.lower().str.replace(" ", "_").str.replace("(","").str.replace(")","")
digitalhub_community_df.head()

## Checked: No problems

In [None]:
## Extract a the Series for "dh_id" and transform into a list

digitalhub_community_series = digitalhub_community_df["dh_id"]
digitalhub_community_list = digitalhub_community_series.tolist()

print(len(digitalhub_community_list))
print(digitalhub_community_list)

## Checked: No problems

In [None]:
## Loop through list of DigitalHub Collection URLS (i.e. Communities) and use the urllib.request to get the json data
## test_list = ['2cc92425-b656-47ea-a3b4-825405ee6088', 'a86e1412-d72c-4cae-b8ca-16fd834cb128','fc389d13-2430-409b-82fd-a4b26613d350']

multi_digitalhub_community_list = []
digitalhub_community_problem_list = []
api_response_list = []

for item in digitalhub_community_list:
    try:

        with urllib.request.urlopen(f"https://digitalhub.northwestern.edu/collections/{item}.json" ) as url:
            single_digitalhub_community_dict = json.loads(url.read().decode())
            multi_digitalhub_community_list.append(single_digitalhub_community_dict)
            print(item)
        time.sleep(1)

        ## Create api response dict
        api_response_dict = {}

        ## Add UUID to api response dict
        api_response_dict['dh_id'] = item

    except urllib.error.HTTPError as http_err:
        print(item)
        digitalhub_community_problem_list.append(item)
        print(f'HTTP error occurred: {http_err}')  # Python 3.6
                
        ## Add err to api response dict
        api_response_dict['Response'] = http_err
        api_response_list.append(api_response_dict)        

    except urllib.error.URLError as url_err:
        print(item)
        digitalhub_community_problem_list.append(item)
        print(f'URL error occurred: {url_err}. ', 'Exiting the loop!')  # Python 3.6
        
        ## Add err to api response dict
        api_response_dict['Response'] = url_err
        api_response_list.append(api_response_dict)              

    except json.JSONDecodeError as json_err:
        print(item)
        digitalhub_community_problem_list.append(item)
        print(f'JSON Decode error occurred: {json_err}. ', 'Poorly formed JSON.')  # Python 3.6
        
        ## Add err to api response dict
        api_response_dict['Response'] = json_err
        api_response_list.append(api_response_dict)  
        
    except Exception as err:
        print(item)
        digitalhub_community_problem_list.append(item)
        print(f'Other error occurred: {err}. ')  # Python 3.6
        
        ## Add err to api response dict
        api_response_dict['Response'] = err
        api_response_list.append(api_response_dict)          
       
    else:
        success_message = "Success"
        print(success_message)

        ## Add success message to api response dict
        api_response_dict['Response'] = success_message
        api_response_list.append(api_response_dict)


## Resources
## https://docs.python.org/3/library/urllib.request.html

## Checked: No problems

In [None]:
## Inspect the results of the API response list of the URL Query for DigitalHub Collections 

api_response_df = pd.DataFrame(api_response_list)
# api_response_df.head(50)

## Checked: No problems

In [None]:
## Export file to excel, with the Pandas index, and with the headers

api_response_df.to_excel("outputs/digitalhub_api_response_df.xlsx", header=True)

## Checked: No problems

In [None]:
## Inspect the results of the URL Query for DigitalHub Collections that will become communities in Prism

# print(multi_digitalhub_community_list)
print(digitalhub_community_problem_list)

## Checked: No problems

In [None]:
## Create a dataframe from DigitalHub json for DigitalHub Collections that will become communities in Prism

digitalhub_community_df = pd.DataFrame.from_dict(json_normalize(multi_digitalhub_community_list, max_level=1))
# digitalhub_community_df.head(10)

## Checked: No problems

In [None]:
## Upload .txt files of the problem Community json metadata

## Problem with: cfda59b0-7d3c-4aa7-9f5e-53665849d624
northwesternelements_path = r"data\community\northwesternelements.txt"

##problem with: ce39f2be-9a64-4717-973b-ff531c2a93ee
communicationbridge_path = r"data\community\communicationbridge.txt"

## Problem with: f2bf6e1d-0e32-4ce2-a52e-bb0522d5708d
nucatsgrantsrepository_path = r"data\community\nucatsgrantsrepository.txt"

## Problem with: 97a2913d-45b8-458c-82eb-5111a94b6c9f
preventionmethodology_path = r"data\community\preventionmethodology.txt"

problem_dict_list = []

path_list = [northwesternelements_path, communicationbridge_path,nucatsgrantsrepository_path, preventionmethodology_path]
for path in path_list: 
#     print(path)
    with open(path) as f:
        problem_dict = json.load(f)
        problem_dict_list.append(problem_dict)
    
# print(problem_dict_list)

## Checked: No problems

In [None]:
problem_df = pd.DataFrame.from_dict(json_normalize(problem_dict_list, max_level=1))
# problem_df.head()

## Checked: No problems

In [None]:
## Concatenate the DigitalHub Community Dataframe to the problem_df

digitalhub_community_df = pd.concat([digitalhub_community_df, problem_df], axis=0)
digitalhub_community_df.reset_index(inplace=True, drop=True) 
# digitalhub_community_df.head(10)

## Checked: No problems

In [None]:
#### NOT NEEDED FOR RUN THROUGH DIGITALHUB, just for adding to "desired" organization for migrating to Prism

## Create some communities

added_communities_df = pd.DataFrame([{'Multi-page?':[], 
                                      'Title':'Galter Library Audio-Video Archives',
                                      'Keyword':[], 
                                      'Resource type(s)':[], 
                                      'Rights':[],
                                      'Creator':[], 
                                      'Contributor':[], 
                                      'Description':[], 
                                      'Abstract':[],
                                      'Original Bibliographic Citation':[], 
                                      'Related URL':[], 
                                      'Publisher':[],
                                      'Date Created':[], 
                                      'Original Identifier':[], 
                                      'Language':[], 
                                      'Subject: MESH':[],
                                      'Subject: LCSH':[], 
                                      'Subject: Geographic Name':[], 
                                      'Subject: Name':[],
                                      'Location':[],
                                      'Digital Origin':[], 
                                      'Id':'galter-library-audio-video-archives',                                           
                                      'uri':[],
                                      'members':[{'Id':'cece380d-4dee-4e4e-aa97-28cb1d4f6b19'},
                                                  {'Id':'student-life'},
                                                  {'Id':'paul-de-kruif-interviews'},
                                                  {'Id':'b1546649-c60e-441b-9042-ec7a27adaf66', 'DOI': '10.18131/G3P44X'}]                                                 
                                     },
                                     {'Multi-page?':[], 
                                      'Title':'History of Feinberg School of Medicine',                                               
                                      'Keyword':[], 
                                      'Resource type(s)':[], 
                                      'Rights':[],
                                      'Creator':[], 
                                      'Contributor':[], 
                                      'Description':[], 
                                      'Abstract':[],
                                      'Original Bibliographic Citation':[], 
                                      'Related URL':[], 
                                      'Publisher':[],
                                      'Date Created':[], 
                                      'Original Identifier':[], 
                                      'Language':[], 
                                      'Subject: MESH':[],
                                      'Subject: LCSH':[], 
                                      'Subject: Geographic Name':[], 
                                      'Subject: Name':[],
                                      'Location':[],
                                      'Digital Origin':[], 
                                      'Id':'history-of-feinberg-school-of-medicine',                                                                            
                                      'uri':[],
                                      'members':[{'Id':'2f75r807r'}, # Special Collections: art 
                                                  {'Id':'areybook', 'DOI': '10.18131/G39735'}, #Northwestern University Medical School 1859-1979
                                                  {'Id':'5712m6524'}, #Special Collections: photos from the vault
                                                  {'Id': '6d56zw644', 'DOI': 'doi:10.18131/G3S01B'},
                                                  {'Id': '01d4023f-da66-45c4-8987-af7badda959a', 'DOI': 'doi:10.18131/G3BG7D'},
                                                  {'Id': 'h702q639b', 'DOI':'doi:10.18131/G3P88T'},
                                                  {'Id': '47429913s','DOI': '10.18131/G39G6Q'},
                                                  {'Id': '6m311p28w','DOI': '10.18131/G3K01G'},
                                                  {'Id': 'pr76f340k','DOI': '10.18131/G3F590'},
                                                  {'Id': 'f670ed27-d344-4cb1-aa34-e3339f2992d5','DOI': '10.18131/G3889G'},
                                                  {'Id': 'aa85a365-c493-42cf-922c-21ab24407a1e','DOI': '10.18131/G3K605'},
                                                  {'Id': 'c85b38a8-e367-4ef4-8c0a-35a94c479dc6','DOI': '10.18131/G3FG8Q'},
                                                  {'Id': 'e13451f1-01ff-4462-acbf-2a87eb2312e2','DOI': '10.18131/G3WP5P'},
                                                  {'Id': '3f4ac3b8-6c05-4b4d-978e-f5ab7388743b','DOI': '10.18131/G3461H'},
                                                  {'Id': '7d8b5c55-9d80-40a5-992f-7bbadd466d5b','DOI': '10.18131/G38W3D'},
                                                  {'Id': '506cbdca-cd9d-43fd-9be7-4b1b912782e5','DOI': '10.18131/G3XW4P'},
                                                  {'Id': '695a2e0f-428f-41ea-acd8-66486a1e292b','DOI': '10.18131/G38P6G'},
                                                  {'Id': 'ae887507-5e30-4129-91c2-dda6f8e5f944','DOI': '10.18131/G3DK72'},
                                                  {'Id': '9cc06089-1a82-4378-ad19-b93dbbd402cb','DOI': '10.18131/G3P726'},
                                                  {'Id': '9f47bff1-33b4-4d63-88b2-75aab10b84bb','DOI': '10.18131/G3CP6D'},
                                                  {'Id': '0a5c7f42-7b10-443e-985a-b773a04e15ba','DOI': '10.18131/G3T31P'},
                                                  {'Id': 'd520d291-a3ab-431c-a992-6a931779ff31','DOI': '10.18131/G37W33'},
                                                  {'Id': 'b6971153-c8bd-4f03-9917-d68659a89784','DOI': '10.18131/G3390V'},
                                                  {'Id': '752b7c7f-ec96-4635-af07-34d4b36dac78','DOI': '10.18131/G3VC9S'},
                                                  {'Id': '3f4ac3b8-6c05-4b4d-978e-f5ab7388743b','DOI': '10.18131/G3461H'},
                                                  {'Id': '9k41zd48h','DOI': '10.18131/G32P4V'}]
                                     },
                                     { 'Multi-page?':[], 
                                      'Title':'Researchers Collections',
                                      'Keyword':[], 
                                      'Resource type(s)':[], 
                                      'Rights':[],
                                      'Creator':[], 
                                      'Contributor':[], 
                                      'Description':[], 
                                      'Abstract':[],
                                      'Original Bibliographic Citation':[], 
                                      'Related URL':[], 
                                      'Publisher':[],
                                      'Date Created':[], 
                                      'Original Identifier':[], 
                                      'Language':[], 
                                      'Subject: MESH':[],
                                      'Subject: LCSH':[], 
                                      'Subject: Geographic Name':[], 
                                      'Subject: Name':[],
                                      'Location':[],
                                      'Digital Origin':[], 
                                      'Id':'researchers-collections',                                           
                                      'uri':[],
                                      'members':[{'Id': 'e5e1683f-5075-4afd-9eba-2a36fc981414'}, ## Previous: e1683f-5075-4afd-9eba-2a36fc981414    
                                                  {'Id': '7badb7c9-d4ec-4ca9-b58e-6e01f224fcf7'},
                                                  {'Id': 'd0798568-47c3-453e-ae39-242d8a96b1dc'},
                                                  {'Id': '913f8fa2-06c9-49e1-9cdf-0f88118b18da'},
                                                  {'Id': '8s45q876k'},
                                                  {'Id': '3ca02e5e-83be-4ea7-b51f-11aee3497e6c'},
                                                  {'Id': 'afec3d3f-5ee6-468a-b8b4-80ab6d0402ac'},
                                                  {'Id': 'ea926798-0e47-4441-b159-8af916499af3'},
                                                  {'Id': '6b2bc47e-a3da-4222-8b8b-39e3b2832648'},
                                                  {'Id': 'fj236212d'},
                                                  {'Id': 'ed5f344a-8a48-4b50-9843-895634e5cd6a'},
                                                  {'Id': 'kw52j804p'},
                                                  {'Id': '96fc0e70-98e7-4e49-9784-8aa6942fc2a6'},
                                                  {'Id': 'ae47e062-d7f8-49c0-8ffa-ce86fb2855ca'},
                                                  {'Id': '91a4a3c2-e9f8-4540-85bb-5b69923106c0'},
                                                  {'Id': '09e7110d-7677-4b82-985a-a7c26ac46b57'},
                                                  {'Id': '55da1441-ddee-4f57-9c17-d371f78f2ed4'},
                                                  {'Id': '9aa727f0-29d0-44af-ae9f-1018208cec89'},
                                                  {'Id': 'd50c6f56-2600-4e67-ba4f-ee681eeae64c'},
                                                  {'Id': '19673087-b6a5-4108-b285-9614aa8b6b95'},
                                                  {'Id': 'rb68xb84x'},
                                                  {'Id': '0d944080-d8ec-4386-abb0-c5ca34d2a3f5'},
                                                  {'Id': 'a0deab15-7c16-4c52-86f8-80c96a2fb888'},
                                                  {'Id': 'e0338411-7829-49ac-8fdc-cd17b7307474'},
                                                  {'Id': '2dd287f0-9748-41f7-8fab-222db450d196'},
                                                  {'Id': '5f1b1739-512f-4015-98bc-22f37f42af7b'},
                                                  {'Id': '96fc0e70-98e7-4e49-9784-8aa6942fc2a6'},
                                                  {'Id': '2d8b503f-203c-48ce-ac34-d0c976997761'},
                                                  {'Id': 'aa31fe0c-41ec-46fd-82c0-405f168a5606'},
                                                  {'Id': '1d4cede9-d8d6-4576-994d-91d36bd15b0b'},
                                                  {'Id': '74d22173-8b26-4d34-b0a0-8b7b15bca6f8'}]
                                     }])
                                      
                                           
added_communities_df.head()                           
## Checked: No problems  

In [None]:
#### NOT NEEDED FOR RUN THROUGH DIGITALHUB, just for adding to "desired" organization for migrating to Prism

## Concatenate the DigitalHub Community Dataframe to the added_communities_df

digitalhub_community_df = pd.concat([digitalhub_community_df, added_communities_df], axis=0)
digitalhub_community_df.reset_index(inplace=True, drop=True) 
# digitalhub_community_df.head(10)

## Checked: No problems

In [None]:
## Add Column to digitalhub_community_df to indicate that these results are a community

digitalhub_community_df["Level Type"] = "Community"
digitalhub_community_df["Level Number"] = "1"
digitalhub_community_df['Level Number'] = digitalhub_community_df['Level Number'].apply(int)

## Create a column from the index

digitalhub_community_df['community_rowid'] = digitalhub_community_df.index

## Create a new column called Community_ID

digitalhub_community_df['Community_ID'] = digitalhub_community_df['Id']

## Checked: No problems

In [None]:
# ## Inspect the "members" column and create a list of member IDs

# row_member_list = []
# column_member ={}
# count_member = {}

# for k, v in digitalhub_community_df["members"].items():
#     for value in v: 
#         member = value["Id"]
#         row_member_list.append(member)
#     column_member[k] = row_member_list
#     count_member[k] = len(row_member_list)
#     row_member_list =[]
#     row_member_count = []

# ## Append the column member dictionary to the DigitalHub Community DF dataframe

# digitalhub_community_df['Member_List'] = digitalhub_community_df.index.map(column_member)
# digitalhub_community_df['Member_List_Count'] = digitalhub_community_df.index.map(count_member)
# digitalhub_community_df.head()

# ## Checked: No problems

In [None]:
#### NOT NEEDED FOR RUN THROUGH DIGITALHUB, just for adding to "desired" organization for migrating to Prism

###########################################################################
##### Add private records to communities || Add sub_collections to communities#####
###########################################################################


######################################
### Add new subcollections to GHSL###
#####################################

## Add subcollections to GHSL: 
# 8c1d851c-a5e6-4790-a867-fa889e66630e - med subject headings
# 91294b2e-34e4-46ac-9086-be17c40d0d01 - operation saving lives
# 8a281ff6-dd0e-4a02-8486-97c3bc7058c4 - pursuit of a grand cause
# k0698748f - daniel hale
# d526f63d-f10a-423d-bb81-bc32cc70b427 - notable women
# ec202d45-992f-4f21-b3ea-d02703ca7621 - men behind

add_to_series = [{'Multi-page?':[], 
                  'Title':'Operation Saving Lives: Northwestern as the 12th General Hospital During WWI',
                  'Keyword':[], 
                  'Resource type(s)':[], 
                  'Rights':[],
                  'Creator':[], 
                  'Contributor':[], 
                  'Description':[], 
                  'Abstract':[],
                  'Original Bibliographic Citation':[], 
                  'Related URL':[], 
                  'Publisher':[],
                  'Date Created':[], 
                  'Original Identifier':[], 
                  'Language':[], 
                  'Subject: MESH':[],
                  'Subject: LCSH':[], 
                  'Subject: Geographic Name':[], 
                  'Subject: Name':[],
                  'Location':[],
                  'Digital Origin':[], 
                  'Id':'91294b2e-34e4-46ac-9086-be17c40d0d01',                                           
                  'uri':[],
                  'members':[{'Id':'ec5cbb80-61dd-47a4-87eb-c5bacd211e90'},
                              {'Id':'5185bcc8-bc82-4e9b-aeca-a6aec040a8eb'},
                              {'Id':'710ff4c3-8f1e-480d-baf9-a425ea936534'},
                              {'Id':'c1cacdba-9705-4420-b58b-02e9975cc02b'},
                              {'Id':'4f0d22bb-84f9-41aa-a834-a9fa1609d1ac'}]                                                 
                 },{'Multi-page?':[], 
                  'Title':'Medical Subject Headings-Library of Congress Subject Headings Mapping Data',
                  'Keyword':[], 
                  'Resource type(s)':[], 
                  'Rights':[],
                  'Creator':[], 
                  'Contributor':[], 
                  'Description':[], 
                  'Abstract':[],
                  'Original Bibliographic Citation':[], 
                  'Related URL':[], 
                  'Publisher':[],
                  'Date Created':[], 
                  'Original Identifier':[], 
                  'Language':[], 
                  'Subject: MESH':[],
                  'Subject: LCSH':[], 
                  'Subject: Geographic Name':[], 
                  'Subject: Name':[],
                  'Location':[],
                  'Digital Origin':[], 
                  'Id': '8c1d851c-a5e6-4790-a867-fa889e66630e',
                  # previously: 'Id':'91294b2e-34e4-46ac-9086-be17c40d0d01s',                                           
                  'uri':[],
                  'members':[{'Id':'3e59c5b9-bbbf-4f49-946f-e08ef9b10d9f'},
                              {'Id':'4b7f6a77-9cf4-4deb-b9aa-f3f49391bcc8'}] 

                 },{'Multi-page?':[], 
                  'Title':'In Pursuit of a Grand Cause',
                  'Keyword':[], 
                  'Resource type(s)':[], 
                  'Rights':[],
                  'Creator':[], 
                  'Contributor':[], 
                  'Description':[], 
                  'Abstract':[],
                  'Original Bibliographic Citation':[], 
                  'Related URL':[], 
                  'Publisher':[],
                  'Date Created':[], 
                  'Original Identifier':[], 
                  'Language':[], 
                  'Subject: MESH':[],
                  'Subject: LCSH':[], 
                  'Subject: Geographic Name':[], 
                  'Subject: Name':[],
                  'Location':[],
                  'Digital Origin':[], 
                  'Id':'8a281ff6-dd0e-4a02-8486-97c3bc7058c4',                                           
                  'uri':[],
                  'members':[{'Id':'f9da4c66-fab8-4ecf-9862-8b016ed0a124'},
                             {'Id':'f4a757f6-a617-496b-b040-df0b27ff8cf2'},
                             {'Id':'20dcde30-f1f6-4386-9620-6fa5b1cb3467'},
                             {'Id':'66708bc0-e8d0-4b44-8057-c0b8dbaa0566'},
                             {'Id':'2320576d-a6c8-422f-a074-b34180d15442'},
                             {'Id':'33e474af-6e46-4bac-9e42-96dbf4bcb51e'}]
                },{'Multi-page?':[], 
                  'Title':'Daniel Hale Williams, Surgeon, Educator & Medical Advocate',
                  'Keyword':[], 
                  'Resource type(s)':[], 
                  'Rights':[],
                  'Creator':[], 
                  'Contributor':[], 
                  'Description':[], 
                  'Abstract':[],
                  'Original Bibliographic Citation':[], 
                  'Related URL':[], 
                  'Publisher':[],
                  'Date Created':[], 
                  'Original Identifier':[], 
                  'Language':[], 
                  'Subject: MESH':[],
                  'Subject: LCSH':[], 
                  'Subject: Geographic Name':[], 
                  'Subject: Name':[],
                  'Location':[],
                  'Digital Origin':[], 
                  'Id':'k0698748f',                                           
                  'uri':[],
                  'members':[],
                   'DOI': ['doi: 10.18131/G3HS3J']
                },{'Multi-page?':[], 
                  'Title':'Notable Women of the Womans Medical School',
                  'Keyword':[], 
                  'Resource type(s)':[], 
                  'Rights':[],
                  'Creator':[], 
                  'Contributor':[], 
                  'Description':[], 
                  'Abstract':[],
                  'Original Bibliographic Citation':[], 
                  'Related URL':[], 
                  'Publisher':[],
                  'Date Created':[], 
                  'Original Identifier':[], 
                  'Language':[], 
                  'Subject: MESH':[],
                  'Subject: LCSH':[], 
                  'Subject: Geographic Name':[], 
                  'Subject: Name':[],
                  'Location':[],
                  'Digital Origin':[], 
                  'Id':'d526f63d-f10a-423d-bb81-bc32cc70b427', 
                  'uri':[],
                  'members':[],
                  'DOI' : ['doi: 10.18131/G3BF1S']
                },{'Multi-page?':[], 
                  'Title':'Men Behind the Women at the Womans Medical School at Northwestern University Medical School',
                  'Keyword':[], 
                  'Resource type(s)':[], 
                  'Rights':[],
                  'Creator':[], 
                  'Contributor':[], 
                  'Description':[], 
                  'Abstract':[],
                  'Original Bibliographic Citation':[], 
                  'Related URL':[], 
                  'Publisher':[],
                  'Date Created':[], 
                  'Original Identifier':[], 
                  'Language':[], 
                  'Subject: MESH':[],
                  'Subject: LCSH':[], 
                  'Subject: Geographic Name':[], 
                  'Subject: Name':[],
                  'Location':[],
                  'Digital Origin':[], 
                  'Id':'ec202d45-992f-4f21-b3ea-d02703ca7621',                                           
                  'uri':[],
                  'members':[],
                  'DOI':['doi:10.18131/g3-px5e-7r70']
                  }] 



## Identify the series that this list needs to be added to
current_series = digitalhub_community_df.loc[digitalhub_community_df['Id'] == 'fj2362114','members']
# print(current_series.item()[0])


current_list=[]
current_list = current_series.tolist()

## Add items from add_to_series list to current_series
ghsl_result = []
ghsl_result = current_series.item() + [x for x in add_to_series if x not in current_series.items()]
print(len(ghsl_result))
# print(ghsl_result)

## Add GHSL result back to member
digitalhub_community_df['members'] = digitalhub_community_df['members'].astype('object')
digitalhub_community_df.at[4,'members'] = ghsl_result

#################################################################
### Add new subcollections to Biostatistics Collaboration Core###
################################################################

## Add subcollections to BCC: 
## 2735afc8-70e9-43db-a85f-3ccac4d18e61 - Statistically Speaking Lecture Series 2021-2022
## Give new name: 2021-2022
## BCC is: 2cc92425-b656-47ea-a3b4-825405ee6088

add_to_series_2 = [{'Multi-page?':[], 
                  'Title':'2021-2022',
                  'Keyword':[], 
                  'Resource type(s)':[], 
                  'Rights':[],
                  'Creator':[], 
                  'Contributor':[], 
                  'Description':[], 
                  'Abstract':[],
                  'Original Bibliographic Citation':[], 
                  'Related URL':[], 
                  'Publisher':[],
                  'Date Created':[], 
                  'Original Identifier':[], 
                  'Language':[], 
                  'Subject: MESH':[],
                  'Subject: LCSH':[], 
                  'Subject: Geographic Name':[], 
                  'Subject: Name':[],
                  'Location':[],
                  'Digital Origin':[], 
                  'Id':'2735afc8-70e9-43db-a85f-3ccac4d18e61',                                           
                  'uri':[],
                  'members':[{'Id':'2788f1e6-90b5-4aca-aef8-4a600dbc786b'},
                              {'Id':'baad30fa-7f5d-47e3-b810-9252794d76f1'},
                              {'Id':'c8255e80-6d6c-4780-83e5-dd8b81ece87f'},
                              {'Id':'a21f49f2-2236-49d1-8a66-6193a7837ef5'}] 
                
                 }]


## Identify the series that this list needs to be added to
## "DOI":["doi:10.18131/g3-g89w-rg50"]
current_series_2 = digitalhub_community_df.loc[digitalhub_community_df['Id'] == '2cc92425-b656-47ea-a3b4-825405ee6088','members']
# print(current_series_2.item()[0])

## Likely don't need to make it a list...
current_list_2=[]
current_list_2 = current_series_2.tolist()

## Add items from add_to_series list to current_series
bcc_result = []
bcc_result = current_series_2.item() + [x for x in add_to_series_2 if x not in current_series_2.items()]
print(len(bcc_result))
# print(bcc_result)

## Add BCC result back to member

digitalhub_community_df['members'] = digitalhub_community_df['members'].astype('object')
digitalhub_community_df.at[0,'members'] = bcc_result
                              

######################################
### Add new subcollections to CBITs###
#####################################

# add_to_series_3 = {"Multi-page?":false,
#                    "Title":"CBITs IRB Materials",
#                    "Keyword":["Digital Mental Health","IRB"],
#                    "Resource type(s)":[],
#                    "Rights":[],
#                    "Creator":[],
#                    "Contributor":[],
#                    "Description":"This collection will serve as a repository of submitted and approved IRB materials for research projects within the Center for Behavioral Intervention Technologies (CBITs). ",
#                    "Abstract":[],
#                    "Original Bibliographic Citation":[],
#                    "Related URL":["https://digitalhub.northwestern.edu/collections/2e61510b-939f-4637-9fd9-c31f1013c661"," http://cbits.northwestern.edu/"],
#                    "Publisher":["DigitalHub. Galter Health Sciences Library \u0026 Learning Center"],
#                    "Date Created":["12/2/2020"],
#                    "Original Identifier":[],
#                    "Language":[],
#                    "Subject: MESH":[],
#                    "Subject: LCSH":[],
#                    "Subject: Geographic Name":[],
#                    "Subject: Name":[],
#                    "Location":[],
#                    "Digital Origin":[],
#                    "Id":"27114af7-6444-4f3c-8de1-3e9c5dc73e95",
#                    "uri":"https://digitalhub.northwestern.edu/collections/27114af7-6444-4f3c-8de1-3e9c5dc73e95",
#                    "members":[{"Title":["\"ACTS Process Evaluation Project\" Human Research Determination Form"],"Resource type(s)":["Study Design"],"Keyword":["Technology Enabled Services","Digital Mental Health","Human Research Determination Form"],"Rights":["http://creativecommons.org/licenses/by/3.0/us/"],"Creator":["Center for Behavioral Intervention Technologies, Department"],"Contributor":null,"Description":null,"Abstract":null,"Original Bibliographic Citation":null,"Related URL":null,"Publisher":["DigitalHub. Galter Health Sciences Library \u0026 Learning Center"],"Date Created":null,"Original Identifier":null,"Language":null,"Subject: MESH":null,"Subject: LCSH":null,"Subject: Geographic Name":null,"Subject: Name":null,"Location":null,"Digital Origin":null,"Page Number":null,"Acknowledgments":null,"Grants And Funding":null,"DOI":["doi:10.18131/g3-g89w-rg50"],"ARK":null,"Id":"e92db21d-e121-4b32-b6c8-353727e2675f","File Size":54220,"File Format":["msword (Microsoft Word Document, OpenDocument Text, Office Open XML Document)"],"uri":"https://digitalhub.northwestern.edu/files/e92db21d-e121-4b32-b6c8-353727e2675f","download":"https://digitalhub.northwestern.edu/downloads/e92db21d-e121-4b32-b6c8-353727e2675f"},
#                               {"Title":["\"Examining the switch to remote-delivered mental health services among college counseling center clinicians\" Consent.pdf"],"Resource type(s)":["Study Design"],"Keyword":["Digital Mental Health","Consent"],"Rights":["http://creativecommons.org/licenses/by/3.0/us/"],"Creator":["Center for Behavioral Intervention Technologies, Department"],"Contributor":null,"Description":null,"Abstract":null,"Original Bibliographic Citation":null,"Related URL":null,"Publisher":["DigitalHub. Galter Health Sciences Library \u0026 Learning Center"],"Date Created":["2020"],"Original Identifier":null,"Language":["English"],"Subject: MESH":["Student Health Services","Mental Health Services","Telemedicine"],"Subject: LCSH":null,"Subject: Geographic Name":null,"Subject: Name":null,"Location":null,"Digital Origin":null,"Page Number":null,"Acknowledgments":null,"Grants And Funding":null,"DOI":["doi:10.18131/g3-3qga-6c80"],"ARK":null,"Id":"9ae9f025-f11d-4d7b-b721-e6f957ce78f4","File Size":100919,"File Format":["pdf (Portable Document Format)"],"uri":"https://digitalhub.northwestern.edu/files/9ae9f025-f11d-4d7b-b721-e6f957ce78f4","download":"https://digitalhub.northwestern.edu/downloads/9ae9f025-f11d-4d7b-b721-e6f957ce78f4"},
#                               {"Title":["\"Examining the switch to remote-delivered mental health services among college counseling center clinicians\" Protocol.pdf"],"Resource type(s)":["Study Design"],"Keyword":["Digital Mental Health","IRB Protocol"],"Rights":["http://creativecommons.org/licenses/by/3.0/us/"],"Creator":["Center for Behavioral Intervention Technologies, Department"],"Contributor":null,"Description":null,"Abstract":null,"Original Bibliographic Citation":null,"Related URL":null,"Publisher":["DigitalHub. Galter Health Sciences Library \u0026 Learning Center"],"Date Created":["2020-06-26"],"Original Identifier":null,"Language":["English"],"Subject: MESH":["Student Health Services","Mental Health Services","Counseling--methods","Telemedicine"],"Subject: LCSH":null,"Subject: Geographic Name":null,"Subject: Name":null,"Location":null,"Digital Origin":null,"Page Number":null,"Acknowledgments":null,"Grants And Funding":null,"DOI":["doi:10.18131/g3-zbz3-6989"],"ARK":null,"Id":"aaf1a825-6927-4df7-be64-4b0cb0ec6157","File Size":102338,"File Format":["pdf (Portable Document Format)"],"uri":"https://digitalhub.northwestern.edu/files/aaf1a825-6927-4df7-be64-4b0cb0ec6157","download":"https://digitalhub.northwestern.edu/downloads/aaf1a825-6927-4df7-be64-4b0cb0ec6157"},{"Title":["\"Examining the switch to remote-delivered mental health services among college counseling center clinicians\" Recruitment Email.pdf"],"Resource type(s)":["Study Design"],"Keyword":["Digital Mental Health","Recruitment"],"Rights":["http://creativecommons.org/licenses/by/3.0/us/"],"Creator":["Center for Behavioral Intervention Technologies, Department"],"Contributor":null,"Description":null,"Abstract":null,"Original Bibliographic Citation":null,"Related URL":null,"Publisher":["DigitalHub. Galter Health Sciences Library \u0026 Learning Center"],"Date Created":["2020"],"Original Identifier":null,"Language":["English"],"Subject: MESH":["Mental Health Services","Telemedicine","Student Health Services","Counseling--methods"],"Subject: LCSH":null,"Subject: Geographic Name":null,"Subject: Name":null,"Location":null,"Digital Origin":null,"Page Number":null,"Acknowledgments":null,"Grants And Funding":null,"DOI":["doi:10.18131/g3-90n7-rn16"],"ARK":null,"Id":"b7681fcb-d62b-471e-a4c1-e1846962a008","File Size":79594,"File Format":["pdf (Portable Document Format)"],"uri":"https://digitalhub.northwestern.edu/files/b7681fcb-d62b-471e-a4c1-e1846962a008","download":"https://digitalhub.northwestern.edu/downloads/b7681fcb-d62b-471e-a4c1-e1846962a008"},{"Title":["\"Examining the switch to remote-delivered mental health services among college counseling center clinicians\" Survey.pdf"],"Resource type(s)":["Forms"],"Keyword":["Digital Mental Health","Survey"],"Rights":["http://creativecommons.org/licenses/by/3.0/us/"],"Creator":["Center for Behavioral Intervention Technologies, Department"],"Contributor":null,"Description":null,"Abstract":null,"Original Bibliographic Citation":null,"Related URL":null,"Publisher":["DigitalHub. Galter Health Sciences Library \u0026 Learning Center"],"Date Created":["2020"],"Original Identifier":null,"Language":["English"],"Subject: MESH":["Mental Health Services","Student Health Services","Telemedicine"],"Subject: LCSH":null,"Subject: Geographic Name":null,"Subject: Name":null,"Location":null,"Digital Origin":null,"Page Number":null,"Acknowledgments":null,"Grants And Funding":null,"DOI":["doi:10.18131/g3-p4mj-c759"],"ARK":null,"Id":"599b5800-878b-43ee-a322-a2c448139786","File Size":103118,"File Format":["pdf (Portable Document Format)"],"uri":"https://digitalhub.northwestern.edu/files/599b5800-878b-43ee-a322-a2c448139786","download":"https://digitalhub.northwestern.edu/downloads/599b5800-878b-43ee-a322-a2c448139786"},
#                               {"Title":["\"Investigation of Care Managed Patient Experience and Interest in Technology Enabled Mental Health Care Delivery\" Consent.pdf"],"Resource type(s)":["Study Design"],"Keyword":["Digital Mental Health","Technology Enabled Services","Consent"],"Rights":["http://creativecommons.org/licenses/by/3.0/us/"],"Creator":["Center for Behavioral Intervention Technologies, Department"],"Contributor":null,"Description":null,"Abstract":null,"Original Bibliographic Citation":null,"Related URL":null,"Publisher":["DigitalHub. Galter Health Sciences Library \u0026 Learning Center"],"Date Created":null,"Original Identifier":null,"Language":null,"Subject: MESH":null,"Subject: LCSH":null,"Subject: Geographic Name":null,"Subject: Name":null,"Location":null,"Digital Origin":null,"Page Number":null,"Acknowledgments":null,"Grants And Funding":null,"DOI":["doi:10.18131/g3-qk7e-wv85"],"ARK":null,"Id":"39a0c5c0-55c8-4c65-b62a-10234b8fbf5a","File Size":77731,"File Format":["pdf (Portable Document Format)"],"uri":"https://digitalhub.northwestern.edu/files/39a0c5c0-55c8-4c65-b62a-10234b8fbf5a","download":"https://digitalhub.northwestern.edu/downloads/39a0c5c0-55c8-4c65-b62a-10234b8fbf5a"},{"Title":["\"Investigation of Care Managed Patient Experience and Interest in Technology Enabled Mental Health Care Delivery\" Email Script.pdf"],"Resource type(s)":["Study Design"],"Keyword":["Digital Mental Health","Technology Enabled Services","Recruitment"],"Rights":["http://creativecommons.org/licenses/by/3.0/us/"],"Creator":["Center for Behavioral Intervention Technologies, Department"],"Contributor":null,"Description":null,"Abstract":null,"Original Bibliographic Citation":null,"Related URL":null,"Publisher":["DigitalHub. Galter Health Sciences Library \u0026 Learning Center"],"Date Created":["2020"],"Original Identifier":null,"Language":["English"],"Subject: MESH":["Mental Health Services","Telemedicine"],"Subject: LCSH":null,"Subject: Geographic Name":null,"Subject: Name":null,"Location":null,"Digital Origin":null,"Page Number":null,"Acknowledgments":null,"Grants And Funding":null,"DOI":["doi:10.18131/g3-snd8-kj17"],"ARK":null,"Id":"47e976ca-79db-441e-8e20-d9848a9b6b6e","File Size":37445,"File Format":["pdf (Portable Document Format)"],"uri":"https://digitalhub.northwestern.edu/files/47e976ca-79db-441e-8e20-d9848a9b6b6e","download":"https://digitalhub.northwestern.edu/downloads/47e976ca-79db-441e-8e20-d9848a9b6b6e"},{"Title":["\"Investigation of Care Managed Patient Experience and Interest in Technology Enabled Mental Health Care Delivery\" Recruitment Script.pdf"],"Resource type(s)":["Study Design"],"Keyword":["Digital Mental Health","Technology Enabled Services","Recruitment"],"Rights":["http://creativecommons.org/licenses/by/3.0/us/"],"Creator":["Center for Behavioral Intervention Technologies, Department"],"Contributor":null,"Description":null,"Abstract":null,"Original Bibliographic Citation":null,"Related URL":null,"Publisher":["DigitalHub. Galter Health Sciences Library \u0026 Learning Center"],"Date Created":["2020"],"Original Identifier":null,"Language":["English"],"Subject: MESH":["Mental Health Services","Telemedicine"],"Subject: LCSH":null,"Subject: Geographic Name":null,"Subject: Name":null,"Location":null,"Digital Origin":null,"Page Number":null,"Acknowledgments":null,"Grants And Funding":null,"DOI":["doi:10.18131/g3-xcpm-sm06"],"ARK":null,"Id":"1164b65b-7dac-48b4-9c31-2936a4da9b91","File Size":17740,"File Format":["pdf (Portable Document Format)"],"uri":"https://digitalhub.northwestern.edu/files/1164b65b-7dac-48b4-9c31-2936a4da9b91","download":"https://digitalhub.northwestern.edu/downloads/1164b65b-7dac-48b4-9c31-2936a4da9b91"},{"Title":["\"Investigation of Care Managed Patient Experience and Interest in Technology Enabled Mental Health Care Delivery\" Screening Measure.pdf"],"Resource type(s)":["Forms"],"Keyword":["Digital Mental Health","Technology Enabled Services","Screening"],"Rights":null,"Creator":["Center for Behavioral Intervention Technologies, Department"],"Contributor":null,"Description":null,"Abstract":null,"Original Bibliographic Citation":null,"Related URL":null,"Publisher":["DigitalHub. Galter Health Sciences Library \u0026 Learning Center"],"Date Created":["2020"],"Original Identifier":null,"Language":["English"],"Subject: MESH":["Mental Health Services","Telemedicine"],"Subject: LCSH":null,"Subject: Geographic Name":null,"Subject: Name":null,"Location":null,"Digital Origin":null,"Page Number":null,"Acknowledgments":null,"Grants And Funding":null,"DOI":["doi:10.18131/g3-s639-q225"],"ARK":null,"Id":"a9e3ab90-1a67-438b-8a37-8208c909d851","File Size":144509,"File Format":["pdf (Portable Document Format)"],"uri":"https://digitalhub.northwestern.edu/files/a9e3ab90-1a67-438b-8a37-8208c909d851","download":"https://digitalhub.northwestern.edu/downloads/a9e3ab90-1a67-438b-8a37-8208c909d851"},{"Title":["\"Investigation of Care Managed Patient Experience and Interest in Technology Enabled Mental Health Care Delivery\" Study Protocol.pdf"],"Resource type(s)":["Study Design"],"Keyword":["Digital Mental Health","IRB Protocol","Technology Enabled Services"],"Rights":["http://creativecommons.org/licenses/by/3.0/us/"],"Creator":["Center for Behavioral Intervention Technologies, Department"],"Contributor":null,"Description":null,"Abstract":null,"Original Bibliographic Citation":null,"Related URL":null,"Publisher":["DigitalHub. Galter Health Sciences Library \u0026 Learning Center"],"Date Created":["2020"],"Original Identifier":null,"Language":["English"],"Subject: MESH":["Mental Health Services","Telemedicine"],"Subject: LCSH":null,"Subject: Geographic Name":null,"Subject: Name":null,"Location":null,"Digital Origin":null,"Page Number":null,"Acknowledgments":null,"Grants And Funding":null,"DOI":["doi:10.18131/g3-prvq-mf27"],"ARK":null,"Id":"6cc12e96-e189-40c7-b473-7592e85b33c0","File Size":129937,"File Format":["pdf (Portable Document Format)"],"uri":"https://digitalhub.northwestern.edu/files/6cc12e96-e189-40c7-b473-7592e85b33c0","download":"https://digitalhub.northwestern.edu/downloads/6cc12e96-e189-40c7-b473-7592e85b33c0"},{"Title":["\"Self-Management and Care Collaboration for Perinatal Depression\" Client Consent"],"Resource type(s)":["Forms"],"Keyword":["Perinatal Depression","Perinatal","Depression","Technology Enabled Services","Digital Mental Health","Design","RP2","Consent"],"Rights":["http://creativecommons.org/licenses/by/3.0/us/"],"Creator":["Center for Behavioral Intervention Technologies, Department"],"Contributor":null,"Description":null,"Abstract":null,"Original Bibliographic Citation":null,"Related URL":null,"Publisher":["DigitalHub. Galter Health Sciences Library \u0026 Learning Center"],"Date Created":["2019"],"Original Identifier":null,"Language":["English"],"Subject: MESH":["Depression, Postpartum","Depressive Disorder--therapy","Self Care--methods","Patient Participation"],"Subject: LCSH":null,"Subject: Geographic Name":null,"Subject: Name":null,"Location":null,"Digital Origin":null,"Page Number":null,"Acknowledgments":null,"Grants And Funding":null,"DOI":["doi:10.18131/g3-r5h0-ge74"],"ARK":null,"Id":"37d6264d-9e10-4967-9fe6-7dff6f0b6690","File Size":113390,"File Format":["pdf (Portable Document Format)"],"uri":"https://digitalhub.northwestern.edu/files/37d6264d-9e10-4967-9fe6-7dff6f0b6690","download":"https://digitalhub.northwestern.edu/downloads/37d6264d-9e10-4967-9fe6-7dff6f0b6690"},{"Title":["\"Self-Management and Care Collaboration for Perinatal Depression\" Client Interview Protocol"],"Resource type(s)":["Other"],"Keyword":["Interview","Perinatal Depression","Perinatal","Depression","Technology Enabled Services","Digital Mental Health","Design","RP2"],"Rights":["http://creativecommons.org/licenses/by/3.0/us/"],"Creator":["Center for Behavioral Intervention Technologies, Department"],"Contributor":null,"Description":null,"Abstract":null,"Original Bibliographic Citation":null,"Related URL":null,"Publisher":["DigitalHub. Galter Health Sciences Library \u0026 Learning Center"],"Date Created":["2019"],"Original Identifier":null,"Language":["English"],"Subject: MESH":["Depression, Postpartum","Depressive Disorder","Patient Participation"],"Subject: LCSH":null,"Subject: Geographic Name":null,"Subject: Name":null,"Location":null,"Digital Origin":null,"Page Number":null,"Acknowledgments":null,"Grants And Funding":null,"DOI":["doi:10.18131/g3-4hqk-xb13"],"ARK":null,"Id":"1f5e9faf-f18b-46f6-8964-156650d33c21","File Size":15831,"File Format":["msword (Microsoft Word Document, OpenDocument Text, Office Open XML Document)"],"uri":"https://digitalhub.northwestern.edu/files/1f5e9faf-f18b-46f6-8964-156650d33c21","download":"https://digitalhub.northwestern.edu/downloads/1f5e9faf-f18b-46f6-8964-156650d33c21"},{"Title":["\"Self-Management and Care Collaboration for Perinatal Depression\" Design Study Protocol"],"Resource type(s)":["Study Design"],"Keyword":["IRB Protocol","Perinatal","Depression","Technology Enabled Services","Digital Mental Health","Design","Perinatal Depression","RP2"],"Rights":["http://creativecommons.org/licenses/by/3.0/us/"],"Creator":["Center for Behavioral Intervention Technologies, Department"],"Contributor":null,"Description":null,"Abstract":null,"Original Bibliographic Citation":null,"Related URL":null,"Publisher":["DigitalHub. Galter Health Sciences Library \u0026 Learning Center"],"Date Created":["2019"],"Original Identifier":null,"Language":["English"],"Subject: MESH":["Depression, Postpartum","Depressive Disorder--therapy","Self Care--methods","Patient Participation"],"Subject: LCSH":null,"Subject: Geographic Name":null,"Subject: Name":null,"Location":null,"Digital Origin":null,"Page Number":null,"Acknowledgments":null,"Grants And Funding":null,"DOI":["doi:10.18131/g3-km8y-a478"],"ARK":null,"Id":"02e5eb3b-46ac-41fc-ae58-017803460cb8","File Size":257093,"File Format":["pdf (Portable Document Format)"],"uri":"https://digitalhub.northwestern.edu/files/02e5eb3b-46ac-41fc-ae58-017803460cb8","download":"https://digitalhub.northwestern.edu/downloads/02e5eb3b-46ac-41fc-ae58-017803460cb8"},{"Title":["\"Self-Management and Care Collaboration for Perinatal Depression\" Recruitment Script"],"Resource type(s)":["Other"],"Keyword":["Recruitment","Perinatal Depression","Perinatal","Depression","Technology Enabled Services","Digital Mental Health","RP2","Design"],"Rights":["http://creativecommons.org/licenses/by/3.0/us/"],"Creator":["Center for Behavioral Intervention Technologies, Department"],"Contributor":null,"Description":null,"Abstract":null,"Original Bibliographic Citation":null,"Related URL":null,"Publisher":["DigitalHub. Galter Health Sciences Library \u0026 Learning Center"],"Date Created":["2019"],"Original Identifier":null,"Language":["English"],"Subject: MESH":["Depression, Postpartum","Patient Participation","Depressive Disorder"],"Subject: LCSH":null,"Subject: Geographic Name":null,"Subject: Name":null,"Location":null,"Digital Origin":null,"Page Number":null,"Acknowledgments":null,"Grants And Funding":null,"DOI":["doi:10.18131/g3-rbv4-3275"],"ARK":null,"Id":"8c78bf3e-8afd-4782-a71c-ccf7c0917ef3","File Size":14893,"File Format":["msword (Microsoft Word Document, OpenDocument Text, Office Open XML Document)"],"uri":"https://digitalhub.northwestern.edu/files/8c78bf3e-8afd-4782-a71c-ccf7c0917ef3","download":"https://digitalhub.northwestern.edu/downloads/8c78bf3e-8afd-4782-a71c-ccf7c0917ef3"},{"Title":["\"Self-Management and Care Collaboration for Perinatal Depression\" Stakeholder Consent"],"Resource type(s)":["Forms"],"Keyword":["Perinatal Depression","Perinatal","Depression","Technology Enabled Services","Digital Mental Health","Design","RP2","Consent"],"Rights":["http://creativecommons.org/licenses/by/3.0/us/"],"Creator":["Center for Behavioral Intervention Technologies, Department"],"Contributor":null,"Description":null,"Abstract":null,"Original Bibliographic Citation":null,"Related URL":null,"Publisher":["DigitalHub. Galter Health Sciences Library \u0026 Learning Center"],"Date Created":["2019"],"Original Identifier":null,"Language":["English"],"Subject: MESH":["Depression, Postpartum","Depressive Disorder--therapy","Patient Participation"],"Subject: LCSH":null,"Subject: Geographic Name":null,"Subject: Name":null,"Location":null,"Digital Origin":null,"Page Number":null,"Acknowledgments":null,"Grants And Funding":null,"DOI":["doi:10.18131/g3-dgcx-vn70"],"ARK":null,"Id":"b5fc4ef9-c3d2-4ba5-9902-280294f68447","File Size":111287,"File Format":["pdf (Portable Document Format)"],"uri":"https://digitalhub.northwestern.edu/files/b5fc4ef9-c3d2-4ba5-9902-280294f68447","download":"https://digitalhub.northwestern.edu/downloads/b5fc4ef9-c3d2-4ba5-9902-280294f68447"},{"Title":["\"Self-Management and Care Collaboration for Perinatal Depression\" Stakeholder Interview Protocol"],"Resource type(s)":["Other"],"Keyword":["Interview","Perinatal Depression","Perinatal","Depression","Technology Enabled Services","Digital Mental Health","Design","RP2"],"Rights":["http://creativecommons.org/licenses/by/3.0/us/"],"Creator":["Center for Behavioral Intervention Technologies, Department"],"Contributor":null,"Description":null,"Abstract":null,"Original Bibliographic Citation":null,"Related URL":null,"Publisher":["DigitalHub. Galter Health Sciences Library \u0026 Learning Center"],"Date Created":["2019"],"Original Identifier":null,"Language":["English"],"Subject: MESH":["Depression, Postpartum","Depressive Disorder--therapy"],"Subject: LCSH":null,"Subject: Geographic Name":null,"Subject: Name":null,"Location":null,"Digital Origin":null,"Page Number":null,"Acknowledgments":null,"Grants And Funding":null,"DOI":["doi:10.18131/g3-8m3q-qj71"],"ARK":null,"Id":"1951c9b0-f350-4198-a15d-84dff985485b","File Size":15246,"File Format":["msword (Microsoft Word Document, OpenDocument Text, Office Open XML Document)"],"uri":"https://digitalhub.northwestern.edu/files/1951c9b0-f350-4198-a15d-84dff985485b","download":"https://digitalhub.northwestern.edu/downloads/1951c9b0-f350-4198-a15d-84dff985485b"},{"Title":["\"Usability testing of an oncology patient-facing symptom management website\" Human Subjects Determinaton Form.pdf"],"Resource type(s)":["Study Design"],"Keyword":["Oncology","Human Research Determination Form","Design"],"Rights":["http://creativecommons.org/licenses/by/3.0/us/"],"Creator":["Center for Behavioral Intervention Technologies, Department"],"Contributor":null,"Description":null,"Abstract":null,"Original Bibliographic Citation":null,"Related URL":null,"Publisher":["DigitalHub. Galter Health Sciences Library \u0026 Learning Center"],"Date Created":["2019-09-09"],"Original Identifier":null,"Language":["English"],"Subject: MESH":["Neoplasms","Online Systems"],"Subject: LCSH":["Web site development"],"Subject: Geographic Name":null,"Subject: Name":null,"Location":null,"Digital Origin":null,"Page Number":null,"Acknowledgments":null,"Grants And Funding":null,"DOI":["doi:10.18131/g3-mve9-eg63"],"ARK":null,"Id":"19006705-18d6-4e58-ae93-4a310a808ee1","File Size":99361,"File Format":["pdf (Portable Document Format)"],"uri":"https://digitalhub.northwestern.edu/files/19006705-18d6-4e58-ae93-4a310a808ee1","download":"https://digitalhub.northwestern.edu/downloads/19006705-18d6-4e58-ae93-4a310a808ee1"},{"Title":["\"Usability testing of an oncology patient-facing symptom management website\" Usability Session Guide.pdf"],"Resource type(s)":["Study Design"],"Keyword":["Usability Session","Oncology","Design"],"Rights":["http://creativecommons.org/licenses/by/3.0/us/"],"Creator":["Center for Behavioral Intervention Technologies, Department"],"Contributor":null,"Description":null,"Abstract":null,"Original Bibliographic Citation":null,"Related URL":null,"Publisher":["DigitalHub. Galter Health Sciences Library \u0026 Learning Center"],"Date Created":["2020"],"Original Identifier":null,"Language":["English"],"Subject: MESH":["Neoplasms","Online Systems"],"Subject: LCSH":["Web site development"],"Subject: Geographic Name":null,"Subject: Name":null,"Location":null,"Digital Origin":null,"Page Number":null,"Acknowledgments":null,"Grants And Funding":null,"DOI":["doi:10.18131/g3-fs0d-bc32"],"ARK":null,"Id":"6f98547f-6fbf-4a88-afaa-7f06d58e2d3b","File Size":114549,"File Format":["pdf (Portable Document Format)"],"uri":"https://digitalhub.northwestern.edu/files/6f98547f-6fbf-4a88-afaa-7f06d58e2d3b","download":"https://digitalhub.northwestern.edu/downloads/6f98547f-6fbf-4a88-afaa-7f06d58e2d3b"},{"Title":["Design Opportunities for Mental Health Technologies for Youth. Adolescent Assent"],"Resource type(s)":["Study Design"],"Keyword":["Digital Mental Health","Children","Consent"],"Rights":["http://creativecommons.org/licenses/by/3.0/us/"],"Creator":["Center for Behavioral Intervention Technologies, Department"],"Contributor":null,"Description":null,"Abstract":null,"Original Bibliographic Citation":null,"Related URL":null,"Publisher":["DigitalHub. Galter Health Sciences Library \u0026 Learning Center"],"Date Created":["2020-06"],"Original Identifier":null,"Language":["English"],"Subject: MESH":["Mental Health","Telemedicine","Digital Technology","Adolescent"],"Subject: LCSH":null,"Subject: Geographic Name":null,"Subject: Name":null,"Location":null,"Digital Origin":null,"Page Number":null,"Acknowledgments":null,"Grants And Funding":["Northwestern University, Jacobs Foundation, and the Delaney Foundation"],"DOI":["doi:10.18131/g3-vqbv-9r77"],"ARK":null,"Id":"5d6c9b9c-e976-4447-b681-d4b5f0310a8c","File Size":112954,"File Format":["pdf (Portable Document Format)"],"uri":"https://digitalhub.northwestern.edu/files/5d6c9b9c-e976-4447-b681-d4b5f0310a8c","download":"https://digitalhub.northwestern.edu/downloads/5d6c9b9c-e976-4447-b681-d4b5f0310a8c"},{"Title":["Design Opportunities for Mental Health Technologies for Youth. Parent Consent"],"Resource type(s)":["Study Design"],"Keyword":["Digital Mental Health","Children","Consent"],"Rights":["http://creativecommons.org/licenses/by/3.0/us/"],"Creator":["Center for Behavioral Intervention Technologies, Department"],"Contributor":null,"Description":null,"Abstract":null,"Original Bibliographic Citation":null,"Related URL":null,"Publisher":["DigitalHub. Galter Health Sciences Library \u0026 Learning Center"],"Date Created":["2020-06"],"Original Identifier":null,"Language":["English"],"Subject: MESH":["Mental Health","Telemedicine","Digital Technology","Adolescent"],"Subject: LCSH":null,"Subject: Geographic Name":null,"Subject: Name":null,"Location":null,"Digital Origin":null,"Page Number":null,"Acknowledgments":null,"Grants And Funding":["This research is supported by the Jacobs Foundation, Delaney Foundation, and Northwestern University."],"DOI":["doi:10.18131/g3-fact-kz56"],"ARK":null,"Id":"c9ff3527-5051-4841-837b-831583d24671","File Size":120446,"File Format":["pdf (Portable Document Format)"],"uri":"https://digitalhub.northwestern.edu/files/c9ff3527-5051-4841-837b-831583d24671","download":"https://digitalhub.northwestern.edu/downloads/c9ff3527-5051-4841-837b-831583d24671"},{"Title":["Design Opportunities for Mental Health Technologies for Youth. Protocol"],"Resource type(s)":["Study Design"],"Keyword":["Digital Mental Health","Children","IRB Protocol"],"Rights":["http://creativecommons.org/licenses/by/3.0/us/"],"Creator":["Center for Behavioral Intervention Technologies, Department"],"Contributor":null,"Description":null,"Abstract":null,"Original Bibliographic Citation":null,"Related URL":null,"Publisher":["DigitalHub. Galter Health Sciences Library \u0026 Learning Center"],"Date Created":["2020-06-08"],"Original Identifier":null,"Language":["English"],"Subject: MESH":["Mental Health","Telemedicine","Adolescent","Digital Technology"],"Subject: LCSH":null,"Subject: Geographic Name":null,"Subject: Name":null,"Location":null,"Digital Origin":null,"Page Number":null,"Acknowledgments":null,"Grants And Funding":null,"DOI":["doi:10.18131/g3-kd36-dy61"],"ARK":null,"Id":"1fa62981-70e7-4633-968c-8a1cd447292d","File Size":179836,"File Format":["pdf (Portable Document Format)"],"uri":"https://digitalhub.northwestern.edu/files/1fa62981-70e7-4633-968c-8a1cd447292d","download":"https://digitalhub.northwestern.edu/downloads/1fa62981-70e7-4633-968c-8a1cd447292d"},{"Title":["Design Opportunities for Mental Health Technologies for Youth. Staff Interview Consent"],"Resource type(s)":["Study Design"],"Keyword":["Digital Mental Health","Children","Consent"],"Rights":["http://creativecommons.org/licenses/by/3.0/us/"],"Creator":["Center for Behavioral Intervention Technologies, Department"],"Contributor":null,"Description":null,"Abstract":null,"Original Bibliographic Citation":null,"Related URL":null,"Publisher":["DigitalHub. Galter Health Sciences Library \u0026 Learning Center"],"Date Created":["2020"],"Original Identifier":null,"Language":["English"],"Subject: MESH":["Mental Health","Telemedicine","Digital Technology","Adolescent"],"Subject: LCSH":null,"Subject: Geographic Name":null,"Subject: Name":null,"Location":null,"Digital Origin":null,"Page Number":null,"Acknowledgments":null,"Grants And Funding":["Northwestern University, Jacobs Foundation, and Delaney Foundation"],"DOI":["doi:10.18131/g3-t6v7-z889"],"ARK":null,"Id":"d83918e6-d356-40b4-b4fa-4ccdf19f2dc3","File Size":122202,"File Format":["pdf (Portable Document Format)"],"uri":"https://digitalhub.northwestern.edu/files/d83918e6-d356-40b4-b4fa-4ccdf19f2dc3","download":"https://digitalhub.northwestern.edu/downloads/d83918e6-d356-40b4-b4fa-4ccdf19f2dc3"},{"Title":["Design Opportunities for Mental Health Technologies for Youth. Staff Workshop Consent"],"Resource type(s)":["Study Design"],"Keyword":["Digital Mental Health","Children","Consent"],"Rights":["http://creativecommons.org/licenses/by/3.0/us/"],"Creator":["Center for Behavioral Intervention Technologies, Department"],"Contributor":null,"Description":null,"Abstract":null,"Original Bibliographic Citation":null,"Related URL":null,"Publisher":["DigitalHub. Galter Health Sciences Library \u0026 Learning Center"],"Date Created":["2020"],"Original Identifier":null,"Language":["English"],"Subject: MESH":["Mental Health","Telemedicine ","Digital Technology","Adolescent "],"Subject: LCSH":null,"Subject: Geographic Name":null,"Subject: Name":null,"Location":null,"Digital Origin":null,"Page Number":null,"Acknowledgments":null,"Grants And Funding":["Northwestern University, Jacobs Foundation, Delaney Foundation"],"DOI":["doi:10.18131/g3-df79-mm47"],"ARK":null,"Id":"9509fce9-5147-4639-ad6a-ed5b2f3244d4","File Size":124596,"File Format":["pdf (Portable Document Format)"],"uri":"https://digitalhub.northwestern.edu/files/9509fce9-5147-4639-ad6a-ed5b2f3244d4","download":"https://digitalhub.northwestern.edu/downloads/9509fce9-5147-4639-ad6a-ed5b2f3244d4"},{"Title":["Examining Millennial and Gen Z Preferences for Non-Traditional Mental Healthcare. Online Consent (13-17 years old)"],"Resource type(s)":["Forms"],"Keyword":["Digital Mental Health","Consent","Children"],"Rights":["http://creativecommons.org/licenses/by/3.0/us/"],"Creator":["Center for Behavioral Intervention Technologies, Department"],"Contributor":null,"Description":null,"Abstract":null,"Original Bibliographic Citation":null,"Related URL":null,"Publisher":["DigitalHub. Galter Health Sciences Library \u0026 Learning Center"],"Date Created":["2019"],"Original Identifier":null,"Language":["English"],"Subject: MESH":["Mental Health","Health Surveys","Adolescent"],"Subject: LCSH":null,"Subject: Geographic Name":null,"Subject: Name":null,"Location":null,"Digital Origin":null,"Page Number":null,"Acknowledgments":null,"Grants And Funding":null,"DOI":["doi:10.18131/g3-j90p-8k20"],"ARK":null,"Id":"c025062b-415a-4d18-b601-72feae58b472","File Size":118967,"File Format":["pdf (Portable Document Format)"],"uri":"https://digitalhub.northwestern.edu/files/c025062b-415a-4d18-b601-72feae58b472","download":"https://digitalhub.northwestern.edu/downloads/c025062b-415a-4d18-b601-72feae58b472"},{"Title":["Examining Millennial and Gen Z Preferences for Non-Traditional Mental Healthcare. Online Consent (18 years or older)"],"Resource type(s)":["Forms"],"Keyword":["Digital Mental Health","Consent"],"Rights":["http://creativecommons.org/licenses/by/3.0/us/"],"Creator":["Center for Behavioral Intervention Technologies, Department"],"Contributor":null,"Description":null,"Abstract":null,"Original Bibliographic Citation":null,"Related URL":null,"Publisher":["DigitalHub. Galter Health Sciences Library \u0026 Learning Center"],"Date Created":["2019"],"Original Identifier":null,"Language":["English"],"Subject: MESH":["Mental Health","Health Surveys"],"Subject: LCSH":null,"Subject: Geographic Name":null,"Subject: Name":null,"Location":null,"Digital Origin":null,"Page Number":null,"Acknowledgments":null,"Grants And Funding":null,"DOI":["doi:10.18131/g3-8nfd-9s55"],"ARK":null,"Id":"a001b48d-63ef-42df-a7b0-3066ab70ac02","File Size":134120,"File Format":["pdf (Portable Document Format)"],"uri":"https://digitalhub.northwestern.edu/files/a001b48d-63ef-42df-a7b0-3066ab70ac02","download":"https://digitalhub.northwestern.edu/downloads/a001b48d-63ef-42df-a7b0-3066ab70ac02"},{"Title":["Examining Millennial and Gen Z Preferences for Non-Traditional Mental Healthcare. Print Ad 1"],"Resource type(s)":["Advertisements"],"Keyword":["Digital Mental Health","Recruitment"],"Rights":["http://creativecommons.org/licenses/by/3.0/us/"],"Creator":["Center for Behavioral Intervention Technologies, Department"],"Contributor":null,"Description":null,"Abstract":null,"Original Bibliographic Citation":null,"Related URL":null,"Publisher":["DigitalHub. Galter Health Sciences Library \u0026 Learning Center"],"Date Created":["2019"],"Original Identifier":null,"Language":["English"],"Subject: MESH":["Mental Health ","Health Surveys"],"Subject: LCSH":null,"Subject: Geographic Name":null,"Subject: Name":null,"Location":null,"Digital Origin":null,"Page Number":null,"Acknowledgments":null,"Grants And Funding":null,"DOI":["doi:10.18131/g3-3rfb-7q08"],"ARK":null,"Id":"3ac2ec9f-f558-4fa4-beae-12aa2c29e9a9","File Size":111087,"File Format":["pdf (Portable Document Format)"],"uri":"https://digitalhub.northwestern.edu/files/3ac2ec9f-f558-4fa4-beae-12aa2c29e9a9","download":"https://digitalhub.northwestern.edu/downloads/3ac2ec9f-f558-4fa4-beae-12aa2c29e9a9"},{"Title":["Examining Millennial and Gen Z Preferences for Non-Traditional Mental Healthcare. Print Ad 2"],"Resource type(s)":["Advertisements"],"Keyword":["Digital Mental Health","Recruitment"],"Rights":["http://creativecommons.org/licenses/by/3.0/us/"],"Creator":["Center for Behavioral Intervention Technologies, Department"],"Contributor":null,"Description":null,"Abstract":null,"Original Bibliographic Citation":null,"Related URL":null,"Publisher":["DigitalHub. Galter Health Sciences Library \u0026 Learning Center"],"Date Created":["2019"],"Original Identifier":null,"Language":["English"],"Subject: MESH":["Mental Health","Health Surveys"],"Subject: LCSH":null,"Subject: Geographic Name":null,"Subject: Name":null,"Location":null,"Digital Origin":null,"Page Number":null,"Acknowledgments":null,"Grants And Funding":null,"DOI":["doi:10.18131/g3-mcc1-ha87"],"ARK":null,"Id":"4408aa75-6aad-4b89-b7df-b1e4895a1a0c","File Size":84618,"File Format":["pdf (Portable Document Format)"],"uri":"https://digitalhub.northwestern.edu/files/4408aa75-6aad-4b89-b7df-b1e4895a1a0c","download":"https://digitalhub.northwestern.edu/downloads/4408aa75-6aad-4b89-b7df-b1e4895a1a0c"},{"Title":["Examining Millennial and Gen Z Preferences for Non-Traditional Mental Healthcare. Study Protocol"],"Resource type(s)":["Study Design"],"Keyword":["Digital Mental Health","IRB Protocol","Children"],"Rights":["http://creativecommons.org/licenses/by/3.0/us/"],"Creator":["Center for Behavioral Intervention Technologies, Department"],"Contributor":null,"Description":null,"Abstract":null,"Original Bibliographic Citation":null,"Related URL":null,"Publisher":["DigitalHub. Galter Health Sciences Library \u0026 Learning Center"],"Date Created":["2019"],"Original Identifier":null,"Language":["English"],"Subject: MESH":["Mental Health","Health Surveys"],"Subject: LCSH":null,"Subject: Geographic Name":null,"Subject: Name":null,"Location":null,"Digital Origin":null,"Page Number":null,"Acknowledgments":null,"Grants And Funding":null,"DOI":["doi:10.18131/g3-97k2-gs87"],"ARK":null,"Id":"1c28693f-8920-4af9-9367-f94079ce3d48","File Size":164582,"File Format":["pdf (Portable Document Format)"],"uri":"https://digitalhub.northwestern.edu/files/1c28693f-8920-4af9-9367-f94079ce3d48","download":"https://digitalhub.northwestern.edu/downloads/1c28693f-8920-4af9-9367-f94079ce3d48"},{"Title":["Examining Millennial and Gen Z Preferences for Non-Traditional Mental Healthcare. Survey"],"Resource type(s)":["Forms"],"Keyword":["Digital Mental Health","Survey"],"Rights":["http://creativecommons.org/licenses/by/3.0/us/"],"Creator":["Center for Behavioral Intervention Technologies, Department"],"Contributor":null,"Description":null,"Abstract":null,"Original Bibliographic Citation":null,"Related URL":null,"Publisher":["DigitalHub. Galter Health Sciences Library \u0026 Learning Center"],"Date Created":["2019"],"Original Identifier":null,"Language":["English"],"Subject: MESH":["Mental Health","Health Surveys"],"Subject: LCSH":null,"Subject: Geographic Name":null,"Subject: Name":null,"Location":null,"Digital Origin":null,"Page Number":null,"Acknowledgments":null,"Grants And Funding":null,"DOI":["doi:10.18131/g3-sf41-b528"],"ARK":null,"Id":"c9f5faae-2930-47a9-86ea-96c3f8c55656","File Size":84023,"File Format":["pdf (Portable Document Format)"],"uri":"https://digitalhub.northwestern.edu/files/c9f5faae-2930-47a9-86ea-96c3f8c55656","download":"https://digitalhub.northwestern.edu/downloads/c9f5faae-2930-47a9-86ea-96c3f8c55656"},{"Title":["Implementing mobile apps for depression and anxiety in a community services agency. Interview Consent"],"Resource type(s)":["Forms"],"Keyword":["Digital Mental Health","Depression","Consent"],"Rights":["http://creativecommons.org/licenses/by/3.0/us/"],"Creator":["Center for Behavioral Intervention Technologies, Department"],"Contributor":null,"Description":null,"Abstract":null,"Original Bibliographic Citation":null,"Related URL":null,"Publisher":["DigitalHub. Galter Health Sciences Library \u0026 Learning Center"],"Date Created":null,"Original Identifier":null,"Language":["English"],"Subject: MESH":["Depression","Anxiety","Mobile Applications","Mental Health Services"],"Subject: LCSH":null,"Subject: Geographic Name":null,"Subject: Name":null,"Location":null,"Digital Origin":null,"Page Number":null,"Acknowledgments":null,"Grants And Funding":null,"DOI":["doi:10.18131/g3-2m9d-7b44"],"ARK":null,"Id":"62211181-1379-4979-95bc-7b9e16acd8a9","File Size":44396,"File Format":["pdf (Portable Document Format)"],"uri":"https://digitalhub.northwestern.edu/files/62211181-1379-4979-95bc-7b9e16acd8a9","download":"https://digitalhub.northwestern.edu/downloads/62211181-1379-4979-95bc-7b9e16acd8a9"},{"Title":["Implementing mobile apps for depression and anxiety in a community services agency. Protocol"],"Resource type(s)":["Study Design"],"Keyword":["Digital Mental Health","Depression","IRB Protocol"],"Rights":["http://creativecommons.org/licenses/by/3.0/us/"],"Creator":["Center for Behavioral Intervention Technologies, Department"],"Contributor":null,"Description":null,"Abstract":null,"Original Bibliographic Citation":null,"Related URL":null,"Publisher":["DigitalHub. Galter Health Sciences Library \u0026 Learning Center"],"Date Created":["2017"],"Original Identifier":null,"Language":["English"],"Subject: MESH":["Depression","Anxiety","Mobile Applications","Community Health Services"],"Subject: LCSH":null,"Subject: Geographic Name":null,"Subject: Name":null,"Location":null,"Digital Origin":null,"Page Number":null,"Acknowledgments":null,"Grants And Funding":null,"DOI":["doi:10.18131/g3-grf2-rr06"],"ARK":null,"Id":"6ed9c44c-db1f-42d7-95ab-0560ca6bb382","File Size":60575,"File Format":["pdf (Portable Document Format)"],"uri":"https://digitalhub.northwestern.edu/files/6ed9c44c-db1f-42d7-95ab-0560ca6bb382","download":"https://digitalhub.northwestern.edu/downloads/6ed9c44c-db1f-42d7-95ab-0560ca6bb382"},{"Title":["Implementing mobile apps for depression and anxiety in a community services agency. Recruitment Script"],"Resource type(s)":["Study Design"],"Keyword":["Digital Mental Health","Depression","Recruitment"],"Rights":["http://creativecommons.org/licenses/by/3.0/us/"],"Creator":["Center for Behavioral Intervention Technologies, Department"],"Contributor":null,"Description":null,"Abstract":null,"Original Bibliographic Citation":null,"Related URL":null,"Publisher":["DigitalHub. Galter Health Sciences Library \u0026 Learning Center"],"Date Created":["2018"],"Original Identifier":null,"Language":["English"],"Subject: MESH":["Depression","Anxiety","Mobile Applications","Community Health Services"],"Subject: LCSH":null,"Subject: Geographic Name":null,"Subject: Name":null,"Location":null,"Digital Origin":null,"Page Number":null,"Acknowledgments":null,"Grants And Funding":null,"DOI":["doi:10.18131/g3-kzpq-qb16"],"ARK":null,"Id":"00441c78-a2cc-46d4-88db-7973142ef9ed","File Size":16541,"File Format":["pdf (Portable Document Format)"],"uri":"https://digitalhub.northwestern.edu/files/00441c78-a2cc-46d4-88db-7973142ef9ed","download":"https://digitalhub.northwestern.edu/downloads/00441c78-a2cc-46d4-88db-7973142ef9ed"},{"Title":["Implementing mobile apps for depression and anxiety in a community services agency. Survey"],"Resource type(s)":["Forms"],"Keyword":["Digital Mental Health","Depression","Survey"],"Rights":["http://creativecommons.org/licenses/by/3.0/us/"],"Creator":["Center for Behavioral Intervention Technologies, Department"],"Contributor":null,"Description":null,"Abstract":null,"Original Bibliographic Citation":null,"Related URL":null,"Publisher":["DigitalHub. Galter Health Sciences Library \u0026 Learning Center"],"Date Created":["2018"],"Original Identifier":null,"Language":["English"],"Subject: MESH":["Depression","Anxiety","Mobile Applications","Community Health Services"],"Subject: LCSH":null,"Subject: Geographic Name":null,"Subject: Name":null,"Location":null,"Digital Origin":null,"Page Number":null,"Acknowledgments":null,"Grants And Funding":null,"DOI":["doi:10.18131/g3-gktk-qp75"],"ARK":null,"Id":"0a0578fa-8818-4efe-857c-95dd9d8de58a","File Size":181860,"File Format":["pdf (Portable Document Format)"],"uri":"https://digitalhub.northwestern.edu/files/0a0578fa-8818-4efe-857c-95dd9d8de58a","download":"https://digitalhub.northwestern.edu/downloads/0a0578fa-8818-4efe-857c-95dd9d8de58a"},{"Title":["Implementing mobile apps for depression and anxiety in a community services agency. Survey Consent"],"Resource type(s)":["Forms"],"Keyword":["Digital Mental Health","Depression","Consent"],"Rights":["http://creativecommons.org/licenses/by/3.0/us/"],"Creator":["Center for Behavioral Intervention Technologies, Department"],"Contributor":null,"Description":null,"Abstract":null,"Original Bibliographic Citation":null,"Related URL":null,"Publisher":["DigitalHub. Galter Health Sciences Library \u0026 Learning Center"],"Date Created":null,"Original Identifier":null,"Language":["English"],"Subject: MESH":["Depression","Anxiety","Mobile Applications","Community Health Services"],"Subject: LCSH":null,"Subject: Geographic Name":null,"Subject: Name":null,"Location":null,"Digital Origin":null,"Page Number":null,"Acknowledgments":null,"Grants And Funding":null,"DOI":["doi:10.18131/g3-q57n-bk83"],"ARK":null,"Id":"a9b2c9ee-0950-44b8-bd93-5571c9fbc581","File Size":51954,"File Format":["pdf (Portable Document Format)"],"uri":"https://digitalhub.northwestern.edu/files/a9b2c9ee-0950-44b8-bd93-5571c9fbc581","download":"https://digitalhub.northwestern.edu/downloads/a9b2c9ee-0950-44b8-bd93-5571c9fbc581"},{"Title":["IntelliCare Study. Consent"],"Resource type(s)":["Forms"],"Keyword":["Digital Mental Health","Consent"],"Rights":["http://creativecommons.org/licenses/by/3.0/us/"],"Creator":["Center for Behavioral Intervention Technologies, Department"],"Contributor":null,"Description":null,"Abstract":null,"Original Bibliographic Citation":null,"Related URL":null,"Publisher":["DigitalHub. Galter Health Sciences Library \u0026 Learning Center"],"Date Created":["2018"],"Original Identifier":null,"Language":["English"],"Subject: MESH":["Depression","Anxiety","Mobile Applications","Community Health Services"],"Subject: LCSH":null,"Subject: Geographic Name":null,"Subject: Name":null,"Location":null,"Digital Origin":null,"Page Number":null,"Acknowledgments":null,"Grants And Funding":null,"DOI":["doi:10.18131/g3-45by-0b98"],"ARK":null,"Id":"06f74810-a80b-43e9-b5b2-0c89e70eb184","File Size":287256,"File Format":["pdf (Portable Document Format)"],"uri":"https://digitalhub.northwestern.edu/files/06f74810-a80b-43e9-b5b2-0c89e70eb184","download":"https://digitalhub.northwestern.edu/downloads/06f74810-a80b-43e9-b5b2-0c89e70eb184"},{"Title":["Technology-Enabled Prevention Services for At-Risk Youth. Design Session Adult Consent"],"Resource type(s)":["Forms"],"Keyword":["Digital Mental Health","Children","Technology Enabled Services","Design","Consent"],"Rights":["http://creativecommons.org/licenses/by/3.0/us/"],"Creator":["Center for Behavioral Intervention Technologies, Department"],"Contributor":null,"Description":null,"Abstract":null,"Original Bibliographic Citation":null,"Related URL":null,"Publisher":["DigitalHub. Galter Health Sciences Library \u0026 Learning Center"],"Date Created":["2020"],"Original Identifier":null,"Language":["English"],"Subject: MESH":["Anxiety Disorders--prevention \u0026 control","Digital Technology","Adolescent"],"Subject: LCSH":null,"Subject: Geographic Name":null,"Subject: Name":null,"Location":null,"Digital Origin":null,"Page Number":null,"Acknowledgments":null,"Grants And Funding":["This research is supported by Northwestern University and the National Institute of Mental Health. "],"DOI":["doi:10.18131/g3-7wfq-4a89"],"ARK":null,"Id":"c45d0ab0-3fe2-404f-bc37-04de53ccf095","File Size":135197,"File Format":["pdf (Portable Document Format)"],"uri":"https://digitalhub.northwestern.edu/files/c45d0ab0-3fe2-404f-bc37-04de53ccf095","download":"https://digitalhub.northwestern.edu/downloads/c45d0ab0-3fe2-404f-bc37-04de53ccf095"},{"Title":["Technology-Enabled Prevention Services for At-Risk Youth. Design Session Child Assent Parent Consent"],"Resource type(s)":["Forms"],"Keyword":["Digital Mental Health","Children","Technology Enabled Services","Consent","Design"],"Rights":["http://creativecommons.org/licenses/by/3.0/us/"],"Creator":["Center for Behavioral Intervention Technologies, Department"],"Contributor":null,"Description":null,"Abstract":null,"Original Bibliographic Citation":null,"Related URL":null,"Publisher":["DigitalHub. Galter Health Sciences Library \u0026 Learning Center"],"Date Created":["2020"],"Original Identifier":null,"Language":["English"],"Subject: MESH":["Anxiety Disorders--prevention \u0026 control","Digital Technology","Adolescent"],"Subject: LCSH":null,"Subject: Geographic Name":null,"Subject: Name":null,"Location":null,"Digital Origin":null,"Page Number":null,"Acknowledgments":null,"Grants And Funding":["This research is supported by Northwestern University and the National Institute of Mental Health."],"DOI":["doi:10.18131/g3-eqnc-sg02"],"ARK":null,"Id":"8095347b-5b41-4bdb-82dc-031301e8f7a6","File Size":125876,"File Format":["pdf (Portable Document Format)"],"uri":"https://digitalhub.northwestern.edu/files/8095347b-5b41-4bdb-82dc-031301e8f7a6","download":"https://digitalhub.northwestern.edu/downloads/8095347b-5b41-4bdb-82dc-031301e8f7a6"},{"Title":["Technology-Enabled Prevention Services for At-Risk Youth. Design Session Staff Consent"],"Resource type(s)":["Forms"],"Keyword":["Digital Mental Health","Children","Consent","Technology Enabled Services","Design"],"Rights":["http://creativecommons.org/licenses/by/3.0/us/"],"Creator":["Center for Behavioral Intervention Technologies, Department"],"Contributor":null,"Description":null,"Abstract":null,"Original Bibliographic Citation":null,"Related URL":null,"Publisher":["DigitalHub. Galter Health Sciences Library \u0026 Learning Center"],"Date Created":["2020"],"Original Identifier":null,"Language":["English"],"Subject: MESH":["Anxiety Disorders--prevention \u0026 control","Digital Technology","Adolescent"],"Subject: LCSH":null,"Subject: Geographic Name":null,"Subject: Name":null,"Location":null,"Digital Origin":null,"Page Number":null,"Acknowledgments":null,"Grants And Funding":["This research is supported by Northwestern University and the National Institute of Mental Health. "],"DOI":["doi:10.18131/g3-sqds-5x90"],"ARK":null,"Id":"d1899d92-00ed-4018-8f53-a6bab4e64d68","File Size":124389,"File Format":["pdf (Portable Document Format)"],"uri":"https://digitalhub.northwestern.edu/files/d1899d92-00ed-4018-8f53-a6bab4e64d68","download":"https://digitalhub.northwestern.edu/downloads/d1899d92-00ed-4018-8f53-a6bab4e64d68"},{"Title":["Technology-Enabled Prevention Services for At-Risk Youth. Protocol"],"Resource type(s)":["Study Design"],"Keyword":["Digital Mental Health","Children","Technology Enabled Services","IRB Protocol"],"Rights":["http://creativecommons.org/licenses/by/3.0/us/"],"Creator":["Center for Behavioral Intervention Technologies, Department"],"Contributor":null,"Description":null,"Abstract":null,"Original Bibliographic Citation":null,"Related URL":null,"Publisher":["DigitalHub. Galter Health Sciences Library \u0026 Learning Center"],"Date Created":["2020-12-03"],"Original Identifier":null,"Language":["English"],"Subject: MESH":["Anxiety Disorders--prevention \u0026 control","Digital Technology ","Adolescent"],"Subject: LCSH":null,"Subject: Geographic Name":null,"Subject: Name":null,"Location":null,"Digital Origin":null,"Page Number":null,"Acknowledgments":null,"Grants And Funding":null,"DOI":["doi:10.18131/g3-t82p-5q82"],"ARK":null,"Id":"552ea67e-b9ca-4f5e-b72c-d5ae77a579c0","File Size":388651,"File Format":["pdf (Portable Document Format)"],"uri":"https://digitalhub.northwestern.edu/files/552ea67e-b9ca-4f5e-b72c-d5ae77a579c0","download":"https://digitalhub.northwestern.edu/downloads/552ea67e-b9ca-4f5e-b72c-d5ae77a579c0"},{"Title":["Technology-Enabled Prevention Services for At-Risk Youth. Usability Lab Adult Consent"],"Resource type(s)":["Forms"],"Keyword":["Digital Mental Health","Children","Technology Enabled Services","Consent"],"Rights":["http://creativecommons.org/licenses/by/3.0/us/"],"Creator":["Center for Behavioral Intervention Technologies, Department"],"Contributor":null,"Description":null,"Abstract":null,"Original Bibliographic Citation":null,"Related URL":null,"Publisher":["DigitalHub. Galter Health Sciences Library \u0026 Learning Center"],"Date Created":["2020"],"Original Identifier":null,"Language":["English"],"Subject: MESH":["Anxiety Disorders--prevention \u0026 control","Digital Technology","Adolescent"],"Subject: LCSH":null,"Subject: Geographic Name":null,"Subject: Name":null,"Location":null,"Digital Origin":null,"Page Number":null,"Acknowledgments":null,"Grants And Funding":["This research is supported by Northwestern University and the National Institute of Mental Health. "],"DOI":["doi:10.18131/g3-sefc-7421"],"ARK":null,"Id":"e7624d02-804c-4a12-a0a6-353fb10da1f0","File Size":124721,"File Format":["pdf (Portable Document Format)"],"uri":"https://digitalhub.northwestern.edu/files/e7624d02-804c-4a12-a0a6-353fb10da1f0","download":"https://digitalhub.northwestern.edu/downloads/e7624d02-804c-4a12-a0a6-353fb10da1f0"},{"Title":["Technology-Enabled Prevention Services for At-Risk Youth. Usability Lab Child Assent Parent Consent"],"Resource type(s)":["Forms"],"Keyword":["Digital Mental Health","Children","Technology Enabled Services","Consent"],"Rights":["http://creativecommons.org/licenses/by/3.0/us/"],"Creator":["Center for Behavioral Intervention Technologies, Department"],"Contributor":null,"Description":null,"Abstract":null,"Original Bibliographic Citation":null,"Related URL":null,"Publisher":["DigitalHub. Galter Health Sciences Library \u0026 Learning Center"],"Date Created":["2020"],"Original Identifier":null,"Language":["English"],"Subject: MESH":["Anxiety Disorders--prevention \u0026 control","Digital Technology","Adolescent"],"Subject: LCSH":null,"Subject: Geographic Name":null,"Subject: Name":null,"Location":null,"Digital Origin":null,"Page Number":null,"Acknowledgments":null,"Grants And Funding":["This research is supported by Northwestern University and the National Institute of Mental Health."],"DOI":["doi:10.18131/g3-t3rb-4688"],"ARK":null,"Id":"ea60cd4d-1db0-4eb1-a5a4-068fedc70798","File Size":126850,"File Format":["pdf (Portable Document Format)"],"uri":"https://digitalhub.northwestern.edu/files/ea60cd4d-1db0-4eb1-a5a4-068fedc70798","download":"https://digitalhub.northwestern.edu/downloads/ea60cd4d-1db0-4eb1-a5a4-068fedc70798"},{"Title":["Technology-Enabled Prevention Services for At-Risk Youth. Usability Lab Staff Consent"],"Resource type(s)":["Forms"],"Keyword":["Digital Mental Health","Children","Technology Enabled Services","Consent"],"Rights":["http://creativecommons.org/licenses/by/3.0/us/"],"Creator":["Center for Behavioral Intervention Technologies, Department"],"Contributor":null,"Description":null,"Abstract":null,"Original Bibliographic Citation":null,"Related URL":null,"Publisher":["DigitalHub. Galter Health Sciences Library \u0026 Learning Center"],"Date Created":["2020"],"Original Identifier":null,"Language":["English"],"Subject: MESH":["Anxiety Disorders--prevention \u0026 control","Digital Technology","Adolescent"],"Subject: LCSH":null,"Subject: Geographic Name":null,"Subject: Name":null,"Location":null,"Digital Origin":null,"Page Number":null,"Acknowledgments":null,"Grants And Funding":["This research is supported by Northwestern University and the National Institute of Mental Health."],"DOI":["doi:10.18131/g3-f7qb-4t30"],"ARK":null,"Id":"0043bb90-7350-423c-9894-e1d60f40b7dd","File Size":124009,"File Format":["pdf (Portable Document Format)"],"uri":"https://digitalhub.northwestern.edu/files/0043bb90-7350-423c-9894-e1d60f40b7dd","download":"https://digitalhub.northwestern.edu/downloads/0043bb90-7350-423c-9894-e1d60f40b7dd"}]}

                              
# current_list_3=[]
# current_series_3 = digitalhub_community_df.loc[digitalhub_community_df['Id'] == '27114af7-6444-4f3c-8de1-3e9c5dc73e95','members']
# # print(current_series_3.item()[0])
# current_list_3 = current_series_3.tolist()
# cbits_result = []
# cbits_result = current_series_3.item() + [x for x in add_to_series_3 if x not in current_series_3.items()]
# print(len(cbits_result))
# # print(cbits_result)
                            
## Add CBITS result back to member

# digitalhub_community_df['members'] = digitalhub_community_df['members'].astype('object')
# digitalhub_community_df.at[8,'members'] = cbits_result  




######################################
### Remove subcollections from GHSL###
#####################################

## Remove Old GV Black (is not migrated)
## x633f100h

print(len(ghsl_result))
for i in range(len(ghsl_result)):
#     print(i)
    if ghsl_result[i]['Id'] == 'x633f100h':
        print("True")
        del ghsl_result[i]
        break


#### Remove Special Collections (include in various communities)
## v405s9425

    ##Subcollection: Notables 
    ## ed35d953-d035-44a7-bacb-0edf1c8006a6
    ## Becomes Community:  Endowed Professorship Biographies

    ##Subcollection: Oral Histories
    ## oral-histories
    ## Becomes Community: AV Archives

    ##Subcollection: Paul de Kruif Interviews
    ## paul-de-kruif-interviews
    ## Becomes Community: AV Archives

    ##Subcollection: Special Collections - art
    ## 2f75r807r
    ## Becomes Community: History of FSM

    ##Subcollection: Special Collections - photos from the vault
    ## 5712m6524
    ## Becomes Communitiy: History of FSM

    ##Subcollection: Student Life
    ## student-life
    ## Becomes Community: AV Archives

print(len(ghsl_result))
for i in range(len(ghsl_result)):
#     print(i)
    if ghsl_result[i]['Id'] == 'v405s9425':
        print("True")
        del ghsl_result[i]
        break
    
#### Remove 12th General hospital collection (becomes own community)
## 07b25bee-4a47-466a-b9b8-70d7a392fab0
## Becomes own community

print(len(ghsl_result))
for i in range(len(ghsl_result)):
#     print(i)
    if ghsl_result[i]['Id'] == '07b25bee-4a47-466a-b9b8-70d7a392fab0':
        print("True")
        del ghsl_result[i]
        break

#### Remove Northwestern University Medical School 1859-1979
## areybook
## Becomes Community: History of FSM

print(len(ghsl_result))
for i in range(len(ghsl_result)):
#     print(i)
    if ghsl_result[i]['Id'] == 'areybook':
        print("True")
        del ghsl_result[i]
        break

        
#################################################
## Add private files to COVID-19 Collection ####
################################################

add_to_series_path_1 = r"data\community\private_covid_19_design of.txt"
add_to_series_path_2 = r"data\community\private_covid_19_illustrated_state_of.txt"
add_to_series_path_3 = r"data\community\private_covid_19_real_time_UV.txt"

covid_private_path_list = [add_to_series_path_1, 
                          add_to_series_path_2,
                          add_to_series_path_3]

covid_private_list = [] 

for path in covid_private_path_list: 
    with open(path, 'r', encoding= 'utf8') as f:
        s = f.read()    
        result = json.loads(s)   # try to parse...
        covid_private_list.append(result)


## Create a list from the existing members column for the collection
current_list_4=[]
current_series_4 = digitalhub_community_df.loc[digitalhub_community_df['Id'] == '3635f61e-e67e-41f2-b4de-982a9f81dcc8','members']
# print(current_series_4.item()[0])

# ## Likely don't need to convert it to a list
# current_list_4 = current_series_4.tolist()

##Add items from covid_private_list list to current_series
covid_result = []
covid_result = current_series_4.item() + [x for x in covid_private_list if x not in current_series_4.items()]
print(len(covid_result))
# print(covid_result)
                            
# Add COVID result back to member

digitalhub_community_df['members'] = digitalhub_community_df['members'].astype('object')
digitalhub_community_df.at[28,'members'] = covid_result  


#################################################################
## Add private files to Masters in Public Health CE Products ####
################################################################

add_to_series_path_5_0 = r"data\community\private_masters_trends_in.txt"

masters_private_path_list = [add_to_series_path_5_0]
                                                   

masters_private_list = [] 

for path in masters_private_path_list: 
    with open(path, 'r', encoding= 'utf8') as f:
        s = f.read()    
        result = json.loads(s)   # try to parse...
        masters_private_list.append(result)


## Create a list from the existing members column for the collection
current_series_5_0 = digitalhub_community_df.loc[digitalhub_community_df['Id'] == '40acd700-b850-4e7b-a650-0535de84ab6b','members']
# print(current_series_5_0.item()[0])

##Add items from private_list to current_series
masters_result = []
masters_result = current_series_5_0.item() + [x for x in masters_private_list if x not in current_series_5_0.items()]
print(len(masters_result))
# print(masters_result)
                            
# Add result back to member

digitalhub_community_df['members'] = digitalhub_community_df['members'].astype('object')
digitalhub_community_df.at[5,'members'] = masters_result   

#################################################################
## Add private files to CPIM ####
################################################################

add_to_series_path_6_0 = r"data\community\private_centerforprevention_smithegan.txt"

cpim_private_path_list = [add_to_series_path_6_0
                                                   ]

cpim_private_list = [] 

for path in cpim_private_path_list: 
    with open(path, 'r', encoding= 'utf8') as f:
        s = f.read()    
        result = json.loads(s)   # try to parse...
        cpim_private_list.append(result)


## Create a list from the existing members column for the collection
current_series_6_0 = digitalhub_community_df.loc[digitalhub_community_df['Id'] == '6682x398c','members']
# print(current_series_6_0.item()[0])

##Add items from private_list list to current_series
cpim_result = current_series_6_0.item() + [x for x in cpim_private_list if x not in current_series_6_0.items()]
print(len(cpim_result))
# print(cpim_result)
                            
# Add result back to member

digitalhub_community_df['members'] = digitalhub_community_df['members'].astype('object')
digitalhub_community_df.at[9,'members'] = cpim_result   

#################################################################
## Add private files to National Center for Data to Health ####
################################################################

add_to_series_path_7_0 = r"data\community\private_centerfordatatohealth_invenio.txt"

centerdata_private_path_list = [add_to_series_path_7_0]

centerdata_private_list = [] 

for path in centerdata_private_path_list: 
    with open(path, 'r', encoding= 'utf8') as f:
        s = f.read()    
        result = json.loads(s)   # try to parse...
        centerdata_private_list.append(result)


## Create a list from the existing members column for the collection
current_series_7_0 = digitalhub_community_df.loc[digitalhub_community_df['Id'] == 'b0375b45-0b95-4bf8-9ee7-7df4d6fb47e4','members']
# print(current_series_7_0.item()[0])

##Add items from private_list list to current_series
centerdata_result = current_series_7_0.item() + [x for x in centerdata_private_list if x not in current_series_7_0.items()]
print(len(centerdata_result))
# print(centerdata_result)
                            
# Add result back to member

digitalhub_community_df['members'] = digitalhub_community_df['members'].astype('object')
digitalhub_community_df.at[6,'members'] = centerdata_result   

#################################################################
## Add private file to History of Feinberg School of Medicine ####
################################################################

add_to_series_path_11_0 = r"data\community\private_feinberghistory_mcclintock.txt"

feinberg_private_path_list = [add_to_series_path_11_0]

feinberg_private_list = [] 

for path in feinberg_private_path_list: 
    with open(path, 'r', encoding= 'utf8') as f:
        s = f.read()    
        result = json.loads(s)   # try to parse...
        feinberg_private_list.append(result)


## Create a list from the existing members column for the collection
current_series_11_0 = digitalhub_community_df.loc[digitalhub_community_df['Id'] == 'history-of-feinberg-school-of-medicine','members']
# print(current_series_11_0.item()[0])

##Add items from private_list list to current_series
feinberg_result = current_series_11_0.item() + [x for x in centerdata_private_list if x not in current_series_11_0.items()]
print(len(feinberg_result))
# print(feinberg_result)
                            
# Add result back to member

digitalhub_community_df['members'] = digitalhub_community_df['members'].astype('object')
digitalhub_community_df.at[34,'members'] = feinberg_result   

## Resources
## https://stackoverflow.com/questions/26483254/python-pandas-insert-list-into-a-cell
## https://www.geeksforgeeks.org/python-removing-dictionary-from-list-of-dictionaries/

In [None]:
## Inspect the "members" column and create a list of member IDs

row_member_list = []
column_member ={}
count_member = {}

for k, v in digitalhub_community_df["members"].items():
    for value in v: 
        member = value["Id"]
        row_member_list.append(member)
    column_member[k] = row_member_list
    count_member[k] = len(row_member_list)
    row_member_list =[]
    row_member_count = []

## Append the column member dictionary to the DigitalHub Community DF dataframe

digitalhub_community_df['Member_List'] = digitalhub_community_df.index.map(column_member)
digitalhub_community_df['Member_List_Count'] = digitalhub_community_df.index.map(count_member)
digitalhub_community_df.head()

## Checked: No problems

In [None]:
## Export file to excel, with the Pandas index, and with the headers

digitalhub_community_df.to_excel("outputs/digitalhub_community_df.xlsx", header=True)

## Checked: No problems

In [None]:
## Create a dataframe for each members row and concatenate all of these into one digitalHub_sub_collection_df

members_dfs_list = []

for k, v in digitalhub_community_df["members"].items():
#     print("this is k: ", k)
#     print("this is v: ", v)
    member_df = pd.json_normalize(v)
    member_df['community_rowid'] = k
    members_dfs_list.append(member_df)

digitalhub_sub_collection_df = pd.concat(members_dfs_list, sort=False).reset_index(drop='index')
digitalhub_sub_collection_df.head()

## Resource
## https://stackoverflow.com/questions/62816027/convert-pandas-json-column-to-multiple-rows

## Checked: No problems

In [None]:
## Remove lists (i.e. brackets) and convert to strings for data in each column

# def list2Str(lst):
#     if type(lst) is list: # apply conversion to list columns
#         return";".join(lst)
#     else:
#         return str

# digitalhub_sub_collection_df.apply(lambda x: [list2Str(i) for i in x])

# digitalhub_sub_collection_df.head()

## Resources
## https://stackoverflow.com/questions/38147447/how-to-remove-square-bracket-from-pandas-dataframe

In [None]:
## Add Column to digitalhub_sub_collection_df to indicate that these results are a sub_collection or a record

# digitalhub_sub_collection_df["Level"] =  "Collection"

digitalhub_sub_collection_df['Level Type'] = np.where(digitalhub_sub_collection_df['DOI'].isnull(),"Sub_Collection", "Record" )
digitalhub_sub_collection_df['Level Number'] = np.where(digitalhub_sub_collection_df['DOI'].isnull(),"2", "6" )
digitalhub_sub_collection_df['Level Number'] = digitalhub_sub_collection_df['Level Number'].apply(int)

# digitalhub_sub_collection_df.dtypes

## Checked: No problems

In [None]:
## Export file to excel, without the Pandas index, but with the header
digitalhub_sub_collection_df.to_excel("outputs/digitalhub_sub_collection_df.xlsx", index=False, header=True)

## Checked: No problems

In [None]:
## Concatenate the digitalhub_community_df to the digitalhub_sub_collection_df using the 'community_rowid'
digitalhub_comm_col_df = pd.concat([digitalhub_community_df, digitalhub_sub_collection_df], axis=0).sort_values(by=['community_rowid'])
digitalhub_comm_col_df .reset_index(inplace=True, drop=True) 
# digitalhub_comm_col_df.head()

## Checked: No problems

In [None]:
## Sort the dataframe by rowid and Level Number
digitalhub_comm_col_df.sort_values(by = ['community_rowid','Level Number'], ascending = [True, True], inplace=True)

## Checked: No problems

In [None]:
# ## Fill forward the Community ID into subcollections and records
digitalhub_comm_col_df['Community_ID'] = digitalhub_comm_col_df.groupby(['community_rowid'])['Community_ID'].ffill()

## Resources
## https://stackoverflow.com/questions/64795941/how-do-i-forward-fill-nas-with-condition-of-2-other-cells-being-equal-in-pandas

## Checked: No problems

In [None]:
## Create a column for Sub_Collection ID
digitalhub_comm_col_df['Sub_Collection_ID'] = np.where(digitalhub_comm_col_df['Level Type'] == 'Sub_Collection',digitalhub_comm_col_df['Id'], np.nan)

## Resources: 
## https://stackoverflow.com/questions/67043249/how-to-use-np-where-in-creating-new-column-using-previous-rows

## Checked: No problems

In [None]:
## Export file to excel, without the Pandas index, but with the header
digitalhub_comm_col_df.to_excel("outputs/digitalhub_comm_col_df.xlsx", index=False, header=True)

## Checked: No problems

##### Recall Sub_Collection 

In [None]:
## Extract a the series for sub_collection 'ids' and transform into a list

digitalhub_sub_collection_series = digitalhub_comm_col_df[digitalhub_comm_col_df["Level Type"] == "Sub_Collection"]['Id']
digitalhub_sub_collection_list = digitalhub_sub_collection_series.tolist()

print(digitalhub_sub_collection_list)

## Checked: No problems

In [None]:
## Loop through list of DigitalHub Sub_Collections and use the urllib.request to get the json data
## test_list = ['2cc92425-b656-47ea-a3b4-825405ee6088', 'a86e1412-d72c-4cae-b8ca-16fd834cb128','fc389d13-2430-409b-82fd-a4b26613d350']

multi_digitalhub_sub_collection_list = []
digitalhub_sub_collection_problem_list = []

for item in digitalhub_sub_collection_list:
    try:

        with urllib.request.urlopen(f"https://digitalhub.northwestern.edu/collections/{item}.json" ) as url:
            single_digitalhub_sub_collection_dict = json.loads(url.read().decode())
            multi_digitalhub_sub_collection_list.append(single_digitalhub_sub_collection_dict)
            print(item)
        time.sleep(1)
       

    except urllib.error.HTTPError as http_err:
        print(item)
        digitalhub_sub_collection_problem_list.append(item)
        print(f'HTTP error occurred: {http_err}')  # Python 3.6
        

    except urllib.error.URLError as url_err:
        print(item)
        digitalhub_sub_collection_problem_list.append(item)
        print(f'URL error occurred: {url_err}. ', 'Exiting the loop!')  # Python 3.6
       

    except json.JSONDecodeError as json_err:
        print(item)
        digitalhub_sub_collection_problem_list.append(item)
        print(f'JSON Decode error occurred: {json_err}. ', 'Poorly formed JSON.')  # Python 3.6
       
        
    except Exception as err:
        print(item)
        digitalhub_sub_collection_problem_list.append(item)
        print(f'Other error occurred: {err}. ')  # Python 3.6
        
        
       
    else:
        print('Success!')



## Resources
## https://docs.python.org/3/library/urllib.request.html

## Checked: No problems

In [None]:
## Inspect the results of the URL Query for DigitalHub Collections that will become communities in Prism

# print(multi_digitalhub_sub_collection_list)
print(len(digitalhub_sub_collection_problem_list))
print(digitalhub_sub_collection_problem_list)


## Checked: No problems

In [None]:
## Create and export dataframe for problem list
problem_df_2 = pd.DataFrame(digitalhub_sub_collection_problem_list)

## Export file to excel, without the Pandas index, but with the header
problem_df_2.to_excel("outputs/problem_df_2.xlsx", index=False, header=True)

## Checked: No problems

In [None]:
## Create a dataframe called digitalhub_sub_collection_recall_df from DigitalHub json for DigitalHub Sub_Collections

digitalhub_sub_collection_recall_df = pd.DataFrame.from_dict(json_normalize(multi_digitalhub_sub_collection_list, max_level=1))
digitalhub_sub_collection_recall_df.head(10)

## Checked: No problems

In [None]:
## Upload .txt files of the problem Sub_Collection json metadata


## 1. Problem with: 91294b2e-34e4-46ac-9086-be17c40d0d01
operation_path = r"data\sub_collection\operation.txt"

## 2. Problem with: 4dde1545-ad4e-4801-9db8-02d5aadc38c6
# template_7 = r"data\sub_collection\template_7.txt"

## 3. Problem with: 1724d997-42e2-493d-88fd-22f836403628
# template_6 = r"data\sub_collection\template_6.txt"

## 4. Problem with: a76c9405-2fb7-47c0-bd6e-cd8cf9ea69a8
# template_5 = r"data\sub_collection\template_5.txt"

## 5. Problem with: ef9651c9-c810-42e0-8e2d-58993586473d
# template_4 = r"data\sub_collection\template_4.txt"

## 6. Problem with: 779a5d4d-23b4-4a28-8c38-48faa43de46d
# template_3 = r"data\sub_collection\template_3.txt"

## 6. Problem with: 8c462e3c-4666-4997-9a29-4c17f7846c96
# template_2 = r"data\sub_collection\template_2.txt"

## 7. Problem with: 22e29a25-6e89-4322-b772-15442ca5b713
t_series_path = r"data\sub_collection\t_series.txt"

## 8. Problem with: 2abe2c1b-932e-4bb0-8455-a97d828f6924
sample_path = r"data\sub_collection\sample_.txt"

## 9. Problem with:4a8457cb-7a65-4774-9ab6-057702e521f5
r_series_path = r"data\sub_collection\r_series.txt"

## 10. Problem with: 90c63af3-adb9-48dc-95d9-7b4bafe13a7a
k_series_path = r"data\sub_collection\k_series.txt"

## 11. Problem with: 01b2ba97-8603-477f-bde3-b36b6c79bdea
industry_path = r"data\sub_collection\industry.txt"

## 12. Problem with: 691ddbbd-785c-4a6a-85d1-7569d8fcbcdb
grant_resources_path = r"data\sub_collection\grant_resources.txt"

## 13.Problem with: 9aa9dbb6-7971-4e5c-9b9c-81216d9325d3
federal_non_nih_path = r"data\sub_collection\federal_non_nih.txt"

## 14. PRoblem with: 1b2b7d0f-d7f3-41a4-a2a4-8435d719ecec
f_series_path = r"data\sub_collection\f_series.txt"

## 15. Problem with: 46374bec-fe29-4a06-a8b8-f1f93668699c
archive_path = r"data\sub_collection\archive.txt"

## 16. Problem with: eaa0acc4-64b5-4c34-8e90-49537ffa9e0b
u_series_path = r"data\sub_collection\u_series.txt"

## 17. PRoblem with: 6ce6bf89-1c23-4949-9ae3-1c6bd874e593
# template_9 = r"data\sub_collection\template_9.txt"

## 18. PRoblem with: 113b8ab8-8f40-454b-9932-abd83e392f78
# template_8 = r"data\sub_collection\template_8.txt"

## 19. Problem with: student-life
student_life_path = r"data\sub_collection\student_life.txt"

## 20. Problem with: paul-de-kruif-interviews
paul_interview_path = r"data\sub_collection\paul_interview.txt"



problem_sub_dict_list = []

path_sub_list = [operation_path, 
#                  template_7, 
#                  template_6, 
#                  template_5, 
#                  template_4, 
#                  template_3, 
#                  template_2, 
                 t_series_path,
                 sample_path,
                 r_series_path,
                 k_series_path,
                 industry_path,
                 grant_resources_path,
                 federal_non_nih_path, 
                 f_series_path,
                 archive_path,  
                 u_series_path, 
#                  template_8, 
#                  template_9, 
                 student_life_path, 
                 paul_interview_path]

for path in path_sub_list: 
#     print(path)
    with open(path, 'r', encoding= 'utf8') as f:
        s = f.read()
#         problem_sub_dict = json.loads(data)
#         problem_sub_dict_list.append(problem_sub_dict)

        while True:
            try:
                result = json.loads(s)   # try to parse...
                break                    # parsing worked -> exit loop
            except Exception as e:
                # "Expecting , delimiter: line 34 column 54 (char 1158)"
                # position of unexpected character after '"'
                unexp = int(re.findall(r'\(char (\d+)\)', str(e))[0])
                # position of unescaped '"' before that
                unesc = s.rfind(r'"', 0, unexp)
                s = s[:unesc] + r'\"' + s[unesc+1:]
                # position of correspondig closing '"' (+2 for inserted '\')
                closg = s.find(r'"', unesc + 2)
                s = s[:closg] + r'\"' + s[closg+1:]
#         print(result)
        problem_sub_dict_list.append(result)


## Resources
## https://stackoverflow.com/questions/16573332/jsondecodeerror-expecting-value-line-1-column-1-char-0
## https://stackoverflow.com/questions/18514910/how-do-i-automatically-fix-an-invalid-json-string

## Checked: No problems

In [None]:
## Create a problem_sub_df to hold the problem_sub_dict_list
problem_sub_df = pd.DataFrame.from_dict(json_normalize(problem_sub_dict_list, max_level=1))
problem_sub_df.head()

## Checked: No problems

In [None]:
## Concatenate the DigitalHub Community Dataframe to the problem_sub_df

digitalhub_sub_collection_recall_df = pd.concat([digitalhub_sub_collection_recall_df, problem_sub_df], axis=0)
digitalhub_sub_collection_recall_df.reset_index(inplace=True, drop=True) 
digitalhub_sub_collection_recall_df.head(10)

## Checked: No problems

In [None]:
## Add Column to digitalhub_sub_collection_df to indicate that these results are a sub_collection
digitalhub_sub_collection_recall_df["Level Type"] = "Sub_Collection"
digitalhub_sub_collection_recall_df["Level Number"] = "2"
digitalhub_sub_collection_recall_df['Level Number'] = digitalhub_sub_collection_recall_df['Level Number'].apply(int)

## Create a column from the index
digitalhub_sub_collection_recall_df['sub_collection_rowid'] = digitalhub_sub_collection_recall_df.index

## Create a new column called Sub_Collection_ID
digitalhub_sub_collection_recall_df['Sub_Collection_ID'] = digitalhub_sub_collection_recall_df['Id']

## Checked: No problems

In [None]:
####################################################################################
##### Add records to sub-collection || Add sub_sub_collections to sub_collection ####
#####################################################################################

#################################################################################
## Add private files to NUCATS Grants Repository --> K-Series Sub_Collection ####
################################################################################

# add_to_series_path_5 = r"data\sub_collection\private_nucats_grants_k-series_tunc_ozcan.txt"

# k_series_private_path_list = [add_to_series_path_5]

# k_series_private_list = [] 

# for path in k_series_private_path_list: 
#     with open(path, 'r', encoding= 'utf8') as f:
#         s = f.read()    
#         result = json.loads(s)   # try to parse...
#         k_series_private_list.append(result)


# ## Create a list from the existing members column for the collection
# current_list_5=[]
# current_series_5 = digitalhub_sub_collection_recall_df.loc[digitalhub_sub_collection_recall_df['Id'] == '90c63af3-adb9-48dc-95d9-7b4bafe13a7a','members']
# #print(current_series_5.item()[0])

# ##Add items from k_series_private_list list to current_series
# k_series_result = []
# k_series_result = current_series_5.item() + [x for x in k_series_private_list if x not in current_series_5.items()]
# print(len(k_series_result))
# # print(k_series_result)
                            
# # Add k_series_result back to member

# digitalhub_sub_collection_recall_df['members'] = digitalhub_sub_collection_recall_df['members'].astype('object')
# digitalhub_sub_collection_recall_df.at[92,'members'] = k_series_result  


#################################################################################
## Add private files to GHSL --> DIAW ####
################################################################################

add_to_series_path_6 = r"data\sub_collection\private_ghsl_diaw_scientists_in_media.txt"

diaw_private_path_list = [add_to_series_path_6]

diaw_private_list = [] 

for path in diaw_private_path_list: 
    with open(path, 'r', encoding= 'utf8') as f:
        s = f.read()    
        result = json.loads(s)   # try to parse...
        diaw_private_list.append(result)


## Create a list from the existing members column for the collection
current_list_6=[]
current_series_6 = digitalhub_sub_collection_recall_df.loc[digitalhub_sub_collection_recall_df['Id'] == 'diaw2017','members']
#print(current_series_6.item()[0])

##Add items from k_series_private_list list to current_series
diaw_result = []
diaw_result = current_series_6.item() + [x for x in diaw_private_list if x not in current_series_6.items()]
print(len(diaw_result))
# print(k_series_result)
                            
# Add k_series_result back to member

digitalhub_sub_collection_recall_df['members'] = digitalhub_sub_collection_recall_df['members'].astype('object')
digitalhub_sub_collection_recall_df.at[7,'members'] = diaw_result

#################################################################################
### Add private files to Science in Society --> 2018 ####
################################################################################

add_to_series_path_7 = r"data\sub_collection\private_science_in_society_2018_mouse_egg.txt"

sis_private_path_list = [add_to_series_path_7]

sis_private_list = [] 

for path in sis_private_path_list: 
    with open(path, 'r', encoding= 'utf8') as f:
        s = f.read()    
        result = json.loads(s)   # try to parse...
        sis_private_list.append(result)


## Create a list from the existing members column for the collection
current_list_7=[]
current_series_7 = digitalhub_sub_collection_recall_df.loc[digitalhub_sub_collection_recall_df['Id'] == '9e27fbd0-c6cb-47c7-8770-8ffeb135009d','members']
#print(current_series_7.item()[0])

##Add items from k_series_private_list list to current_series
sis_result = []
diaw_result = current_series_7.item() + [x for x in sis_private_list if x not in current_series_7.items()]
print(len(sis_result))
# print(sis_result)
                            
# Add k_series_result back to member

digitalhub_sub_collection_recall_df['members'] = digitalhub_sub_collection_recall_df['members'].astype('object')
digitalhub_sub_collection_recall_df.at[44,'members'] = diaw_result


####################################################################################################################
### Add private files and one sub_collection to Center for Biomedical Informatics --> Biomedical_data_science_day###
####################################################################################################################


add_to_series_path_0 = r"data\sub_collection\private_centerforbiomedical_biomedicalday_heartrate.txt"
add_to_series_path_0_0 = r"data\sub_collection\private_centerforbiomedical_biomedicalday_overviewofcausal.txt"
add_to_series_path_0_1 = r"data\sub_collection\private_centerforbiomedical_biomedicalday_predictingsevere.txt"
add_to_series_path_0_2 = r"data\sub_collection\private_centerforbiomedical_biomedicalday_feinberg2016.txt"
add_to_series_path_0_3 = r"data\sub_collection\private_centerforbiomedical_chicagobiomedicaljam_chicagobiomedicaljam2016.txt" ## subcollection

biomed_private_path_list = [add_to_series_path_0,
                            add_to_series_path_0_0,
                            add_to_series_path_0_1,
                            add_to_series_path_0_2
                           ]

biomed_private_list = [] 

for path in biomed_private_path_list: 
    with open(path, 'r', encoding= 'utf8') as f:
        s = f.read()    
        result = json.loads(s)   # try to parse...
        biomed_private_list.append(result)


## Create a list from the existing members column for the collection
current_series_0 = digitalhub_sub_collection_recall_df.loc[digitalhub_sub_collection_recall_df['Id'] == 'ab570598-e497-4c81-9351-1aa316252682','members']
# print(current_series_0.item()[0])

##Add items from biomed_private_list list to current_series
biomed_result = []
biomed_result = current_series_0.item() + [x for x in biomed_private_list if x not in current_series_0.items()]
print(len(biomed_result))
# print(biomed_result)
                            
# Add biomed_result back to member

digitalhub_sub_collection_recall_df['members'] = digitalhub_sub_collection_recall_df['members'].astype('object')
digitalhub_sub_collection_recall_df.at[5,'members'] = biomed_result   


##########################################################################################################################################
### Add private files to Galter Library Audio-Visual Archive --> Northwestern University Medical School, Class of 1952: an oral history###
##########################################################################################################################################


add_to_series_path_1_0_0 = r"data\sub_collection\private_av_archive_oral_histories_interviews_with_Alfred_F_Anderegg.txt"
add_to_series_path_1_0_1 = r"data\sub_collection\private_av_archive_oral_histories_interviews_with_Maurice_Gore.txt"
add_to_series_path_1_0_2 = r"data\sub_collection\private_av_archive_oral_histories_sample_of.txt"


oral_private_path_list = [add_to_series_path_1_0_0,
                            add_to_series_path_1_0_1,
                            add_to_series_path_1_0_2
                           ]

oral_private_list = [] 

for path in oral_private_path_list: 
    with open(path, 'r', encoding= 'utf8') as f:
        s = f.read()    
        result = json.loads(s)   # try to parse...
        oral_private_list.append(result)


## Create a list from the existing members column for the collection
current_series_1_0_0 = digitalhub_sub_collection_recall_df.loc[digitalhub_sub_collection_recall_df['Id'] == 'cece380d-4dee-4e4e-aa97-28cb1d4f6b19','members']
# print(current_series_1_0_0.item()[0])

##Add items from private_list to current_series
oral_result = current_series_1_0_0.item() + [x for x in oral_private_list if x not in current_series_1_0_0.items()]
print(len(oral_result))
# print(oral_result)
                            
# Add result back to member

digitalhub_sub_collection_recall_df['members'] = digitalhub_sub_collection_recall_df['members'].astype('object')
digitalhub_sub_collection_recall_df.at[54,'members'] = oral_result   

#############################################################################################################################
### Add private files to History of Feinberg School of Medicine --> Special Collections- Art (known as Portraits in Prism ###
#############################################################################################################################


add_to_series_path_2_0_0 = r"data\sub_collection\private_feinberghistory_portraits_louiskeith.txt"


portrait_private_path_list = [add_to_series_path_2_0_0]

portrait_private_list = [] 

for path in portrait_private_path_list: 
    with open(path, 'r', encoding= 'utf8') as f:
        s = f.read()    
        result = json.loads(s)   # try to parse...
        portrait_private_list.append(result)


## Create a list from the existing members column for the collection
current_series_2_0_0 = digitalhub_sub_collection_recall_df.loc[digitalhub_sub_collection_recall_df['Id'] == '2f75r807r','members']
# print(current_series_2_0_0.item()[0])

##Add items from private_list to current_series
portrait_result = current_series_2_0_0.item() + [x for x in portrait_private_list if x not in current_series_2_0_0.items()]
print(len(portrait_result))
# print(portrait_result)
                            
# Add result back to member

digitalhub_sub_collection_recall_df['members'] = digitalhub_sub_collection_recall_df['members'].astype('object')
digitalhub_sub_collection_recall_df.at[55,'members'] = portrait_result

#############################################################################################
### Add private files to History of Feinberg School of Medicine --> Photos from the vault ###
#############################################################################################


add_to_series_path_3_0_0 = r"data\sub_collection\private_feinberghistory_vault_alumnilibrary.txt"


vault_private_path_list = [add_to_series_path_3_0_0]

vault_private_list = [] 

for path in vault_private_path_list: 
    with open(path, 'r', encoding= 'utf8') as f:
        s = f.read()    
        result = json.loads(s)   # try to parse...
        vault_private_list.append(result)


## Create a list from the existing members column for the collection
current_series_3_0_0 = digitalhub_sub_collection_recall_df.loc[digitalhub_sub_collection_recall_df['Id'] == '5712m6524','members']
# print(current_series_3_0_0.item()[0])

##Add items from private_list to current_series
vault_result = current_series_3_0_0.item() + [x for x in vault_private_list if x not in current_series_3_0_0.items()]
print(len(vault_result))
# print(vault_result)
                            
# Add result back to member

digitalhub_sub_collection_recall_df['members'] = digitalhub_sub_collection_recall_df['members'].astype('object')
digitalhub_sub_collection_recall_df.at[56,'members'] = vault_result


###########################################################################################################
### Add private files to Researchers' Collections --> Expression of receptors for plasminogen activators###
###########################################################################################################


add_to_series_path_4_0_0 = r"data\sub_collection\private_researcher_expressionofreceptors_changesinblood.txt"
add_to_series_path_4_1_0 = r"data\sub_collection\private_researcher_expressionofreceptors_particulate.txt"
add_to_series_path_4_2_0 = r"data\sub_collection\private_researcher_expressionofreceptors_theapparent.txt"
add_to_series_path_4_3_0 = r"data\sub_collection\private_researcher_expressionofreceptors_theroleof.txt"


express_private_path_list = [add_to_series_path_4_0_0,
                             add_to_series_path_4_1_0,
                             add_to_series_path_4_2_0,
                             add_to_series_path_4_3_0]

express_private_list = [] 

for path in express_private_path_list: 
    with open(path, 'r', encoding= 'utf8') as f:
        s = f.read()    
        result = json.loads(s)   # try to parse...
        express_private_list.append(result)


## Create a list from the existing members column for the collection
current_series_4_0_0 = digitalhub_sub_collection_recall_df.loc[digitalhub_sub_collection_recall_df['Id'] == 'fj236212d','members']
# print(current_series_4_0_0.item()[0])

##Add items from private_list to current_series
express_result = current_series_4_0_0.item() + [x for x in express_private_list if x not in current_series_4_0_0.items()]
print("this is len",len(express_result))
print(express_result)
                            
# Add result back to member

digitalhub_sub_collection_recall_df['members'] = digitalhub_sub_collection_recall_df['members'].astype('object')
digitalhub_sub_collection_recall_df.at[76,'members'] = express_result



In [None]:
## Inspect the "members" column and create a list of member IDs

row_sub_collection_member_list = []
column_sub_collection_member_dict ={}
count_sub_collection_member_dict = {}


for k, v in digitalhub_sub_collection_recall_df["members"].items():
    for value in v: 
        member = value["Id"]
        row_sub_collection_member_list.append(member)
        count_sub_collection_member_dict[k] = len(row_sub_collection_member_list)
    column_sub_collection_member_dict[k] = row_sub_collection_member_list
    row_sub_collection_member_list =[]
   
## Append the column member dictionary to the digitalhub_sub_collection_recall_df dataframe

digitalhub_sub_collection_recall_df['Member_List'] = digitalhub_sub_collection_recall_df.index.map(column_sub_collection_member_dict)
digitalhub_sub_collection_recall_df['Member_List_Count'] = digitalhub_sub_collection_recall_df.index.map(count_sub_collection_member_dict)
# digitalhub_sub_collection_recall_df.head()

## Checked: No problems

In [None]:
## Export file to excel, without the Pandas index, but with the header

digitalhub_sub_collection_recall_df.to_excel("outputs/digitalhub_sub_collection_recall_df.xlsx", index=True, header=True)

##### Split Recall Sub_Collection into Members

In [None]:
## Create a dataframe for each members row and concatenate all of these into one member_df

sub_collection_members_dfs_list = []

for k, v in digitalhub_sub_collection_recall_df["members"].items():
#     print("this is k: ", k)
#     print("this is v: ", v)
    sub_collection_member_df = pd.json_normalize(v)
    sub_collection_member_df['sub_collection_rowid'] = k
    sub_collection_members_dfs_list.append(sub_collection_member_df)

digitalhub_sub_collection_members_df = pd.concat(sub_collection_members_dfs_list, sort=False).reset_index(drop='index')
# digitalhub_sub_collection_members_df.head()

## Resource
## https://stackoverflow.com/questions/62816027/convert-pandas-json-column-to-multiple-rows

## Checked: No problems

In [None]:
## Add Column to digitalhub_community_df to indicate that these results are a collection

digitalhub_sub_collection_members_df['Level Type'] = np.where(digitalhub_sub_collection_members_df['DOI'].isnull(),"Sub_Sub_Collection", "Record" )
digitalhub_sub_collection_members_df['Level Number'] = np.where(digitalhub_sub_collection_members_df['DOI'].isnull(),"3", "6" )
digitalhub_sub_collection_members_df['Level Number'] = digitalhub_sub_collection_members_df['Level Number'].apply(int)
# digitalhub_sub_collection_members_df.head()

## Checked: No problems

In [None]:
## Export file to excel with the header

digitalhub_sub_collection_members_df.to_excel("outputs/digitalhub_sub_collection_members_df.xlsx", header=True)

##### Concatenate Recall Sub_Collection with Members

In [None]:
## Concatenate the DigitalHub Sub_Collection_Recall Dataframe using the 'rowid' to the DigitalHub Sub_Collection Members Dataframe using the 'rowid'

digitalhub_sub_col_members_df = pd.concat([digitalhub_sub_collection_recall_df, digitalhub_sub_collection_members_df], axis=0).sort_values(by=['sub_collection_rowid'])
# digitalhub_sub_col_members_df.head()

## Checked: No problems

In [None]:
## Sort the dataframe by sub_collection_rowid and Level Number to prepare for group by and forward fill of Sub_Collection_ID

digitalhub_sub_col_members_df.sort_values(by = ['sub_collection_rowid','Level Number'], ascending = [True, True], inplace=True)

## Checked: No problems

In [None]:
## Groupby Sub_Collection_rowid and fill forward the Sub_Collection_ID into members of sub_collections (i.e. sub_sub_collections and records)

digitalhub_sub_col_members_df.update(digitalhub_sub_col_members_df.groupby(['sub_collection_rowid'])['Sub_Collection_ID'].ffill())

## Resources
## https://stackoverflow.com/questions/64795941/how-do-i-forward-fill-nas-with-condition-of-2-other-cells-being-equal-in-pandas
## https://stackoverflow.com/questions/27012151/forward-fill-specific-columns-in-pandas-dataframe

## Checked: No problems

In [None]:
## Create a column for Sub_Sub_Collection_ID to hold the Id from columns with Level Type as "Sub_Sub_Collection"

digitalhub_sub_col_members_df['Sub_Sub_Collection_ID'] = np.where(digitalhub_sub_col_members_df['Level Type'] == 'Sub_Sub_Collection',digitalhub_sub_col_members_df['Id'], np.nan)

## Resources
## https://stackoverflow.com/questions/67043249/how-to-use-np-where-in-creating-new-column-using-previous-rows

## Checked: No problems

In [None]:
## Export file to excel, without the Pandas index, but with the header

digitalhub_sub_col_members_df.to_excel("outputs/digitalhub_sub_col_members_df.xlsx", index=False, header=True)

## Checked: No problems

##### Concatenate Sub_Collection with Community

In [None]:
## Concatenate the DigitalHub digitalhub_comm_col_df Dataframe using the 'Id' to the DigitalHub digitalhub_sub_col_members_df Dataframe using the 'Id'

#digitalhub_comm_col_df
#digitalhub_sub_col_members_df

digitalhub_sub_col_sub_col_df = pd.concat([digitalhub_comm_col_df, digitalhub_sub_col_members_df], axis=0).sort_values(by=['Community_ID', 'Sub_Collection_ID'])
# digitalhub_sub_col_sub_col_df.head()

## Checked: No problems
## Note: Creates duplicate rows for "Sub_Collection_ID"


In [None]:
## Sort the dataframe by Id and Level Number to get ready to groupby ID and remove duplicate Sub_Collections from the concatenate

digitalhub_sub_col_sub_col_df.sort_values(by = ['Id','Level Number'], ascending = [True, True], inplace=True)

## Checked: No problems

In [None]:
# Remove the duplicate Sub_Collections based on their Id and take the first non-null data

digitalhub_sub_col_sub_col_df = digitalhub_sub_col_sub_col_df.groupby(['Id'], as_index=False).first().reset_index()

## Resources: 
## https://stackoverflow.com/questions/64795941/how-do-i-forward-fill-nas-with-condition-of-2-other-cells-being-equal-in-pandas
## https://www.pauldesalvo.com/how-to-apply-a-forward-fill-ffill-to-groups-in-pandas/

## Checked: No problems

In [None]:
## Sort the dataframe by Sub_Collection_ID and then Level Number to get ready to fill Community IDs into connected Sub_Collections

digitalhub_sub_col_sub_col_df.sort_values(by = ['Sub_Collection_ID','Level Number'], ascending = [True, True], inplace=True)

## Checked: No problems

In [None]:
## Fill foward the Community_ID into Records based on their Sub_Collection_IDs

digitalhub_sub_col_sub_col_df.update(digitalhub_sub_col_sub_col_df.groupby(['Sub_Collection_ID'])['Community_ID'].ffill())

## Other options
##digitalhub_sub_col_sub_col_df['Community_ID'] = digitalhub_sub_col_sub_col_df.groupby(['Sub_Collection_ID'])['Community_ID'].fillna(method='ffill')
##digitalhub_sub_col_sub_col_df['Community_ID'] = digitalhub_sub_col_sub_col_df.groupby(['Sub_Collection_ID'])['Community_ID'].transform(lambda x: x.ffill())
##digitalhub_sub_col_sub_col_df['Community_ID'] = digitalhub_sub_col_sub_col_df.groupby(['Sub_Collection_ID'])['Community_ID'].fillna(method='ffill')

## Resources
## https://stackoverflow.com/questions/64795941/how-do-i-forward-fill-nas-with-condition-of-2-other-cells-being-equal-in-pandas
## https://stackoverflow.com/questions/58181262/groupby-with-ffill-deletes-group-and-does-not-put-group-in-index

## Checked: No problems

In [None]:
## Export file to excel with the header

digitalhub_sub_col_sub_col_df.to_excel("outputs/digitalhub_sub_col_sub_col_df.xlsx",  header=True)

## Checked: No problems

In [None]:
## DO NOT NEED
## Because of groupby issue from above, re-add the community id for communities that lost theirs

## Sort the dataframe by rowid and Level Number
# digitalhub_sub_col_sub_col_df.sort_values(by = ['community_rowid','Level Number'], ascending = [True, True], inplace=True)

# ## Create a column for Sub_Collection ID
# digitalhub_sub_col_sub_col_df['Community_ID'] = np.where(digitalhub_sub_col_sub_col_df['Level Type'] == 'Community',digitalhub_sub_col_sub_col_df['Id'], np.nan)

# ## https://stackoverflow.com/questions/67043249/how-to-use-np-where-in-creating-new-column-using-previous-rows

# # ## Fill forward the Community ID into subcollections and records
# digitalhub_sub_col_sub_col_df['Community_ID'] = digitalhub_sub_col_sub_col_df.groupby(['community_rowid'])['Community_ID'].ffill()

# ## https://stackoverflow.com/questions/64795941/how-do-i-forward-fill-nas-with-condition-of-2-other-cells-being-equal-in-pandas


In [None]:
## DO NOT NEED
## Export file to excel, without the Pandas index, but with the header

# digitalhub_sub_col_sub_col_df.to_excel("outputs/digitalhub_sub_col_sub_col_df.xlsx",  header=True)

##### Recall Sub_Sub_Collection

In [None]:
## Extract a the Series for sub_sub_collection "dh_id" and transform into a list

digitalhub_sub_sub_collection_series = digitalhub_sub_col_sub_col_df[digitalhub_sub_col_sub_col_df["Level Type"] == "Sub_Sub_Collection"]['Id']
digitalhub_sub_sub_collection_list = digitalhub_sub_sub_collection_series.tolist()

print(digitalhub_sub_sub_collection_list)

## Checked: No problems

In [None]:
## Loop through list of DigitalHub Sub_Sub_Collections and use the urllib.request to get the json data
## test_list = ['2cc92425-b656-47ea-a3b4-825405ee6088', 'a86e1412-d72c-4cae-b8ca-16fd834cb128','fc389d13-2430-409b-82fd-a4b26613d350']

multi_digitalhub_sub_sub_collection_list = []
digitalhub_sub_sub_collection_problem_list = []

for item in digitalhub_sub_sub_collection_list:
    try:

        with urllib.request.urlopen(f"https://digitalhub.northwestern.edu/collections/{item}.json" ) as url:
            single_digitalhub_sub_sub_collection_dict = json.loads(url.read().decode())
            multi_digitalhub_sub_sub_collection_list.append(single_digitalhub_sub_sub_collection_dict)
            print(item)
        time.sleep(1)
       

    except urllib.error.HTTPError as http_err:
        print(item)
        digitalhub_sub_sub_collection_problem_list.append(item)
        print(f'HTTP error occurred: {http_err}')  # Python 3.6
        

    except urllib.error.URLError as url_err:
        print(item)
        digitalhub_sub_sub_collection_problem_list.append(item)
        print(f'URL error occurred: {url_err}. ', 'Exiting the loop!')  # Python 3.6
       

    except json.JSONDecodeError as json_err:
        print(item)
        digitalhub_sub_sub_collection_problem_list.append(item)
        print(f'JSON Decode error occurred: {json_err}. ', 'Poorly formed JSON.')  # Python 3.6
       
        
    except Exception as err:
        print(item)
        digitalhub_sub_sub_collection_problem_list.append(item)
        print(f'Other error occurred: {err}. ')  # Python 3.6
        
        
       
    else:
        print('Success!')



## Resources
## https://docs.python.org/3/library/urllib.request.html

## Checked: No problems

In [None]:
## Inspect the results of the URL Query for DigitalHub Collections that will become communities in Prism

# print(multi_digitalhub_sub_sub_collection_list)
print(len(digitalhub_sub_sub_collection_problem_list))
print(digitalhub_sub_sub_collection_problem_list)

In [None]:
## Create a dataframe from DigitalHub json for DigitalHub Sub_Sub_Collections

digitalhub_sub_sub_collection_recall_df = pd.DataFrame.from_dict(json_normalize(multi_digitalhub_sub_sub_collection_list, max_level=1))
digitalhub_sub_sub_collection_recall_df.head(10)

In [None]:
## Upload .txt files of the problem Sub_Sub_Collection json metadata

## 1. Problem with:8b34974a-1cef-402d-8a57-c04c1f015fa9
## child-1-of-template-0 (10-04-19)

## 2. Problem with: af7512ff-addc-457c-b2c2-22f49b3aaab0
## child-0-of-template-0 (10-04-19)

## 3. PRoblem with: eaf6729a-34d6-4dec-ae66-c339f14fe1ea
## child-2-of-template-0 (10-04-19)

## 4.Problem with: 906b9f6e-a70e-4821-aee0-9a038ec909d5
## child-1-of-template-1 (10-04-19)

## 5. Problem with:a4ba4155-9cd8-43eb-b4a7-865e50b61dc0
## child-0-of-template-1 (10-04-19)

## 6. Problem with:c2b21560-15fb-4c7d-a7ca-2ecee215a647
## child-2-of-template-1 (10-04-19)

## 7. Problem with: 49945521-3b35-4efd-9e77-e909900f1604
R03_path = r"data\sub_sub_collection\R03.txt"

## 8. Problem with: 6a9690f5-664a-4654-a5ca-c1c1ca99db9b 
R01_path = r"data\sub_sub_collection\R01.txt"

## 9. Problem with: af85d02b-4b6c-48db-8cac-b82f4075e1f0 
R21_path = r"data\sub_sub_collection\R21.txt"

## 10. Problem with: 7d40546b-cad0-4838-b289-947b5187c02a 
biosketches_path = r"data\sub_sub_collection\biosketches.txt"

## 11. Problem with: cb52ca3c-f967-41b1-886d-60f5611706ac
summary_statements_path = r"data\sub_sub_collection\summary_statements.txt"

## 12. Problem with: c6f1d0a2-9bc2-424f-aebe-1ea6beed176d
## is a file, but was never given a DOI. Tunc-Ozcan_K99R00_7.22.2021.pdf 

## 13. Problem with:
health_and_welfare_path = r"data\sub_sub_collection\health_and_welfare.txt"

## 14. Problem with:
fight_for_life_path = r"data\sub_sub_collection\fight_for_life.txt"

## 15. Problem with:
unlabeled_1_path = r"data\sub_sub_collection\unlabeled_1.txt"

## 16. Problem with:
unlabeled_2_path = r"data\sub_sub_collection\unlabeled_2.txt"

## 17. Problem with:
a_little_culture_path =  r"data\sub_sub_collection\a_little_culture.txt"

## 18. Problem with:
bachelors_degree_1_path =  r"data\sub_sub_collection\bachelors_degree_1.txt"

## 19. Problem with:
bachelors_degree_2_path = r"data\sub_sub_collection\bachelors_degree_2.txt"


problem_sub_sub_dict_list = []

path_sub_sub_list = [biosketches_path, 
                     R01_path, 
                     R03_path, 
                     R21_path, 
                     summary_statements_path,
                     health_and_welfare_path,
                     fight_for_life_path,
                     unlabeled_1_path,
                     unlabeled_2_path,
                     a_little_culture_path,
                     bachelors_degree_1_path,
                     bachelors_degree_2_path]

for path in path_sub_sub_list: 
#     print(path)
    with open(path, 'r', encoding= 'utf8') as f:
        s = f.read()
#         problem_sub_sub_dict = json.loads(data)
#         problem_sub_sub_dict_list.append(problem_sub_dict)

        while True:
            try:
                result = json.loads(s)   # try to parse...
                break                    # parsing worked -> exit loop
            except Exception as e:
                # "Expecting , delimiter: line 34 column 54 (char 1158)"
                # position of unexpected character after '"'
                unexp = int(re.findall(r'\(char (\d+)\)', str(e))[0])
                # position of unescaped '"' before that
                unesc = s.rfind(r'"', 0, unexp)
                s = s[:unesc] + r'\"' + s[unesc+1:]
                # position of correspondig closing '"' (+2 for inserted '\')
                closg = s.find(r'"', unesc + 2)
                s = s[:closg] + r'\"' + s[closg+1:]
#         print(result)
        problem_sub_sub_dict_list.append(result)



## https://stackoverflow.com/questions/16573332/jsondecodeerror-expecting-value-line-1-column-1-char-0
## https://stackoverflow.com/questions/18514910/how-do-i-automatically-fix-an-invalid-json-string

## Checked: No problems

In [None]:
problem_sub_sub_df = pd.DataFrame.from_dict(json_normalize(problem_sub_sub_dict_list, max_level=1))
problem_sub_sub_df.head()

## Checked: No problems

In [None]:
## Concatenate the DigitalHub Community Dataframe to the problem_df

digitalhub_sub_sub_collection_recall_df= pd.concat([digitalhub_sub_sub_collection_recall_df, problem_sub_sub_df], axis=0)
digitalhub_sub_sub_collection_recall_df.reset_index(inplace=True, drop=True) 
digitalhub_sub_sub_collection_recall_df.head(10)

## Checked: No problems

In [None]:
## Add Column to digitalhub_sub_sub_collection_df to indicate that these results are a sub_sub_collection
digitalhub_sub_sub_collection_recall_df["Level Type"] = "Sub_Sub_Collection"
digitalhub_sub_sub_collection_recall_df["Level Number"] = "3"
digitalhub_sub_sub_collection_recall_df['Level Number'] = digitalhub_sub_sub_collection_recall_df['Level Number'].apply(int)

## Create a column from the index
digitalhub_sub_sub_collection_recall_df['sub_sub_collection_rowid'] = digitalhub_sub_sub_collection_recall_df.index

## Create a new column called Sub_Collection_ID
digitalhub_sub_sub_collection_recall_df['Sub_Sub_Collection_ID'] = digitalhub_sub_sub_collection_recall_df['Id']

## Checked: No problems


In [None]:
##################################################################################################
##### Add records to sub_sub_collection || Add sub-sub_sub_collections to sub_sub_collection ####
##################################################################################################

#################################################################################
## Add private files to 12th General Hospital --> Mason --> General Reports ####
################################################################################

add_to_series_path_8 = r"data\sub_sub_collection\private_12_general_Mason_12_Gen_Reports_Final_Report.txt"

gen_reports_private_path_list = [add_to_series_path_8]

gen_reports_private_list = [] 

for path in gen_reports_private_path_list: 
    with open(path, 'r', encoding= 'utf8') as f:
        s = f.read()    
        result = json.loads(s)   # try to parse...
        gen_reports_private_list.append(result)


## Create a list from the existing members column for the collection
current_list_8=[]
current_series_8 = digitalhub_sub_sub_collection_recall_df.loc[digitalhub_sub_sub_collection_recall_df['Id'] == '1a3c738a-ba69-4275-98b0-fc77939d5b93','members']
#print(current_series_8.item()[0])

##Add items from gen_reports_private_list to current_series
gen_reports_result = []
gen_reports_result = current_series_8.item() + [x for x in gen_reports_private_list if x not in current_series_8.items()]
print(len(gen_reports_result))
# print(gen_reports_result)
                            
# Add k_series_result back to member

digitalhub_sub_sub_collection_recall_df['members'] = digitalhub_sub_sub_collection_recall_df['members'].astype('object')
digitalhub_sub_sub_collection_recall_df.at[11,'members'] = gen_reports_result 


############################################################################
## Add private files to 12th General Hospital --> Mason --> Photographs ####
############################################################################

add_to_series_path_9 = r"data\sub_sub_collection\private_12_general_Mason_photographs_miscellaneous_14.txt"
add_to_series_path_10 = r"data\sub_sub_collection\private_12_general_Mason_photographs_rome_034.txt"

photographs_private_path_list = [add_to_series_path_9,
                                 add_to_series_path_10]

photographs_private_list = [] 

for path in photographs_private_path_list: 
    with open(path, 'r', encoding= 'utf8') as f:
        s = f.read()    
        result = json.loads(s)   # try to parse...
        photographs_private_list.append(result)


## Create a list from the existing members column for the collection
current_list_10=[]
current_series_10 = digitalhub_sub_sub_collection_recall_df.loc[digitalhub_sub_sub_collection_recall_df['Id'] == '19e0660b-7dd3-480b-b3ec-b57a12f34014','members']
#print(current_series_10.item()[0])

##Add items from photographs_private_list to current_series
photographs_result = []
photographs_result = current_series_10.item() + [x for x in photographs_private_list if x not in current_series_10.items()]
print(len(photographs_result))
# print(photographs_result)
                            
# Add photographs back to member

digitalhub_sub_sub_collection_recall_df['members'] = digitalhub_sub_sub_collection_recall_df['members'].astype('object')
digitalhub_sub_sub_collection_recall_df.at[10,'members'] = photographs_result 

#############################################################################################################################################################################################################
### Add private files to Medical Subject Headings-Library of Congress Subject Headings Mapping Data --> Library of Congress Subject Headings Authority Records with Medical Subject Headings mapping data###
############################################################################################################################################################################################################


add_to_series_path_8_0 = r"data\sub_sub_collection\private_mesh_lcsh_lcsh2018.txt"
add_to_series_path_8_1 = r"data\sub_sub_collection\private_mesh_lcsh_lcsh2018_2.txt"


lcsh_private_path_list = [add_to_series_path_8_0,
                            add_to_series_path_8_1]

lcsh_private_list = [] 

for path in lcsh_private_path_list: 
    with open(path, 'r', encoding= 'utf8') as f:
        s = f.read()    
        result = json.loads(s)   # try to parse...
        lcsh_private_list.append(result)


## Create a list from the existing members column for the collection
current_series_8_0 = digitalhub_sub_sub_collection_recall_df.loc[digitalhub_sub_sub_collection_recall_df['Id'] == '3e59c5b9-bbbf-4f49-946f-e08ef9b10d9f','members']
# print(current_series_8_0.item()[0])

##Add items from private_list list to current_series
lcsh_result = []
lcsh_result = current_series_8_0.item() + [x for x in lcsh_private_list if x not in current_series_8_0.items()]
print(len(lcsh_result))
# print(lcsh_result)
                            
# Add result back to member

digitalhub_sub_sub_collection_recall_df['members'] = digitalhub_sub_sub_collection_recall_df['members'].astype('object')
digitalhub_sub_sub_collection_recall_df.at[0,'members'] = lcsh_result 

#############################################################################################################################################################################################################
### Add private files to Medical Subject Headings-Library of Congress Subject Headings Mapping Data --> Medical Subject Headings Authority Records with Library of Congress Subject Headings mapping data###
############################################################################################################################################################################################################


add_to_series_path_9_0 = r"data\sub_sub_collection\private_mesh_mesh_mesh2018.txt"
add_to_series_path_9_1 = r"data\sub_sub_collection\private_mesh_mesh_mesh2018_2.txt"


mesh_private_path_list = [add_to_series_path_9_0,
                            add_to_series_path_9_1]

mesh_private_list = [] 

for path in mesh_private_path_list: 
    with open(path, 'r', encoding= 'utf8') as f:
        s = f.read()    
        result = json.loads(s)   # try to parse...
        mesh_private_list.append(result)


## Create a list from the existing members column for the collection
current_series_9_0 = digitalhub_sub_sub_collection_recall_df.loc[digitalhub_sub_sub_collection_recall_df['Id'] == '4b7f6a77-9cf4-4deb-b9aa-f3f49391bcc8','members']
# print(current_series_9_0.item()[0])

##Add items from private_list list to current_series
mesh_result = []
mesh_result = current_series_9_0.item() + [x for x in mesh_private_list if x not in current_series_9_0.items()]
print(len(mesh_result))
#print(mesh_result)
                            
# Add result back to member

digitalhub_sub_sub_collection_recall_df['members'] = digitalhub_sub_sub_collection_recall_df['members'].astype('object')
digitalhub_sub_sub_collection_recall_df.at[1,'members'] = mesh_result  



In [None]:
## Inspect the "members" column and create a list of member IDs

row_sub_sub_collection_member_list = []
column_sub_sub_collection_member_dict ={}
count_sub_sub_collection_member_dict = {}

for k, v in digitalhub_sub_sub_collection_recall_df["members"].items():
    for value in v: 
        member = value["Id"]
        row_sub_sub_collection_member_list.append(member)
        count_sub_sub_collection_member_dict[k] = len(row_sub_sub_collection_member_list)
    column_sub_sub_collection_member_dict[k] = row_sub_sub_collection_member_list
    row_sub_sub_collection_member_list =[]

## Append the column member dictionary to the DigitalHub Community DF dataframe

digitalhub_sub_sub_collection_recall_df['Member_List'] =digitalhub_sub_sub_collection_recall_df.index.map(column_sub_sub_collection_member_dict)
digitalhub_sub_sub_collection_recall_df['Member_List_Count'] =digitalhub_sub_sub_collection_recall_df.index.map(count_sub_sub_collection_member_dict)
digitalhub_sub_collection_recall_df.head()

## Checked: No problems

In [None]:
## Export file to excel, with the Pandas index, but with the header

digitalhub_sub_sub_collection_recall_df.to_excel("outputs/digitalhub_sub_sub_collection_recall_df.xlsx", index=True, header=True)

## Checked: No problems

###### Split Recall Sub_Sub_Collection into Members

In [None]:
## Create a dataframe for each members row and concatenate all of these into one member_df

sub_sub_collection_members_dfs_list = []

for k, v in digitalhub_sub_sub_collection_recall_df["members"].items():
#     print("this is k: ", k)
#     print("this is v: ", v)
    sub_sub_collection_member_df = pd.json_normalize(v)
    sub_sub_collection_member_df['sub_sub_collection_rowid'] = k
    sub_sub_collection_members_dfs_list.append(sub_sub_collection_member_df)

digitalhub_sub_sub_collection_members_df = pd.concat(sub_sub_collection_members_dfs_list, sort=False).reset_index(drop='index')
digitalhub_sub_sub_collection_members_df.head()

## Resource
## https://stackoverflow.com/questions/62816027/convert-pandas-json-column-to-multiple-rows

## Checked: No problems

In [None]:
## Add Column to digitalhub_community_df to indicate that these results are a collection

# digitalhub_sub_collection_df["Level"] =  "Collection"

digitalhub_sub_sub_collection_members_df['Level Type'] = np.where(digitalhub_sub_sub_collection_members_df['DOI'].isnull(),"Sub_Sub_Sub_Collection", "Record" )

digitalhub_sub_sub_collection_members_df['Level Number'] = np.where(digitalhub_sub_sub_collection_members_df['DOI'].isnull(),"4", "6" )
digitalhub_sub_sub_collection_members_df['Level Number'] = digitalhub_sub_sub_collection_members_df['Level Number'].apply(int)
digitalhub_sub_sub_collection_members_df.head()

## Checked: No problems

##### Concatenate Recall Sub_Sub_Collection with Members

In [None]:
## Concatenate the DigitalHub Sub_Sub_Collection_Recall Dataframe using the 'rowid' to the DigitalHub Sub_Sub_Collection Members Dataframe using the 'rowid'

digitalhub_sub_sub_col_members_df = pd.concat([digitalhub_sub_sub_collection_recall_df, digitalhub_sub_sub_collection_members_df], axis=0).sort_values(by=['sub_sub_collection_rowid'])
digitalhub_sub_sub_col_members_df.head()

## Checked: No problems

In [None]:
## Export file to excel, without the Pandas index, but with the header
digitalhub_sub_sub_col_members_df.to_excel("outputs/digitalhub_sub_sub_col_members_df.xlsx", index=False, header=True)

## Checked: No problems

In [None]:
## Sort the dataframe by sub_sub_collection_rowid and Level Number to prepare for group by and forward fill of Sub_Sub_Collection_ID
## sub_sub_collection_rowid then Level Number

digitalhub_sub_sub_col_members_df.sort_values(by = ['sub_sub_collection_rowid','Level Number'], ascending = [True, True], inplace=True)

## Checked: No problems

In [None]:
## Groupby Sub_Sub_Collection_rowid and fill forward the Sub_Sub_Collection_ID into members of sub_sub_collections (i.e. sub_sub_sub_collections and records)

digitalhub_sub_sub_col_members_df.update(digitalhub_sub_sub_col_members_df.groupby(['sub_sub_collection_rowid'])['Sub_Sub_Collection_ID'].ffill())

## Resources
## https://stackoverflow.com/questions/64795941/how-do-i-forward-fill-nas-with-condition-of-2-other-cells-being-equal-in-pandas
## https://stackoverflow.com/questions/27012151/forward-fill-specific-columns-in-pandas-dataframe

In [None]:
## Create a column for Sub_Sub_Sub_Collection_ID to hold the Id from columns with Level Type as "Sub_Sub_Sub_Collection"

digitalhub_sub_sub_col_members_df['Sub_Sub_Sub_Collection_ID'] = np.where(digitalhub_sub_sub_col_members_df['Level Type'] == 'Sub_Sub_Sub_Collection',digitalhub_sub_sub_col_members_df['Id'], np.nan)

## Resources
## https://stackoverflow.com/questions/67043249/how-to-use-np-where-in-creating-new-column-using-previous-rows

## Checked: No problems

In [None]:
## Export file to excel, without the Pandas index with the header

digitalhub_sub_sub_col_members_df.to_excel("outputs/digitalhub_sub_sub_col_members_df.xlsx", header=True)

## Checked: No problems

##### Concatenate Sub_Sub_Collection with Sub_Collection and Community

In [None]:
## Concatenate the DigitalHub digitalhub_sub_col_sub_col_df Dataframe using the 'Id' to the DigitalHub digitalhub_sub_sub_col_members_df Dataframe using the 'Id'
#digitalhub_sub_col_sub_col_df
#digitalhub_sub_sub_col_members_df

digitalhub_sub_col_sub_col_sub_col_df = pd.concat([digitalhub_sub_col_sub_col_df, digitalhub_sub_sub_col_members_df], axis=0).sort_values(by=['Community_ID', 'Sub_Collection_ID'])
digitalhub_sub_col_sub_col_sub_col_df.head()

## Note: Creates duplicate rows for "Sub_Sub_Collection_ID"
## Checked: No problems

In [None]:
## Export file to excel, without the Pandas index with the header

digitalhub_sub_col_sub_col_sub_col_df.to_excel("outputs/digitalhub_sub_col_sub_col_sub_col_df.xlsx", header=True)

## Checked: No problems

In [None]:
## Sort the dataframe by Id and Level Number to get ready to groupby ID and remove duplicate Sub_Sub_Collections from the concatenate
digitalhub_sub_col_sub_col_sub_col_df.sort_values(by = ['Id','Level Number'], ascending = [True, True], inplace=True)

## Checked: No problems

In [None]:
## Remove duplicates for Sub_Sub_Collection ID between concatenated dataframes
digitalhub_sub_col_sub_col_sub_col_df = digitalhub_sub_col_sub_col_sub_col_df.groupby(['Id'], as_index=False).first().reset_index()

## Checked: No problems

In [None]:
## Sort the dataframe by Sub_Sub_Collection_ID and then Level Number to get ready to fill Community IDs into connected Sub_Sub_Collections

digitalhub_sub_col_sub_col_sub_col_df.sort_values(by = ['Sub_Sub_Collection_ID','Level Number'], ascending = [True, True], inplace=True)

## Checked: No problems

In [None]:
## Fill foward the Community_ID into Records based on their Sub_Sub_Collection_IDs

digitalhub_sub_col_sub_col_sub_col_df.update(digitalhub_sub_col_sub_col_sub_col_df.groupby(['Sub_Sub_Collection_ID'])['Community_ID'].ffill())

## Other options
##digitalhub_sub_col_sub_col_df['Community_ID'] = digitalhub_sub_col_sub_col_df.groupby(['Sub_Collection_ID'])['Community_ID'].fillna(method='ffill')
##digitalhub_sub_col_sub_col_df['Community_ID'] = digitalhub_sub_col_sub_col_df.groupby(['Sub_Collection_ID'])['Community_ID'].transform(lambda x: x.ffill())
##digitalhub_sub_col_sub_col_df['Community_ID'] = digitalhub_sub_col_sub_col_df.groupby(['Sub_Collection_ID'])['Community_ID'].fillna(method='ffill')

## Resources
## https://stackoverflow.com/questions/64795941/how-do-i-forward-fill-nas-with-condition-of-2-other-cells-being-equal-in-pandas
## https://stackoverflow.com/questions/58181262/groupby-with-ffill-deletes-group-and-does-not-put-group-in-index

## Checked: No problems

In [None]:
## Export file to excel with the header

digitalhub_sub_col_sub_col_sub_col_df.to_excel("outputs/digitalhub_sub_col_sub_col_sub_col_df.xlsx",  header=True)

## Checked: No problems

#### Recal Sub_Sub_Sub_Collections
##### Do not need if no sub_sub_sub_collections

In [None]:
## DO NOT NEED if there are no sub_sub_sub_collections

## Extract a the Series for sub_sub_sub_collection "dh_id" and transform into a list

digitalhub_sub_sub_sub_collection_series = digitalhub_sub_col_sub_col_sub_col_df[digitalhub_sub_col_sub_col_sub_col_df["Level Type"] == "Sub_Sub_Sub_Collection"]['Id']
digitalhub_sub_sub_sub_collection_list = digitalhub_sub_sub_sub_collection_series.tolist()

print(digitalhub_sub_sub_sub_collection_list)

## Checked: No problems 

In [None]:
## DO NOT NEED if there are no sub_sub_sub_collections

## Loop through list of DigitalHub Sub_Sub_Sub_Collections and use the urllib.request to get the json data
## test_list = ['2cc92425-b656-47ea-a3b4-825405ee6088', 'a86e1412-d72c-4cae-b8ca-16fd834cb128','fc389d13-2430-409b-82fd-a4b26613d350']

# multi_digitalhub_sub_sub_sub_collection_list = []
# digitalhub_sub_sub_sub_collection_problem_list = []

# for item in digitalhub_sub_sub_sub_collection_list:
#     try:

#         with urllib.request.urlopen(f"https://digitalhub.northwestern.edu/collections/{item}.json" ) as url:
#             single_digitalhub_sub_sub_sub_collection_dict = json.loads(url.read().decode())
#             multi_digitalhub_sub_sub_sub_collection_list.append(single_digitalhub_sub_sub_sub_collection_dict)
#             print(item)
#         time.sleep(1)
       

#     except urllib.error.HTTPError as http_err:
#         print(item)
#         digitalhub_sub_sub_sub_collection_problem_list.append(item)
#         print(f'HTTP error occurred: {http_err}')  # Python 3.6
        

#     except urllib.error.URLError as url_err:
#         print(item)
#         digitalhub_sub_sub_sub_collection_problem_list.append(item)
#         print(f'URL error occurred: {url_err}. ')  # Python 3.6
       

#     except json.JSONDecodeError as json_err:
#         print(item)
#         digitalhub_sub_sub_sub_collection_problem_list.append(item)
#         print(f'JSON Decode error occurred: {json_err}. ', 'Poorly formed JSON.')  # Python 3.6
       
        
#     except Exception as err:
#         print(item)
#         digitalhub_sub_sub_sub_collection_problem_list.append(item)
#         print(f'Other error occurred: {err}. ')  # Python 3.6
        
              
#     else:
#         print('Success!')



## Resources
## https://docs.python.org/3/library/urllib.request.html

## Checked: No problems

In [None]:
## DO NOT NEED if there are no sub_sub_sub_collections

## Inspect the results of the URL Query for DigitalHub Collections that will become communities in Prism

# print(multi_digitalhub_sub_sub_sub_collection_list)
# print(digitalhub_sub_sub_sub_collection_problem_list)

In [None]:
## DO NOT NEED if there are no sub_sub_sub_collections
## Create a dataframe from DigitalHub json for DigitalHub Sub_Sub_Sub_Collections

# digitalhub_sub_sub_sub_collection_recall_df = pd.DataFrame.from_dict(json_normalize(multi_digitalhub_sub_sub_sub_collection_list, max_level=1))
# digitalhub_sub_sub_sub_collection_recall_df.head(10)

In [None]:
## DO NOT NEED if there are no sub_sub_sub_collections
### DO NOT NEED if there are no problems with sub_sub_sub_collections

## Upload .txt files of the problem Sub_Sub_Sub_Collection json metadata

# name_here_path = r"data\sub_sub_sub_collection\NAME HERE.txt"
# problem_sub_sub_sub_dict_list = []
# path_sub_sub_list = [name_here_path]

# for path in path_sub_sub_sub_list: 
#     with open(path, 'r', encoding= 'utf8') as f:
#         s = f.read()
#         while True:
#             try:
#                 result = json.loads(s)   # try to parse...
#                 break                    # parsing worked -> exit loop
#             except Exception as e:
#                 # "Expecting , delimiter: line 34 column 54 (char 1158)"
#                 # position of unexpected character after '"'
#                 unexp = int(re.findall(r'\(char (\d+)\)', str(e))[0])
#                 # position of unescaped '"' before that
#                 unesc = s.rfind(r'"', 0, unexp)
#                 s = s[:unesc] + r'\"' + s[unesc+1:]
#                 # position of correspondig closing '"' (+2 for inserted '\')
#                 closg = s.find(r'"', unesc + 2)
#                 s = s[:closg] + r'\"' + s[closg+1:]
# #         print(result)
#         problem_sub_sub_sub_dict_list.append(result)



## https://stackoverflow.com/questions/16573332/jsondecodeerror-expecting-value-line-1-column-1-char-0
## https://stackoverflow.com/questions/18514910/how-do-i-automatically-fix-an-invalid-json-string

## Checked: No problems

In [None]:
## DO NOT NEED if there are no sub_sub_sub_collections
### DO NOT NEED if there are no problems with sub_sub_sub_collections

# problem_sub_sub_sub_df = pd.DataFrame.from_dict(json_normalize(problem_sub_sub_sub_dict_list, max_level=1))
# problem_sub_sub_sub_df.head()

## Checked: No problems

In [None]:
## DO NOT NEED if there are no sub_sub_sub_collections
### DO NOT NEED if there are no problems with sub_sub_sub_collections

## Concatenate the DigitalHub Community Dataframe to the problem_df

# digitalhub_sub_sub_sub_collection_recall_df= pd.concat([digitalhub_sub_sub_sub_collection_recall_df, problem_sub_sub_sub_df], axis=0)
# digitalhub_sub_sub_sub_collection_recall_df.reset_index(inplace=True, drop=True) 
# digitalhub_sub_sub_sub_collection_recall_df.head(10)

## Checked: No problems


In [None]:
# ## DO NOT NEED if there are no sub_sub_sub_collections

# ## Add Column to digitalhub_sub_sub_sub_collection_recall_df to indicate that these results are a sub_sub_sub_collection
# digitalhub_sub_sub_sub_collection_recall_df["Level Type"] = "Sub_Sub_Sub_Collection"
# digitalhub_sub_sub_sub_collection_recall_df["Level Number"] = "4"
# digitalhub_sub_sub_sub_collection_recall_df['Level Number'] = digitalhub_sub_sub_sub_collection_recall_df['Level Number'].apply(int)

# ## Create a column from the index
# digitalhub_sub_sub_sub_collection_recall_df['sub_sub_sub_collection_rowid'] = digitalhub_sub_sub_sub_collection_recall_df.index

# ## Create a new column called Sub_Collection_ID
# digitalhub_sub_sub_sub_collection_recall_df['Sub_Sub_Sub_Collection_ID'] = digitalhub_sub_sub_sub_collection_recall_df['Id']

# ## Checked: No problems

In [None]:
# ## DO NOT NEED if there are no sub_sub_sub_collections
# ## Inspect the "members" column and create a list of member IDs

# row_sub_sub_sub_collection_member_list = []
# column_sub_sub_sub_collection_member_dict ={}
# count_sub_sub_sub_collection_member_dict = {}

# for k, v in digitalhub_sub_sub_sub_collection_recall_df["members"].items():
#     for value in v: 
#         member = value["Id"]
#         row_sub_sub_sub_collection_member_list.append(member)
#         count_sub_sub_sub_collection_member_dict[k] = len(row_sub_sub_sub_collection_member_list)
#     column_sub_sub_sub_collection_member_dict[k] = row_sub_sub_sub_collection_member_list
#     row_sub_sub_sub_collection_member_list =[]

# ## Append the column member dictionary to the DigitalHub Community DF dataframe

# digitalhub_sub_sub_sub_collection_recall_df['Member_List'] =digitalhub_sub_sub_sub_collection_recall_df.index.map(column_sub_sub_sub_collection_member_dict)
# digitalhub_sub_sub_sub_collection_recall_df['Member_List_Count'] =digitalhub_sub_sub_sub_collection_recall_df.index.map(count_sub_sub_sub_collection_member_dict)
# digitalhub_sub_sub_collection_recall_df.head()

# ## Checked: No problems

In [None]:
# ## DO NOT NEED if there are no sub_sub_sub_collections
# ## Export file to excel, without the Pandas index with the header

# digitalhub_sub_sub_sub_collection_recall_df.to_excel("outputs/digitalhub_sub_sub_sub_collection_recall_df.xlsx", header=True)

# ## Checked: No problems

##### Split the Recall Sub_Sub_Sub_Collection into Members

In [None]:
## DO NOT NEED if there are no sub_sub_sub_collections
## Create a dataframe for each members row and concatenate all of these into one member_df

# sub_sub_sub_collection_members_dfs_list = []

# for k, v in digitalhub_sub_sub_sub_collection_recall_df["members"].items():
# #     print("this is k: ", k)
# #     print("this is v: ", v)
#     sub_sub_sub_collection_member_df = pd.json_normalize(v)
#     sub_sub_sub_collection_member_df['sub_sub_sub_collection_rowid'] = k
#     sub_sub_sub_collection_members_dfs_list.append(sub_sub_sub_collection_member_df)

# digitalhub_sub_sub_sub_collection_members_df = pd.concat(sub_sub_sub_collection_members_dfs_list, sort=False).reset_index(drop='index')
# digitalhub_sub_sub_sub_collection_members_df.head()

# ## Resource
# ## https://stackoverflow.com/questions/62816027/convert-pandas-json-column-to-multiple-rows

# ## Checked: No problems

In [None]:
## DO NOT NEED if there are no sub_sub_sub_collections
## Add Column to digitalhub_community_df to indicate that these results are a collection

# digitalhub_sub_collection_df["Level"] =  "Collection"

# digitalhub_sub_sub_sub_collection_members_df['Level Type'] = np.where(digitalhub_sub_sub_sub_collection_members_df['DOI'].isnull(),"Sub_Sub_Sub_Sub_Collection", "Record" )

# digitalhub_sub_sub_sub_collection_members_df['Level Number'] = np.where(digitalhub_sub_sub_sub_collection_members_df['DOI'].isnull(),"5", "6" )
# digitalhub_sub_sub_sub_collection_members_df['Level Number'] = digitalhub_sub_sub_sub_collection_members_df['Level Number'].apply(int)
# digitalhub_sub_sub_sub_collection_members_df.head()

## Checked: No problems

##### Concatenate Reacal Sub_Sub_Sub_Collection with Members

In [None]:
# ## DO NOT NEED if there are no sub_sub_sub_collections
# ## Concatenate the DigitalHub Sub_Sub_Sub_Collection_Recall Dataframe using the 'rowid' to the DigitalHub Sub_Sub_Sub_Collection Members Dataframe using the 'rowid'

# digitalhub_sub_sub_sub_col_members_df = pd.concat([digitalhub_sub_sub_sub_collection_recall_df, digitalhub_sub_sub_sub_collection_members_df], axis=0).sort_values(by=['sub_sub_sub_collection_rowid'])
# digitalhub_sub_sub_sub_col_members_df.head()

# ## Checked: No problems

In [None]:
# ## DO NOT NEED if there are no sub_sub_sub_collections
# ## Export file to excel, without the Pandas index, but with the header
# digitalhub_sub_sub_sub_col_members_df.to_excel("outputs/digitalhub_sub_sub_sub_col_members_df.xlsx", index=False, header=True)

# ## Checked: No problems

In [None]:
## DO NOT NEED if there are no sub_sub_sub_collections
# ## Sort the dataframe by sub_sub_sub_collection_rowid and Level Number to prepare for group by and forward fill of Sub_Sub_Sub_Collection_ID
# ## sub_sub_sub_collection_rowid then Level Number

# digitalhub_sub_sub_sub_col_members_df.sort_values(by = ['sub_sub_sub_collection_rowid','Level Number'], ascending = [True, True], inplace=True)

# ## Checked: No problems

In [None]:
# ## DO NOT NEED if there are no sub_sub_sub_collections
# ## Groupby Sub_Sub_Sub_Collection_rowid and fill forward the Sub_Sub_Sub_Collection_ID into members of sub_sub_sub_collections (i.e. sub_sub_sub_sub_collections and records)

# digitalhub_sub_sub_sub_col_members_df.update(digitalhub_sub_sub_sub_col_members_df.groupby(['sub_sub_sub_collection_rowid'])['Sub_Sub_Sub_Collection_ID'].ffill())

# ## Resources
# ## https://stackoverflow.com/questions/64795941/how-do-i-forward-fill-nas-with-condition-of-2-other-cells-being-equal-in-pandas
# ## https://stackoverflow.com/questions/27012151/forward-fill-specific-columns-in-pandas-dataframe


In [None]:
# ## DO NOT NEED if there are no sub_sub_sub_collections
# ## Create a column for Sub_Sub_Sub_Sub_Collection_ID to hold the Id from columns with Level Type as "Sub_Sub_Sub_Sub_Collection"

# digitalhub_sub_sub_sub_col_members_df['Sub_Sub_Sub_Sub_Collection_ID'] = np.where(digitalhub_sub_sub_sub_col_members_df['Level Type'] == 'Sub_Sub_Sub_Sub_Collection',digitalhub_sub_sub_sub_col_members_df['Id'], np.nan)

# ## Resources
# ## https://stackoverflow.com/questions/67043249/how-to-use-np-where-in-creating-new-column-using-previous-rows

# ## Checked: No problems

In [None]:
# ## DO NOT NEED if there are no sub_sub_sub_collections
# ## Export file to excel, without the Pandas index with the header

# digitalhub_sub_sub_sub_col_members_df.to_excel("outputs/digitalhub_sub_sub_sub_col_members_df.xlsx", header=True)

# ## Checked: No problems

##### Concetenate Sub_Sub_Sub_Sub_Collection with Sub_Sub_Collection, Sub_Collection, and Community

In [None]:
# ## DO NOT NEED if there are no sub_sub_sub_collections
# ## Concatenate the DigitalHub digitalhub_sub_col_sub_col_df Dataframe using the 'Id' to the DigitalHub digitalhub_sub_sub_col_members_df Dataframe using the 'Id'
# #digitalhub_sub_col_sub_col_df
# #digitalhub_sub_sub_col_members_df

# digitalhub_sub_col_sub_col_sub_col_sub_col_df = pd.concat([digitalhub_sub_col_sub_col_sub_col_df, digitalhub_sub_sub_sub_col_members_df], axis=0).sort_values(by=['Community_ID', 'Sub_Collection_ID'])
# digitalhub_sub_col_sub_col_sub_col_sub_col_df.head()

# ## Note: Creates duplicate rows for "Sub_Sub_Collection_ID"
# ## Checked: No problems

In [None]:
# ## DO NOT NEED if there are no sub_sub_sub_collections
# ## Sort the dataframe by Id and Level Number to get ready to groupby ID and remove duplicate Sub_Sub_Sub_Collections from the concatenate
# digitalhub_sub_col_sub_col_sub_col_sub_col_df.sort_values(by = ['Id','Level Number'], ascending = [True, True], inplace=True)

# ## Checked: No problems

In [None]:
# ## DO NOT NEED if there are no sub_sub_sub_collections
# digitalhub_sub_col_sub_col_sub_col_sub_col_df.drop(columns=['level_0'], inplace=True)

In [None]:
# ## DO NOT NEED if there are no sub_sub_sub_collections
# ## Remove duplicates for Sub_Sub_Sub_Collection ID between concatenated dataframes
# digitalhub_sub_col_sub_col_sub_col_sub_col_df = digitalhub_sub_col_sub_col_sub_col_sub_col_df.groupby(['Id'], as_index=False).first().reset_index()

# ## Checked: No problems

In [None]:
# ## DO NOT NEED if there are no sub_sub_sub_collections
# ## Sort the dataframe by Sub_Sub_Sub_Collection_ID and then Level Number to get ready to fill Community IDs into connected Sub_Sub_Sub_Collections

# digitalhub_sub_col_sub_col_sub_col_sub_col_df.sort_values(by = ['Sub_Sub_Sub_Collection_ID','Level Number'], ascending = [True, True], inplace=True)

# ## Checked: No problems

In [None]:
# ## DO NOT NEED if there are no sub_sub_sub_collections
# ## Fill foward the Community_ID into Records based on their Sub_Sub_Sub_Collection_IDs

# digitalhub_sub_col_sub_col_sub_col_sub_col_df.update(digitalhub_sub_col_sub_col_sub_col_sub_col_df.groupby(['Sub_Sub_Sub_Collection_ID'])['Community_ID'].ffill())

# ## Other options
# ##digitalhub_sub_col_sub_col_df['Community_ID'] = digitalhub_sub_col_sub_col_df.groupby(['Sub_Collection_ID'])['Community_ID'].fillna(method='ffill')
# ##digitalhub_sub_col_sub_col_df['Community_ID'] = digitalhub_sub_col_sub_col_df.groupby(['Sub_Collection_ID'])['Community_ID'].transform(lambda x: x.ffill())
# ##digitalhub_sub_col_sub_col_df['Community_ID'] = digitalhub_sub_col_sub_col_df.groupby(['Sub_Collection_ID'])['Community_ID'].fillna(method='ffill')

# ## Resources
# ## https://stackoverflow.com/questions/64795941/how-do-i-forward-fill-nas-with-condition-of-2-other-cells-being-equal-in-pandas
# ## https://stackoverflow.com/questions/58181262/groupby-with-ffill-deletes-group-and-does-not-put-group-in-index

# ## Checked: No problems

In [None]:
# ## DO NOT NEED if there are no sub_sub_sub_collections
# ## Export file to excel with the header

# digitalhub_sub_col_sub_col_sub_col_sub_col_df.to_excel("outputs/digitalhub_sub_col_sub_col_sub_col_sub_col_df.xlsx",  header=True)

# ## Checked: No problems

In [None]:
# ## DO NOT NEED if there are no sub_sub_sub_collections
# digitalhub_sub_col_sub_col_sub_col_sub_col_df.columns

In [None]:
## If there are sub_sub_sub_collections, use: digitalhub_sub_col_sub_col_sub_col_sub_col_df
## If there are no sub_sub_sub_collections, use: digitalhub_sub_col_sub_col_sub_col_df

digitalhub_sub_col_sub_col_sub_col_df.drop(columns=['level_0', 'index'], inplace=True)

In [None]:
## Re-order Columns
## If there are sub_sub_sub_collections, use: digitalhub_sub_col_sub_col_sub_col_sub_col_df
## If there are no sub_sub_sub_collections, use: digitalhub_sub_col_sub_col_sub_col_df

re_ordered_df = digitalhub_sub_col_sub_col_sub_col_df.reindex(columns=['Level Type',
                                                                               'Level Number', 
                                                                               'Community_ID',
                                                                               'Sub_Collection_ID',
                                                                               'Sub_Sub_Collection_ID',
                                                                               'Sub_Sub_Sub_Collection_ID',
                                                                               'Sub_Sub_Sub_Sub_Collection_ID',
                                                                               'Record',
                                                                               'Member_List',
                                                                               'Member_List_Count',
                                                                               'Title',
                                                                               'uri',
                                                                               'Id',
                                                                               'Keyword',
                                                                               'Resource type(s)',
                                                                               'Rights',
                                                                               'Creator',
                                                                               'Contributor',
                                                                               'Description',
                                                                               'Abstract',
                                                                               'Original Bibliographic Citation',
                                                                               'Related ULR',
                                                                               'Publisher',
                                                                               'Date Created',
                                                                               'Original Identifier',
                                                                               'Language',
                                                                               'Subject: MESH',
                                                                               'Subject: LCSH',
                                                                               'Subject: Geographic Name',
                                                                               'Subject: Name',
                                                                               'Location',
                                                                               'Digital Origin',
                                                                               'URI',
                                                                               'Acknowledgements',
                                                                               'Grants And Funding',
                                                                               'DOI', 
                                                                               'ARK', 
                                                                               'File Size', 
                                                                               'File Format', 
                                                                               'download',
                                                                               'Multi-page?',
                                                                               'members',
                                                                               'community_rowid',
                                                                               'sub_collection_rowid',
                                                                               'sub_sub_collection_rowid',
                                                                               'sub_sub_sub_collection_rowid'
                                                                              ])


re_ordered_df.sort_values(by = ['Community_ID',
                                'community_rowid', 
                                'Sub_Collection_ID', 
                                'sub_collection_rowid',
                                'Sub_Sub_Collection_ID',
                                'sub_sub_collection_rowid',
                                'Sub_Sub_Sub_Collection_ID',
                                'sub_sub_sub_collection_rowid',
                                'Sub_Sub_Sub_Sub_Collection_ID'], ascending = [True, 
                                                                               True, 
                                                                               True, 
                                                                               True, 
                                                                               True, 
                                                                               True, 
                                                                               True, 
                                                                               True, 
                                                                               True], inplace=True)

In [None]:
re_ordered_df.head()

In [None]:
## Export file to excel with the header

re_ordered_df.to_excel("outputs/re_ordered_df.xlsx", index= False, header=True)

## Checked: No problems

In [None]:
## Upload a .csv DigitalHub Collections that will become Prism Communities

digitalhub_community_path = "data/2022_08-01 DigitalHub Collection Migration Plan_Communities Only.csv"

## Read the CSV file and store into Pandas DataFrame 
digitalhub_community_shape_df = pd.read_csv(digitalhub_community_path , encoding = "ISO-8859-1", na_values=['NULL', '<NA>'])

## encoding = "ISO-8859-1", na_values=['NULL', '<NA>']

#Change the column names to lower case with underscore for spaces
digitalhub_community_shape_df.columns =  digitalhub_community_shape_df.columns.str.strip().str.lower().str.replace(" ", "_").str.replace("(","").str.replace(")","")
digitalhub_community_shape_df.head()

## Checked: No problems

In [None]:
## Merge in fields from DigitalHub Communities spreadsheet
#re_ordered_df
#digitalhub_community_shape_df

merged_df = re_ordered_df.merge(digitalhub_community_shape_df, how = 'left', left_on='Id', right_on='dh_id')
merged_df.head()

In [None]:
merged_df.columns

In [None]:
## Drop extra columns

drop_columns = ['ï»¿description','community','metadata:_title', 'metadata:_description', 'dh_id',
           'number_of_collections', 'collection_name',       'child-collection_name_not_present_in_prism', 
           'number_of_items','item-level_id', 'item-level_doi', 'unnamed:_19']

merged_df.drop(columns= drop_columns, inplace=True)
merged_df.head()

In [None]:
merged_df.columns

In [None]:
## Final Re-order of columns

## Re-order Columns
## If there are sub_sub_sub_collections, use: digitalhub_sub_col_sub_col_sub_col_sub_col_df
## If there are no sub_sub_sub_collections, use: digitalhub_sub_col_sub_col_sub_col_df

final_re_ordered_df = merged_df.reindex(columns=['Level Type',
                                                   'Level Number', 
                                                   'Community_ID',
                                                   'Sub_Collection_ID',
                                                   'Sub_Sub_Collection_ID',
                                                   'Sub_Sub_Sub_Collection_ID',
                                                   'Sub_Sub_Sub_Sub_Collection_ID',
                                                   'Record',
                                                   'Member_List',
                                                   'Member_List_Count',
                                                   'Title',
                                                   'uri',
                                                   'Id',
                                                   'access:_visibility',
                                                   'access:_member_policy', 
                                                   'access:_record_policy', 
                                                   'access:_owned_by', 
                                                   'access:_reader',
                                                   'id', 
                                                   'metadata:_type', 
                                                   'metadata:_website'
                                                   'Keyword',
                                                   'Resource type(s)',
                                                   'Rights',
                                                   'Creator',
                                                   'Contributor',
                                                   'Description',
                                                   'Abstract',
                                                   'Original Bibliographic Citation',
                                                   'Related ULR',
                                                   'Publisher',
                                                   'Date Created',
                                                   'Original Identifier',
                                                   'Language',
                                                   'Subject: MESH',
                                                   'Subject: LCSH',
                                                   'Subject: Geographic Name',
                                                   'Subject: Name',
                                                   'Location',
                                                   'Digital Origin',
                                                   'URI',
                                                   'Acknowledgements',
                                                   'Grants And Funding',
                                                   'DOI', 
                                                   'ARK', 
                                                   'File Size', 
                                                   'File Format', 
                                                   'download',
                                                   'Multi-page?',
                                                   'members',
                                                   'community_rowid',
                                                   'sub_collection_rowid',
                                                   'sub_sub_collection_rowid',
                                                   'sub_sub_sub_collection_rowid'
                                                  ])

final_re_ordered_df.rename(columns={'access:_visibility': 'Prism Visibility',
                                       'access:_member_policy': 'Prism Member Policy', 
                                       'access:_record_policy': 'Prism Record Policy', 
                                       'access:_owned_by': 'Prism Owners', 
                                       'access:_reader': 'Prism Readers',
                                       'id': 'Prism_Link', 
                                       'metadata:_type': 'Prism Community Type', 
                                       'metadata:_website': 'Prism Community Website'
                                    })

final_re_ordered_df.sort_values(by = ['Community_ID',
                                'community_rowid', 
                                'Sub_Collection_ID', 
                                'sub_collection_rowid',
                                'Sub_Sub_Collection_ID',
                                'sub_sub_collection_rowid',
                                'Sub_Sub_Sub_Collection_ID',
                                'sub_sub_sub_collection_rowid',
                                'Sub_Sub_Sub_Sub_Collection_ID'], ascending = [True, 
                                                                               True, 
                                                                               True, 
                                                                               True, 
                                                                               True, 
                                                                               True, 
                                                                               True, 
                                                                               True, 
                                                                               True], inplace=True)
final_re_ordered_df.head()

In [None]:
## Export file to excel with the header

final_re_ordered_df.to_excel("outputs/final_re_ordered_df.xlsx", index= False, header=True)

## Checked: No problems

In [None]:
## Save the results of the ROR API query

with open("outputs/final_re_ordered_df_pickle", "wb") as fp:   #Pickling
    pickle.dump(final_re_ordered_df, fp)

with open("outputs/final_re_ordered_df_pickle", "rb") as fp:   # Unpickling
    final_re_ordered_df_pickle = pickle.load(fp)