# This Notebook is used to exclude non-papers from database search export and merge database results between PubMed and Web Of Science

Required packages:
Python == 3.12.0
pandas == 2.1.2
jupyter == 1.0.0
xlrd == 2.0.1

In [32]:
import pandas as pd
import numpy as np
PM_path = "Z:/Literature Review/Queries exports/raw/27-10-2023/PM_1_271023.csv"    # PubMed's results have to be exported in csv format.
WoS_path = "Z:/Literature Review/Queries exports/raw/27-10-2023/WoS_1_271023.xls"  # Web of Science results must be exported in Excel format.

In [30]:
PM_1 = pd.read_csv(PM_path)
PM_1    # Inspect PM dataframe

In [33]:
WoS_1 = pd.read_excel(WoS_path)
WoS_1   # Inspect Web Of Science dataframe

Unnamed: 0,Publication Type,Authors,Book Authors,Group Authors,Book Group Authors,Researcher Ids,ORCIDs,Book Editors,Author - Arabic,Article Title,...,Copyright,Degree Name,Institution Address,Institution,Dissertation and Thesis Subjects,Author Keywords,Indexed Date,UT (Unique ID),Pubmed Id,Unnamed: 73
0,J,"Lankinen, Kaisu; Saari, Jukka; Hlushchuk, Yevh...",,,,"Parkkonen, Lauri/G-6755-2012; Lankinen, Kaisu ...","Parkkonen, Lauri/0000-0002-0130-0801; Lankinen...",,,Consistency and similarity of MEG- and fMRI-si...,...,,,,,,,2018-12-28,WOS:000430366000030,29486325.0,
1,J,"Liu, Xingyu; Dai, Yuxuan; Xie, Hailun; Zhen, Z...",,,,"Zhen, Zonglei/GPG-1239-2022","Liu, Xingyu/0000-0002-4386-2140",,,"A studyforrest extension, MEG recordings while...",...,,,,,,,2022-05-25,WOS:000795513500002,35562378.0,
2,J,"Bonmassar, G; Schwartz, DP; Liu, AK; Kwong, KK...",,,,"Liu, Alan King Lun/A-2210-2015; anand, amit/A-...","Liu, Alan King Lun/0000-0001-6109-1338; Kwong,...",,,Spatiotemporal brain imaging of visual-evoked ...,...,,,,,,,2001-06-01,WOS:000169056500009,11352609.0,
3,J,"Eickhoff, Simon B.; Milham, Michael; Vanderwal...",,,,"Vanderwal, Tamara/AAS-4214-2021; Eickhoff, Sim...","Eickhoff, Simon B./0000-0001-6363-2759; Vander...",,,Towards clinical applications of movie fMRI,...,,,,,,,2020-08-15,WOS:000542369500007,32376301.0,
5,J,"Stroman, Patrick W.; Coe, Brian C.; Munoz, Dou...",,,,,"Coe, Brian/0000-0002-3985-0163; Stroman, Patri...",,,Influence of attention focus on neural activit...,...,,,,,,,2011-01-01,WOS:000285570100002,20850240.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
313,J,"Tan, Chenhao; Liu, Xin; Zhang, Gaoyan",,,,,"Zhang, Gaoyan/0000-0001-5189-9229",,,Inferring Brain State Dynamics Underlying Natu...,...,,,,,,,2022-03-21,WOS:000764450400001,35244856.0,
314,J,"Symonds, Renee M.; Zhou, Juin W.; Cole, Sally ...",,,,,"Cole, Sally/0000-0002-7660-2732; Sussman, Elys...",,,Cognitive resources are distributed among the ...,...,,,,,,,2020-02-03,WOS:000509324600002,31578762.0,
315,J,"Franssen, Sieske; Jansen, Anita; van den Hurk,...",,,,,"Roefs, Anne/0000-0002-9935-1075; Jansen, Anita...",,,"Effects of mindset on hormonal responding, neu...",...,,,,,,,2022-07-15,WOS:000820529200006,35182553.0,
316,J,"Alain, C; Schuler, BM; McDonald, KL",,,,,,,,Neural activity associated with distinguishing...,...,,,,,,,2002-02-01,WOS:000173784900036,11863201.0,


In [34]:
# the "str.contains" function iteratively looks for the specified string in the string-converted rows of column "Document Type". case=False ensures the string is not case-sensitive. the "|" acts as a logical "OR" to either retain all rows that contain either "article" or "reviews" since both interest us. na=False ensures that we treat "NaN" values as unwanted and are thus excluded. regex=True ensures the specified string is not treated as literal. The "~" is the logical exclusion for the following rule.

WoS_1_exclude = WoS_1[~WoS_1["Document Type"].str.contains("article|review", case=False, na=False, regex=True)] # excluded papers extracted in another dataframe
WoS_1_include = WoS_1[WoS_1["Document Type"].str.contains("article|review", case=False, na=False, regex=True)]  # kept papers extracted in another dataframe
WoS_1_exclude   # Visualizing the excluded dataframe


Unnamed: 0,Title,Authors,DOI,PMID
0,Prediction of individual brain age using movie...,"Bi S, Guan Y, Tian L.",10.1093/cercor/bhad407,37885127.0
1,Neural envelope tracking predicts speech intel...,"Van Hirtum T, Somers B, Dieudonné B, Verschuer...",10.1016/j.heares.2023.108893,37806102.0
2,Theta EEG neurofeedback promotes early consoli...,"Rozengurt R, Kuznietsov I, Kachynska T, Kozach...",10.3758/s13415-023-01125-0,37752389.0
3,Arousal modulates the amygdala-insula reciproc...,"Wang L, Hu X, Ren Y, Lv J, Zhao S, Guo L, Liu ...",10.1016/j.neuroimage.2023.120316,37562718.0
4,Individual differences in time-varying and sta...,"Di X, Xu T, Uddin LQ, Biswal BB.",10.1016/j.dcn.2023.101280,37480715.0
...,...,...,...,...
434,Spontaneous eye movements during eyes-open res...,"Koba, Cemal; Notaro, Giuseppe; Tamm, Sandra; N...",10.1162/netn_a_00186,34189373.0
437,Inferring Brain State Dynamics Underlying Natu...,"Tan, Chenhao; Liu, Xin; Zhang, Gaoyan",10.1007/s12021-022-09568-5,35244856.0
438,Cognitive resources are distributed among the ...,"Symonds, Renee M.; Zhou, Juin W.; Cole, Sally ...",10.1111/psyp.13487,31578762.0
440,Neural activity associated with distinguishing...,"Alain, C; Schuler, BM; McDonald, KL",10.1121/1.1434942,11863201.0


Next: merge dataframes from both databases