In [1]:
from google.colab import drive

def mount():
  drive.mount('/content/drive', force_remount=True)

mount()

Mounted at /content/drive


In [2]:
!ls

drive  sample_data


In [3]:
# Set used to keep track of any skipped decisions (placed here to not be reset without purpose)
skipped = set()

# **1 Load results from databases**

## 1.1 Load results from Web of Science

In [4]:
# Path to file generated through the following process:
#   1. Making a search on web of science
#   2. Exporting results as a "Tab delimited file" including author, title, source and abstract (make sure to include all records)
#   3. Renaming the file to "wos" (without any extension to prevent Google from converting) and uploading it to Google Drive
WoS_file_path = "drive/MyDrive/Master/Screening/Raw data/wos.csv"

In [5]:
import pandas as pd
import io

# Assuming saved_recs is a string containing the TSV data
wos_df = pd.read_csv(WoS_file_path, sep='\t')

# Display the dataframe
wos_df


Unnamed: 0,PT,AU,BA,BE,GP,AF,BF,CA,TI,SO,...,WC,WE,SC,GA,PM,OA,HC,HP,DA,UT
0,J,"Nguyen, N; Nguyen, D",,,,"Nguyen, Nguyet; Nguyen, Dung",,,Global Stock Selection with Hidden Markov Model,RISKS,...,,,,,,,,,2024-10-15,WOS:000610728400001
1,J,"Zhang, MQ; Jiang, X; Fang, ZH; Zeng, Y; Xu, K",,,,"Zhang, Mengqi; Jiang, Xin; Fang, Zehua; Zeng, ...",,,High-order Hidden Markov Model for trend predi...,PHYSICA A-STATISTICAL MECHANICS AND ITS APPLIC...,...,,,,,,,,,2024-10-15,WOS:000456228800001
2,J,"Shah, YS; Liu, YM; Shah, FZ; Shah, F; Satti, M...",,,,"Shah, Yasir; Liu, Yumin; Shah, Faiza; Shah, Fa...",,,COVID-19 and commodity effects monitoring usin...,SCIENTIFIC AFRICAN,...,,,,,,,,,2024-10-15,WOS:001061639000001
3,J,"Fereydooni, A; Mahootchi, M",,,,"Fereydooni, Ali; Mahootchi, Masoud",,,An algorithmic trading system based on a stack...,GLOBAL FINANCE JOURNAL,...,,,,,,,,,2024-10-15,WOS:000980522800001
4,J,"Su, Z; Yi, B",,,,"Su, Zhi; Yi, Bo",,,Research on HMM-Based Efficient Stock Price Pr...,MOBILE INFORMATION SYSTEMS,...,,,,,,,,,2024-10-15,WOS:000773704100006
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
153,J,"Papaioannou, PG; Talmon, R; Kevrekidis, IG; Si...",,,,"Papaioannou, Panagiotis G.; Talmon, Ronen; Kev...",,,Time-series forecasting using manifold learnin...,CHAOS,...,,,,,,,,,2024-10-15,WOS:000863319300002
154,J,"Tian, R; Lu, M; Wang, HP; Wang, B; Tang, QX",,,,"Tian, Ran; Lu, Meng; Wang, Haopeng; Wang, Bo; ...",,,IACPPO: A deep reinforcement learning-based mo...,COMPUTERS & INDUSTRIAL ENGINEERING,...,,,,,,,,,2024-10-15,WOS:001139673000001
155,J,"Ahmed, T; Srivastava, A",,,,"Ahmed, Tanveer; Srivastava, Abhishek",,,Combining humans and machines for the future: ...,FUTURE GENERATION COMPUTER SYSTEMS-THE INTERNA...,...,,,,,,,,,2024-10-15,WOS:000466254600058
156,J,"Marvin, HJP; Bouzembrak, Y",,,,"Marvin, Hans J. P.; Bouzembrak, Yamine",,,A system approach towards prediction of food s...,AGRICULTURAL SYSTEMS,...,,,,,,,,,2024-10-15,WOS:000524974800004


In [6]:
# Get a list of columns that are not all NaN
columns_with_data = [col for col in wos_df.columns if not wos_df[col].isnull().all()]

print(columns_with_data)


['PT', 'AU', 'AF', 'TI', 'SO', 'DT', 'CT', 'CY', 'CL', 'DE', 'ID', 'AB', 'C1', 'RP', 'EM', 'RI', 'OI', 'TC', 'Z9', 'PD', 'PY', 'VL', 'IS', 'PN', 'SI', 'BP', 'EP', 'AR', 'DI', 'DL', 'EA', 'DA', 'UT']


In [7]:
wos_df["DE"]

Unnamed: 0,DE
0,global stocks; trading; machine learning; hidd...
1,High-order HMM; Trend prediction; Trading algo...
2,VAR model; Machine learning; Commodity effect;...
3,Foreign exchange market; Stacked generalizatio...
4,
...,...
153,
154,Inventory cost management; Replenishment strat...
155,Human machine systems; Data analytics; Interes...
156,Food supply chain; Bayesian Networks; Dairy an...


In [8]:
clean_wos_df = wos_df[["UT", "TI", "AB", "DI", "DE"]].set_index("UT")
clean_wos_df.columns = ["Title", "Abstract", "DOI", "Keywords"]
clean_wos_df["Origin"] = "WOS"
clean_wos_df.index.name = "ID"
clean_wos_df

Unnamed: 0_level_0,Title,Abstract,DOI,Keywords,Origin
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
WOS:000610728400001,Global Stock Selection with Hidden Markov Model,Hidden Markov model (HMM) is a powerful machin...,10.3390/risks9010009,global stocks; trading; machine learning; hidd...,WOS
WOS:000456228800001,High-order Hidden Markov Model for trend predi...,Financial price series trend prediction is an ...,10.1016/j.physa.2018.10.053,High-order HMM; Trend prediction; Trading algo...,WOS
WOS:001061639000001,COVID-19 and commodity effects monitoring usin...,This article focuses on examining the effects ...,10.1016/j.sciaf.2023.e01856,VAR model; Machine learning; Commodity effect;...,WOS
WOS:000980522800001,An algorithmic trading system based on a stack...,The Forex market has been one of the most attr...,10.1016/j.gfj.2023.100825,Foreign exchange market; Stacked generalizatio...,WOS
WOS:000773704100006,Research on HMM-Based Efficient Stock Price Pr...,Stock market is one of the most important part...,10.1155/2022/8124149,,WOS
...,...,...,...,...,...
WOS:000863319300002,Time-series forecasting using manifold learnin...,We address a three-tier numerical framework ba...,10.1063/5.0094887,,WOS
WOS:001139673000001,IACPPO: A deep reinforcement learning-based mo...,Inventory cost is a significant factor in Supp...,10.1016/j.cie.2023.109829,Inventory cost management; Replenishment strat...,WOS
WOS:000466254600058,Combining humans and machines for the future: ...,This paper proposes a method to quantify inter...,10.1016/j.future.2018.01.043,Human machine systems; Data analytics; Interes...,WOS
WOS:000524974800004,A system approach towards prediction of food s...,"In this study, we aimed to demonstrate the apt...",10.1016/j.agsy.2019.102760,Food supply chain; Bayesian Networks; Dairy an...,WOS


## 1.2 Load results from Scopus


In [9]:
scopus_file_path = "drive/MyDrive/Master/Screening/Raw data/scopus.csv"

In [10]:
import pandas as pd
import io

# Assuming saved_recs is a string containing the TSV data
scopus_df = pd.read_csv(scopus_file_path)

# Display the dataframe
scopus_df


Unnamed: 0,Authors,Author full names,Author(s) ID,Title,Year,Source title,Volume,Issue,Art. No.,Page start,...,DOI,Link,Abstract,Author Keywords,Index Keywords,Document Type,Publication Stage,Open Access,Source,EID
0,Caprioli S.; Cagliero E.; Crupi R.,"Caprioli, Sergio (58632917500); Cagliero, Eman...",58632917500; 58631756500; 57222250441,Quantifying credit portfolio sensitivity to as...,2024,Journal of Risk Model Validation,18.0,1,,1.0,...,10.21314/JRMV.2024.002,https://www.scopus.com/inward/record.uri?eid=2...,We propose a novel approach for quantifying th...,concentration risk; credit portfolio model; ex...,,Article,Final,All Open Access; Green Open Access,Scopus,2-s2.0-85194178240
1,Clintworth M.; Lyridis D.; Boulougouris E.,"Clintworth, Mark (57194013072); Lyridis, Dimit...",57194013072; 6506983549; 7801393511,Financial risk assessment in shipping: a holis...,2023,Maritime Economics and Logistics,25.0,1,,90.0,...,10.1057/s41278-020-00183-2,https://www.scopus.com/inward/record.uri?eid=2...,Corporate financial distress (FD) prediction m...,Conditional value-at-risk; Extreme gradient bo...,,Article,Final,,Scopus,2-s2.0-85098792497
2,Kim J.-M.; Han H.H.; Kim S.,"Kim, Jong-Min (55720212100); Han, Hope H. (572...",55720212100; 57216825024; 57190955712,Forecasting Crude Oil Prices with Major S&P 50...,2022,Axioms,11.0,8,375,,...,10.3390/axioms11080375,https://www.scopus.com/inward/record.uri?eid=2...,This paper introduces methodologies in forecas...,Bayesian variable selection; functional princi...,,Article,Final,All Open Access; Gold Open Access,Scopus,2-s2.0-85137349001
3,Jin B.; Xu X.,"Jin, Bingzi (58914834300); Xu, Xiaojie (571920...",58914834300; 57192066072,Forecasting wholesale prices of yellow corn th...,2024,Neural Computing and Applications,36.0,15,,8693.0,...,10.1007/s00521-024-09531-2,https://www.scopus.com/inward/record.uri?eid=2...,"For market players and policy officials, commo...",Commodity price; Corn; Forecasting; Gaussian p...,Commerce; Decision making; Errors; Gaussian di...,Article,Final,,Scopus,2-s2.0-85186458437
4,Li Z.; Li C.; Min L.; Lin D.,"Li, Zhen (58514566900); Li, Changfei (58558014...",58514566900; 58558014600; 57220547786; 5872449...,Black-Litterman Portfolio Optimization Using G...,2023,IAENG International Journal of Applied Mathema...,53.0,4,IJAM_53_4_34,,...,,https://www.scopus.com/inward/record.uri?eid=2...,The Black-Litterman portfolios based on the pr...,Black-Litterman; Gaussian Process; Machine lea...,Financial data processing; Financial markets; ...,Article,Final,,Scopus,2-s2.0-85177977642
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
208,Lux T.,"Lux, Thomas (7003769381)",7003769381,The Markov-switching multifractal model of ass...,2008,Journal of Business and Economic Statistics,26.0,2,,194.0,...,10.1198/073500107000000403,https://www.scopus.com/inward/record.uri?eid=2...,Multifractal processes have recently been prop...,Generalized method of moments; Levinson-Durbin...,,Article,Final,,Scopus,2-s2.0-41649118014
209,Wong F.S.; Wang P.Z.; Teh H.H.,"Wong, F.S. (16431776200); Wang, P.Z. (74054602...",16431776200; 7405460280; 7006085846,A stock selection strategy using fuzzy neural ...,1991,Computer Science in Economics and Management,4.0,2,,77.0,...,10.1007/BF00436283,https://www.scopus.com/inward/record.uri?eid=2...,"This paper describes, from a general system-de...",artificial intelligence; financial analysis; f...,,Article,Final,,Scopus,2-s2.0-34249919736
210,Cheng C.-H.; Chen T.-L.; Wei L.-Y.,"Cheng, Ching-Hsue (7404797459); Chen, Tai-Lian...",7404797459; 56143516900; 23062367300,A hybrid model based on rough sets theory and ...,2010,Information Sciences,180.0,9,,1610.0,...,10.1016/j.ins.2010.01.014,https://www.scopus.com/inward/record.uri?eid=2...,"In the stock market, technical analysis is a u...",Cumulative probability distribution approach; ...,Communication channels (information theory); C...,Article,Final,,Scopus,2-s2.0-76349121290
211,Kim S.H.; Chun S.H.,"Kim, Steven H. (7601578179); Chun, Se Hak (720...",7601578179; 7202148435,Graded forecasting using an array of bipolar p...,1998,International Journal of Forecasting,14.0,3,,323.0,...,10.1016/S0169-2070(98)00003-X,https://www.scopus.com/inward/record.uri?eid=2...,"To an increasing extent over the past decade, ...",Artificial intelligence; Financial market fore...,,Article,Final,,Scopus,2-s2.0-0000558764


In [11]:
# Get a list of columns that are not all NaN
columns_with_data_scopus = [col for col in scopus_df.columns if not scopus_df[col].isnull().all()]

print(columns_with_data_scopus)


['Authors', 'Author full names', 'Author(s) ID', 'Title', 'Year', 'Source title', 'Volume', 'Issue', 'Art. No.', 'Page start', 'Page end', 'Page count', 'Cited by', 'DOI', 'Link', 'Abstract', 'Author Keywords', 'Index Keywords', 'Document Type', 'Publication Stage', 'Open Access', 'Source', 'EID']


In [12]:
import numpy as np

clean_scopus_df = scopus_df[["DOI", "Title", "Abstract", "Author Keywords"]].copy()
clean_scopus_df.columns = ["DOI", "Title", "Abstract", "Keywords"]
clean_scopus_df["ID"] = np.where(pd.isna(clean_scopus_df["DOI"]), scopus_df["EID"], clean_scopus_df["DOI"])
clean_scopus_df = clean_scopus_df.set_index("ID")
clean_scopus_df["Origin"] = "SCOPUS"
clean_scopus_df.index.name = "ID"
clean_scopus_df

Unnamed: 0_level_0,DOI,Title,Abstract,Keywords,Origin
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
10.21314/JRMV.2024.002,10.21314/JRMV.2024.002,Quantifying credit portfolio sensitivity to as...,We propose a novel approach for quantifying th...,concentration risk; credit portfolio model; ex...,SCOPUS
10.1057/s41278-020-00183-2,10.1057/s41278-020-00183-2,Financial risk assessment in shipping: a holis...,Corporate financial distress (FD) prediction m...,Conditional value-at-risk; Extreme gradient bo...,SCOPUS
10.3390/axioms11080375,10.3390/axioms11080375,Forecasting Crude Oil Prices with Major S&P 50...,This paper introduces methodologies in forecas...,Bayesian variable selection; functional princi...,SCOPUS
10.1007/s00521-024-09531-2,10.1007/s00521-024-09531-2,Forecasting wholesale prices of yellow corn th...,"For market players and policy officials, commo...",Commodity price; Corn; Forecasting; Gaussian p...,SCOPUS
2-s2.0-85177977642,,Black-Litterman Portfolio Optimization Using G...,The Black-Litterman portfolios based on the pr...,Black-Litterman; Gaussian Process; Machine lea...,SCOPUS
...,...,...,...,...,...
10.1198/073500107000000403,10.1198/073500107000000403,The Markov-switching multifractal model of ass...,Multifractal processes have recently been prop...,Generalized method of moments; Levinson-Durbin...,SCOPUS
10.1007/BF00436283,10.1007/BF00436283,A stock selection strategy using fuzzy neural ...,"This paper describes, from a general system-de...",artificial intelligence; financial analysis; f...,SCOPUS
10.1016/j.ins.2010.01.014,10.1016/j.ins.2010.01.014,A hybrid model based on rough sets theory and ...,"In the stock market, technical analysis is a u...",Cumulative probability distribution approach; ...,SCOPUS
10.1016/S0169-2070(98)00003-X,10.1016/S0169-2070(98)00003-X,Graded forecasting using an array of bipolar p...,"To an increasing extent over the past decade, ...",Artificial intelligence; Financial market fore...,SCOPUS


## 1.3 Load results from IEE Xplore


In [13]:
ieee_file_path = "drive/MyDrive/Master/Screening/Raw data/ieeexplore.csv"

In [14]:
import pandas as pd
import io

# Assuming saved_recs is a string containing the TSV data
ieee_df = pd.read_csv(ieee_file_path, quotechar='"', sep=",")

# Display the dataframe
ieee_df


Unnamed: 0,Document Title,Authors,Author Affiliations,Publication Title,Date Added To Xplore,Publication Year,Volume,Issue,Start Page,End Page,...,Mesh_Terms,Article Citation Count,Patent Citation Count,Reference Count,License,Online Date,Issue Date,Meeting Date,Publisher,Document Identifier
0,Stock Market Trend Prediction Using High-Order...,M. Wen; P. Li; L. Zhang; Y. Chen,"School of Computer Science, Southwest Petroleu...",IEEE Access,15 Mar 2019,2019,7,,28299,28308,...,,110.0,,40,OAPA,26 Feb 2019,,,IEEE,IEEE Journals
1,Combining Deep Learning and Multiresolution An...,K. A. Althelaya; S. A. Mohammed; E. -S. M. El-...,"Information and Computer Science Department, K...",IEEE Access,22 Jan 2021,2021,9,,13099,13111,...,,26.0,,54,CCBY,14 Jan 2021,,,IEEE,IEEE Journals
2,Accurate Stock Price Forecasting Based on Deep...,Y. Li; L. Chen; C. Sun; G. Liu; C. Chen; Y. Zhang,"School of Economics and Management, Weifang Un...",IEEE Access,10 Apr 2024,2024,12,,49878,49894,...,,2.0,,42,CCBY,3 Apr 2024,,,IEEE,IEEE Journals
3,Ensemble Technique With Optimal Feature Select...,S. S. Alotaibi,"Department of Information Systems, College of ...",IEEE Access,3 May 2021,2021,9,,64929,64944,...,,28.0,,39,CCBY,15 Apr 2021,,,IEEE,IEEE Journals
4,A Bayesian Learning Method for Financial Time-...,F. Zhu; W. Quan; Z. Zheng; S. Wan,"College of Economics, Shenzhen University, She...",IEEE Access,31 Jul 2018,2018,6,,38959,38966,...,,13.0,,46,OAPA,9 Jul 2018,,,IEEE,IEEE Journals
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71,CrowdFAB: Intelligent Crowd-Forecasting Using ...,T. Salman; A. Ghubaish; R. D. Pietro; M. Baza;...,"Department of Computer Science, Texas Tech Uni...",IEEE Transactions on Dependable and Secure Com...,10 Jul 2024,2024,21,4.0,3030,3047,...,,,,80,IEEE,4 Oct 2023,,,IEEE,IEEE Journals
72,Crop Prediction Based on Characteristics of th...,S. P. Raja; B. Sawicka; Z. Stamenkovic; G. Mar...,"School of Computer Science and Engineering, Ve...",IEEE Access,7 Mar 2022,2022,10,,23625,23641,...,,57.0,,40,CCBY,24 Feb 2022,,,IEEE,IEEE Journals
73,Learning the Information Divergence,O. Dikmen; Z. Yang; E. Oja,Department of Information and Computer Science...,IEEE Transactions on Pattern Analysis and Mach...,2 Jun 2015,2015,37,7.0,1442,1454,...,,18.0,,47,IEEE,31 Oct 2014,,,IEEE,IEEE Journals
74,Predicting IoT Distributed Ledger Fraud Transa...,C. Rawlins; J. Sarangapani,Department of Electrical and Computer Engineer...,IEEE Transactions on Mobile Computing,5 Jun 2024,2024,23,7.0,7818,7829,...,,,,44,IEEE,5 Dec 2023,,,IEEE,IEEE Journals


In [15]:
# Get a list of columns that are not all NaN
print([col for col in ieee_df.columns if not ieee_df[col].isnull().all()])


['Document Title', 'Authors', 'Author Affiliations', 'Publication Title', 'Date Added To Xplore', 'Publication Year', 'Volume', 'Issue', 'Start Page', 'End Page', 'Abstract', 'ISSN', 'DOI', 'Funding Information', 'PDF Link', 'Author Keywords', 'IEEE Terms', 'Mesh_Terms', 'Article Citation Count', 'Reference Count', 'License', 'Online Date', 'Publisher', 'Document Identifier']


In [16]:
clean_ieee_df = ieee_df[["DOI", "Document Title", "Abstract"]].copy()
clean_ieee_df.columns = ["DOI", "Title", "Abstract"]
clean_ieee_df["Keywords"] = ieee_df["IEEE Terms"].str.cat(ieee_df["Author Keywords"], sep=";")
clean_ieee_df["ID"] = ieee_df["PDF Link"]
clean_ieee_df = clean_ieee_df.set_index("ID")
clean_ieee_df["Origin"] = "IEEE"
clean_ieee_df.index.name = "ID"
clean_ieee_df

Unnamed: 0_level_0,DOI,Title,Abstract,Keywords,Origin
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=8653278,10.1109/ACCESS.2019.2901842,Stock Market Trend Prediction Using High-Order...,Given a financial time series such as $S\&P~5...,Time series analysis;Market research;Hidden Ma...,IEEE
https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=9324831,10.1109/ACCESS.2021.3051872,Combining Deep Learning and Multiresolution An...,"Due to its complexity, financial time-series f...",Predictive models;Forecasting;Multiresolution ...,IEEE
https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=10488422,10.1109/ACCESS.2024.3384430,Accurate Stock Price Forecasting Based on Deep...,The stock market is playing an increasingly im...,Predictive models;Hidden Markov models;Forecas...,IEEE
https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=9405660,10.1109/ACCESS.2021.3073507,Ensemble Technique With Optimal Feature Select...,The forecast of the stock price attempts to as...,Hidden Markov models;Forecasting;Stock markets...,IEEE
https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=8408721,10.1109/ACCESS.2018.2853998,A Bayesian Learning Method for Financial Time-...,This article develops a sequential Bayesian le...,Bayes methods;Biological system modeling;Numer...,IEEE
...,...,...,...,...,...
https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=10271718,10.1109/TDSC.2023.3322038,CrowdFAB: Intelligent Crowd-Forecasting Using ...,"Crowdsourcing applications, such as Uber for r...",Blockchains;Security;Knowledge based systems;F...,IEEE
https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=9721191,10.1109/ACCESS.2022.3154350,Crop Prediction Based on Characteristics of th...,Agriculture is a growing field of research. In...,Crops;Zigbee;Monitoring;Soil;Temperature senso...,IEEE
https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=6942194,10.1109/TPAMI.2014.2366144,Learning the Information Divergence,Information divergence that measures the diffe...,Medals;Approximation methods;Tensile stress;Ma...,IEEE
https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=10342719,10.1109/TMC.2023.3339384,Predicting IoT Distributed Ledger Fraud Transa...,Decision-making and consensus in traditional b...,Blockchains;Training;Generative adversarial ne...,IEEE


## 1.4 Load results from Proquest


In [17]:
#setting the file path from proquest data
proQuest_file_path = "drive/MyDrive/Master/Screening/Raw data/proquest.csv"

In [18]:
#loading the data as a pandas dataframe
proquest_df = pd.read_csv(proQuest_file_path, sep=',')

# Display the dataframes
proquest_df

Unnamed: 0,Title,Subtitle,Author,Publication,SourceType,Publisher,Volume,Issue,PubDate,AlphaDate,StartPage,EndPage,PageRange,ISSN,EISSN,ISBN,Language,Abstract,DocumentUrl,DOI
0,Gaussian Process-Mixture Conditional Heterosce...,,"Platanios, Emmanouil A; Chatzis, Sotirios P",IEEE transactions on pattern analysis and mach...,Undefined,,36,5,2014-05-01,May 2014,888,900,888-900,,1939-3539,,ENG,Generalized autoregressive conditional heteros...,https://www.proquest.com/docview/1711536889?ac...,https://doi.org/10.1109/TPAMI.2013.183
1,Conducting Causal Analysis by Means of Approxi...,,Bo Pieter Johannes Andrée,Entropy,Scholarly Journals,,24,1,2022-01-01,2022,92,,,10994300,,,ENG,Simple SummaryThe current paper develops a pro...,https://www.proquest.com/docview/2621295030?ac...,https://doi.org/10.3390/e24010092
2,A New Framework for Fraud Detection in Bitcoin...,,"Nayyer, Noor; Javaid, Nadeem; Akbar, Mariam; A...",IEEE Access,Scholarly Journals,,11,,2023-01-01,2023,90916,,90916-90938,21693536,,,ENG,Bitcoin has a reputation of being used for unl...,https://www.proquest.com/docview/2859716063?ac...,https://doi.org/10.1109/ACCESS.2023.3308298
3,RETRACTED ARTICLE: A Bayesian analysis based o...,,"Ma, Ming; Zhang, Jing",Journal of Combinatorial Optimization,Scholarly Journals,,45,1,2023-01-01,2023,,,,13826905,,,ENG,Green stocks are companies environmental prote...,https://www.proquest.com/docview/2740530868?ac...,https://doi.org/10.1007/s10878-022-00936-0
4,Forecasting Crude Oil Prices with Major S&P 50...,,"Jong-Min, Kim; Jong-Min, Kim; Han, Hope H; Kim...",Axioms,Scholarly Journals,,11,8,2022-01-01,2022,375,,,20751680,,,ENG,This paper introduces methodologies in forecas...,https://www.proquest.com/docview/2706102072?ac...,https://doi.org/10.3390/axioms11080375
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
103,Classical and Bayesian Analysis of Univariate ...,,"Liesenfeld, Roman; Jean-François, Richard",Econometric Reviews,Scholarly Journals,,25,2/3,2006-01-01,2006,335,,335,07474938,,,ENG,"In this paper, efficient importance sampling (...",https://www.proquest.com/docview/212083404?acc...,
104,Implementation of a Commitment Machine for an ...,,"Bagnato, Marco; Bottasso, Anna; Giribone, Pier...",Frontiers in artificial intelligence,Undefined,,4,,2021-01-01,2021,732805,732805,732805,,2624-8212,,ENG,This study proposes a metaheuristic for the se...,https://www.proquest.com/docview/2574382753?ac...,https://doi.org/10.3389/frai.2021.732805
105,The impact of the global stock and energy mark...,,"Wang, Zi-Jie; Zhao, Lu-Tao",Journal of cleaner production,Undefined,Elsevier Ltd,289 p.125140-,,2021-03-20,"Mar 20, 2021",,,,0959-6526,0959-6526,,ENG,The industrial revolution has brought about gr...,https://www.proquest.com/docview/2511185497?ac...,https://doi.org/10.1016/j.jclepro.2020.125140
106,Bayesian analysis of a change-point in exponen...,,"Lee, Ch-B",COMPUT STAT DATA ANAL,Undefined,"ELSEVIER SCI B.V , AMSTERDAM, (NETHERLANDS)",27,2,1998-04-03,3 Apr. 1998,195,208,195-208,0167-9473,0167-9473,,ENG,A Bayesian analysis is used to detect a change...,https://www.proquest.com/docview/27449074?acco...,


In [19]:
# Get a list of columns that are not all NaN
columns_with_data = [col for col in proquest_df.columns if not proquest_df[col].isnull().all()]

print(columns_with_data)

['Title', 'Author', 'Publication', 'SourceType', 'Publisher', 'Volume', 'Issue', 'PubDate', 'AlphaDate', 'StartPage', 'EndPage', 'PageRange', 'ISSN', 'EISSN', 'ISBN', 'Language', 'Abstract', 'DocumentUrl', 'DOI']


In [20]:
import numpy as np

clean_proquest_df = proquest_df[["DOI", "Title", "Abstract", "ISSN", "EISSN", "ISBN"]]

clean_proquest_df["ID"] = np.where(
    pd.notna(clean_proquest_df["DOI"]), clean_proquest_df["DOI"],
    np.where(
        pd.notna(clean_proquest_df["ISSN"]), clean_proquest_df["ISSN"],
        np.where(
            pd.notna(clean_proquest_df["EISSN"]), clean_proquest_df["EISSN"],
            clean_proquest_df["ISBN"]
        )
    )
)

clean_proquest_df= clean_proquest_df.drop(columns=["ISSN", "EISSN", "ISBN"])
clean_proquest_df = clean_proquest_df.set_index("ID")
clean_proquest_df["Origin"] = "Proquest"
clean_proquest_df.index.name = "ID"
clean_proquest_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  clean_proquest_df["ID"] = np.where(


Unnamed: 0_level_0,DOI,Title,Abstract,Origin
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
https://doi.org/10.1109/TPAMI.2013.183,https://doi.org/10.1109/TPAMI.2013.183,Gaussian Process-Mixture Conditional Heterosce...,Generalized autoregressive conditional heteros...,Proquest
https://doi.org/10.3390/e24010092,https://doi.org/10.3390/e24010092,Conducting Causal Analysis by Means of Approxi...,Simple SummaryThe current paper develops a pro...,Proquest
https://doi.org/10.1109/ACCESS.2023.3308298,https://doi.org/10.1109/ACCESS.2023.3308298,A New Framework for Fraud Detection in Bitcoin...,Bitcoin has a reputation of being used for unl...,Proquest
https://doi.org/10.1007/s10878-022-00936-0,https://doi.org/10.1007/s10878-022-00936-0,RETRACTED ARTICLE: A Bayesian analysis based o...,Green stocks are companies environmental prote...,Proquest
https://doi.org/10.3390/axioms11080375,https://doi.org/10.3390/axioms11080375,Forecasting Crude Oil Prices with Major S&P 50...,This paper introduces methodologies in forecas...,Proquest
...,...,...,...,...
07474938,,Classical and Bayesian Analysis of Univariate ...,"In this paper, efficient importance sampling (...",Proquest
https://doi.org/10.3389/frai.2021.732805,https://doi.org/10.3389/frai.2021.732805,Implementation of a Commitment Machine for an ...,This study proposes a metaheuristic for the se...,Proquest
https://doi.org/10.1016/j.jclepro.2020.125140,https://doi.org/10.1016/j.jclepro.2020.125140,The impact of the global stock and energy mark...,The industrial revolution has brought about gr...,Proquest
0167-9473,,Bayesian analysis of a change-point in exponen...,A Bayesian analysis is used to detect a change...,Proquest


# **2 Merge and clean dataframes**

In [21]:
clean_df = pd.concat([clean_wos_df, clean_scopus_df, clean_ieee_df, clean_proquest_df])
clean_df

Unnamed: 0_level_0,Title,Abstract,DOI,Keywords,Origin
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
WOS:000610728400001,Global Stock Selection with Hidden Markov Model,Hidden Markov model (HMM) is a powerful machin...,10.3390/risks9010009,global stocks; trading; machine learning; hidd...,WOS
WOS:000456228800001,High-order Hidden Markov Model for trend predi...,Financial price series trend prediction is an ...,10.1016/j.physa.2018.10.053,High-order HMM; Trend prediction; Trading algo...,WOS
WOS:001061639000001,COVID-19 and commodity effects monitoring usin...,This article focuses on examining the effects ...,10.1016/j.sciaf.2023.e01856,VAR model; Machine learning; Commodity effect;...,WOS
WOS:000980522800001,An algorithmic trading system based on a stack...,The Forex market has been one of the most attr...,10.1016/j.gfj.2023.100825,Foreign exchange market; Stacked generalizatio...,WOS
WOS:000773704100006,Research on HMM-Based Efficient Stock Price Pr...,Stock market is one of the most important part...,10.1155/2022/8124149,,WOS
...,...,...,...,...,...
07474938,Classical and Bayesian Analysis of Univariate ...,"In this paper, efficient importance sampling (...",,,Proquest
https://doi.org/10.3389/frai.2021.732805,Implementation of a Commitment Machine for an ...,This study proposes a metaheuristic for the se...,https://doi.org/10.3389/frai.2021.732805,,Proquest
https://doi.org/10.1016/j.jclepro.2020.125140,The impact of the global stock and energy mark...,The industrial revolution has brought about gr...,https://doi.org/10.1016/j.jclepro.2020.125140,,Proquest
0167-9473,Bayesian analysis of a change-point in exponen...,A Bayesian analysis is used to detect a change...,,,Proquest


In [22]:
# Temporarily reset index
clean_df = clean_df.reset_index()

# Separate rows with NaN in 'DOI'
nan_rows = clean_df[clean_df['DOI'].isna()]

# Process rows where 'DOI' is not NaN
non_nan_rows = clean_df[clean_df['DOI'].notna()].copy()

# Normalize DOI
non_nan_rows["DOI"] = non_nan_rows["DOI"].str.lower()
non_nan_rows["DOI"] = non_nan_rows["DOI"].str.replace("https://doi.org/", "", regex=False)

# Drop duplicates based on DOI
non_nan_rows = non_nan_rows.drop_duplicates(subset=["DOI"], keep='first')

# Concatenate the untouched NaN rows back into the DataFrame
clean_df = pd.concat([non_nan_rows, nan_rows], ignore_index=True)

# Display the result
clean_df


Unnamed: 0,ID,Title,Abstract,DOI,Keywords,Origin
0,WOS:000610728400001,Global Stock Selection with Hidden Markov Model,Hidden Markov model (HMM) is a powerful machin...,10.3390/risks9010009,global stocks; trading; machine learning; hidd...,WOS
1,WOS:000456228800001,High-order Hidden Markov Model for trend predi...,Financial price series trend prediction is an ...,10.1016/j.physa.2018.10.053,High-order HMM; Trend prediction; Trading algo...,WOS
2,WOS:001061639000001,COVID-19 and commodity effects monitoring usin...,This article focuses on examining the effects ...,10.1016/j.sciaf.2023.e01856,VAR model; Machine learning; Commodity effect;...,WOS
3,WOS:000980522800001,An algorithmic trading system based on a stack...,The Forex market has been one of the most attr...,10.1016/j.gfj.2023.100825,Foreign exchange market; Stacked generalizatio...,WOS
4,WOS:000773704100006,Research on HMM-Based Efficient Stock Price Pr...,Stock market is one of the most important part...,10.1155/2022/8124149,,WOS
...,...,...,...,...,...,...
330,978-1-109-52503-8,Variance-based clustering methods and higher o...,Two approaches have been proposed in statistic...,,,Proquest
331,02522667,Forecasting classification of operating perfor...,Classification of operating performance of the...,,,Proquest
332,0038092X,Probabilistic forecasting of the solar irradia...,Forecasting of the solar irradiance is a key f...,,,Proquest
333,07474938,Classical and Bayesian Analysis of Univariate ...,"In this paper, efficient importance sampling (...",,,Proquest


In [23]:
# Show title dupes
clean_df[clean_df["Title"].duplicated(keep=False)].sort_values("Title")

Unnamed: 0,ID,Title,Abstract,DOI,Keywords,Origin
318,0957-4174,A stacked generalization system for automated ...,Multiple FOREX time series forecasting is a ho...,,,Proquest
109,WOS:000412252000021,A stacked generalization system for automated ...,Multiple FOREX time series forecasting is a ho...,10.1016/j.eswa.2017.08.011,Forex forecasting; Algorithmic trading; Portfo...,WOS
235,10.1007/BF00436283,A stock selection strategy using fuzzy neural ...,"This paper describes, from a general system-de...",10.1007/bf00436283,artificial intelligence; financial analysis; f...,SCOPUS
234,10.1016/0925-2312(91)90026-8,A stock selection strategy using fuzzy neural ...,"This paper describes, from a general system-de...",10.1016/0925-2312(91)90026-8,artificial intelligence; financial analysis; f...,SCOPUS
11,WOS:000734789000001,Adoption of deep learning Markov model combine...,In order to accurately describe the risk depen...,10.2478/amns.2021.2.00065,mixed Copula model; HMM; financial index; risk...,WOS
13,WOS:000756268700001,Adoption of deep learning Markov model combine...,In order to accurately describe the risk depen...,10.2478/amns.2021.1.00085,mixed copula model; HMM; financial index; risk...,WOS
221,10.2478/amns.2021.2.00112,Adoption of deep learning Markov model combine...,In order to accurately describe the risk depen...,10.2478/amns.2021.2.00112,financial index; HMM; investment failure rate;...,SCOPUS
334,0167-9473,Bayesian analysis of a change-point in exponen...,A Bayesian analysis is used to detect a change...,,,Proquest
54,WOS:000073291400005,Bayesian analysis of a change-point in exponen...,A Bayesian analysis is used to detect a change...,10.1016/s0167-9473(98)00009-7,change-point; conjugate prior; ML-II approach;...,WOS
316,01679236,Negation scope detection in sentiment analysis...,Decision support for financial news using natu...,,,Proquest


In [24]:
# Drop duplicates based on title
clean_df = clean_df.groupby(['Title'], as_index=False).first()
clean_df = clean_df.copy()
clean_df

Unnamed: 0,Title,ID,Abstract,DOI,Keywords,Origin
0,<i>DL-GuesS</i>: Deep Learning and Sentiment A...,WOS:000779594700001,Cryptocurrencies are peer-to-peer-based transa...,10.1109/access.2022.3163305,Cryptography; Predictive models; Bitcoin; Hidd...,WOS
1,A Bayesian Learning Method for Financial Time-...,https://ieeexplore.ieee.org/stamp/stamp.jsp?ar...,This article develops a sequential Bayesian le...,10.1109/access.2018.2853998,Bayes methods;Biological system modeling;Numer...,IEEE
2,A Bayesian Regularized Neural Network for Anal...,https://ieeexplore.ieee.org/stamp/stamp.jsp?ar...,Bitcoin is a decentralized digital currency wi...,10.1109/access.2021.3063243,Bitcoin;Predictive models;Blockchain;Market re...,IEEE
3,A Bayesian analysis based on multivariate stoc...,WOS:000889065800003,Green stocks are companies environmental prote...,10.1007/s10878-022-00936-0,Green stock; Spillover Effect; Machine learnin...,WOS
4,A Bayesian-based classification framework for ...,WOS:000863229400001,Financial time series have been extensively st...,10.1007/s11227-022-04834-4,Trend prediction; Machine learning; Deep learn...,WOS
...,...,...,...,...,...,...
321,Volatility index prediction based on a hybrid ...,WOS:000891788100011,Advances in volatility index prediction based ...,10.1016/j.eswa.2022.119184,Volatility index; Mode decomposition; Deep lea...,WOS
322,Volumetric occupancy mapping with probabilisti...,10.1109/LRA.2021.3070308,"In robotic applications, a key requirement for...",10.1109/lra.2021.3070308,Computer vision; Machine learning; Mobile robo...,SCOPUS
323,Welcome to SIGNALS: A New Open-Access Scientif...,https://doi.org/10.3390/signals1010001,The sheer exposure to vast amounts of signals ...,10.3390/signals1010001,,Proquest
324,When CVaR Meets With Bluetooth PAN: A Physical...,https://ieeexplore.ieee.org/stamp/stamp.jsp?ar...,"In this work, we propose a risk-aware physical...",10.1109/jsen.2021.3068782,COVID-19;Safety;Bluetooth;Sensors;Trajectory;P...,IEEE


In [25]:
# Set ID as index again
clean_df = clean_df.set_index("ID")
clean_df

Unnamed: 0_level_0,Title,Abstract,DOI,Keywords,Origin
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
WOS:000779594700001,<i>DL-GuesS</i>: Deep Learning and Sentiment A...,Cryptocurrencies are peer-to-peer-based transa...,10.1109/access.2022.3163305,Cryptography; Predictive models; Bitcoin; Hidd...,WOS
https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=8408721,A Bayesian Learning Method for Financial Time-...,This article develops a sequential Bayesian le...,10.1109/access.2018.2853998,Bayes methods;Biological system modeling;Numer...,IEEE
https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=9366736,A Bayesian Regularized Neural Network for Anal...,Bitcoin is a decentralized digital currency wi...,10.1109/access.2021.3063243,Bitcoin;Predictive models;Blockchain;Market re...,IEEE
WOS:000889065800003,A Bayesian analysis based on multivariate stoc...,Green stocks are companies environmental prote...,10.1007/s10878-022-00936-0,Green stock; Spillover Effect; Machine learnin...,WOS
WOS:000863229400001,A Bayesian-based classification framework for ...,Financial time series have been extensively st...,10.1007/s11227-022-04834-4,Trend prediction; Machine learning; Deep learn...,WOS
...,...,...,...,...,...
WOS:000891788100011,Volatility index prediction based on a hybrid ...,Advances in volatility index prediction based ...,10.1016/j.eswa.2022.119184,Volatility index; Mode decomposition; Deep lea...,WOS
10.1109/LRA.2021.3070308,Volumetric occupancy mapping with probabilisti...,"In robotic applications, a key requirement for...",10.1109/lra.2021.3070308,Computer vision; Machine learning; Mobile robo...,SCOPUS
https://doi.org/10.3390/signals1010001,Welcome to SIGNALS: A New Open-Access Scientif...,The sheer exposure to vast amounts of signals ...,10.3390/signals1010001,,Proquest
https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=9386066,When CVaR Meets With Bluetooth PAN: A Physical...,"In this work, we propose a risk-aware physical...",10.1109/jsen.2021.3068782,COVID-19;Safety;Bluetooth;Sensors;Trajectory;P...,IEEE


# **3 Enrich with GPT assessments**

In [26]:
# Load previous assessments
gpt_file_path = "drive/MyDrive/Master/Screening/gpt"
gpt_response_df = pd.read_csv(gpt_file_path, index_col=0)
gpt_response_df

Unnamed: 0_level_0,Summary,Financial instrument?,Instrument,AI?,Probabilistic?
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
WOS:001258447000001,Proposes a novel deep learning hybrid model co...,✔️,Stock Indices,✔️,❌
WOS:001096941900002,Combines machine learning with rough set theor...,✔️,Stock Prices,✔️,❌
WOS:001281386400001,Develops an integrated spotted hyena optimizat...,✔️,Stock Prices,✔️,✔️
WOS:001009666300001,Applies Long Short-Term Memory (LSTM) neural n...,✔️,Stock Market Indices,✔️,❌
WOS:000870513100007,Implements a multi-model GAN-based hybrid pred...,✔️,Stock Prices,✔️,✔️
...,...,...,...,...,...
2-s2.0-85111270345,The study employs LASSO-based machine learning...,❌,?,✔️,❌
9798380382243,The thesis introduces novel principles for pre...,❌,?,❌,❌
1350486X,The paper presents a Bayesian approach using D...,✔️,Value at Risk (VaR) for financial assets,❌,✔️
2-s2.0-84874513449,This survey reviews recent machine learning te...,✔️,Stock,✔️,❌


### Instructions

The block below generates several prompts to be input to Chat GPT.

Go to Chat GPT, choose the `o1-mini` model, and input each prompt, and store the results in a local CSV file named `gpt` (without extension).

Once through all the batches and all the prompts, upload the CSV file to Google Drive, refresh files in google colab, and run the next code block.

In [27]:
# Generate prompts to Chat GPT
import os
from IPython.display import clear_output

batch_size = 50
i = 0

# Filter clean df on whether a gpt assessment has already been done
input_df = clean_df.drop(gpt_response_df.index, errors='ignore')[["Title", "Abstract"]]

for batch in range(0, len(input_df), batch_size):
    batch_df = input_df.iloc[batch:batch+batch_size]
    print("""
For the following CSV list of articles, please return a CSV with the following columns:
- `ID`: The ID of the article, corresponding to the ID in the provided CSV
- `Summary`: A one-to-two sentence summary of the article
- `Financial instrument?`: A column with the value ✔️ if the article contains a model that predicts the price of a financial instrument and ❌ otherwise
- `Instrument`: A column with the type of the financial instrument that is predicted
- `AI?`: A column with the value ✔️ if the model in the article is an AI/ML model (i.e. more advanced than traditional econometric models) and ❌ otherwise
- `Probabilistic?`: A column with the value ✔️ if the model is a **probablistic** AI model (i.e. the prediction includes either variance or a distribution or some other financial risk measure such as VaR) and ❌ otherwise
You can also answer "?" to any question if it cannot be answered based on the title or abstract.
Please ensure that the CSV is valid by applying quotation marks wherever necessary.
""")
    print(batch_df.to_csv(index=True))
    input("Press enter to get next prompt")
    clear_output(wait=True)


## Load assessments

In [28]:
# Load all assessments

gpt_file_path = "drive/MyDrive/Master/Screening/gpt"
gpt_response_df = pd.read_csv(gpt_file_path, index_col="ID")
gpt_response_df

Unnamed: 0_level_0,Summary,Financial instrument?,Instrument,AI?,Probabilistic?
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
WOS:001258447000001,Proposes a novel deep learning hybrid model co...,✔️,Stock Indices,✔️,❌
WOS:001096941900002,Combines machine learning with rough set theor...,✔️,Stock Prices,✔️,❌
WOS:001281386400001,Develops an integrated spotted hyena optimizat...,✔️,Stock Prices,✔️,✔️
WOS:001009666300001,Applies Long Short-Term Memory (LSTM) neural n...,✔️,Stock Market Indices,✔️,❌
WOS:000870513100007,Implements a multi-model GAN-based hybrid pred...,✔️,Stock Prices,✔️,✔️
...,...,...,...,...,...
2-s2.0-85111270345,The study employs LASSO-based machine learning...,❌,?,✔️,❌
9798380382243,The thesis introduces novel principles for pre...,❌,?,❌,❌
1350486X,The paper presents a Bayesian approach using D...,✔️,Value at Risk (VaR) for financial assets,❌,✔️
2-s2.0-84874513449,This survey reviews recent machine learning te...,✔️,Stock,✔️,❌


In [29]:
# Remove duplicate indices in gpt_response_df
gpt_response_df = gpt_response_df[~gpt_response_df.index.duplicated(keep='last')]
gpt_response_df

Unnamed: 0_level_0,Summary,Financial instrument?,Instrument,AI?,Probabilistic?
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
WOS:001258447000001,Proposes a novel deep learning hybrid model co...,✔️,Stock Indices,✔️,❌
WOS:001096941900002,Combines machine learning with rough set theor...,✔️,Stock Prices,✔️,❌
WOS:001281386400001,Develops an integrated spotted hyena optimizat...,✔️,Stock Prices,✔️,✔️
WOS:001009666300001,Applies Long Short-Term Memory (LSTM) neural n...,✔️,Stock Market Indices,✔️,❌
WOS:000870513100007,Implements a multi-model GAN-based hybrid pred...,✔️,Stock Prices,✔️,✔️
...,...,...,...,...,...
2-s2.0-85111270345,The study employs LASSO-based machine learning...,❌,?,✔️,❌
9798380382243,The thesis introduces novel principles for pre...,❌,?,❌,❌
1350486X,The paper presents a Bayesian approach using D...,✔️,Value at Risk (VaR) for financial assets,❌,✔️
2-s2.0-84874513449,This survey reviews recent machine learning te...,✔️,Stock,✔️,❌


In [30]:
processed_df = clean_df.join(gpt_response_df)
processed_df

Unnamed: 0_level_0,Title,Abstract,DOI,Keywords,Origin,Summary,Financial instrument?,Instrument,AI?,Probabilistic?
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
WOS:000779594700001,<i>DL-GuesS</i>: Deep Learning and Sentiment A...,Cryptocurrencies are peer-to-peer-based transa...,10.1109/access.2022.3163305,Cryptography; Predictive models; Bitcoin; Hidd...,WOS,"DL-GuesS, a hybrid deep learning and sentiment...",✔️,Cryptocurrency,✔️,❌
https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=8408721,A Bayesian Learning Method for Financial Time-...,This article develops a sequential Bayesian le...,10.1109/access.2018.2853998,Bayes methods;Biological system modeling;Numer...,IEEE,The paper introduces a sequential Bayesian lea...,✔️,"Financial time series (e.g., S&P 500)",❌,✔️
https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=9366736,A Bayesian Regularized Neural Network for Anal...,Bitcoin is a decentralized digital currency wi...,10.1109/access.2021.3063243,Bitcoin;Predictive models;Blockchain;Market re...,IEEE,A Bayesian Regularized Neural Network model is...,✔️,Bitcoin,✔️,✔️
WOS:000889065800003,A Bayesian analysis based on multivariate stoc...,Green stocks are companies environmental prote...,10.1007/s10878-022-00936-0,Green stock; Spillover Effect; Machine learnin...,WOS,Employs Bayesian multivariate stochastic volat...,✔️,Green Stock Indices,✔️,✔️
WOS:000863229400001,A Bayesian-based classification framework for ...,Financial time series have been extensively st...,10.1007/s11227-022-04834-4,Trend prediction; Machine learning; Deep learn...,WOS,The paper presents a Bayesian-optimized classi...,✔️,Financial time series,✔️,✔️
...,...,...,...,...,...,...,...,...,...,...
WOS:000891788100011,Volatility index prediction based on a hybrid ...,Advances in volatility index prediction based ...,10.1016/j.eswa.2022.119184,Volatility index; Mode decomposition; Deep lea...,WOS,This paper develops a hybrid deep learning sys...,✔️,Volatility indices,✔️,✔️
10.1109/LRA.2021.3070308,Volumetric occupancy mapping with probabilisti...,"In robotic applications, a key requirement for...",10.1109/lra.2021.3070308,Computer vision; Machine learning; Mobile robo...,SCOPUS,Develops a probabilistic depth completion fram...,❌,?,✔️,✔️
https://doi.org/10.3390/signals1010001,Welcome to SIGNALS: A New Open-Access Scientif...,The sheer exposure to vast amounts of signals ...,10.3390/signals1010001,,Proquest,Introduction to the open-access journal SIGNAL...,❌,,❌,❌
https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=9386066,When CVaR Meets With Bluetooth PAN: A Physical...,"In this work, we propose a risk-aware physical...",10.1109/jsen.2021.3068782,COVID-19;Safety;Bluetooth;Sensors;Trajectory;P...,IEEE,A risk-aware physical distancing system using ...,❌,?,✔️,✔️


In [31]:
# prompt: export clean_df as csv to Master/Analysis/Processed data from screening
processed_df.to_csv("drive/MyDrive/Master/Analysis/Processed data from screening/clean_with_gpt_df.csv")

# **4 Make decisions on whether to include articles**

In [32]:
# Load previous decisions
mount()
decisions_file_path = "drive/MyDrive/Master/Screening/decisions"
decisions_df = pd.read_csv(decisions_file_path, index_col=0)
decisions_df.sort_values("Date", ascending=True)

Mounted at /content/drive


Unnamed: 0_level_0,Decision,Date,Reviewer
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
WOS:001168597500001,n,2024-09-24T13:25:58.924587,Tord
WOS:000380081200006,n,2024-09-24T13:30:26.953245,Tord
WOS:000793470500003,n,2024-09-24T13:32:18.959640,Tord
WOS:000596207300001,n,2024-09-24T13:32:25.758788,Tord
WOS:000993396100001,n,2024-09-24T13:32:30.940790,Tord
...,...,...,...
2-s2.0-85177977642,y,2024-11-01T14:32:32.181051,Tord
WOS:000390831000014,n,2024-11-01T14:32:51.699112,Tord
2-s2.0-85073325015,y,2024-11-01T14:40:29.120978,Tord
9798382067292,n,2024-11-01T14:48:54.971261,Tord


In [33]:
# Keep own version to mitigate overwriting each other
my_decisions_df = decisions_df.copy()

In [34]:
from datetime import datetime
from IPython.display import display, HTML, clear_output

reviewer = input("What is your name?")

# Shuffle the indices of the DataFrame to limit chance of two people reviewing the same article at the same time
np.random.seed(hash(reviewer) % 2**32)
shuffled_indices = np.random.permutation(processed_df.index)

def get_progress():
    decided = set(decisions_df.index).intersection(processed_df.index)
    return len(decided) / len(processed_df)

# Loop through the randomized indices
for index in shuffled_indices:
    row = processed_df.loc[index]
    if index in my_decisions_df.index:
        continue
    if index in skipped:
        continue
    if len(row.shape) > 1:
        row = row.iloc[0]
    if pd.isna(row["Summary"]):
        continue
    clear_output(wait=True)
    print(f"Screening progress: {get_progress() * 100:.2f}%")
    display(HTML(f'<h1>{row["Title"]}</h1>'))
    print("ID:", index)
    display(HTML(f'<a href="https://doi.org/{row["DOI"]}" target="_blank">Link</a>'))
    display(HTML(f'<h2>GPT assessment</h2>'))
    display(HTML(f'<p style="font-size:20px;">Financial instrument?: {row["Financial instrument?"]}</p>'))
    display(HTML(f'<p style="font-size:20px;">AI?: {row["AI?"]}</p>'))
    display(HTML(f'<p style="font-size:20px;">Probabilistic?: {row["Probabilistic?"]}</p>'))
    display(HTML(f'<p style="font-size:20px;">Instrument: {row["Instrument"]}</p>'))
    display(HTML(f'<h2>Summary</h2>'))
    display(HTML(f'<p style="font-size:18px;line-height:30px">{row["Summary"]}</p>'))
    display(HTML(f'<h2>Keywords</h2>'))
    display(HTML(f'<p style="font-size:18px;line-height:30px">{row["Keywords"]}</p>'))
    display(HTML(f'<h2>Abstract</h2>'))
    display(HTML(f'<p style="font-size:16px;line-height:24px">{row["Abstract"]}</p>'))
    decision = None
    while decision not in ["y", "n", "survey", "tja", "skip"]:
        decision = input("\nInclude this article? (y/n/survey/tja/skip):\n").lower()
    if decision == "skip":
        skipped.add(index)
        continue
    # Add decision
    my_decisions_df.loc[index] = [decision, datetime.now().isoformat(), reviewer]
    # Ensure latest version of file
    # mount()
    decisions_df = pd.read_csv(decisions_file_path, index_col=0)
    # Add any entries in my_decisions_df that are not in decisions_df
    decisions_df = pd.concat([decisions_df, my_decisions_df])
    # Remove rows that are strictly equal
    decisions_df = decisions_df.drop_duplicates(keep="last")
    # Save
    decisions_df.to_csv(decisions_file_path)

KeyboardInterrupt: Interrupted by user

# 5 Overview of accepted articles

In [35]:
# Check for duplicates in decisions_df
decisions_df[decisions_df.index.duplicated(keep=False)].sort_index()

Unnamed: 0_level_0,Decision,Date,Reviewer
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
10.1007/s13042-019-01041-1,n,2024-09-25T14:41:00.539555,Grude
10.1007/s13042-019-01041-1,n,2024-09-25T14:27:49.258474,Tord
10.1016/0925-2312(91)90026-8,n,2024-09-25T14:36:34.866043,Sivert
10.1016/0925-2312(91)90026-8,n,2024-09-25T14:33:41.949907,Tord
10.1080/00949655.2021.1899179,n,2024-09-25T14:48:08.895193,Tord
10.1080/00949655.2021.1899179,tja,2024-10-02T11:39:06.827506,Grude
10.1080/00949655.2021.1899179,tja,2024-10-02T14:03:31.140956,Grude
10.21003/EA.V187-17,n,2024-09-25T14:27:49.840122,Grude
10.21003/EA.V187-17,n,2024-09-25T14:47:23.218442,Tord
WOS:000604846000002,n,2024-09-25T14:44:54.322686,Grude


In [37]:
# Remove duplicates
decisions_df = decisions_df[~decisions_df.index.duplicated(keep='last')]
decisions_df

Unnamed: 0_level_0,Decision,Date,Reviewer
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
WOS:001302798700001,y,2024-10-15T11:20:12.186703,Sivert
https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=10684197,n,2024-10-15T11:20:30.036330,Sivert
https://doi.org/10.5120/ijca2015905413,survey,2024-10-15T11:20:47.668472,Sivert
WOS:000864527500006,y,2024-10-15T11:26:43.493810,Sivert
WOS:000980522800001,y,2024-10-15T11:29:11.772374,Sivert
...,...,...,...
2-s2.0-85177977642,y,2024-11-01T14:32:32.181051,Tord
WOS:000390831000014,n,2024-11-01T14:32:51.699112,Tord
2-s2.0-85073325015,y,2024-11-01T14:40:29.120978,Tord
9798382067292,n,2024-11-01T14:48:54.971261,Tord


In [38]:
df_with_decisions = processed_df.join(decisions_df)
df_with_decisions.groupby("Decision").count()["Title"]

Unnamed: 0_level_0,Title
Decision,Unnamed: 1_level_1
n,185
survey,8
tja,29
y,104


In [39]:
df_with_decisions.groupby("Reviewer").count()["Title"]

Unnamed: 0_level_0,Title
Reviewer,Unnamed: 1_level_1
Grude,56
Sivert,111
Tord,159


In [40]:
accepted_df = df_with_decisions[df_with_decisions["Decision"] == "y"].copy()
print(accepted_df.shape[0], "accepted articles")
accepted_df

104 accepted articles


Unnamed: 0_level_0,Title,Abstract,DOI,Keywords,Origin,Summary,Financial instrument?,Instrument,AI?,Probabilistic?,Decision,Date,Reviewer
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
WOS:000889065800003,A Bayesian analysis based on multivariate stoc...,Green stocks are companies environmental prote...,10.1007/s10878-022-00936-0,Green stock; Spillover Effect; Machine learnin...,WOS,Employs Bayesian multivariate stochastic volat...,✔️,Green Stock Indices,✔️,✔️,y,2024-09-24T14:36:28.478841,Tord
10.48048/tis.2022.3045,A Gaussian Process Regression Model for Foreca...,A stock price index measures the change in sev...,10.48048/tis.2022.3045,Artificial neural network; Gaussian process re...,SCOPUS,Applies Gaussian Process Regression to forecas...,✔️,Stock Exchange of Thailand,✔️,✔️,y,2024-09-25T11:38:00.820053,Grude
https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=9448324,A Hybrid Approach of Bayesian Structural Time ...,"In the financial sector, the stock market and ...",10.1109/tcss.2021.3073964,Predictive models;Mathematical model;Data mode...,IEEE,The paper presents a hybrid Bayesian Structura...,✔️,Stocks,✔️,✔️,y,2024-09-25T14:27:01.998244,Tord
https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=9745535,A Hybrid Prediction Model Integrating GARCH Mo...,Accurate prediction of volatility is one of th...,10.1109/access.2022.3163723,Predictive models;Data models;Stock markets;St...,IEEE,The study develops a hybrid model integrating ...,✔️,Stock market volatility,✔️,✔️,y,2024-09-25T12:33:25.499308,Tord
https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=10138772,A Latent Factor-Based Bayesian Neural Networks...,The selling price of a used car can be predict...,10.1109/tem.2023.3270301,Automobiles;Predictive models;Bayes methods;Ar...,IEEE,Implements a Commitment Machine metaheuristic ...,✔️,Portfolio Expected Shortfall,✔️,✔️,y,2024-09-24T15:46:39.419325,Tord
...,...,...,...,...,...,...,...,...,...,...,...,...,...
WOS:000294432100009,Trend forecasting of financial time series usi...,Many machine learning methods in Artificial in...,10.3233/ida-2011-0495,Trend forecasting; financial time series; PIPs...,WOS,A trend forecasting model for financial time s...,✔️,Financial Time Series,✔️,❌,y,2024-10-15T11:53:18.726820,Sivert
https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=9779871,Uncertainty-Aware Portfolio Management With Ri...,As deep neural networks (DNNs) have gained con...,10.1109/tnnls.2022.3174642,Uncertainty;Portfolios;Investment;Uncertain sy...,IEEE,Proposes the RSMAN framework using AI models t...,✔️,Portfolio,✔️,✔️,y,2024-09-25T14:43:50.480812,Sivert
10.3390/su16051789,Using Probabilistic Machine Learning Methods t...,Accurate agricultural commodity price models e...,10.3390/su16051789,Adaboost; ARIMA; Canadian Cattle Price Modelin...,SCOPUS,Implements machine learning models with Bayesi...,✔️,Flat steel price index,✔️,✔️,y,2024-09-25T10:47:14.971616,Tord
978-1-109-52503-8,Variance-based clustering methods and higher o...,Two approaches have been proposed in statistic...,,,Proquest,The dissertation develops novel variance-based...,❌,?,✔️,❌,y,2024-11-01T14:19:13.708903,Tord


In [41]:
tja_df = df_with_decisions[df_with_decisions["Decision"] == "tja"]
print(tja_df.shape[0], "in tja category")
tja_df

29 in tja category


Unnamed: 0_level_0,Title,Abstract,DOI,Keywords,Origin,Summary,Financial instrument?,Instrument,AI?,Probabilistic?,Decision,Date,Reviewer
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=9366736,A Bayesian Regularized Neural Network for Anal...,Bitcoin is a decentralized digital currency wi...,10.1109/access.2021.3063243,Bitcoin;Predictive models;Blockchain;Market re...,IEEE,A Bayesian Regularized Neural Network model is...,✔️,Bitcoin,✔️,✔️,tja,2024-09-25T13:28:55.481571,Sivert
https://doi.org/10.1088/1742-6596/2287/1/012018,A Machine Learning Model for Healthcare Stocks...,This paper study the nowcasting and forecastin...,10.1088/1742-6596/2287/1/012018,,Proquest,"The paper develops machine learning models, in...",✔️,Healthcare stocks in the US stock market,✔️,✔️,tja,2024-09-25T12:43:26.830482,Sivert
10.11591/eei.v12i6.4987,Adaptation of stochasticity into activation fu...,Stock market is an example of a stochastic env...,10.11591/eei.v12i6.4987,Forecasting stock price; Gaussian process; Mul...,SCOPUS,"Utilizes machine learning, including stacked e...",✔️,Estimated Ultimate Recovery (EUR),✔️,✔️,tja,2024-09-25T13:07:51.519205,Tord
10.1016/j.aej.2023.08.025,Analysis of bitcoin prices using a heavy-taile...,Statistical modeling and forecasting are very ...,10.1016/j.aej.2023.08.025,Bitcoin prices; Dagum distribution; Heavy-tail...,SCOPUS,Presents a heavy-tailed probabilistic model fo...,✔️,Bitcoin,✔️,✔️,tja,2024-09-25T13:13:23.103513,Tord
WOS:001062957900001,Application of machine learning in algorithmic...,The research undertakes the subject of machine...,10.1016/j.ribaf.2023.102052,Algorithmic investment strategies; Machine lea...,WOS,Develops machine learning-based algorithmic tr...,✔️,Global Stock Indices,✔️,❌,tja,2024-09-25T14:29:27.298367,Tord
WOS:000572871900002,Assess deep learning models for Egyptian excha...,Financial analysis of the stock market using t...,10.1007/s00521-020-05374-9,Artificial neural networks; Autoregressive; Ba...,WOS,Evaluates the performance of various deep lear...,✔️,Egyptian Stock Exchange Indices,✔️,❌,tja,2024-09-25T11:31:18.278890,Sivert
10.1080/00949655.2021.1899179,Cryptocurrency direction forecasting using dee...,"Recently, the deep learning architecture has b...",10.1080/00949655.2021.1899179,BPNN; BTC/USD forecasting; Cryptocurrency fore...,SCOPUS,Utilizes Long Short-Term Memory (LSTM) deep le...,✔️,Cryptocurrency (BTC/USD),✔️,✔️,tja,2024-10-02T14:03:31.140956,Grude
https://doi.org/10.7906/indecs.18.4.7,Forecasting Stock Market Indices using Machine...,In recent years machine learning algorithms ha...,10.7906/indecs.18.4.7,,Proquest,Employs machine learning algorithms to forecas...,✔️,"Stock Market Indices (DAX, Dow Jones, NASDAQ, ...",✔️,❌,tja,2024-09-25T13:26:31.055033,Sivert
10.1007/s00521-024-09531-2,Forecasting wholesale prices of yellow corn th...,"For market players and policy officials, commo...",10.1007/s00521-024-09531-2,Commodity price; Corn; Forecasting; Gaussian p...,SCOPUS,The paper develops a Gaussian process regressi...,✔️,Commodity (Yellow Corn),✔️,✔️,tja,2024-09-25T12:12:09.391648,Tord
https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=10494511,HyBiLSTM: Multivariate Bitcoin Price Forecasti...,Despite their growing popularity in recent res...,10.1109/access.2024.3386029,Biological system modeling;Predictive models;L...,IEEE,"The paper introduces HyBiLSTM, a hybrid time-s...",✔️,Bitcoin,✔️,✔️,tja,2024-09-25T11:09:16.259438,Tord


In [42]:
accepted_df["DOI"].isna().sum()

7

In [43]:
accepted_df.groupby("Origin").count()["Title"]

Unnamed: 0_level_0,Title
Origin,Unnamed: 1_level_1
IEEE,17
Proquest,6
SCOPUS,22
WOS,59


In [51]:
# find all surveys in accepted sample
accepted_surveys_df = df_with_decisions[df_with_decisions["Decision"] == "survey"].copy()
accepted_surveys_df

Unnamed: 0_level_0,Title,Abstract,DOI,Keywords,Origin,Summary,Financial instrument?,Instrument,AI?,Probabilistic?,Decision,Date,Reviewer
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
WOS:000505589700021,A Study Concerning Soft Computing Approaches f...,Financial time-series are well known for their...,10.3390/axioms8040116,machine learning; stock price prediction; Hong...,WOS,A comprehensive review of soft computing appro...,✔️,Stocks,✔️,❌,survey,2024-10-15T11:32:40.289750,Sivert
WOS:001104674200001,A Systematic Survey of AI Models in Financial ...,Artificial intelligence (AI)-based models have...,10.1109/access.2023.3330156,Forecasting; Predictive models; Artificial int...,WOS,A systematic survey reviews AI-based models in...,❌,,✔️,?,survey,2024-10-02T15:06:22.472231,Sivert
WOS:001177961600001,A survey on uncertainty quantification in deep...,Investors make decisions about buying and sell...,10.1016/j.neucom.2024.127339,Deep Learning; Time series; Uncertainty quanti...,WOS,Surveys uncertainty quantification methods in ...,❌,Financial Time Series,✔️,✔️,survey,2024-09-25T12:14:05.578216,Tord
https://doi.org/10.5120/ijca2015905413,Cloud based Financial Market Prediction throug...,This paper surveys recent literature in the ar...,10.5120/ijca2015905413,,Proquest,A review of genetic algorithms and cloud-based...,✔️,Financial Markets,✔️,❌,survey,2024-10-15T11:20:47.668472,Sivert
https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=9627654,Machine Learning in Financial Market Surveilla...,The use of machine learning for anomaly detect...,10.1109/access.2021.3130843,Surveillance;Machine learning;Anomaly detectio...,IEEE,The survey reviews machine learning methods fo...,✔️,Power trading prices,✔️,❌,survey,2024-09-25T14:05:11.754002,Tord
WOS:001216335500001,Prediction of realized volatility and implied ...,"In this systematic literature review, we exami...",10.1016/j.irfa.2024.103221,Volatility forecasting; Machine learning; Expl...,WOS,This systematic review examines existing AI an...,✔️,Volatility indices,✔️,?,survey,2024-10-02T13:15:09.106541,Grude
2-s2.0-84874513449,Stock market forecasting techniques: A survey,This paper surveys recent literature in the ar...,,And time series analysis; Data mining; Forecas...,SCOPUS,This survey reviews recent machine learning te...,✔️,Stock,✔️,❌,survey,2024-11-01T14:32:09.536892,Tord
WOS:000497715600001,Systematic analysis and review of stock market...,Prediction of stock market trends is considere...,10.1016/j.cosrev.2019.08.001,Stock market prediction; Bayesian model; Fuzzy...,WOS,Comprehensive review of 50 research papers on ...,✔️,Stock,✔️,?,survey,2024-09-25T12:58:20.741139,Tord


# Write results to CSV

In [44]:
to_csv = pd.concat([accepted_df, tja_df])
to_csv

Unnamed: 0_level_0,Title,Abstract,DOI,Keywords,Origin,Summary,Financial instrument?,Instrument,AI?,Probabilistic?,Decision,Date,Reviewer
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
WOS:000889065800003,A Bayesian analysis based on multivariate stoc...,Green stocks are companies environmental prote...,10.1007/s10878-022-00936-0,Green stock; Spillover Effect; Machine learnin...,WOS,Employs Bayesian multivariate stochastic volat...,✔️,Green Stock Indices,✔️,✔️,y,2024-09-24T14:36:28.478841,Tord
10.48048/tis.2022.3045,A Gaussian Process Regression Model for Foreca...,A stock price index measures the change in sev...,10.48048/tis.2022.3045,Artificial neural network; Gaussian process re...,SCOPUS,Applies Gaussian Process Regression to forecas...,✔️,Stock Exchange of Thailand,✔️,✔️,y,2024-09-25T11:38:00.820053,Grude
https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=9448324,A Hybrid Approach of Bayesian Structural Time ...,"In the financial sector, the stock market and ...",10.1109/tcss.2021.3073964,Predictive models;Mathematical model;Data mode...,IEEE,The paper presents a hybrid Bayesian Structura...,✔️,Stocks,✔️,✔️,y,2024-09-25T14:27:01.998244,Tord
https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=9745535,A Hybrid Prediction Model Integrating GARCH Mo...,Accurate prediction of volatility is one of th...,10.1109/access.2022.3163723,Predictive models;Data models;Stock markets;St...,IEEE,The study develops a hybrid model integrating ...,✔️,Stock market volatility,✔️,✔️,y,2024-09-25T12:33:25.499308,Tord
https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=10138772,A Latent Factor-Based Bayesian Neural Networks...,The selling price of a used car can be predict...,10.1109/tem.2023.3270301,Automobiles;Predictive models;Bayes methods;Ar...,IEEE,Implements a Commitment Machine metaheuristic ...,✔️,Portfolio Expected Shortfall,✔️,✔️,y,2024-09-24T15:46:39.419325,Tord
...,...,...,...,...,...,...,...,...,...,...,...,...,...
WOS:000555358600001,Selecting data adaptive learner from multiple ...,A method to predict time series using multiple...,10.1007/s00521-020-05234-6,Time-series forecasting; Deep learning; Bayesi...,WOS,This study proposes a Bayesian network-based m...,✔️,Nikkei 225 index,✔️,❌,tja,2024-09-25T13:00:50.076937,Tord
WOS:000254056900005,The Markov-switching multifractal model of ass...,Multifractal processes have recently been prop...,10.1198/073500107000000403,generalized method of moments; Levinson-Durbin...,WOS,The article introduces a generalized method of...,✔️,Asset returns volatility,❌,✔️,tja,2024-10-02T14:33:29.139854,Sivert
WOS:000188597300010,The adaptive selection of financial and econom...,It has been widely accepted that predicting st...,10.1016/j.neucom.2003.05.001,neural networks; variable relevance analysis; ...,WOS,Uses machine learning and neural networks with...,✔️,Stocks,✔️,❌,tja,2024-09-25T12:25:57.160981,Tord
WOS:000592318300001,"Time series modelling, NARX neural network and...",Purpose This study constructs time series mode...,10.1108/ajems-04-2019-0161,Forex market; Time series model; Artificial in...,WOS,"Applies time series models, NARX neural networ...",✔️,"Forex (EUR/MUR, GBP/MUR, etc.)",✔️,❌,tja,2024-09-25T14:24:23.144119,Sivert


In [45]:
to_csv.to_csv("drive/MyDrive/Master/Screening/accepted_articles.csv")

In [46]:
new_articles = to_csv.copy()
new_articles = new_articles[new_articles["Date"] > "2024-11-01"]
new_articles

Unnamed: 0_level_0,Title,Abstract,DOI,Keywords,Origin,Summary,Financial instrument?,Instrument,AI?,Probabilistic?,Decision,Date,Reviewer
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2-s2.0-85177977642,Black-Litterman Portfolio Optimization Using G...,The Black-Litterman portfolios based on the pr...,,Black-Litterman; Gaussian Process; Machine lea...,SCOPUS,The study integrates Gaussian Process regressi...,✔️,Stocks,✔️,✔️,y,2024-11-01T14:32:32.181051,Tord
978-1-392-07688-0,Dynamic Machine Learning with Least Square Obj...,"As of the writing of this thesis, machine lear...",,,Proquest,The thesis explores dynamic and online machine...,✔️,Stock,✔️,✔️,y,2024-11-01T14:12:28.718719,Tord
02776693,Foreign exchange market prediction with multip...,Foreign exchange market prediction is attracti...,,,Proquest,The article investigates the predictability of...,✔️,Foreign Exchange,✔️,❌,y,2024-11-01T14:49:06.960110,Tord
0015198X,Fuzzy Neural Systems for Stock Selection,Artificial neural systems suffer from an inabi...,,,Proquest,The paper presents an Intelligent Stock Select...,✔️,Stock,✔️,✔️,y,2024-11-01T14:04:05.074421,Tord
2-s2.0-79957931366,Neural networks and investor sentiment measure...,Soft computing methods and various sentiment i...,,Artificial intelligence; Classification; Stock...,SCOPUS,The article compares Probabilistic Neural Netw...,✔️,Stock,✔️,✔️,y,2024-11-01T14:12:43.334348,Tord
2-s2.0-85073325015,Predicting a stock portfolio with the multivar...,"In this paper, we provide methods for creative...",,Feature Selection; Sentiment Analysis; Text Mi...,SCOPUS,The paper integrates text mining from financia...,✔️,Stock,✔️,✔️,y,2024-11-01T14:40:29.120978,Tord
978-1-109-52503-8,Variance-based clustering methods and higher o...,Two approaches have been proposed in statistic...,,,Proquest,The dissertation develops novel variance-based...,❌,?,✔️,❌,y,2024-11-01T14:19:13.708903,Tord


In [47]:
print(new_articles.shape)

(7, 13)


In [48]:
new_articles.to_csv("drive/MyDrive/Master/Screening/new_articles.csv")

In [49]:
decisions_df[decisions_df["Date"] > "2024-11-01"].groupby("Decision").count()["Date"]

Unnamed: 0_level_0,Date
Decision,Unnamed: 1_level_1
n,14
survey,1
y,7
