# Notebook python criado para administrar os artigos baixados para a revisão bibliográfica.

Imports:

In [None]:
from deep_translator import GoogleTranslator
import pandas as pd
import pickle
import os
import glob

# Realizador de pesquisa:

In [44]:
def generate_search_query_woc(synonym_groups, key="TI"):

    formatted_groups = ["(" + " OR ".join(f'"{word}"' for word in group) + ")" for group in synonym_groups]
    query = " AND ".join(formatted_groups)
    
    return f"{key}=({query})"

def generate_search_query_ieee(synonym_groups, key="Document Title"):

    formatted_groups = ["(" + " OR ".join(f'"{key}":{word}' for word in group) + ")" for group in synonym_groups]
    query = " AND ".join(formatted_groups)
    
    return f"({query})"

def generate_search_query_scopus(synonym_groups, key="TITLE"):

    formatted_groups = [f"{key}(" + " OR ".join(f'"{word}"' for word in group) + ")" for group in synonym_groups]

    query = ""

    for i in range(len(formatted_groups)):
        if i == 0:
            query = formatted_groups[i]
        else:
            query = f"{query} AND {formatted_groups[i]}"
    
    return query

In [45]:
# synonym_groups = [
#     ["Semantic Segmentation", "Pixel-wise classification", "Dense prediction", "Pixel-level segmentation", "Pixel-classification"],
#     ["Deep Learning", "Neural Networks", "Deep", "Artificial Intelligence", "DNNs", "DL"],
#     ["Multiscale", "Multi scale", "Multi resolution", "Multi level", "Scale-space"]
# ]

synonym_groups = [
    ["Semantic Segmentation", "Pixel-wise classification", "Dense prediction", "Pixel-level segmentation", "Pixel-classification"],
    ["Deep Learning", "Neural Networks", "Deep", "Artificial Intelligence", "DNNs", "DL"],
    ["Multiscale", "Multi scale", "Multi resolution", "Multi level", "Scale-space"],
    ["Regularization", "Overfitting Reduction", "Model Generalization", "Regularity", "Regularized"]
]

key_web = ["AB"]
key_ieee = ["Abstract"]
key_scopus = ["ABS"]

print("Para buscar na Web of Science:")
for key in key_web:
    print(generate_search_query_woc(synonym_groups, key=key))
print("")

print("Para buscar na IEEE:")
for key in key_ieee:
    print(generate_search_query_ieee(synonym_groups, key=key))
print("")

print("Para buscar na Scopus:")
for key in key_scopus:
    print(generate_search_query_scopus(synonym_groups, key=key))

Para buscar na Web of Science:
AB=(("Semantic Segmentation" OR "Pixel-wise classification" OR "Dense prediction" OR "Pixel-level segmentation" OR "Pixel-classification") AND ("Deep Learning" OR "Neural Networks" OR "Deep" OR "Artificial Intelligence" OR "DNNs" OR "DL") AND ("Multiscale" OR "Multi scale" OR "Multi resolution" OR "Multi level" OR "Scale-space") AND ("Regularization" OR "Overfitting Reduction" OR "Model Generalization" OR "Regularity" OR "Regularized"))

Para buscar na IEEE:
(("Abstract":Semantic Segmentation OR "Abstract":Pixel-wise classification OR "Abstract":Dense prediction OR "Abstract":Pixel-level segmentation OR "Abstract":Pixel-classification) AND ("Abstract":Deep Learning OR "Abstract":Neural Networks OR "Abstract":Deep OR "Abstract":Artificial Intelligence OR "Abstract":DNNs OR "Abstract":DL) AND ("Abstract":Multiscale OR "Abstract":Multi scale OR "Abstract":Multi resolution OR "Abstract":Multi level OR "Abstract":Scale-space) AND ("Abstract":Regularization OR 

# Primeira pesquisa:

## Loading archives e preparing inital data:

Loading and transforming csv archives (WEB OF SCIENCE):

In [46]:
directory = './base/1/'

prefix = 'web'

csv_files = glob.glob(os.path.join(directory, f'{prefix}*.xls'))

dataframes = []

for file in csv_files:
    df = pd.read_excel(file)
    dataframes.append(df)

web_of_science_archives = pd.concat(dataframes, ignore_index=True)

In [47]:
web_of_science_archives['Base'] = 'Web of Science'

In [48]:
prefix = 'IEEE'

csv_files = glob.glob(os.path.join(directory, f'{prefix}*.csv'))

dataframes = []

for file in csv_files:
    df = pd.read_csv(file)
    dataframes.append(df)

IEEE_archives = pd.concat(dataframes, ignore_index=True)      

In [49]:
IEEE_archives['Base'] = 'IEEE'

In [50]:
prefix = 'scopus'

csv_files = glob.glob(os.path.join(directory, f'{prefix}*.csv'))

dataframes = []

for file in csv_files:
    df = pd.read_csv(file)
    dataframes.append(df)

scopus_archives = pd.concat(dataframes, ignore_index=True)

In [51]:
scopus_archives['Base'] = 'Scopus (Elsevier)'

Deleting Duplicates:

In [52]:
web_of_science_archives = web_of_science_archives.drop_duplicates()
IEEE_archives = IEEE_archives.drop_duplicates()
scopus_archives = scopus_archives.drop_duplicates()

In [53]:
scopus_archives.groupby("Year").size()

Year
2017     2
2018     1
2020     4
2021     3
2022    15
2023     6
2024    10
2025     2
dtype: int64

In [54]:
print(web_of_science_archives.columns)
print(IEEE_archives.columns)
print(scopus_archives.columns)

Index(['Publication Type', 'Authors', 'Book Authors', 'Book Editors',
       'Book Group Authors', 'Author Full Names', 'Book Author Full Names',
       'Group Authors', 'Article Title', 'Source Title', 'Book Series Title',
       'Book Series Subtitle', 'Language', 'Document Type', 'Conference Title',
       'Conference Date', 'Conference Location', 'Conference Sponsor',
       'Conference Host', 'Author Keywords', 'Keywords Plus', 'Abstract',
       'Addresses', 'Affiliations', 'Reprint Addresses', 'Email Addresses',
       'Researcher Ids', 'ORCIDs', 'Funding Orgs', 'Funding Name Preferred',
       'Funding Text', 'Cited References', 'Cited Reference Count',
       'Times Cited, WoS Core', 'Times Cited, All Databases',
       '180 Day Usage Count', 'Since 2013 Usage Count', 'Publisher',
       'Publisher City', 'Publisher Address', 'ISSN', 'eISSN', 'ISBN',
       'Journal Abbreviation', 'Journal ISO Abbreviation', 'Publication Date',
       'Publication Year', 'Volume', 'Issue', 'Pa

In [55]:
web_of_science_archives.rename(columns={'Authors': 'Autores'}, inplace=True)
web_of_science_archives.rename(columns={'Article Title': 'Titulo'},inplace=True)
web_of_science_archives.rename(columns={'Author Keywords': "Keywords"}, inplace=True)
web_of_science_archives.rename(columns={'Conference Date': "Year"}, inplace=True)

IEEE_archives.rename(columns={'Authors': 'Autores'}, inplace=True)
IEEE_archives.rename(columns={'Document Title': 'Titulo'}, inplace = True)
IEEE_archives.rename(columns={'Author Keywords': "Keywords"}, inplace=True)
IEEE_archives.rename(columns={'Publication Year': "Year"}, inplace=True)

scopus_archives.rename(columns={'Authors': 'Autores'}, inplace=True)
scopus_archives.rename(columns={'Title': 'Titulo'}, inplace=True)
scopus_archives.rename(columns={'Author Keywords': "Keywords"}, inplace=True)


In [56]:
colums = ['Titulo', 'Base', 'Year', 'DOI', 'Autores', 'Keywords', 'Abstract']
data_base1 = pd.DataFrame(columns=colums)
data_base2 = pd.DataFrame(columns=colums)
data_base3 = pd.DataFrame(columns=colums)

In [57]:
data_base1 = web_of_science_archives[colums]
data_base2 = IEEE_archives[colums]
data_base3 = scopus_archives[colums]

data_base = pd.concat([data_base1, data_base2, data_base3], ignore_index=True)

In [58]:
data_base.tail()

Unnamed: 0,Titulo,Base,Year,DOI,Autores,Keywords,Abstract
92,END-TO-END SEMANTIC SEGMENTATION AND BOUNDARY ...,Scopus (Elsevier),2021,10.1109/IGARSS47720.2021.9555147,Li Q.; Zorzi S.; Shi Y.; Fraundorfer F.; Zhu X.X.,Boundary regularization; Building; Generative ...,Building footprint generation is a vital task ...
93,Mixup-CAM: Weakly-supervised Semantic Segmenta...,Scopus (Elsevier),2020,,Chang Y.-T.; Wang Q.; Hung W.-C.; Piramuthu R....,,Obtaining object response maps is one importan...
94,Deep Multi-Task Learning for an Autoencoder-Re...,Scopus (Elsevier),2022,10.3390/math10244798,Jin G.; Chen X.; Ying L.,edge attention; retinal vessel segmentation; V...,Automated segmentation of retinal blood vessel...
95,Semi-Supervised Semantic Segmentation Constrai...,Scopus (Elsevier),2020,10.1109/ICME46284.2020.9102851,Li X.; He Q.; Dai S.; Wu P.; Tong W.,Consistency regularization; Semantic segmentat...,"In this paper, we propose a self-training base..."
96,Regularized fully convolutional networks for R...,Scopus (Elsevier),2017,10.1109/VCIP.2016.7805508,Su W.; Wang Z.,Depth Segmentation FCN Features Regularization,The prospect of semantic segmentation using de...


In [59]:
data_base.drop_duplicates(subset=['DOI'], inplace=True)
data_base = data_base.reset_index(drop=True)

data_base.drop_duplicates(subset=['Titulo'], inplace=True)
data_base = data_base.reset_index(drop=True)

In [60]:
data_base.tail()

Unnamed: 0,Titulo,Base,Year,DOI,Autores,Keywords,Abstract
41,Class-Aware Feature Regularization for Semanti...,Scopus (Elsevier),2023,10.1145/3633637.3633694,Deng Z.; Tang X.; Zhang Z.; Xie J.; Zhang W.,class-aware regularization; inter-class variat...,"In this paper, to address the problem of intra..."
42,Regularized Loss for Weakly Supervised Single ...,Scopus (Elsevier),2020,10.1007/978-3-030-58526-6_21,Veksler O.,,Fully supervised semantic segmentation is high...
43,CAR: Class-Aware Regularizations for Semantic ...,Scopus (Elsevier),2022,10.1007/978-3-031-19815-1_30,Huang Y.; Kang D.; Chen L.; Zhe X.; Jia W.; Ba...,Class-aware regularizations; Semantic segmenta...,"Recent segmentation methods, such as OCR and C..."
44,Cross-consistent semantic segmentation algorit...,Scopus (Elsevier),2022,10.11834/jig.210571,Liu L.; Zong J.; Xiao Z.; Lan H.; Qu H.,cross-consistency training; deep learning; man...,Objective Image semantic segmentation is a pix...
45,Regularized fully convolutional networks for R...,Scopus (Elsevier),2017,10.1109/VCIP.2016.7805508,Su W.; Wang Z.,Depth Segmentation FCN Features Regularization,The prospect of semantic segmentation using de...


Saving provisional pkl:

In [61]:
with open('./base/data_base.pkl', 'wb') as f:
    pickle.dump(data_base, f)

## Managing data:

Loading pkl:

In [62]:
with open('./base/data_base.pkl', 'rb') as f:
    data_base = pickle.load(f)

Deleting useless articles for the search:

In [63]:
data_base.tail()

Unnamed: 0,Titulo,Base,Year,DOI,Autores,Keywords,Abstract
41,Class-Aware Feature Regularization for Semanti...,Scopus (Elsevier),2023,10.1145/3633637.3633694,Deng Z.; Tang X.; Zhang Z.; Xie J.; Zhang W.,class-aware regularization; inter-class variat...,"In this paper, to address the problem of intra..."
42,Regularized Loss for Weakly Supervised Single ...,Scopus (Elsevier),2020,10.1007/978-3-030-58526-6_21,Veksler O.,,Fully supervised semantic segmentation is high...
43,CAR: Class-Aware Regularizations for Semantic ...,Scopus (Elsevier),2022,10.1007/978-3-031-19815-1_30,Huang Y.; Kang D.; Chen L.; Zhe X.; Jia W.; Ba...,Class-aware regularizations; Semantic segmenta...,"Recent segmentation methods, such as OCR and C..."
44,Cross-consistent semantic segmentation algorit...,Scopus (Elsevier),2022,10.11834/jig.210571,Liu L.; Zong J.; Xiao Z.; Lan H.; Qu H.,cross-consistency training; deep learning; man...,Objective Image semantic segmentation is a pix...
45,Regularized fully convolutional networks for R...,Scopus (Elsevier),2017,10.1109/VCIP.2016.7805508,Su W.; Wang Z.,Depth Segmentation FCN Features Regularization,The prospect of semantic segmentation using de...


In [64]:
data_base.head(57)

Unnamed: 0,Titulo,Base,Year,DOI,Autores,Keywords,Abstract
0,Topological Regularization for Dense Prediction,Web of Science,"DEC 12-14, 2022",10.1109/ICMLA55696.2022.00014,"Fu, DQ; Nelson, BJ",,Dense prediction tasks such as depth perceptio...
1,Unbiased Subclass Regularization for Semi-Supe...,Web of Science,"JUN 18-24, 2022",10.1109/CVPR52688.2022.00973,"Guan, DY; Huang, JX; Xiao, AR; Lu, SJ",,Semi-supervised semantic segmentation learns f...
2,SEMI-SUPERVISED SEMANTIC SEGMENTATION CONSTRAI...,Web of Science,"JUL 06-10, 2020",10.1109/icme46284.2020.9102851,"Li, XQ; He, Q; Dai, SM; Wu, P; Tong, WQ",,"In this paper, we propose a self-training base..."
3,CARD: Semantic Segmentation With Efficient Cla...,Web of Science,,10.1109/TCSVT.2024.3395132,"Huang, Y; Kang, D; Chen, L; Jia, WJ; He, XJ; D...",,Semantic segmentation has recently achieved no...
4,Regularized Fully Convolutional Networks for R...,Web of Science,"NOV 27-30, 2016",,"Su, W; Wang, ZF",,The prospect of semantic segmentation using de...
5,Self-regularized prototypical network for few-...,Web of Science,,10.1016/j.patcog.2022.109018,"Ding, HH; Zhang, H; Jiang, XD",,The deep CNNs in image semantic segmentation t...
6,Class Probability Space Regularization for sem...,Web of Science,,10.1016/j.cviu.2024.104146,"Yin, JJ; Yan, S; Chen, T; Chen, Y; Yao, YZ",,Semantic segmentation achieves fine-grained sc...
7,Uncertainty-aware consistency regularization f...,Web of Science,,10.1016/j.cviu.2022.103448,"Zhou, QY; Feng, ZY; Gu, QQ; Cheng, GL; Lu, XQ;...",,Unsupervised domain adaptation (UDA) aims to a...
8,Noise-robust consistency regularization for se...,Web of Science,,10.1016/j.neunet.2024.107041,"Zhang, HK; Li, HT; Zhang, XF; Yang, GY; Li, AT...",,The essential of semi-supervised semantic segm...
9,Consistency Regularization for Unsupervised Do...,Web of Science,"MAY 23-27, 2022",10.1007/978-3-031-06427-2_42,"Scherer, S; Brehm, S; Lienhart, R",,Unsupervised domain adaptation is a promising ...


Descartados:

31 e 45 - repetidos \
40 e 44 - está em Chinês \
41 - não consegui acesso (ACM)

## 41 artigos no final explorados

# Segunda pesquisa

## Loading archives e preparing inital data:

Loading and transforming csv archives (WEB OF SCIENCE):

In [65]:
directory = './base/2/'

prefix = 'web'

csv_files = glob.glob(os.path.join(directory, f'{prefix}*.xls'))

dataframes = []

for file in csv_files:
    df = pd.read_excel(file)
    dataframes.append(df)

web_of_science_archives = pd.concat(dataframes, ignore_index=True)

In [66]:
web_of_science_archives['Base'] = 'Web of Science'

In [67]:
prefix = 'IEEE'

csv_files = glob.glob(os.path.join(directory, f'{prefix}*.csv'))

dataframes = []

for file in csv_files:
    df = pd.read_csv(file)
    dataframes.append(df)

IEEE_archives = pd.concat(dataframes, ignore_index=True)      

In [68]:
IEEE_archives['Base'] = 'IEEE'

In [69]:
prefix = 'scopus'

csv_files = glob.glob(os.path.join(directory, f'{prefix}*.csv'))

dataframes = []

for file in csv_files:
    df = pd.read_csv(file)
    dataframes.append(df)

scopus_archives = pd.concat(dataframes, ignore_index=True)

In [70]:
scopus_archives['Base'] = 'Scopus (Elsevier)'

Deleting Duplicates:

In [71]:
web_of_science_archives = web_of_science_archives.drop_duplicates()
IEEE_archives = IEEE_archives.drop_duplicates()
scopus_archives = scopus_archives.drop_duplicates()

In [72]:
scopus_archives.groupby("Year").size()

Year
2017    1
2018    2
2019    2
2020    3
2021    3
2022    2
2023    4
2024    1
2025    1
dtype: int64

In [73]:
web_of_science_archives.rename(columns={'Authors': 'Autores'}, inplace=True)
web_of_science_archives.rename(columns={'Article Title': 'Titulo'},inplace=True)
web_of_science_archives.rename(columns={'Author Keywords': "Keywords"}, inplace=True)
web_of_science_archives.rename(columns={'Conference Date': "Year"}, inplace=True)

IEEE_archives.rename(columns={'Authors': 'Autores'}, inplace=True)
IEEE_archives.rename(columns={'Document Title': 'Titulo'}, inplace = True)
IEEE_archives.rename(columns={'Author Keywords': "Keywords"}, inplace=True)
IEEE_archives.rename(columns={'Publication Year': "Year"}, inplace=True)

scopus_archives.rename(columns={'Authors': 'Autores'}, inplace=True)
scopus_archives.rename(columns={'Title': 'Titulo'}, inplace=True)
scopus_archives.rename(columns={'Author Keywords': "Keywords"}, inplace=True)


In [74]:
colums = ['Titulo', 'Base', 'Year', 'DOI', 'Autores', 'Keywords', 'Abstract']
data_base1 = pd.DataFrame(columns=colums)
data_base2 = pd.DataFrame(columns=colums)
data_base3 = pd.DataFrame(columns=colums)

In [75]:
data_base1 = web_of_science_archives[colums]
data_base2 = IEEE_archives[colums]
data_base3 = scopus_archives[colums]

second_data_base = pd.concat([data_base1, data_base2, data_base3], ignore_index=True)

In [76]:
second_data_base.tail()

Unnamed: 0,Titulo,Base,Year,DOI,Autores,Keywords,Abstract
33,Semantic Segmentation of Road Scene Based on M...,Scopus (Elsevier),2022,10.1117/12.2644695,Wang L.; Yan C.,ASPP; Deep supervision; Dual attention module;...,Aiming at the problems of inaccurate segmentat...
34,Convolutional Neural Network-Based Multi-scale...,Scopus (Elsevier),2025,10.1007/978-3-031-72396-4_1,Wang Q.; Zhao Y.; Zhang Z.,MICCAI challenge; Panoramic x-ray; Tooth segme...,"In this study, we propose a semantic segmentat..."
35,Deep Multi - Resolution Network for Real- Time...,Scopus (Elsevier),2023,10.1109/IJCNN54540.2023.10191758,Wang Y.; Chen S.; Bian H.; Li W.; Lu Q.,CNN; Lightweight Network; Real-time; Semantic ...,Information at different resolutions plays dis...
36,Multi-scale structural analysis of proteins by...,Scopus (Elsevier),2020,10.1093/bioinformatics/btz650,Eguchi R.R.; Huang P.-S.,,Motivation: Recent advances in computational m...
37,HookNet: Multi-resolution convolutional neural...,Scopus (Elsevier),2021,10.1016/j.media.2020.101890,van Rijthoven M.; Balkenhol M.; Siliņa K.; van...,Computational pathology; Deep learning; Multi-...,"We propose HookNet, a semantic segmentation mo..."


In [77]:
second_data_base.drop_duplicates(subset=['DOI'], inplace=True)
second_data_base = second_data_base.reset_index(drop=True)

second_data_base.drop_duplicates(subset=['Titulo'], inplace=True)
second_data_base = second_data_base.reset_index(drop=True)

Saving provisional pkl:

In [78]:
with open('./base/second_data_base.pkl', 'wb') as f:
    pickle.dump(second_data_base, f)

## Managing data:

Loading pkl:

In [79]:
with open('./base/second_data_base.pkl', 'rb') as f:
    second_data_base = pickle.load(f)

Deleting useless articles for the search:

In [80]:
second_data_base.tail()

Unnamed: 0,Titulo,Base,Year,DOI,Autores,Keywords,Abstract
17,Scale-aware neural network for semantic segmen...,Scopus (Elsevier),2021,10.3390/rs13245015,Wang L.; Zhang C.; Li R.; Duan C.; Meng X.; At...,Deep convolutional neural network; Multiple sp...,Assigning geospatial objects with specific cat...
18,Butterfly network: a convolutional neural netw...,Scopus (Elsevier),2023,10.1007/s11554-023-01273-z,Alavianmehr M.A.; Helfroush M.S.; Danyali H.; ...,Butterfly network (BF-Net); Convolutional neur...,The detection of multi-scale pedestrians is on...
19,Multi-scale residual deep network for semantic...,Scopus (Elsevier),2020,10.3390/RS12182932,Wang C.; Li L.,Multiple scales; Regularizer; Residual deep en...,It is challenging for semantic segmentation of...
20,Semantic Segmentation of Road Scene Based on M...,Scopus (Elsevier),2022,10.1117/12.2644695,Wang L.; Yan C.,ASPP; Deep supervision; Dual attention module;...,Aiming at the problems of inaccurate segmentat...
21,Convolutional Neural Network-Based Multi-scale...,Scopus (Elsevier),2025,10.1007/978-3-031-72396-4_1,Wang Q.; Zhao Y.; Zhang Z.,MICCAI challenge; Panoramic x-ray; Tooth segme...,"In this study, we propose a semantic segmentat..."


In [111]:
second_data_base.iloc[9]

Titulo      Semantic Segmentation of Earth Observation Dat...
Base                                           Web of Science
Year                                          NOV 20-24, 2016
DOI                              10.1007/978-3-319-54181-5_12
Autores                   Audebert, N; Le Saux, B; Lefèvre, S
Keywords                                                  NaN
Abstract    This work investigates the use of deep fully c...
Name: 9, dtype: object

9 é livro

# Terceira pesquisa

## Loading archives e preparing inital data:

Loading and transforming csv archives (WEB OF SCIENCE):

In [82]:
directory = './base/3/'

prefix = 'web'

csv_files = glob.glob(os.path.join(directory, f'{prefix}*.xls'))

dataframes = []

for file in csv_files:
    df = pd.read_excel(file)
    dataframes.append(df)

web_of_science_archives = pd.concat(dataframes, ignore_index=True)

In [83]:
web_of_science_archives['Base'] = 'Web of Science'

In [84]:
prefix = 'IEEE'

csv_files = glob.glob(os.path.join(directory, f'{prefix}*.csv'))

dataframes = []

for file in csv_files:
    df = pd.read_csv(file)
    dataframes.append(df)

IEEE_archives = pd.concat(dataframes, ignore_index=True)      

In [85]:
IEEE_archives['Base'] = 'IEEE'

In [86]:
prefix = 'scopus'

csv_files = glob.glob(os.path.join(directory, f'{prefix}*.csv'))

dataframes = []

for file in csv_files:
    df = pd.read_csv(file)
    dataframes.append(df)

scopus_archives = pd.concat(dataframes, ignore_index=True)

In [87]:
scopus_archives['Base'] = 'Scopus (Elsevier)'

Deleting Duplicates:

In [88]:
web_of_science_archives = web_of_science_archives.drop_duplicates()
IEEE_archives = IEEE_archives.drop_duplicates()
scopus_archives = scopus_archives.drop_duplicates()

In [89]:
scopus_archives.groupby("Year").size()

Year
2017     3
2019     2
2020     5
2021    10
2022    13
2023    10
2024     6
dtype: int64

In [90]:
web_of_science_archives.rename(columns={'Authors': 'Autores'}, inplace=True)
web_of_science_archives.rename(columns={'Article Title': 'Titulo'},inplace=True)
web_of_science_archives.rename(columns={'Author Keywords': "Keywords"}, inplace=True)
web_of_science_archives.rename(columns={'Conference Date': "Year"}, inplace=True)

IEEE_archives.rename(columns={'Authors': 'Autores'}, inplace=True)
IEEE_archives.rename(columns={'Document Title': 'Titulo'}, inplace = True)
IEEE_archives.rename(columns={'Author Keywords': "Keywords"}, inplace=True)
IEEE_archives.rename(columns={'Publication Year': "Year"}, inplace=True)

scopus_archives.rename(columns={'Authors': 'Autores'}, inplace=True)
scopus_archives.rename(columns={'Title': 'Titulo'}, inplace=True)
scopus_archives.rename(columns={'Author Keywords': "Keywords"}, inplace=True)


In [91]:
colums = ['Titulo', 'Base', 'Year', 'DOI', 'Autores', 'Keywords', 'Abstract']
data_base1 = pd.DataFrame(columns=colums)
data_base2 = pd.DataFrame(columns=colums)
data_base3 = pd.DataFrame(columns=colums)

In [92]:
data_base1 = web_of_science_archives[colums]
data_base2 = IEEE_archives[colums]
data_base3 = scopus_archives[colums]

third_data_base = pd.concat([data_base1, data_base2, data_base3], ignore_index=True)

In [93]:
third_data_base.tail()

Unnamed: 0,Titulo,Base,Year,DOI,Autores,Keywords,Abstract
132,Omni-Seg: A Scale-Aware Dynamic Network for Re...,Scopus (Elsevier),2023,10.1109/TBME.2023.3260739,Deng R.; Liu Q.; Cui C.; Yao T.; Long J.; Asad...,image segmentation; multi-label; multi-scale; ...,Comprehensive semantic segmentation on renal p...
133,9th International Conference on Image and Grap...,Scopus (Elsevier),2017,,,,The proceedings contain 172 papers. The specia...
134,UAVid: A semantic segmentation dataset for UAV...,Scopus (Elsevier),2020,10.1016/j.isprsjprs.2020.05.009,Lyu Y.; Vosselman G.; Xia G.-S.; Yilmaz A.; Ya...,Dataset; Deep learning; Semantic segmentation;...,Semantic segmentation has been one of the lead...
135,Methodology of data fusion using deep learning...,Scopus (Elsevier),2020,10.1109/ACCESS.2020.3031533,De Oliveira J.P.; Costa M.G.F.; Costa Filho C....,Convolutional neural networks; Deep learning; ...,This study proposes a methodology using deep l...
136,"Generalizable multi-task, multi-domain deep se...",Scopus (Elsevier),2022,10.1016/j.media.2022.102556,Boutillon A.; Conze P.-H.; Pons C.; Burdin V.;...,Attention models; Contrastive learning; Domain...,Clinical diagnosis of the pediatric musculoske...


In [94]:
third_data_base.drop_duplicates(subset=['DOI'], inplace=True)
third_data_base = third_data_base.reset_index(drop=True)

third_data_base.drop_duplicates(subset=['Titulo'], inplace=True)
third_data_base = third_data_base.reset_index(drop=True)

Saving provisional pkl:

In [95]:
with open('./base/third_data_base.pkl', 'wb') as f:
    pickle.dump(third_data_base, f)

## Managing data:

Loading pkl:

In [96]:
with open('./base/third_data_base.pkl', 'rb') as f:
    third_data_base = pickle.load(f)

Deleting useless articles for the search:

In [97]:
third_data_base.tail()

Unnamed: 0,Titulo,Base,Year,DOI,Autores,Keywords,Abstract
84,Method for image segmentation of cucumber dise...,Scopus (Elsevier),2020,10.11975/j.issn.1002-6819.2020.16.019,Zhang S.; Wang Z.; Wang Z.,Convolutional neural networks; Disease; Image ...,Cucumber disease leaf image segmentation is an...
85,Conditional Generative Adversarial Networks wi...,Scopus (Elsevier),2020,10.1007/s11063-020-10303-x,He J.; Li X.; Liu N.; Zhan S.,Feature matching loss; Generative adversarial ...,Accurate prostate MR image segmentation is a n...
86,Identification of sweetpotato virus disease-in...,Scopus (Elsevier),2024,10.3389/fpls.2024.1456713,Ding Z.; Zeng F.; Li H.; Zheng J.; Chen J.; Ch...,deep learning; RGB image; semantic segmentatio...,Introduction: Sweetpotato virus disease (SPVD)...
87,Multi-scale Multi-task Distillation for Increm...,Scopus (Elsevier),2023,10.1007/978-3-031-25066-8_20,Tian M.; Yang Q.; Gao Y.,3D medical image segmentation; Distillation; I...,Automatic medical image segmentation is the co...
88,Generalized Semantic Segmentation by Self-Supe...,Scopus (Elsevier),2023,10.1609/aaai.v37i9.26280,Yang L.; Gu X.; Sun J.,,Deep networks trained on the source domain sho...


In [98]:
third_data_base.iloc[9]

Titulo      From image-level to pixel-level labeling: A we...
Base                                           Web of Science
Year                                                      NaN
DOI                                 10.1016/j.jag.2024.104023
Autores                   Li, BY; Gong, AD; Zhang, JM; Fu, ZX
Keywords                                                  NaN
Abstract    Aquaculture mapping is essential for monitorin...
Name: 9, dtype: object