In [1]:
#!conda install -n impacta_env ipykernel --update-deps --force-reinstall

In [2]:
import bibtexparser
import pandas as pd
import os
import glob

import warnings
warnings.simplefilter(action = 'ignore', category = FutureWarning)

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 300)



In [3]:
def read_bib(file_path: str):
    '''
    Function to read and parse bib files to dataframe object.
    path: bib file path
    '''
    with open(file_path) as bibtex_file:
        bib_file = bibtexparser.load(bibtex_file)
    df = pd.DataFrame(bib_file.entries)
    
    return df

In [4]:
def load_bib(folder_path: str):
    
    '''
    Function to: 
    1) read and parse bib files from a list of directories (folders); 
    2) concatenate multiple dataframes in an only one
    
    folder_path: directories path where are located the bib files
    
    '''
    
    #listing bib files path from acm directory
    list_files = []
    for file in glob.glob(f'{folder_path}/*.bib'):
        list_files.append(file)

    #loading each bib file listed
    list_df = []
    c = 1
    for file in list_files:
        df_temp = read_bib(file) #def
        list_df.append(df_temp)
        print(f'{c} de {len(list_files)}: {file}')
        c += 1

    #concatenating all files in a unique dataframe object
    df = pd.concat(list_df)
    print(f'Shape df_{folder}: ', df.shape)
    
    return df

In [5]:
list_folders = []
for folder in glob.glob(f'../01_Datasets/*'):
    list_folders.append(folder)
list_folders

list_df = []
for f in list_folders:
    print('\n',f)
    df_temp = load_bib(f) #def
    list_df.append(df_temp)
df_all = pd.concat(list_df)
print('\nShape df_all: ',df_all.shape)


 ../01_Datasets/acm
1 de 15: ../01_Datasets/acm/acm (1).bib
2 de 15: ../01_Datasets/acm/acm (10).bib
3 de 15: ../01_Datasets/acm/acm (11).bib
4 de 15: ../01_Datasets/acm/acm (12).bib
5 de 15: ../01_Datasets/acm/acm (13).bib
6 de 15: ../01_Datasets/acm/acm (14).bib
7 de 15: ../01_Datasets/acm/acm (2).bib
8 de 15: ../01_Datasets/acm/acm (3).bib
9 de 15: ../01_Datasets/acm/acm (4).bib
10 de 15: ../01_Datasets/acm/acm (5).bib
11 de 15: ../01_Datasets/acm/acm (6).bib
12 de 15: ../01_Datasets/acm/acm (7).bib
13 de 15: ../01_Datasets/acm/acm (8).bib
14 de 15: ../01_Datasets/acm/acm (9).bib
15 de 15: ../01_Datasets/acm/acm.bib
Shape df_../01_Datasets/sciencedirect:  (1451, 27)

 ../01_Datasets/ieee
1 de 5: ../01_Datasets/ieee/ieee01.bib
2 de 5: ../01_Datasets/ieee/ieee02.bib
3 de 5: ../01_Datasets/ieee/ieee03.bib
4 de 5: ../01_Datasets/ieee/ieee04.bib
5 de 5: ../01_Datasets/ieee/ieee05.bib
Shape df_../01_Datasets/sciencedirect:  (466, 18)

 ../01_Datasets/sciencedirect
1 de 51: ../01_Datasets

In [6]:
df_all

Unnamed: 0,series,location,keywords,numpages,articleno,booktitle,abstract,doi,url,address,publisher,isbn,year,title,author,ENTRYTYPE,ID,pages,month,journal,issn,number,volume,issue_date,note,edition,editor
0,CSAE 2019,"Sanya, China","Crop germplasm resources, Data analysis, Big d...",7,27,Proceedings of the 3rd International Conferenc...,Based on understanding the application of big ...,10.1145/3331453.3361308,https://doi.org/10.1145/3331453.3361308,"New York, NY, USA",Association for Computing Machinery,9781450362948,2019,Construction and Implementation of Big Data Fr...,"Jing, Furong and Cao, Yongsheng and Fang, Wei ...",inproceedings,10.1145/3331453.3361308,,,,,,,,,,
1,ICSE '22,"Pittsburgh, Pennsylvania",,12,,Proceedings of the 44th International Conferen...,Massive data from software repositories and co...,10.1145/3510003.3510619,https://doi.org/10.1145/3510003.3510619,"New York, NY, USA",Association for Computing Machinery,9781450392211,2022,Big Data = Big Insights? Operationalising Broo...,"Gote, Christoph and Mavrodiev, Pavlin and Schw...",inproceedings,10.1145/3510003.3510619,262–273,,,,,,,,,
2,ICBIM 2017,"Bei Jing, China","Database, Business Intelligence, Institutional...",5,,Proceedings of the International Conference on...,The applications on business intelligence and ...,10.1145/3134271.3134296,https://doi.org/10.1145/3134271.3134296,"New York, NY, USA",Association for Computing Machinery,9781450352765,2017,Establishment of Business Intelligence and Big...,"Peng, Michael Yao-Ping and Tuan, Sheng-Hwa and...",inproceedings,10.1145/3134271.3134296,121–125,,,,,,,,,
3,AICS 2019,"Wuhan, Hubei, China","big data, agro-meteorological disasters, early...",5,,Proceedings of the 2019 International Conferen...,"Agricultural meteorological disasters, includi...",10.1145/3349341.3349371,https://doi.org/10.1145/3349341.3349371,"New York, NY, USA",Association for Computing Machinery,9781450371506,2019,Quality Control Framework of Big Data for Earl...,"Li, Jiale and Liao, Shunbao",inproceedings,10.1145/3349341.3349371,74–78,,,,,,,,,
4,iiWAS2019,"Munich, Germany","Intelligent smart environments, Big data analy...",3,,Proceedings of the 21st International Conferen...,This paper focuses on big data management and ...,10.1145/3366030.3366044,https://doi.org/10.1145/3366030.3366044,"New York, NY, USA",Association for Computing Machinery,9781450371797,2019,Big Data Management and Analytics in Intellige...,"Cuzzocrea, Alfredo",inproceedings,10.1145/3366030.3366044,5–7,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67,,,"Vehicular networks, VANET, IoV, V2X, Trust man...",,,,"In recent years, the emergence of the Internet...",https://doi.org/10.1016/j.comnet.2021.108558,https://www.sciencedirect.com/science/article/...,,,,2022,A survey of trust management in the Internet o...,Amal Hbaieb and Samiha Ayed and Lamia Chaari,article,HBAIEB2022108558,108558,,Computer Networks,1389-1286,,203,,,,
68,,,,,,Quantitative Analysis and Modeling of Earth an...,,https://doi.org/10.1016/B978-0-12-816341-2.000...,https://www.sciencedirect.com/science/article/...,,Elsevier,978-0-12-816341-2,2022,Index,,incollection,2022485,485-492,,,,,,,,,Jiaping Wu and Junyu He and George Christakos
69,,,"Prognostics and health management (PHM), Artif...",,,,Prognostics and health management (PHM) has be...,https://doi.org/10.1016/j.engappai.2021.104552,https://www.sciencedirect.com/science/article/...,,,,2022,Artificial intelligence in prognostics and hea...,Sunday Ochella and Mahmood Shafiee and Fateme ...,article,OCHELLA2022104552,104552,,Engineering Applications of Artificial Intelli...,0952-1976,,108,,,,
70,,,"Data infrastructures, Data management, Marine ...",,,Ocean Science Data,Many marine observation and data collection in...,https://doi.org/10.1016/B978-0-12-823427-3.000...,https://www.sciencedirect.com/science/article/...,,Elsevier,978-0-12-823427-3,2022,Chapter Three - Data management infrastructure...,Dick M.A. Schaap and Antonio Novellino and Mic...,incollection,SCHAAP2022131,131-193,,,,,,,,,Giuseppe Manzella and Antonio Novellino


# Export to csv

In [7]:
df_all.to_csv('df_all.csv',sep=';', index = False)