In [1]:
#!conda install -n impacta_env ipykernel --update-deps --force-reinstall
# !python3 -m pip install bibtexparser
# !python3 -m pip install pyyaml

In [2]:
import bibtexparser
import pandas as pd
import numpy as np
import os
import glob
import yaml

import warnings
warnings.simplefilter(action = 'ignore', category = FutureWarning)

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 300)

# Def Functions

In [3]:
#def1
def read_bib(file_path: str):
    '''
    Function to read and parse bib files to dataframe object.
    path: bib file path
    '''
    with open(file_path) as bibtex_file:
        bib_file = bibtexparser.load(bibtex_file)
    df = pd.DataFrame(bib_file.entries)
    
    return df
#########################################################################

#def2
def load_bib(folder_path: str):
    
    '''
    Function to: 
    1) read and parse bib files from a list of directories (folders); 
    2) concatenate multiple dataframes in an only one
    
    folder_path: directories path where are located the bib files
    
    '''
    
    #listing bib files path from acm directory
    list_files = []
    for file in glob.glob(f'{folder_path}/*.bib'):
        list_files.append(file)

    #loading each bib file listed
    list_df = []
    c = 1
    for file in list_files:
        df_temp = read_bib(file) #def
        list_df.append(df_temp)
        print(f'{c} de {len(list_files)}: {file}')
        c += 1

    #concatenating all files in a unique dataframe object
    df = pd.concat(list_df)
    print(f'Shape df_{folder}: ', df.shape)
    
    return df
#########################################################################

#def3
def write_yaml (data, file_name):
  with open(f'../03_OutputFiles/{file_name}.yaml', 'w') as output_file:
    yaml.dump_all(data.to_dict('records'), output_file, default_flow_style=False)
    print(f"{file_name}.yaml successfully written")
#########################################################################

#def4
def write_json (data, file_name):
  data.to_json(f'../03_OutputFiles/{file_name}.json', orient='records', indent=4)
  print(f"{file_name}.json successfully written")
#########################################################################

#def5
def write_csv (data, file_name):
  data.to_csv(f'../03_OutputFiles/{file_name}.csv',sep=';', index = False)
  print(f"{file_name}.csv successfully written")
#########################################################################

#def6
def read_yaml (file_name):
  with open(file_name, "r") as yamlfile:
      data = yaml.load(yamlfile, Loader=yaml.FullLoader)
      print(f"{file_name} read successfully")
  return data

def read_csv (file_name):
  csv_data = pd.read_csv(file_name, sep = ';')
  return csv_data

# Reading, loading and concatenating all bibtex files

In [4]:
list_folders = []
for folder in glob.glob(f'../01_Datasets/*'):
    list_folders.append(folder)
list_folders

list_df = []
for f in list_folders:
    print('\n',f)
    df_temp = load_bib(f) #def
    list_df.append(df_temp)
df_all_raw = pd.concat(list_df)
print('\nShape df_all_raw: ',df_all_raw.shape)


 ../01_Datasets/acm
1 de 15: ../01_Datasets/acm/acm (1).bib
2 de 15: ../01_Datasets/acm/acm (10).bib
3 de 15: ../01_Datasets/acm/acm (11).bib
4 de 15: ../01_Datasets/acm/acm (12).bib
5 de 15: ../01_Datasets/acm/acm (13).bib
6 de 15: ../01_Datasets/acm/acm (14).bib
7 de 15: ../01_Datasets/acm/acm (2).bib
8 de 15: ../01_Datasets/acm/acm (3).bib
9 de 15: ../01_Datasets/acm/acm (4).bib
10 de 15: ../01_Datasets/acm/acm (5).bib
11 de 15: ../01_Datasets/acm/acm (6).bib
12 de 15: ../01_Datasets/acm/acm (7).bib
13 de 15: ../01_Datasets/acm/acm (8).bib
14 de 15: ../01_Datasets/acm/acm (9).bib
15 de 15: ../01_Datasets/acm/acm.bib
Shape df_../01_Datasets/sciencedirect:  (1451, 27)

 ../01_Datasets/ieee
1 de 5: ../01_Datasets/ieee/ieee01.bib
2 de 5: ../01_Datasets/ieee/ieee02.bib
3 de 5: ../01_Datasets/ieee/ieee03.bib
4 de 5: ../01_Datasets/ieee/ieee04.bib
5 de 5: ../01_Datasets/ieee/ieee05.bib
Shape df_../01_Datasets/sciencedirect:  (466, 18)

 ../01_Datasets/sciencedirect
1 de 51: ../01_Datasets

# Import JCS and Scimago CSV files

In [5]:
df_jcs = pd.read_csv(f'../jcr_scimago/jcs_2020.csv', sep = ';')
df_scimago = pd.read_csv(f'../jcr_scimago/scimagojr_2020.csv', sep = ';')

# df1.isna().sum()
df_scimago.head()

  df_scimago = pd.read_csv(f'../jcr_scimago/scimagojr_2020.csv', sep = ';')


Unnamed: 0,Rank,Sourceid,Title,Type,Issn,SJR,SJR Best Quartile,H index,Total Docs. (2020),Total Docs. (3years),Total Refs.,Total Cites (3years),Citable Docs. (3years),Cites / Doc. (2years),Ref. / Doc.,Country,Region,Publisher,Coverage,Categories
0,1,28773,Ca-A Cancer Journal for Clinicians,journal,"15424863, 00079235",62937,Q1,168,47,119,3452,15499,80,12634,7345,United States,Northern America,Wiley-Blackwell,1950-2020,Hematology (Q1); Oncology (Q1)
1,2,19434,MMWR Recommendations and Reports,journal,"10575987, 15458601",40949,Q1,143,10,9,1292,492,9,5000,12920,United States,Northern America,Centers for Disease Control and Prevention (CDC),1990-2020,Epidemiology (Q1); Health Information Manageme...
2,3,20315,Nature Reviews Molecular Cell Biology,journal,"14710072, 14710080",37461,Q1,431,115,338,8439,10844,167,3283,7338,United Kingdom,Western Europe,Nature Publishing Group,2000-2020,Cell Biology (Q1); Molecular Biology (Q1)
3,4,29431,Quarterly Journal of Economics,journal,"00335533, 15314650",34573,Q1,259,40,110,2733,1945,109,1600,6833,United Kingdom,Western Europe,Oxford University Press,1886-2020,Economics and Econometrics (Q1)
4,5,21100812243,Nature Reviews Materials,journal,20588437,32011,Q1,108,92,264,10632,11188,138,3215,11557,United Kingdom,Western Europe,Nature Publishing Group,2016-2020,"Biomaterials (Q1); Electronic, Optical and Mag..."


In [6]:
df_jcs.head()

Unnamed: 0,Rank,Full Journal Title,Total Cites,Unnamed: 3,Journal Impact Factor,Eigenfactor Score,Unnamed: 6,Unnamed: 7
0,1,CA-A CANCER JOURNAL FOR CLINICIANS,55868,,508.702,0.10514,,
1,2,NATURE REVIEWS MOLECULAR CELL BIOLOGY,58477,,94.444,0.07548,,
2,3,NEW ENGLAND JOURNAL OF MEDICINE,464351,,91.245,0.63118,,
3,4,NATURE REVIEWS DRUG DISCOVERY,41989,,84.694,0.04822,,
4,5,LANCET,369601,,79.321,0.44524,,


# Rename columns and standardized column values

In [7]:
df_jcs.rename(columns={'Rank': 'rank_jcr', 'Full Journal Title': 'journal_title', 'Total Cites': 'total_cities_jcr', 'Journal Impact Factor': 'jcr_value'}, inplace = True)
df_scimago.rename(columns={'Rank': 'rank_scimago', 'Title': 'journal_title', 'Total Cites (3years)': 'total_cities_scimago', 'SJR': 'scimago_value', 'Issn': 'issn'}, inplace = True)

cols_to_keep_df1 = ['rank_jcr', 'journal_title', 'total_cities_jcr', 'jcr_value']

cols_to_keep_df2 = ['issn','rank_scimago', 'journal_title', 'total_cities_scimago', 'scimago_value']

df_jcs['journal_title'] = df_jcs['journal_title'].str.title()
df_jcs['total_cities_jcr'] = df_jcs['total_cities_jcr'].str.replace(",","")
df_jcs['jcr_value'] = df_jcs['jcr_value'].str.replace(".",",")

df_jcs_clean = df_jcs[cols_to_keep_df1]
df_scimago_clean = df_scimago[cols_to_keep_df2]

df_scimago_clean.head()

Unnamed: 0,issn,rank_scimago,journal_title,total_cities_scimago,scimago_value
0,"15424863, 00079235",1,Ca-A Cancer Journal for Clinicians,15499,62937
1,"10575987, 15458601",2,MMWR Recommendations and Reports,492,40949
2,"14710072, 14710080",3,Nature Reviews Molecular Cell Biology,10844,37461
3,"00335533, 15314650",4,Quarterly Journal of Economics,1945,34573
4,20588437,5,Nature Reviews Materials,11188,32011


In [8]:
df_jcs_clean.head()

Unnamed: 0,rank_jcr,journal_title,total_cities_jcr,jcr_value
0,1,Ca-A Cancer Journal For Clinicians,55868,508702
1,2,Nature Reviews Molecular Cell Biology,58477,94444
2,3,New England Journal Of Medicine,464351,91245
3,4,Nature Reviews Drug Discovery,41989,84694
4,5,Lancet,369601,79321


In [9]:
print(df_jcs_clean.count())
print(df_scimago_clean.count())

rank_jcr            13010
journal_title       13010
total_cities_jcr    13010
jcr_value           13010
dtype: int64
issn                    32952
rank_scimago            32952
journal_title           32952
total_cities_scimago    32952
scimago_value           32604
dtype: int64


# Merge scimago dataframe with jcs dataframe

In [10]:
df_jcs_scimago = pd.merge(df_scimago_clean, df_jcs_clean, on="journal_title")
df_jcs_scimago.head()

Unnamed: 0,issn,rank_scimago,journal_title,total_cities_scimago,scimago_value,rank_jcr,total_cities_jcr,jcr_value
0,"14710072, 14710080",3,Nature Reviews Molecular Cell Biology,10844,37461,2,58477,94444
1,20588437,5,Nature Reviews Materials,11188,32011,7,19887,66308
2,"00928674, 10974172",7,Cell,52644,26304,33,320407,41582
3,"14710056, 14710064",8,Nature Reviews Genetics,6348,26214,19,42803,53242
4,"14741741, 14741733",11,Nature Reviews Immunology,8200,20529,20,55784,53106


# Explode issn column of jcs_scimago dataframe

In [11]:
teste = df_jcs_scimago.copy()
teste.head()
teste['issn'] = teste['issn'].str.split(',', expand=False)
teste = teste.explode('issn').drop_duplicates()
display(teste)


Unnamed: 0,issn,rank_scimago,journal_title,total_cities_scimago,scimago_value,rank_jcr,total_cities_jcr,jcr_value
0,14710072,3,Nature Reviews Molecular Cell Biology,10844,37461,2,58477,94444
0,14710080,3,Nature Reviews Molecular Cell Biology,10844,37461,2,58477,94444
1,20588437,5,Nature Reviews Materials,11188,32011,7,19887,66308
2,00928674,7,Cell,52644,26304,33,320407,41582
2,10974172,7,Cell,52644,26304,33,320407,41582
...,...,...,...,...,...,...,...,...
4144,09518967,31524,Mediterranean Historical Review,2,0101,12898,133,0158
4145,17458706,31574,Northern History,1,0101,12849,80,0211
4145,0078172X,31574,Northern History,1,0101,12849,80,0211
4146,01497952,32166,German Studies Review,10,0100,12902,150,0156


# Features renaming / selection

In [12]:
df_all = df_all_raw.copy()
df_all.rename(columns={'ENTRYTYPE': 'type_publication'}, inplace = True)

cols_to_keep = ['author', 'title', 'keywords', 'abstract', 'year', 'type_publication', 'doi', 'issn']

df_all = df_all[cols_to_keep]

df_all.head()

Unnamed: 0,author,title,keywords,abstract,year,type_publication,doi,issn
0,"Jing, Furong and Cao, Yongsheng and Fang, Wei ...",Construction and Implementation of Big Data Fr...,"Crop germplasm resources, Data analysis, Big d...",Based on understanding the application of big ...,2019,inproceedings,10.1145/3331453.3361308,
1,"Gote, Christoph and Mavrodiev, Pavlin and Schw...",Big Data = Big Insights? Operationalising Broo...,,Massive data from software repositories and co...,2022,inproceedings,10.1145/3510003.3510619,
2,"Peng, Michael Yao-Ping and Tuan, Sheng-Hwa and...",Establishment of Business Intelligence and Big...,"Database, Business Intelligence, Institutional...",The applications on business intelligence and ...,2017,inproceedings,10.1145/3134271.3134296,
3,"Li, Jiale and Liao, Shunbao",Quality Control Framework of Big Data for Earl...,"big data, agro-meteorological disasters, early...","Agricultural meteorological disasters, includi...",2019,inproceedings,10.1145/3349341.3349371,
4,"Cuzzocrea, Alfredo",Big Data Management and Analytics in Intellige...,"Intelligent smart environments, Big data analy...",This paper focuses on big data management and ...,2019,inproceedings,10.1145/3366030.3366044,


# Drop 

In [13]:
df_all["issn"] = df_all["issn"].str.replace("-","")

# Merge .bibtex dataframe with jcr_scimago dataframe

In [14]:
merged_df = pd.merge(teste,df_all, on="issn")
display(merged_df)

Unnamed: 0,issn,rank_scimago,journal_title,total_cities_scimago,scimago_value,rank_jcr,total_cities_jcr,jcr_value,author,title,keywords,abstract,year,type_publication,doi
0,00928674,7,Cell,52644,26304,33,320407,41582,Yann Joly and Stephanie O.M. Dyke and Bartha M...,Are Data Sharing and Privacy Protection Mutual...,,We review emerging strategies to protect the p...,2016,article,https://doi.org/10.1016/j.cell.2016.11.004
1,00928674,7,Cell,52644,26304,33,320407,41582,Emma Laks and Andrew McPherson and Hans Zahn a...,Clonal Decomposition and DNA Replication State...,"single cell, copy number, aneuploidy, tumor ev...",Summary\nAccurate measurement of clonal genoty...,2019,article,https://doi.org/10.1016/j.cell.2019.10.026
2,00928674,7,Cell,52644,26304,33,320407,41582,Daisong Wang and Jingqiang Wang and Lanyue Bai...,Long-Term Expansion of Pancreatic Islet Organo...,"pancreatic islets, adult stem cells, organoid,...",Summary\nIt has generally proven challenging t...,2020,article,https://doi.org/10.1016/j.cell.2020.02.048
3,00928674,7,Cell,52644,26304,33,320407,41582,Jiamin Wu and Zhi Lu and Dong Jiang and Yuduo ...,Iterative tomography with digital adaptive opt...,"long-term high-speed imaging, adaptive optics,...",Summary\nLong-term subcellular intravital imag...,2021,article,https://doi.org/10.1016/j.cell.2021.04.029
4,00928674,7,Cell,52644,26304,33,320407,41582,Tianyi Wang and Wei Wang and Guangmao Xie and ...,Human population history at the crossroads of ...,"ancient DNA, 12,000-year-old humans, deeply di...",Summary\nPast human genetic diversity and migr...,2021,article,https://doi.org/10.1016/j.cell.2021.05.018
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
851,00405957,12449,Therapie,338,0387,7565,1069,2070,Jean-Christophe Corvol and Sylvia Goni and Rég...,Translational research on cognitive and behavi...,"Neurology, Psychiatrics, Cognition, Behaviour,...",Summary\nThe important medical and social burd...,2016,article,https://doi.org/10.1016/j.therap.2016.01.001
852,18744907,13273,Physical Communication,969,0354,8418,1412,1810,Farshad Miramirkhani and Mehdi Karbalayghareh ...,Enabling 5G indoor services for residential en...,"Visible light communication (VLC), Ray-tracing...",Visible light communication (VLC) has emerged ...,2022,article,https://doi.org/10.1016/j.phycom.2022.101679
853,18744907,13273,Physical Communication,969,0354,8418,1412,1810,Nida Fatima and Paresh Saxena and Manik Gupta,Integration of multi access edge computing wit...,"Unmanned aerial vehicle, Multi-access edge com...","During the last decade, research and developme...",2022,article,https://doi.org/10.1016/j.phycom.2022.101641
854,03623319,13408,Social Science Journal,340,0349,6572,1817,2376,Turgut Ozkan,Criminology in the age of data explosion: New ...,"Social science, Big data, Crime, Social media,...",This review discusses practical benefits and l...,2019,article,https://doi.org/10.1016/j.soscij.2018.10.010


# Dtypes

In [15]:
merged_df.dtypes

issn                    object
rank_scimago             int64
journal_title           object
total_cities_scimago     int64
scimago_value           object
rank_jcr                 int64
total_cities_jcr        object
jcr_value               object
author                  object
title                   object
keywords                object
abstract                object
year                    object
type_publication        object
doi                     object
dtype: object

In [16]:
#Adjusting "year" feature dtype
merged_df['year'] = merged_df.year.astype('int64')
print('dtypes:\n', df_all.dtypes, '\n')

#Sorting values by "year"
merged_df = merged_df.sort_values('year')
display(merged_df.head())

dtypes:
 author              object
title               object
keywords            object
abstract            object
year                object
type_publication    object
doi                 object
issn                object
dtype: object 



Unnamed: 0,issn,rank_scimago,journal_title,total_cities_scimago,scimago_value,rank_jcr,total_cities_jcr,jcr_value,author,title,keywords,abstract,year,type_publication,doi
32,00128252,408,Earth-Science Reviews,7358,3893,311,27304,12413,A. Hubaux,A new geological tool-the data,,Today data processing technology offers new an...,1973,article,https://doi.org/10.1016/0012-8252(73)90089-5
741,01403664,8052,Computer Communications,2424,627,4546,6725,3167,C.K Yeo and S.C Hui and I.Y Soon and B.S Lee,An adaptive protocol for real-time fax communi...,"Internet faxing, Adaptive fax communication, R...",Internet faxing allows users from different lo...,2001,article,https://doi.org/10.1016/S0140-3664(00)00342-X
139,00063207,1105,Biological Conservation,7021,2227,1353,39669,5990,Brian L. Sullivan and Christopher L. Wood and ...,eBird: A citizen-based bird observation networ...,"eBird, Citizen-science, Observation network, S...",New technologies are rapidly changing the way ...,2009,article,https://doi.org/10.1016/j.biocon.2009.05.006
513,1470160X,2829,Ecological Indicators,12850,1315,2031,32205,4958,C. Zucca and R. Della Peruta and R. Salvia and...,Towards a World Desertification Atlas. Relatin...,"UNCCD, Land degradation, Indicator frameworks,...",Mapping land degradation and desertification (...,2012,article,https://doi.org/10.1016/j.ecolind.2011.09.012
171,0740624X,1194,Government Information Quarterly,1946,2121,876,5379,7279,Gwanhoo Lee and Young Hoon Kwak,An Open Government Maturity Model for social m...,"Open government, Social media, Public engageme...",Social media has opened up unprecedented new p...,2012,article,https://doi.org/10.1016/j.giq.2012.06.001


# NaN analysis

In [17]:
print('Shape: ',merged_df.shape,'\n')
print(f'Nan Analysis: \n{merged_df.isna().sum()}\n')


Shape:  (856, 15) 

Nan Analysis: 
issn                     0
rank_scimago             0
journal_title            0
total_cities_scimago     0
scimago_value            0
rank_jcr                 0
total_cities_jcr         0
jcr_value                0
author                  30
title                    2
keywords                84
abstract                62
year                     0
type_publication         0
doi                      0
dtype: int64



In [18]:
print('Shape before dropna: ', merged_df.shape)
merged_df.dropna(axis = 0, inplace = True)
print('Shape after dropna: ', merged_df.shape)

Shape before dropna:  (856, 15)
Shape after dropna:  (769, 15)


# Drop duplicates

In [19]:
print('Shape before drop_duplicates: ',merged_df.shape)
merged_df.drop_duplicates(inplace = True)
print('Shape after drop_duplicates: ',merged_df.shape)

Shape before drop_duplicates:  (769, 15)
Shape after drop_duplicates:  (766, 15)


In [20]:
display(merged_df)

Unnamed: 0,issn,rank_scimago,journal_title,total_cities_scimago,scimago_value,rank_jcr,total_cities_jcr,jcr_value,author,title,keywords,abstract,year,type_publication,doi
741,01403664,8052,Computer Communications,2424,0627,4546,6725,3167,C.K Yeo and S.C Hui and I.Y Soon and B.S Lee,An adaptive protocol for real-time fax communi...,"Internet faxing, Adaptive fax communication, R...",Internet faxing allows users from different lo...,2001,article,https://doi.org/10.1016/S0140-3664(00)00342-X
139,00063207,1105,Biological Conservation,7021,2227,1353,39669,5990,Brian L. Sullivan and Christopher L. Wood and ...,eBird: A citizen-based bird observation networ...,"eBird, Citizen-science, Observation network, S...",New technologies are rapidly changing the way ...,2009,article,https://doi.org/10.1016/j.biocon.2009.05.006
513,1470160X,2829,Ecological Indicators,12850,1315,2031,32205,4958,C. Zucca and R. Della Peruta and R. Salvia and...,Towards a World Desertification Atlas. Relatin...,"UNCCD, Land degradation, Indicator frameworks,...",Mapping land degradation and desertification (...,2012,article,https://doi.org/10.1016/j.ecolind.2011.09.012
171,0740624X,1194,Government Information Quarterly,1946,2121,876,5379,7279,Gwanhoo Lee and Young Hoon Kwak,An Open Government Maturity Model for social m...,"Open government, Social media, Public engageme...",Social media has opened up unprecedented new p...,2012,article,https://doi.org/10.1016/j.giq.2012.06.001
350,01679236,2113,Decision Support Systems,2672,1564,1455,13580,5795,Haluk Demirkan and Dursun Delen,Leveraging the capabilities of service-oriente...,"Cloud computing, Service orientation, Service ...",Using service-oriented decision support system...,2013,article,https://doi.org/10.1016/j.dss.2012.05.048
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
431,00200255,2220,Information Sciences,17554,1524,1030,44038,6795,Michal Moran and Tom Cohen and Yuval Ben-Zion ...,Curious instance selection,"Intrinsic motivation learning, Curiosity loop,...",In the process of building machine learning mo...,2022,article,https://doi.org/10.1016/j.ins.2022.07.025
122,10534822,880,Human Resource Management Review,1006,2549,830,3952,7444,Soumyadeb Chowdhury and Prasanta Dey and Sian ...,Unlocking the value of artificial intelligence...,"Artificial intelligence, Organisational resour...",Artificial Intelligence (AI) is increasingly a...,2022,article,https://doi.org/10.1016/j.hrmr.2022.100899
439,00313203,2302,Pattern Recognition,12476,1492,767,33363,7740,Chengyang Li and Liping Zhu and Gangyi Tian an...,Rethinking referring relationships from a pers...,"Referring relationship, Multimodal learning, I...",Referring relationship aims at localizing subj...,2023,article,https://doi.org/10.1016/j.patcog.2022.109044
659,01651684,5098,Signal Processing,5800,0907,2276,15960,4662,Mingchi Ju and Man Zhao and Tailin Han and Hon...,A novel subspace pursuit of residual correlati...,"Distributed compressed sensing, Sparse reconst...",Multi-signal joint reconstruction is critical ...,2023,article,https://doi.org/10.1016/j.sigpro.2022.108747


# Export

## Creating 03_OutputFiles if not exists

In [21]:
if not os.path.exists('../03_OutputFiles/'):
  os.makedirs('../03_OutputFiles/')

## Creating yaml file if not exists

In [22]:
if not os.path.exists("configuration.yaml"):
  configuration_dict = {
        'output_extensions': ['csv', 'json', 'yaml'],
        'filter_atributes': ['title', 'keywords', 'abstract', 'year', 'type_publication', 'doi', 'jcr_value', 'scimago_value']
    }
  with open('configuration.yaml', 'w') as yamlfile:
    data = yaml.dump(configuration_dict, yamlfile)

configuration_file = read_yaml("configuration.yaml")


configuration.yaml read successfully


# Filter Columns

In [23]:
filter_columns = configuration_file['filter_atributes']
print("Filters selected: ", configuration_file['filter_atributes'])

if filter_columns:
    final_df = merged_df.filter(items=filter_columns)
else:
    print("Using all dataframe columns")
    final_df = merged_df
final_df.head()

Filters selected:  ['title']


Unnamed: 0,title
741,An adaptive protocol for real-time fax communi...
139,eBird: A citizen-based bird observation networ...
513,Towards a World Desertification Atlas. Relatin...
171,An Open Government Maturity Model for social m...
350,Leveraging the capabilities of service-oriente...


## Reading yaml file and exporting

In [254]:
print("Output extensions options: ", configuration_file['output_extensions'])

if 'csv' in configuration_file['output_extensions']:
    write_csv(df_all, 'df_all')
if 'json' in configuration_file['output_extensions']:
    write_json(df_all, 'df_all')
if 'yaml' in configuration_file['output_extensions']:
    write_yaml(df_all, 'df_all')
if not configuration_file['output_extensions']:
  raise Exception("Please, select an output file extension")


    

Output extensions options:  ['csv', 'json', 'yaml']
df_all.csv successfully written
df_all.json successfully written
df_all.yaml successfully written
