# Segunda tentativa da formação da matriz para input na Ucinet (coautoria de autores).


## Exemplo simplificado da matriz de coautoria dos autores.
### Exemplo "x escreveu com : a, b, c, y e z"

In [None]:
import pandas as pd
import numpy as np

# Definir os elementos e suas conexões
elements = {
    "x": ["a", "b", "c", "y", "z"],
    "y": ["a", "c", "d", "e", "z"],
    "z": ["a", "b", "d", "e", "f"]
}

# Obter a lista única de todos os elementos
unique_elements = list(set([item for sublist in elements.values() for item in sublist] + list(elements.keys())))

# Criar uma matriz de zeros
matrix = np.zeros((len(unique_elements), len(unique_elements)), dtype=int)

# Preencher a matriz
for row_element, references in elements.items():
    for ref in references:
        row_idx = unique_elements.index(row_element)
        col_idx = unique_elements.index(ref)
        matrix[row_idx][col_idx] = 1
        matrix[col_idx][row_idx] = 1  # Garantir simetria

# Transformar em um DataFrame para visualização
matrix_df = pd.DataFrame(matrix, index=unique_elements, columns=unique_elements)

# Ordenar as linhas e colunas em ordem alfabética
matrix_df_sorted = matrix_df.sort_index(axis=0).sort_index(axis=1)

# Mostrar a matriz
print(matrix_df_sorted)


   a  b  c  d  e  f  x  y  z
a  0  0  0  0  0  0  1  1  1
b  0  0  0  0  0  0  1  0  1
c  0  0  0  0  0  0  1  1  0
d  0  0  0  0  0  0  0  1  1
e  0  0  0  0  0  0  0  1  1
f  0  0  0  0  0  0  0  0  1
x  1  1  1  0  0  0  0  1  1
y  1  0  1  1  1  0  1  0  1
z  1  1  0  1  1  1  1  1  0


## Selecionar os autores da base de dados.
### Extração real

In [None]:
#instala o pacote para manipulação em bibtext
!pip install bibtexparser


Collecting bibtexparser
  Downloading bibtexparser-1.4.2.tar.gz (55 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/55.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m55.7/55.7 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: bibtexparser
  Building wheel for bibtexparser (setup.py) ... [?25l[?25hdone
  Created wheel for bibtexparser: filename=bibtexparser-1.4.2-py3-none-any.whl size=43562 sha256=f4063de1c550c21fb16ed9f55c8a739521eed83ff5708ca15777c33b364a674f
  Stored in directory: /root/.cache/pip/wheels/3c/71/a1/ace26bfc971a86c092f8932dd34c6dcf965f8c2cc29da7a7c8
Successfully built bibtexparser
Installing collected packages: bibtexparser
Successfully installed bibtexparser-1.4.2


In [None]:
import bibtexparser

# Definir o caminho do arquivo BibTeX
bibtex_file_path_wos = '/content/savedrecs.bib' # WOS
bibtex_file_path_scopus = '/content/scopus.bib' # Scopus

# Ler o arquivo BibTeX
with open(bibtex_file_path_wos, 'r', encoding='utf-8') as bibtex_file_wos:
    bib_database_wos = bibtexparser.load(bibtex_file_wos)
with open(bibtex_file_path_scopus, 'r', encoding='utf-8') as bibtex_file_scopus:
    bib_database_scopus = bibtexparser.load(bibtex_file_scopus)

# Exibir os registros
print(f"Número de registros: {len(bib_database_wos.entries)}")
print("Exemplo de registro:", bib_database_wos.entries[0])

print(f"Número de registros: {len(bib_database_scopus.entries)}")
print("Exemplo de registro:", bib_database_scopus.entries[0])

Número de registros: 157
Exemplo de registro: {'da': '2024-06-24', 'unique-id': 'WOS:000555083800004', 'web-of-science-index': 'Social Science Citation Index (SSCI)', 'doc-delivery-number': 'MT6LG', 'journal-iso': 'Hist. Soc. Res.', 'usage-count-since-2013': '57', 'usage-count-last-180-days': '3', 'times-cited': '11', 'number-of-cited-references': '67', 'cited-references': "Alex N., 2006, SOCIAL ENTREPRENEURS.\nAmartya Sen, 1999, DEV FREEDOM.\nAminur Rahman, 1999, WOMEN MICROCREDIT RU.\nAnn Richey Lisa, 2011, BRAND AID SHOPPING W.\n{[}Anonymous], 2007, Innovations: Technology, Governance, Globalization, DOI DOI 10.1162/ITGG.2007.2.1-2.63.\n{[}Anonymous], 2008, ECONOMIST.\nBanerjee S.B., 2008, CRIT SOCIOL, V34, P51, DOI DOI 10.1177/0896920507084623.\nBanks N, 2014, THIRD WORLD Q, V35, P181, DOI 10.1080/01436597.2014.868997.\nBarbara Adams, 2013, WHOSE DEV WHOSE UN R.\nBatchelor S, 2012, IDS BULL-I DEV STUD, V43, P84, DOI 10.1111/j.1759-5436.2012.00367.x.\nBatchelor Simon, 2005, DEV MAGA

#### Web of Science

In [None]:
# verifica os campos
for entry in bib_database_wos.entries:
    print(f"title: {entry.get('title', 'N/A')}")
    print(f"author: {entry.get('author', 'N/A')}")
    print(f"year: {entry.get('year', 'N/A')}")
    print(f"times-cited: {entry.get('times-cited')}")
    print(f"number-of-cited-references: {entry.get('number-of-cited-references')}")
  # print(f"references: {entry.get('cited-references', 'Sem referências')}")
    print("-" * 40)


title: Digital Finance Inclusion and the Mobile Money ``Social{''} Enterprise:
A Socio-Legal Critique of M-Pesa in Kenya
author: Natile, Serena
year: 2020
times-cited: 11
number-of-cited-references: 67
----------------------------------------
title: FinTech ecosystem practices shaping financial inclusion: the case of
mobile money in Ghana
author: Senyo, P. K. and Karanasios, Stan and Gozman, Daniel and Baba, Melissa
year: 2022
times-cited: 31
number-of-cited-references: 29
----------------------------------------
title: FinTech market development and financial inclusion in Ghana: The role of
heterogeneous actors
author: Coffie, Cephas Paa Kwasi and Zhao Hongjiang
year: 2023
times-cited: 13
number-of-cited-references: 63
----------------------------------------
title: Can Fintech development improve the financial inclusion of village and
township banks? Evidence from China
author: Chen, Xiaojie and He, Guangwen and Li, Qian
year: 2024
times-cited: 1
number-of-cited-references: 59
------

#### Scopus

In [None]:
# verifica os campos
for entry in bib_database_scopus.entries:
    print(f"title: {entry.get('title', 'N/A')}")
    print(f"author: {entry.get('author', 'N/A')}")
    print(f"year: {entry.get('year', 'N/A')}")
#   print(f"times-cited: {entry.get('Cited by:')}")
#   print(f"number-of-cited-references: {entry.get('number-of-cited-references')}")
#   print(f"references: {entry.get('cited-references', 'Sem referências')}")
    print("-" * 40)

title: Digital Finance, Digital Divide and Household Financial Portfolio Effectiveness—An Analysis Based on the Perspective of Urban-rural Differences; [数字金融、数字鸿沟与家庭金融资产组合有效性———基于城乡差异视角的分析]
author: Wang, Xiaohua and Li, Xinru and Song, Meng and Ma, Xiaoke
year: 2024
----------------------------------------
title: Fintech an opportunity or opportunity missed by developing sides of the world: A special emphasize on fintech use and prospects in Pakistan
author: Sultan, Muhammad Faisal and Asim, Muhammad and Khan, Raza Ali and Shaikh, Sadia
year: 2023
----------------------------------------
title: Can renewable energy microfinance promote financial inclusion and empower the vulnerable?
author: Gatto, Andrea
year: 2023
----------------------------------------
title: Policy and Regulatory Frameworks for Financial Inclusion in South Africa, Botswana, Namibia and Zimbabwe
author: Svotwa, Tendai D. and Wealth, Eukeria and Makanyeza, Charles
year: 2023
----------------------------------------
t

In [None]:
import pandas as pd

# Converter registros para DataFrame
data_wos = pd.DataFrame(bib_database_wos.entries)
data_scopus = pd.DataFrame(bib_database_scopus.entries)

# Exibir as colunas disponíveis
print(data_wos.columns)
print(data_scopus.columns)

# Salvar em CSV para análise futura
#data.to_csv('dados_wos.csv', index=False)


Index(['da', 'unique-id', 'web-of-science-index', 'doc-delivery-number',
       'journal-iso', 'usage-count-since-2013', 'usage-count-last-180-days',
       'times-cited', 'number-of-cited-references', 'cited-references',
       'orcid-numbers', 'affiliations', 'author-email',
       'web-of-science-categories', 'research-areas', 'keywords-plus',
       'keywords', 'issn', 'doi', 'affiliation', 'language', 'type', 'address',
       'publisher', 'abstract', 'pages', 'number', 'volume', 'year', 'journal',
       'title', 'author', 'ENTRYTYPE', 'ID', 'oa', 'researcherid-numbers',
       'eissn', 'earlyaccessdate', 'month', 'esi-hot-paper',
       'esi-highly-cited-paper', 'article-number', 'funding-text',
       'funding-acknowledgement', 'note', 'isbn', 'series', 'booktitle',
       'book-author', 'book-group-author', 'editor'],
      dtype='object')
Index(['note', 'source', 'publication_stage', 'type', 'abbrev_source_title',
       'language', 'issn', 'publisher', 'author_keywords', 'ab

In [None]:
data_wos['author'].str.replace('\n', ' ', regex=True)

Unnamed: 0,author
0,"Natile, Serena"
1,"Senyo, P. K. and Karanasios, Stan and Gozman, ..."
2,"Coffie, Cephas Paa Kwasi and Zhao Hongjiang"
3,"Chen, Xiaojie and He, Guangwen and Li, Qian"
4,"Juwita, Ratna and Kusumah, Adam Darma and Aqil..."
...,...
152,"Urquia-Grande, Elena and Cano-Montero, Elisa I..."
153,"Mpofu, Olipha and Sibindi, Athenia Bongani"
154,"Wang, Xingqi and Mao, Zhenhua"
155,"Oelkers, Tim and Musshoff, Oliver"


#### Junta as duas bases

In [None]:
import pandas as pd # Número não bate com o parsif.al

# 1. Normalizar colunas importantes
# Certifique-se de que os nomes das colunas sejam consistentes
data_wos.rename(columns={"DOI": "doi", "Title": "title"}, inplace=True)
data_scopus.rename(columns={"DOI": "doi", "Title": "title"}, inplace=True)

# 2. Concatenar as bases
combined = pd.concat([data_wos, data_scopus], ignore_index=True)
combined.author = combined.author.str.replace('\n', ' ', regex=True)

# 3. Remover duplicatas
# Baseado no DOI (mais confiável)
if "doi" in combined.columns:
    combined_clean = combined.drop_duplicates(subset="doi", keep="first")
else:
    # Alternativamente, use título e ano se DOI não estiver disponível
    combined_clean = combined.drop_duplicates(subset=["title", "year"], keep="first")

# 4. Exportar a base consolidada
combined_clean.to_excel("base_unificada.xlsx", index=False)
print("Base consolidada salva como 'base_unificada.xlsx'")

# 5.Download
from google.colab import files
files.download("base_unificada.xlsx")

Base consolidada salva como 'base_unificada.xlsx'


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

### Arruma os nomes

In [None]:
# Extrair e transformar os autores
articles_authors = []

for author in combined_clean.author:
    # raw_authors = combined_clean.author  # Remove this line - unnecessary
    if author and isinstance(author, str):  # Check if the current author string is not empty and is a string
        # Dividir a string de autores em uma lista
        authors_list = [author.strip() for author in author.split(" and ")]
        articles_authors.append(authors_list)

# Exibir os autores de cada artigo
for i, authors in enumerate(articles_authors):
    print(f"Artigo {i+1}: {authors}")


Artigo 1: ['Natile, Serena']
Artigo 2: ['Senyo, P. K.', 'Karanasios, Stan', 'Gozman, Daniel', 'Baba, Melissa']
Artigo 3: ['Coffie, Cephas Paa Kwasi', 'Zhao Hongjiang']
Artigo 4: ['Chen, Xiaojie', 'He, Guangwen', 'Li, Qian']
Artigo 5: ['Juwita, Ratna', 'Kusumah, Adam Darma', 'Aqila, Tiara Syaharani', 'Tsabitah, Hanan', 'Syauqi, Muhammad Farhan']
Artigo 6: ['Lyons, Angela C.', 'Kass-Hanna, Josephine', 'Fava, Ana']
Artigo 7: ['Mohamed, Abdinur Ali']
Artigo 8: ['Ozili, Peterson K.', 'Mhlanga, David']
Artigo 9: ['Velazquez, Patricia Vilcanqui', 'Bobek, Vito', 'Vide, Romana Korez', 'Horvat, Tatjana']
Artigo 10: ['Bongomin, George Okello Candiya', 'Yourougou, Pierre', 'Munene, John C.']
Artigo 11: ['Danladi, Sagir', 'Prasad, M. S. V.', 'Modibbo, Umar Muhammad', 'Ahmadi, Seyedeh Asra', 'Ghasemi, Peiman']
Artigo 12: ['Kemal, Atika Ahmad']
Artigo 13: ['Demirguc-Kunt, Asli', 'Klapper, Leora', 'Singer, Dorothe', 'Ansar, Saniya', 'Hess, Jake']
Artigo 14: ['Okello Candiya Bongomin, George', 'Ntayi, 

## Cria a matriz de coautoria

In [None]:
import pandas as pd
from itertools import combinations

# Criar um conjunto de autores únicos
unique_authors = set(author for authors in articles_authors for author in authors)
author_list = sorted(unique_authors)  # Ordenar os autores
author_index = {author: i for i, author in enumerate(author_list)}  # Mapear autores para índices

# Inicializar a matriz de adjacência
coauthorship_matrix = [[0] * len(author_list) for _ in range(len(author_list))]

# Preencher a matriz de coautoria
for authors in articles_authors:
    for author1, author2 in combinations(authors, 2):  # Todas as combinações de coautores
        i, j = author_index[author1], author_index[author2]
        coauthorship_matrix[i][j] += 1
        coauthorship_matrix[j][i] += 1  # Matriz simétrica

# Converter para DataFrame para visualização
matrix_df = pd.DataFrame(coauthorship_matrix, index=author_list, columns=author_list)
print(matrix_df)


                                Ababio, Josephine Ofosu-Mensah  \
Ababio, Josephine Ofosu-Mensah                               0   
Abduh, Muhamad                                               0   
Abdulhamid, Nafisa A.                                        0   
Abdullah-Al-Faruk, Md                                        0   
Abor, Joshua Yindenaba                                       0   
...                                                        ...   
van Dongen, Elisa                                            0   
van Tulder, Rob                                              0   
van der Westhuizen, Morne                                    0   
Çağıl, Gülcan                                                0   
Łasak, Piotr                                                 0   

                                Abduh, Muhamad  Abdulhamid, Nafisa A.  \
Ababio, Josephine Ofosu-Mensah               0                      0   
Abduh, Muhamad                               0               

### Verificar se a matriz é simétrica.


In [None]:
import numpy as np

# Verificando se a matriz é simétrica
def verificar_simetria(matriz):
    return np.array_equal(matrix_df, matrix_df.T)

# Resultado
if verificar_simetria(matrix_df):
    print("A matriz é simétrica.")
else:
    print("A matriz não é simétrica.")


A matriz é simétrica.


#### Exportar para o CSV

In [None]:
matrix_df.to_excel('matriz_coautoria.xlsx', index=True, header=True)

from google.colab import files
files.download("matriz_coautoria.xlsx")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Fazer a base limpa (apenas os autores que têm alguma conexão)

In [None]:
import pandas as pd

# Identificar as linhas e colunas com valores diferentes de zero
non_zero_rows = matrix_df.any(axis=1)  # Linhas com valores diferentes de zero
non_zero_cols = matrix_df.any(axis=0)  # Colunas com valores diferentes de zero

# Filtrar o DataFrame
filtered_df = matrix_df.loc[non_zero_rows, non_zero_cols] # Matriz sem colunas e linhas apenas com 0

print("DataFrame original:")
print(matrix_df)

print("\nDataFrame filtrado:")
print(filtered_df)



DataFrame original:
                                Ababio, Josephine Ofosu-Mensah  \
Ababio, Josephine Ofosu-Mensah                               0   
Abduh, Muhamad                                               0   
Abdulhamid, Nafisa A.                                        0   
Abdullah-Al-Faruk, Md                                        0   
Abor, Joshua Yindenaba                                       0   
...                                                        ...   
van Dongen, Elisa                                            0   
van Tulder, Rob                                              0   
van der Westhuizen, Morne                                    0   
Çağıl, Gülcan                                                0   
Łasak, Piotr                                                 0   

                                Abduh, Muhamad  Abdulhamid, Nafisa A.  \
Ababio, Josephine Ofosu-Mensah               0                      0   
Abduh, Muhamad                           

In [None]:
# Verificando se a matriz é simétrica
def verificar_simetria(filtered_df):
    return np.array_equal(filtered_df, filtered_df.T)

# Resultado
if verificar_simetria(filtered_df):
    print("A matriz é simétrica.")
else:
    print("A matriz não é simétrica.")

A matriz é simétrica.


In [None]:
filtered_df.to_excel('matriz_coautoria_filtrada.xlsx', index=True, header=True)

from google.colab import files
files.download("matriz_coautoria_filtrada.xlsx")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>