In [1]:
import numpy as np
import scipy.sparse as ssp
from collections import defaultdict
import pickle

# Define paper data
papers = [
    ("A review of generative adversarial networks and its application in cybersecurity", "Artificial Intelligence Review", 2020),
    ("Biometric fingerprint generation using generative adversarial networks", "Artificial Intelligence for Cyber Security", 2021),
    ("Object detection for robot coordination in robotics soccer", "Nigerian Journal of Technological Development", 2022),
    ("Conflict resolution via emerging technologies?", "Journal of Physics: Conference Series", 2019),
    ("A predictive model for automatic generation control in smart grids using artificial neural networks", "Emerging Technologies for Developing Countries", 2019),
    ("Estimating the time-lapse between medical insurance reimbursement with non-parametric regression models", "Advances in Information and Communication", 2020)
]

# Create mapping of journals to papers
journal_to_papers = defaultdict(list)
for idx, (title, journal, year) in enumerate(papers):
    journal_to_papers[journal].append(idx)

# List of journals
journals = list(journal_to_papers.keys())



In [3]:
print(journal_to_papers)
print(journals)

defaultdict(<class 'list'>, {'Artificial Intelligence Review': [0], 'Artificial Intelligence for Cyber Security': [1], 'Nigerian Journal of Technological Development': [2], 'Journal of Physics: Conference Series': [3], 'Emerging Technologies for Developing Countries': [4], 'Advances in Information and Communication': [5]})
['Artificial Intelligence Review', 'Artificial Intelligence for Cyber Security', 'Nigerian Journal of Technological Development', 'Journal of Physics: Conference Series', 'Emerging Technologies for Developing Countries', 'Advances in Information and Communication']


In [4]:
# Create row and column indices for the sparse matrix
rows = []
cols = []

for journal_idx, journal in enumerate(journals):
    for paper_idx in journal_to_papers[journal]:
        rows.append(paper_idx)
        cols.append(journal_idx)

# Create sparse matrix G
G = ssp.coo_matrix((np.ones(len(rows), dtype=np.int8), (rows, cols)), shape=(len(papers), len(journals)), dtype=np.int8)

# Extract publication years
paper_dates = np.array([year for _, _, year in papers], dtype=np.int32)

In [6]:
print(G)
print(paper_dates)

  (0, 0)	1
  (1, 1)	1
  (2, 2)	1
  (3, 3)	1
  (4, 4)	1
  (5, 5)	1
[2020 2021 2022 2019 2019 2020]


In [7]:
# Save the hypergraph matrix and paper dates
with open('hypergraph.pkl', 'wb') as f:
    pickle.dump((G.row.tolist(), G.col.tolist()), f)

with open('paper_dates.pkl', 'wb') as f:
    pickle.dump(paper_dates, f)


In [1]:
import pandas as pd

# Example data based on the provided structure
papers_df = pd.DataFrame({
    'paper_id': [0, 1, 2, 3, 4, 5],
    'title': [
        'A review of generative adversarial networks and its application in cybersecurity',
        'Biometric fingerprint generation using generative adversarial networks',
        'Object detection for robot coordination in robotics soccer',
        'Conflict resolution via emerging technologies?',
        'A predictive model for automatic generation control in smart grids using artificial neural networks',
        'Estimating the time-lapse between medical insurance reimbursement with non-parametric regression models'
    ]
})

# Define terms for Predictive AI
predictive_ai_df = pd.DataFrame({
    'paper_id': [4, 5, 3],
    'term': ['Regression Models', 'Non-Parametric Regression Models', 'Multi Agents']
})

# Define terms for Computer Vision
computer_vision_df = pd.DataFrame({
    'paper_id': [0, 1, 2],
    'term': ['gans', 'Object Detection', 'Object Detection']
})


In [2]:
import numpy as np
import scipy.sparse as ssp
import pickle

def create_hierarchical_incidence_matrix(papers_df, terms_df, term_column):
    # Create mapping from term to index
    term_to_index = {term: idx for idx, term in enumerate(terms_df[term_column].unique())}
    
    # Initialize matrix
    num_papers = len(papers_df)
    num_terms = len(term_to_index)
    rows = []
    cols = []
    
    # Fill matrix
    for _, row in terms_df.iterrows():
        paper_id = row['paper_id']
        term = row[term_column]
        if term in term_to_index:
            rows.append(paper_id)
            cols.append(term_to_index[term])
    
    data = np.ones(len(rows), dtype=np.int8)  # Binary matrix
    incidence_matrix = ssp.coo_matrix((data, (rows, cols)), shape=(num_papers, num_terms))
    
    return incidence_matrix, term_to_index

# Create incidence matrices for new categories
predictive_ai_matrix, predictive_ai_to_index = create_hierarchical_incidence_matrix(papers_df, predictive_ai_df, 'term')
computer_vision_matrix, computer_vision_to_index = create_hierarchical_incidence_matrix(papers_df, computer_vision_df, 'term')



In [12]:
print(predictive_ai_matrix)
print(predictive_ai_to_index)
print(computer_vision_matrix)
print(computer_vision_to_index)

  (4, 0)	1
  (5, 1)	1
  (3, 2)	1
{'Regression Models': 0, 'Non-Parametric Regression Models': 1, 'Multi Agents': 2}
  (0, 0)	1
  (1, 1)	1
  (2, 1)	1
{'gans': 0, 'Object Detection': 1}


In [13]:
# Save matrices and mappings
def save_to_pickle(matrix, filename):
    with open(filename, 'wb') as f:
        pickle.dump(matrix, f)

def save_mapping(mapping, filename):
    with open(filename, 'wb') as f:
        pickle.dump(mapping, f)

save_to_pickle(predictive_ai_matrix, 'predictive_ai.pkl')
save_mapping(predictive_ai_to_index, 'predictive_ai_mapping.pkl')

save_to_pickle(computer_vision_matrix, 'computer_vision.pkl')
save_mapping(computer_vision_to_index, 'computer_vision_mapping.pkl')


In [3]:
# Create id2chemical.pkl (for demonstration, we're using a general term set)
id2predictive_ai = predictive_ai_df.groupby('paper_id')['term'].apply(list).to_dict()
id2computer_vision = computer_vision_df.groupby('paper_id')['term'].apply(list).to_dict()

In [15]:
print(id2predictive_ai)

{3: ['Multi Agents'], 4: ['Regression Models'], 5: ['Non-Parametric Regression Models']}


In [4]:
# Save to pickle files
with open('id2predictive_ai.pkl', 'wb') as f:
    pickle.dump(id2predictive_ai, f)

with open('id2computer_vision.pkl', 'wb') as f:
    pickle.dump(id2computer_vision, f)

In [17]:
import numpy as np
import scipy.sparse as ssp
import pickle

# Number of papers
num_papers = len(papers_df)

# Create a random citation matrix for demonstration
# In a real scenario, you would replace this with actual citation data
citation_matrix = np.random.randint(0, 5, size=(num_papers, num_papers))
citation_matrix = ssp.csr_matrix(citation_matrix)

In [18]:
print(citation_matrix)

  (0, 0)	3
  (0, 1)	4
  (0, 2)	1
  (0, 3)	1
  (0, 4)	2
  (0, 5)	1
  (1, 0)	3
  (1, 1)	1
  (1, 2)	2
  (1, 3)	1
  (1, 5)	3
  (2, 0)	3
  (2, 1)	2
  (2, 2)	2
  (2, 4)	4
  (2, 5)	3
  (3, 0)	4
  (3, 1)	1
  (3, 2)	1
  (3, 3)	2
  (3, 4)	2
  (3, 5)	4
  (4, 0)	2
  (4, 1)	3
  (4, 2)	2
  (4, 3)	3
  (4, 4)	1
  (4, 5)	4
  (5, 0)	2
  (5, 1)	4
  (5, 2)	2
  (5, 3)	1
  (5, 4)	2
  (5, 5)	2


In [19]:
# Save to pickle file
with open('citations.pkl', 'wb') as f:
    pickle.dump(citation_matrix, f)