# Work relationship with authors

In [None]:
CALL db.relationshipTypes();


In [None]:
MATCH ()-[r:BELONGS_TO_Domain]->()
DELETE r;

In [None]:
//Visualize domains for a specific work by its Work ID
MATCH (work:Work {id: "https://openalex.org/W2559085405"})-[:BELONGS_TO]->(domain:Domain)
RETURN work, domain;

In [1]:
pip install requests



In [4]:
import requests
import csv

# OpenAlex API base URL
base_url = "https://api.openalex.org/works"

# Define 10 Concept IDs (Example IDs; replace with actual relevant IDs)
concept_ids = [
    "C144133560",  # Machine Learning
    "C127313418",  # Artificial Intelligence
    "C121332964",  # Deep Learning
    "C154945302",  # Data Science
    "C187152609",  # Natural Language Processing
    "C203014093",  # Computer Vision
    "C206292854",  # Robotics
    "C155942298",  # Algorithms
    "C136952241",  # Bioinformatics
    "C176746249"   # Computational Biology
]

# Total number of works to fetch
num_works = 1000
page_size = 100  # Maximum results per page (OpenAlex limit)

# Combine Concept IDs into a single filter string
concept_filter = "|".join(concept_ids)

# Output CSV file
output_file = "openalex_10_concepts_works.csv"

# Function to fetch works from OpenAlex
def fetch_works(page):
    params = {
        "per_page": page_size,
        "page": page,
        "filter": f"concepts.id:{concept_filter}"
    }
    response = requests.get(base_url, params=params)
    if response.status_code == 200:
        return response.json()["results"]
    else:
        print(f"Failed to fetch data for page {page}: {response.status_code}")
        return []

# Collect data for all works
all_works = []
for page in range(1, (num_works // page_size) + 1):
    works = fetch_works(page)
    if not works:
        break
    all_works.extend(works)

# Extract relevant fields
data = []
for work in all_works:
    institutions = []
    authors = []
    topics = []
    domains = []

    # Extract institutions and authors
    for authorship in work.get("authorships", []):
        institutions.extend([inst["display_name"] for inst in authorship.get("institutions", [])])
        authors.append(authorship.get("author", {}).get("display_name", ""))

    # Extract topics and domains
    for concept in work.get("concepts", []):
        topics.append(concept["display_name"])
        if concept["level"] == 0:  # Domains are level 0 in OpenAlex hierarchy
            domains.append(concept["display_name"])

    # Add work details to the data list
    data.append({
        "Work_ID": work["id"],
        "Work_Title": work.get("title", ""),
        "Authors": ", ".join(authors),
        "Published_Year": work.get("publication_year", ""),
        "Institutions": ", ".join(set(institutions)),
        "Topics": ", ".join(set(topics)),
        "Domains": ", ".join(set(domains))
    })

# Save to CSV
with open(output_file, mode="w", newline="", encoding="utf-8") as file:
    writer = csv.DictWriter(file, fieldnames=["Work_ID", "Work_Title", "Authors", "Published_Year", "Institutions", "Topics", "Domains"])
    writer.writeheader()
    writer.writerows(data)

print(f"Data saved to {output_file}")


Data saved to openalex_10_concepts_works.csv


# Citation Data extraction

In [5]:
import requests
import csv

# OpenAlex API base URL for works
base_url = "https://api.openalex.org/works"

# Total number of works to fetch
num_works = 1000
page_size = 100  # OpenAlex limit per page
output_file = "openalex_works_citations.csv"  # Output CSV file

# Fetch works and their citation details
all_data = []
for page in range(1, (num_works // page_size) + 1):
    params = {
        "per_page": page_size,
        "page": page
    }
    response = requests.get(base_url, params=params)
    if response.status_code == 200:
        results = response.json().get("results", [])
        for work in results:
            work_id = work.get("id", "").split("/")[-1]  # Extract work ID
            title = work.get("title", "Unknown Title")
            cited_by_count = work.get("cited_by_count", 0)
            cites_count = len(work.get("referenced_works", []))  # Count references
            all_data.append({
                "Work_ID": work_id,
                "Title": title,
                "Cited_By_Count": cited_by_count,
                "Cites_Count": cites_count
            })
    else:
        print(f"Failed to fetch page {page}: {response.status_code}")
        break

# Save data to CSV
with open(output_file, mode="w", newline="", encoding="utf-8") as file:
    writer = csv.DictWriter(file, fieldnames=["Work_ID", "Title", "Cited_By_Count", "Cites_Count"])
    writer.writeheader()
    writer.writerows(all_data)

print(f"Data saved to {output_file}")


Data saved to openalex_works_citations.csv


# 1. Work Relationship with Author Entity

#### Step 1: Cypher Query to Create Work Nodes

In [None]:
LOAD CSV WITH HEADERS FROM 'file:///OpenAlex_Main_Data.csv' AS row
MERGE (work:Work {id: row.Work_ID})
SET work.title = row.Work_Title

#### Step 2: Create Nodes for Authors

In [None]:
LOAD CSV WITH HEADERS FROM 'file:///OpenAlex_Main_Data.csv' AS row
WITH row, SPLIT(row.Authors, ",") AS authors
UNWIND authors AS author
MERGE (a:Author {name: TRIM(author)})

#### Step 3: Create Relationships Between Work and Authors

In [None]:
LOAD CSV WITH HEADERS FROM 'file:///OpenAlex_Main_Data.csv' AS row
WITH row, SPLIT(row.Authors, ",") AS authors
UNWIND authors AS author
MATCH (work:Work {id: row.Work_ID})
MERGE (a:Author {name: TRIM(author)})
MERGE (work)-[:AUTHORIZED_BY]->(a)


#### Step 4: Visualizing the work and Authors relationship

In [None]:
MATCH (work:Work)-[:AUTHORIZED_BY]->(author:Author)
RETURN work, author
LIMIT 100

# 2. Work Relationship with Published year

#### Step 5: Create Year nodes


In [None]:
LOAD CSV WITH HEADERS FROM 'file:///OpenAlex_Main_Data.csv' AS row
WITH row WHERE row.Published_Year IS NOT NULL
MERGE (year:Year {year: row.Published_Year})


#### Step 6: Create Work and Year relationship

In [None]:
LOAD CSV WITH HEADERS FROM 'file:///OpenAlex_Main_Data.csv' AS row
WITH row WHERE row.Published_Year IS NOT NULL
MATCH (work:Work {id: row.Work_ID})
MERGE (year:Year {year: row.Published_Year})
MERGE (work)-[:PUBLISHED_IN]->(year)

#### Step 7: Visualizing Work and Year relationship

In [None]:
MATCH (work:Work)-[:PUBLISHED_IN]->(year:Year)
RETURN work, year
LIMIT 10

# 3. Authors affiliation relationship with Institutions

#### Step 7: Create Nodes for Institutions

In [None]:
LOAD CSV WITH HEADERS FROM 'file:///OpenAlex_Main_Data.csv' AS row
WITH row, SPLIT(row.Institutions, ",") AS institutions
UNWIND institutions AS institution
MERGE (inst:Institution {name: TRIM(institution)})

#### Step 8: Create Relationships Between Authors and Institutions

In [None]:
LOAD CSV WITH HEADERS FROM 'file:///OpenAlex_Main_Data.csv' AS row
WITH row, SPLIT(row.Authors, ",") AS authors, SPLIT(row.Institutions, ",") AS institutions
UNWIND authors AS author
UNWIND institutions AS institution
MATCH (a:Author {name: TRIM(author)})
MERGE (inst:Institution {name: TRIM(institution)})
MERGE (a)-[:AFFILIATED_WITH]->(inst)


####  Step 9: Visualize the Author-Institution Relationship

In [None]:
MATCH (author:Author)-[:AFFILIATED_WITH]->(inst:Institution)
RETURN author, inst
LIMIT 100


# 4. Work relationship with Domain

#### Step 10: Create Nodes for Domains

In [None]:
LOAD CSV WITH HEADERS FROM 'file:///OpenAlex_Main_Data.csv' AS row
WITH SPLIT(row.Domains, ",") AS domains
UNWIND domains AS domain
MERGE (d:Domain {name: TRIM(domain)})

#### Step 11: Create Relationships Between Work and Domain

In [None]:
LOAD CSV WITH HEADERS FROM 'file:///OpenAlex_Main_Data.csv' AS row
WITH row, SPLIT(row.Domains, ",") AS domains
UNWIND domains AS domain
MATCH (work:Work {id: row.Work_ID})
MERGE (d:Domain {name: TRIM(domain)})
MERGE (work)-[:BELONGS_TO]->(d)

#### Step 12: Visualize Relationships Between Work and Domain

In [None]:
MATCH (work:Work)-[:BELONGS_TO]->(domain:Domain)
WHERE domain.name IN ["Art", "Materials science", "Business"]
RETURN work, domain
LIMIT 300

# 5. Work relationship with Topics

#### Step 13: Create Nodes for Topics

In [None]:
LOAD CSV WITH HEADERS FROM 'file:///OpenAlex_Main_Data.csv' AS row
WITH SPLIT(row.Topics, ",") AS topics
UNWIND topics AS topic
MERGE (t:Topic {name: TRIM(topic)})

#### Step 14: Create Relationships Between Work and Topics

In [None]:
LOAD CSV WITH HEADERS FROM 'file:///OpenAlex_Main_Data.csv' AS row
WITH row, SPLIT(row.Topics, ",") AS topics
UNWIND topics AS topic
MATCH (work:Work {id: row.Work_ID})
MERGE (t:Topic {name: TRIM(topic)})
MERGE (work)-[:HAS_TOPIC]->(t)

#### Step 15: Visualize Relationships Between Work and Topics

In [None]:
MATCH (work:Work)-[:HAS_TOPIC]->(topic:Topic)
RETURN work, topic
LIMIT 150

# Example query to view knowledge graph of a single work.

In [None]:
MATCH (work:Work {id: "https://openalex.org/W2128635872"})
OPTIONAL MATCH (work)-[:AUTHORIZED_BY]->(author:Author)
OPTIONAL MATCH (work)-[:PUBLISHED_IN]->(year:Year)
OPTIONAL MATCH (author)-[:AFFILIATED_WITH]->(institution:Institution)
OPTIONAL MATCH (work)-[:BELONGS_TO]->(domain:Domain)
OPTIONAL MATCH (work)-[:HAS_TOPIC]->(topic:Topic)
RETURN work, author, year, institution, domain, topic