In [1]:
# Required packages:
import arxiv
from openai import OpenAI
from neo4j import GraphDatabase
import pandas as pd

In [36]:
# In this project, I will use Neo4j for graph storage and query and 
# ChatGPT for text entity recognition and create triple.
# Hence, user config for both interfaces are required:
KEY = "openAI_user_key" # openAI
Neo4j_PWD = "neo4j_password" # Neo4j

# Arxiv API

In [23]:
class ArxivAPI:
    def __init__(self):
        self.client = arxiv.Client()
        
    def query(self, query_string:str="quantum", max_results:int=10):
        """
        PARAMS:
            query_string(str): The string to query. Detailed information about query parameters can be found at:
                            https://info.arxiv.org/help/api/user-manual.html#query_details
            max_results(int): The maximum number of returned articles.
        RETURNS:
            arxiv generator: An iterable generator of Result objects.
        """

        search = arxiv.Search(
          query = query_string,
          max_results = max_results,
          sort_by = arxiv.SortCriterion.SubmittedDate
        )

        results = client.results(search)
        return(client.results(results))
    

In [28]:
# testing for querying "alloy"
arxiv_interface = ArxivAPI()
query_string = "cat:cond-mat.mes-hall AND ti:alloy"
alloy_iter = arxiv_query(query_string)
for r in alloy_iter:
    print(r)
    print(r.title)

http://arxiv.org/abs/2406.05168v1
Topological photonic alloy
http://arxiv.org/abs/2405.16329v1
Engineering liquid-liquid interfaces for high-entropy alloy synthesis
http://arxiv.org/abs/2405.14324v1
Magnetic microstructure of nanocrystalline Fe-Nb-B alloys as seen by small-angle neutron and X-ray scattering
http://arxiv.org/abs/2405.13619v1
Drastic modification in thermal conductivity of TiCoSb Half-Heusler alloy: Phonon engineering by lattice softening and ionic polarization
http://arxiv.org/abs/2405.01832v2
Strategies for enhancing spin-shuttling fidelities in Si/SiGe quantum wells with random-alloy disorder
http://arxiv.org/abs/2404.05947v1
Thermoelectric transport and current noise through a multilevel Anderson impurity: Three-body Fermi-liquid corrections in quantum dots and magnetic alloys
http://arxiv.org/abs/2403.17166v2
Enhanced mobility of ternary InGaAs quantum wells through digital alloying
http://arxiv.org/abs/2403.11019v1
Carrier confinement and alloy disorder exacerbate 

# ChatGPT

In [35]:
class GPTAPI:
    def __init__(self, api_key: str):
        """
        PARAMS:
            api_key: The OpenAI API key.
        """
        self.client = OpenAI(api_key=api_key)
        
    def chat(self, question:str):
        """
        PARAMS:
            question(str): The prompt input (with single triple-quotes) to ChatGPT.
        RETURNS:
            ans(str): The results returned by ChatGPT.
        """

        completion = self.client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
            {"role": "system", "content": "You are a material science professor and want to extract information from the paper's abstract."},
            {"role": "user", "content": f"{question}"}
            ],
            temperature=0
        )
        ans = completion.choices[0].message.content
        return(ans)
    

In [33]:
# testing GPTAPI with using paper: https://arxiv.org/abs/2207.05343v1

testing_prompt = """
    Given the abstract of a paper, can you generate a Cypher code to construct a knowledge graph in Neo4j? 
    It might be multiple materials.
    To create nodes, you must follow the pattern, you must follow the five node type below:
    MERGE (a: Application {{name: "[application]"}})
    MERGE (p:Property {{name: "[property]"}})
    MERGE (m:MaterialCategory {{name: "[material]"}})
    MERGE (f:Formula {{name: "[chemical formula]"}})
    MERGE (e:Element {{name: "[element]"}})

    where 
    1. "[...]" is the extract information
    2. Do not include elemental information in "[material]"
    3. "[element]" should be the element symbol in the periodic table.

    To create relationships, you must follow the four relationship below:
    MERGE (a)-[:NEED_PROPERTY]->(p)
    MERGE (m)-[: HAS_PROPERTY]->(p)
    MERGE (m)-[: HAS_FORMULA]->(f)
    MERGE (m)-[: HAS_ELEMENT]->(e)

    (don't include any explanations in your responses, and give code directly)


    The development of a materials platform that exhibits both superconducting and semiconducting properties is an important endeavour for a range of emerging quantum technologies. We investigate the formation of superconductivity in nanowires fabricated with silicon-on-insulator (SOI). Aluminium from deposited contact electrodes is found to interdiffuses with the Si nanowire structures to form an Al-Si alloy along the entire length of the predefined nanowire device over micron length scales at temperatures well below that of the Al-Si eutectic. The resultant transformed nanowire structures are layered in geometry with a continuous Al-Si alloy wire sitting on the buried oxide of the SOI and a residual Si cap sitting on top of the wire. The phase transformed material is conformal with any predefined device patterns and the resultant structures are exceptionally smooth-walled compared to similar nanowire devices formed by silicidation processes. The superconducting properties of a mesoscopic AlSi ring formed on a SOI platform are investigated. Low temperature magnetoresistance oscillations, quantized in units of the fluxoid, h/2e, are observed.
    """

chatgpt = GPTAPI(KEY)
chatgpt.chat(testing_prompt)

'```cypher\nMERGE (a: Application {name: "Quantum Technologies"})\nMERGE (p: Property {name: "Superconductivity"})\nMERGE (m: MaterialCategory {name: "Nanowires"})\nMERGE (f: Formula {name: "Al-Si"})\nMERGE (e: Element {name: "Si"})\nMERGE (e2: Element {name: "Al"})\n\nMERGE (a)-[:NEED_PROPERTY]->(p)\nMERGE (m)-[:HAS_PROPERTY]->(p)\nMERGE (m)-[:HAS_FORMULA]->(f)\nMERGE (m)-[:HAS_ELEMENT]->(e)\nMERGE (m)-[:HAS_ELEMENT]->(e2)\n```  '

# Neo4j

In [37]:
class Neo4j:
    def __init__(self, user: str, password: str, url = 'bolt://localhost:7687'):
        """
        PARAMS:
            user(str): The user ID for accessing the database.
            password(str): The user password to access the database.
            url(str): The URL link to the database.
        """
        self._driver = GraphDatabase.driver(url, auth=(user, password))
        self._driver.verify_connectivity()
        try:
            self._driver.verify_connectivity()
        except:
            raise ("Fail to connect, please check!")

    def close(self):
        if self._driver is not None:
            self._driver.close()

    
    def create(self, cypher_string:str):
        """
        PARAMS:
            cypher_string(str): The string with cypher creation
        """
        assert self._driver is not None, "Driver not initialized!"
        
        with self._driver.session() as session:
            session.run(cypher_string)
        

    def query(self, query_string:str):
        """
        PARAMS:
            query_string: The query string enclosed in single triple-quotes.
        RETURNS:
            query results(pd.DataFrame): The query results formatted as a DataFrame.
        """
        assert self._driver is not None, "Driver not initialized!"

        # create a session
        # A Session is a logical context for transactional units of work. Connections are drawn from the Driver connection pool as required.
        with self._driver.session() as session:

            # query part
            neo4j_result = session.run(query_string).to_df()
            return(neo4j_result)


In [43]:
driver = Neo4j("neo4j", Neo4j_PWD)
driver.query("""MATCH (e:Element) RETURN e.name ORDER BY e.name LIMIT 10""")

Unnamed: 0,e.name
0,
1,Ag
2,Al
3,Au
4,B
5,Bi
6,C
7,Cd
8,Co
9,Cr


# ActiveScience

In [360]:
def create_node(query_string: str):
    """
    PARAMS:
        query_string(str): The string to query in Arxiv API. Detailed information about query parameters can be found at:
                            https://info.arxiv.org/help/api/user-manual.html#query_details
    """

    prompt_pattern = """
    Given the abstract of a paper, can you generate a Cypher code to construct a knowledge graph in Neo4j? 
    It might be multiple materials.
    To create nodes, you must follow the pattern, you must follow the five node type below:
    MERGE (a: Application {{name: "[application]"}})
    MERGE (p:Property {{name: "[property]"}})
    MERGE (m:MaterialCategory {{name: "[material]"}})
    MERGE (f:Formula {{name: "[chemical formula]"}})
    MERGE (e:Element {{name: "[element]"}})

    where 
    1. "[...]" is the extract information
    2. Do not include elemental information in "[material]"
    3. "[element]" should be the element symbol in the periodic table.

    To create relationships, you must follow the four relationship below:
    MERGE (a)-[:NEED_PROPERTY]->(p)
    MERGE (m)-[: HAS_PROPERTY]->(p)
    MERGE (m)-[: HAS_FORMULA]->(f)
    MERGE (m)-[: HAS_ELEMENT]->(e)

    (don't include any explanations in your responses, and give code directly)


    {abstract}
    """

    ref_pattern = """
    MERGE (r: Reference {{url: "{url}"}})\nMERGE (m)-[: HAS_REF]->(r)
    """
    
    # arxiv
    arxiv_interface = ArxivAPI()
    arxiv_results = arxiv_query(query_string)
    
    # ChatGPT
    chatgpt = GPTAPI(KEY)
    
    # Neo4j
    driver = Neo4j("neo4j", Neo4j_PWD)
    
    
    # iterate all papers and create triple in Neo4j
    for r in arxiv_results:
        print('------')
        print(r)
        print(r.title)

        # prompt creation
        prompt = prompt_pattern.format(abstract = r.summary)

        # chatGPT
        answer = chatgpt.chat(prompt)
        
        # post-processing
        if answer.startswith('```cypher\n'):
            answer = answer[10:-5]
        prompt_ref_answer = prompt_answer + ref_pattern.format(url = r.pdf_url)
        
        # neo4j input
        try:
            driver.create(answer)
        except Exception as e:
            print(f"An error occurred: {e}")


In [361]:
create_node("cat:cond-mat.mes-hall AND ti:optical AND ti:alloy")

------
http://arxiv.org/abs/2402.09878v1
Explaining all-optical switching in ferrimagnets with heavy rare-earth elements by varying the spin-flip scattering probability of Gd in Co$_x$Gd$_{100-x}$ alloys and Co/Gd bilayers
An error occurred: {code: Neo.ClientError.Statement.SyntaxError} {message: Invalid input 'e': expected ":" or "IS" (line 10, column 28 (offset: 352))
"MERGE (m)-[:HAS_ELEMENT]->(e"
                            ^}
------
http://arxiv.org/abs/2208.05337v1
Impact of random alloy fluctuations on the electronic and optical properties of (Al,Ga)N quantum wells: Insights from tight-binding calculations
------
http://arxiv.org/abs/2205.14342v1
Helicity-independent all-optical switching of magnetization in ferrimagnetic alloys
------
http://arxiv.org/abs/2204.06229v2
Computational Design of Alloy Nanostructures for Optical Sensing of Hydrogen
------
http://arxiv.org/abs/2203.15460v1
Realistic micromagnetic description of all-optical ultrafast switching processes in ferrimagnet