In [1]:
import arxiv

In [2]:
client = arxiv.Client()

search = arxiv.Search(
    query = "quantum",
    max_results= 25,
    sort_by = arxiv.SortCriterion.SubmittedDate
)

In [3]:
results = list(client.results(search))

In [4]:
import pandas as pd

# Erstellen einer leeren Liste, um die Daten für den DataFrame zu sammeln
data = []

# Durchlaufen der Ergebnisse und Sammeln der Daten
for r in results:
    data.append({
        'title': r.title,
        'authors': ', '.join([author.name for author in r.authors]),
        'published': r.published,
        'summary': r.summary,
        'primary_category': r.primary_category,
        'categories': ', '.join(r.categories),
        "links": [link.href for link in r.links]
    })

# Erstellen des DataFrame aus der gesammelten Datenliste
df = pd.DataFrame(data)
df["published"] = pd.to_datetime(df["published"]).dt.date


In [5]:
df

Unnamed: 0,title,authors,published,summary,primary_category,categories,links
0,Non-equilibrium dynamics of symmetry-resolved ...,Katja Klobas,2024-07-31,Symmetry resolved entanglement and entanglemen...,cond-mat.stat-mech,"cond-mat.stat-mech, nlin.CG, nlin.SI, quant-ph","[http://arxiv.org/abs/2407.21793v1, http://arx..."
1,Non-equilibrium dynamics of charged dual-unita...,"Alessandro Foligno, Pasquale Calabrese, Bruno ...",2024-07-31,The interplay between symmetries and entanglem...,cond-mat.stat-mech,"cond-mat.stat-mech, hep-th, math-ph, math.MP, ...","[http://arxiv.org/abs/2407.21786v1, http://arx..."
2,A concrete construction of a topological opera...,Masashi Kawahira,2024-07-31,Factorization algebras play a central role in ...,hep-th,"hep-th, math-ph, math.MP","[http://arxiv.org/abs/2407.21784v1, http://arx..."
3,Lanczos for lattice QCD matrix elements,"Daniel C. Hackett, Michael L. Wagman",2024-07-31,Recent work found that an analysis formalism b...,hep-lat,hep-lat,"[http://arxiv.org/abs/2407.21777v1, http://arx..."
4,Properties of Krylov state complexity in qubit...,"Siddharth Seetharaman, Chetanya Singh, Rejish ...",2024-07-31,We analyze the properties of Krylov state comp...,quant-ph,"quant-ph, cond-mat.quant-gas","[http://arxiv.org/abs/2407.21776v2, http://arx..."
5,Shadow Hamiltonian Simulation,"Rolando D. Somma, Robbie King, Robin Kothari, ...",2024-07-31,"We present shadow Hamiltonian simulation, a fr...",quant-ph,quant-ph,"[http://arxiv.org/abs/2407.21775v1, http://arx..."
6,Spurious Solar-Wind Effects on Acceleration No...,"Arnold Yang, Indie Desiderio-Sloane, Grant Dav...",2024-07-31,Spurious solar-wind effects are a potential no...,physics.space-ph,"physics.space-ph, astro-ph.IM, gr-qc","[http://arxiv.org/abs/2407.21774v1, http://arx..."
7,Engineering a multi-level bath for transmon wi...,"Xi Cao, Maria Mucci, Gangqiang Liu, David Pekk...",2024-07-31,A photonic system with a tunable bath environm...,quant-ph,quant-ph,"[http://arxiv.org/abs/2407.21765v1, http://arx..."
8,Energy Transport Among Highly-Polarized Atoms,"Catherine D. Opsahl, Yuan Jiang, Samantha A. G...",2024-07-31,A static electric field of a few V/cm shifts t...,physics.atom-ph,"physics.atom-ph, quant-ph","[http://arxiv.org/abs/2407.21764v1, http://arx..."
9,Minimal Quantum Circuits for Simulating Fibona...,"Sary Bseiso, Joel Pommerening, Richard R. Alle...",2024-07-31,The Fibonacci topological order is the prime c...,quant-ph,"quant-ph, cond-mat.str-el, math-ph, math.MP","[http://arxiv.org/abs/2407.21761v1, http://arx..."


In [25]:
from transformers import BertTokenizer, BertModel
import torch

tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
model = BertModel.from_pretrained("bert-base-uncased")
df["emb_ti_auth_sum"] = None

paper_embeddings = []
for idx, row in df.iterrows():
    paper_info = row["authors"] + ", " + row["title"] + ", " + row["summary"]
    inputs = tokenizer(paper_info, return_tensors= "pt", max_length= 512, truncation= True, padding= True)

    with torch.no_grad():
        outputs = model(**inputs)

    output_cls_token_emb = outputs.last_hidden_state[:, 0, :].squeeze().numpy()
    df.at[idx, "emb_ti_auth_sum"] = output_cls_token_emb
    paper_embeddings.append(output_cls_token_emb)



In [40]:
from sklearn.decomposition import PCA
import plotly.express as px
from sklearn.metrics.pairwise import cosine_similarity

cosine_similarities = cosine_similarity(df["emb_ti_auth_sum"].values.tolist())
#embeddings_array = torch.stack(df["emb_ti_auth_sum"].values).detach().numpy()

pca = PCA(n_components=3)
embeddings_reduced = pca.fit_transform(df["emb_ti_auth_sum"].values.tolist())

fig = px.scatter_3d(
    x=embeddings_reduced[:, 0], 
    y=embeddings_reduced[:, 1], 
    z=embeddings_reduced[:, 2],
    title="3D Visualisierung der Paper Embeddings",
    color= df["primary_category"]
)

fig.show()

In [None]:
# Code oben anpassen, idx, title auth prim_cat für jedes paper als label speichern und danach für jedes label über idx matchen um vis zu annotaten
# vis: colobar nach der nähe der punkte zueinander definieren, cos, function das nach prim_cat gecolored wird

In [117]:
df["emb_ti_auth_sum"].values.tolist()[0]

array([-3.82961899e-01, -5.77112615e-01, -8.74850005e-02, -1.92240700e-01,
       -5.64849600e-02, -1.67696968e-01, -1.83353141e-01, -4.22380865e-01,
        5.91557562e-01, -1.09967172e+00, -3.33724022e-01,  1.32639885e-01,
       -6.98367000e-01, -2.52428055e-01, -3.11977476e-01,  4.47234362e-01,
        4.96635854e-01, -7.18828142e-02, -1.82825834e-01,  1.40252123e-02,
        3.66788834e-01, -1.58625737e-01,  9.74384323e-02, -3.85426134e-02,
        1.31820560e-01, -4.72510099e-01, -1.70363784e-01, -3.39535534e-01,
        2.49586523e-01,  4.32586133e-01, -4.12304759e-01,  6.54714167e-01,
       -3.72190058e-01, -6.44016564e-01,  8.10964823e-01,  6.15352094e-02,
        6.14458263e-01,  4.91400898e-01,  5.23263931e-01,  8.62905383e-02,
       -1.02233201e-01,  1.36117280e-01,  8.26833248e-02, -1.81768775e-01,
       -8.55005383e-01,  9.17503051e-03, -4.51417923e+00, -2.94986874e-01,
       -3.89297664e-01, -1.02606022e+00, -1.68631747e-01, -2.79952198e-01,
       -2.58737169e-02,  

In [130]:
from sklearn.decomposition import PCA
import plotly.graph_objects as go
import plotly.express as px

pca = PCA(n_components= 3)
embeddings_reduced = pca.fit_transform(df["emb_ti_auth_sum"].values.tolist())
df["emb_ti_auth_sum_reduced"] = embeddings_reduced.tolist()

unique_categories = df["primary_category"].unique()
color_mapping = {category: color for category, color in zip(unique_categories, px.colors.qualitative.Plotly)}

print(color_mapping)
# traces = []
# for category in unique_categories:
#     category_mask = df["primary_category"] == category
#     trace = go.Scatter3d(
#         x=embeddings_reduced[category_mask, 0],
#         y=embeddings_reduced[category_mask, 1],
#         z=embeddings_reduced[category_mask, 2],
#         mode="markers+text",
#         marker=dict(
#             size=5,
#             color=color_mapping[category]
#         ),
#         text= df[category_mask].index.values,
#         name= category
#     )
#     traces.append(trace)

# fig = go.Figure(data=traces)

# fig.show()

{'cond-mat.stat-mech': '#636EFA', 'hep-th': '#EF553B', 'hep-lat': '#00CC96', 'quant-ph': '#AB63FA', 'physics.space-ph': '#FFA15A', 'physics.atom-ph': '#19D3F3', 'math.SG': '#FF6692', 'physics.chem-ph': '#B6E880', 'hep-ph': '#FF97FF', 'astro-ph.IM': '#FECB52'}


In [33]:
from sklearn.decomposition import PCA
from sklearn.metrics.pairwise import cosine_similarity
import plotly.graph_objects as go


pca = PCA(n_components=3)
embeddings_reduced = pca.fit_transform(df["emb_ti_auth_sum"].values.tolist())


traces = []


for idx, row in df.iterrows():

    trace = go.Scatter3d(
        x=[embeddings_reduced[idx, 0]], 
        y=[embeddings_reduced[idx, 1]], 
        z=[embeddings_reduced[idx, 2]],
        mode="markers",
        marker=dict(
            size= 4,
            color= "",
            colorscale= "Viridis",
            opacity= 0.8
        ),
        text= f"Title: {row['title']}<br>Authors: {row['authors']}<br>Primary Category: {row['primary_category']}"
    )

    traces.append(trace)



fig = go.Figure()

fig.add_trace(go.Scatter3d(
    x=embeddings_reduced[:, 0], 
    y=embeddings_reduced[:, 1], 
    z=embeddings_reduced[:, 2],
    mode="markers",
    marker=dict(
        size= 4,
        color= "",
        colorscale= "Viridis",
        opacity= 0.8
    )
))

fig.update_layout(
    title="3D Visualisierung der Paper Embeddings",
    scene=dict(
        xaxis_title="PCA 1",
        yaxis_title="PCA 2",
        zaxis_title="PCA 3"
    )
)

fig.show()