In [1]:
import arxiv

In [2]:
client = arxiv.Client()

search = arxiv.Search(
    query = "quantum",
    max_results= 25,
    sort_by = arxiv.SortCriterion.SubmittedDate
)

In [3]:
results = list(client.results(search))

In [4]:
import pandas as pd

# Erstellen einer leeren Liste, um die Daten für den DataFrame zu sammeln
data = []

# Durchlaufen der Ergebnisse und Sammeln der Daten
for r in results:
    data.append({
        'title': r.title,
        'authors': ', '.join([author.name for author in r.authors]),
        'published': r.published,
        'summary': r.summary,
        'primary_category': r.primary_category,
        'categories': ', '.join(r.categories),
        "links": [link.href for link in r.links]
    })

# Erstellen des DataFrame aus der gesammelten Datenliste
df = pd.DataFrame(data)
df["published"] = pd.to_datetime(df["published"]).dt.date


In [5]:
df

Unnamed: 0,title,authors,published,summary,primary_category,categories,links
0,Non-equilibrium dynamics of symmetry-resolved ...,Katja Klobas,2024-07-31,Symmetry resolved entanglement and entanglemen...,cond-mat.stat-mech,"cond-mat.stat-mech, nlin.CG, nlin.SI, quant-ph","[http://arxiv.org/abs/2407.21793v1, http://arx..."
1,Non-equilibrium dynamics of charged dual-unita...,"Alessandro Foligno, Pasquale Calabrese, Bruno ...",2024-07-31,The interplay between symmetries and entanglem...,cond-mat.stat-mech,"cond-mat.stat-mech, hep-th, math-ph, math.MP, ...","[http://arxiv.org/abs/2407.21786v1, http://arx..."
2,A concrete construction of a topological opera...,Masashi Kawahira,2024-07-31,Factorization algebras play a central role in ...,hep-th,"hep-th, math-ph, math.MP","[http://arxiv.org/abs/2407.21784v1, http://arx..."
3,Lanczos for lattice QCD matrix elements,"Daniel C. Hackett, Michael L. Wagman",2024-07-31,Recent work found that an analysis formalism b...,hep-lat,hep-lat,"[http://arxiv.org/abs/2407.21777v1, http://arx..."
4,Properties of Krylov state complexity in qubit...,"Siddharth Seetharaman, Chetanya Singh, Rejish ...",2024-07-31,We analyze the properties of Krylov state comp...,quant-ph,"quant-ph, cond-mat.quant-gas","[http://arxiv.org/abs/2407.21776v2, http://arx..."
5,Shadow Hamiltonian Simulation,"Rolando D. Somma, Robbie King, Robin Kothari, ...",2024-07-31,"We present shadow Hamiltonian simulation, a fr...",quant-ph,quant-ph,"[http://arxiv.org/abs/2407.21775v1, http://arx..."
6,Spurious Solar-Wind Effects on Acceleration No...,"Arnold Yang, Indie Desiderio-Sloane, Grant Dav...",2024-07-31,Spurious solar-wind effects are a potential no...,physics.space-ph,"physics.space-ph, astro-ph.IM, gr-qc","[http://arxiv.org/abs/2407.21774v1, http://arx..."
7,Engineering a multi-level bath for transmon wi...,"Xi Cao, Maria Mucci, Gangqiang Liu, David Pekk...",2024-07-31,A photonic system with a tunable bath environm...,quant-ph,quant-ph,"[http://arxiv.org/abs/2407.21765v1, http://arx..."
8,Energy Transport Among Highly-Polarized Atoms,"Catherine D. Opsahl, Yuan Jiang, Samantha A. G...",2024-07-31,A static electric field of a few V/cm shifts t...,physics.atom-ph,"physics.atom-ph, quant-ph","[http://arxiv.org/abs/2407.21764v1, http://arx..."
9,Minimal Quantum Circuits for Simulating Fibona...,"Sary Bseiso, Joel Pommerening, Richard R. Alle...",2024-07-31,The Fibonacci topological order is the prime c...,quant-ph,"quant-ph, cond-mat.str-el, math-ph, math.MP","[http://arxiv.org/abs/2407.21761v1, http://arx..."


In [25]:
from transformers import BertTokenizer, BertModel
import torch

tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
model = BertModel.from_pretrained("bert-base-uncased")
df["emb_ti_auth_sum"] = None

paper_embeddings = []
for idx, row in df.iterrows():
    paper_info = row["authors"] + ", " + row["title"] + ", " + row["summary"]
    inputs = tokenizer(paper_info, return_tensors= "pt", max_length= 512, truncation= True, padding= True)

    with torch.no_grad():
        outputs = model(**inputs)

    output_cls_token_emb = outputs.last_hidden_state[:, 0, :].squeeze().numpy()
    df.at[idx, "emb_ti_auth_sum"] = output_cls_token_emb
    paper_embeddings.append(output_cls_token_emb)



In [156]:
from sklearn.decomposition import PCA
import plotly.graph_objects as go
import plotly.express as px
import colorsys
from sklearn.metrics.pairwise import cosine_similarity


def generate_unique_colors(num_colors: int) -> list:
    """
    Generates a list of unique colors in hex format.

    :params:    - num_colors (int): number of colors to generate
    :returns:   - list: list of unique colors in hex format
    :example:
        >>> colors = generate_unique_colors(5)
        >>> print(colors)
    """        
    colors = []
    for i in range(num_colors):
        hue = i / num_colors
        lightness = 0.5
        saturation = 0.9
        rgb = colorsys.hls_to_rgb(hue, lightness, saturation)
        hex_color = '#{:02x}{:02x}{:02x}'.format(int(rgb[0]*255), int(rgb[1]*255), int(rgb[2]*255))
        colors.append(hex_color)
    return colors


pca = PCA(n_components= 3)
embeddings_reduced = pca.fit_transform(df["emb_ti_auth_sum"].values.tolist())
df["emb_ti_auth_sum_reduced"] = embeddings_reduced.tolist()

def calculate_cosine_similarities(df: pd.DataFrame, index: int) -> pd.DataFrame:

    if f"cossim{index}" in df.columns:
        return df
    
    else:
        target_embedding = df.loc[index, "emb_ti_auth_sum_reduced"]
        df["cossim" + f"{index}"] = cosine_similarity([target_embedding], df["emb_ti_auth_sum_reduced"].tolist())[0]

    return df



index = 5 # Beispielindex
similarities_df = calculate_cosine_similarities(df, index)
df

# print(similarities)

# unique_categories = df["primary_category"].unique()
# colors = generate_unique_colors(len(unique_categories))
# color_mapping = {category: color for category, color in zip(unique_categories, colors)}



# traces = []
# for category in unique_categories:
#     category_mask = df["primary_category"] == category
#     trace = go.Scatter3d(
#         x=embeddings_reduced[category_mask, 0],
#         y=embeddings_reduced[category_mask, 1],
#         z=embeddings_reduced[category_mask, 2],
#         mode="markers+text",
#         marker=dict(
#             size=5,
#             color=color_mapping[category]
#         ),
#         text= df[category_mask].index.values,
#         name= category
#     )
#     traces.append(trace)


# colors_paperwise = generate_unique_colors(len(df))
# color_mapping_paperwise = {idx+1: color for idx, color in zip(df.index, colors_paperwise)}
# traces = []
# for idx, row in df.iterrows():
#     trace = go.Scatter3d(
#         x= [row["emb_ti_auth_sum_reduced"][0]],
#         y= [row["emb_ti_auth_sum_reduced"][1]],
#         z= [row["emb_ti_auth_sum_reduced"][2]],
#         mode="markers+text",
#         marker=dict(
#             size= 5,
#             color= color_mapping_paperwise.get(idx+1, "black")
#         ),
#         text= idx+1,
#         name= f"{idx+1}. {row["title"][:25]}..."
#     )
#     traces.append(trace)

# fig = go.Figure(data=traces)

# fig.show()