In [None]:
import pandas as pd

authors = pd.read_csv('snapshot/dblp_authors.csv', sep='\t')
papers = pd.read_csv('snapshot/dblp_papers.csv', sep='\t')
papers_authors = pd.read_csv('snapshot/dblp_papers_authors.csv', sep='\t')

In [None]:
# filter out papers that have "conf" in ther DBLP column
# papers_filtered = papers[~papers['DBLP'].str.contains('/conf/')]
authors[authors["Name"].str.contains("Steffen Koch")]

In [None]:
print(authors.loc[0])
print(papers.loc[0])
print(papers_authors.loc[0])

In [None]:
from datetime import datetime
from itables import init_notebook_mode, show
import ipywidgets as widgets
from IPython.display import display

# init_notebook_mode(all_interactive=True)
# pd.set_option('display.width', 1000)
# pd.set_option('display.max_rows', 1000)

search = "Guido Reina"
cutoff = 5
explain = True

# Create UI elements
search_input = widgets.Text(value=search, description='Search:')
cutoff_slider = widgets.IntSlider(value=cutoff, min=1, max=10, step=1, description='Cutoff:')
explain_toggle = widgets.Checkbox(value=explain, description='Explain')
search_button = widgets.Button(description="Go", )
output = widgets.Output()

# Display UI elements
display(search_input, cutoff_slider, explain_toggle, search_button, output)

# Define the button click event
def on_search_button_clicked(b):
# Update variables based on UI input
    search = search_input.value
    cutoff = cutoff_slider.value
    explain = explain_toggle.value

    with output:
        output.clear_output()
        cutoffYear = datetime.now().year - cutoff
        hits = authors[authors["Name"].str.contains(search)]
        if hits.empty:
            print(f"No authors found for {search}")
        else:
            authors_ids = []
            for h in hits.itertuples():
                print(f"Author: {h.Name}, NumericID: {h.NumericID}, ORCID: {h.ORCID}, DBLP: {h.DBLP}")
                id = h.NumericID
                authors_ids.append(id)
                # Get all papers for these authors
            papers_ids = papers_authors[papers_authors["AuthorID"].isin(authors_ids)]["PaperID"]
            # print(papers_ids)
            papers_data = papers[papers["NumericID"].isin(papers_ids) & (papers["Year"] >= cutoffYear)]
            # print("papers_data")
            # print(papers_data)
            # get all co-authors for these papers
            filtered_papers_ids = papers_data["NumericID"]
            co_authors = papers_authors[papers_authors["PaperID"].isin(filtered_papers_ids)]
            co_authors = co_authors[co_authors["AuthorID"] != id]
            co_authors = co_authors["AuthorID"].unique()
            # print(co_authors)
            # lookup co_authors names
            co_authors_data = authors[authors["NumericID"].isin(co_authors)]
            # print("co_authors_data")
            # join the coauthors with the information from the papers table
            if (explain):
                merge = pd.merge(co_authors_data, papers_authors, left_on="NumericID", right_on="AuthorID")
                merge = pd.merge(merge, papers_data, left_on="PaperID", right_on="NumericID")
                # print(merge.columns.tolist())
                merge = merge[['DBLP_x', 'Name', 'ORCID', 'DBLP_y', 'Title', 'Year', 'PaperID', 'AuthorID', 'NumericID_x', 'NumericID_y']]
                merge["DBLP_x"] = ['<a href="{}">{}</a>'.format(d, d) for d in merge["DBLP_x"]]
                merge["DBLP_y"] = ['<a href="{}">{}</a>'.format(d, d) for d in merge["DBLP_y"]]
                merge["ORCID"] = ['<a href="{}">{}</a>'.format(d, d) for d in merge["ORCID"]]
                merge = merge.rename(columns={"DBLP_x": "DBLP Author", "DBLP_y": "DBLP Paper"})
                show(merge)
            else:
                cod = co_authors_data.copy()
                cod["DBLP"] = ['<a href="{}">{}</a>'.format(d, d) for d in cod["DBLP"]]
                cod["ORCID"] = ['<a href="{}">{}</a>'.format(d, d) for d in cod["ORCID"]]
                show(cod)

search_button.on_click(on_search_button_clicked)
