In [1]:
import pandas as pd
import json

In [9]:
df_papers = pd.read_csv("./df_papers_final.csv")
df_posters = pd.read_csv("./df_posters_final.csv")
df_plenary = pd.read_csv("./df_plenary_final.csv")
df_keynotes = pd.read_csv("./df_keynotes.csv")
df_tutorial = pd.read_csv("./df_tutorials.csv")

df_other = pd.read_csv("./df_other.csv")


df_posters["session_id"] = df_posters["day"].apply(lambda x:"PT"+str(x))
df_posters["session_name"] = df_posters["day"].apply(lambda x:"Poster session "+str(x))
df_plenary["time"] = df_plenary["time"].apply(lambda x:str(x).replace(".",":"))

df_posters["room"] = "Atrium"
df_posters["time"] = ""
df_posters["type"] = "poster"
df_plenary["type"] = "plenary"
df_papers["type"] = "parallel"
df_keynotes["type"] = "keynote"
df_other["type"] = "other"
df_other.loc[df_other.session_id.apply(lambda x:x[:2])=="SD","type"] = "dinner"


df_tutorial["presenter_name"] = df_tutorial["presenters"].apply(lambda x: ", ".join(x.split("|")))
df_tutorial["type"] = "tutorial"


df_papers = df_papers[["session_id","session_name","authors","time","submission_id","presenter_name","title","type"]].copy()
df_plenary = df_plenary[["session_id","session_name","authors","time","submission_id","presenter_name","title","type"]].copy()
df_posters = df_posters[["session_id","session_name","authors","time","submission_id","presenter_name","title","type","poster_board_id"]].copy()
df_keynotes = df_keynotes[["session_id","session_name","time","presenter_name","title","type","abstract","Bio",'Sponsor']].copy()
df_other = df_other[["session_id","session_name","time","presenter_name","title","type","abstract"]].copy().rename(columns = {"Link":"link"})
df_tutorial = df_tutorial[["session_id","presenter_name","time","title","type","url","description"]].copy().rename(columns = {"description":"abstract","url":"link"})

presentations = pd.concat([df_papers, df_plenary,df_posters, df_keynotes,df_other, df_tutorial]).rename(columns = {"presenter_name":"presenter","time":"presentation_time"})
presentations.loc[~presentations["type"].isin(["tutorial","dinner"]),"link"] = presentations.loc[~presentations["type"].isin(["tutorial","dinner"]),"submission_id"].apply(lambda x:"https://laura.alessandretti.com/public/pdf_accepted/paper{}.pdf".format(int(x)) if not pd.isna(x) else x)

In [10]:
df_other

Unnamed: 0,session_id,session_name,time,presenter_name,title,type,abstract
0,LL1,Too Lazy to Read the Paper LIVESHOW EXTRAVAGAN...,13:00,Sune Lehmann,Too Lazy to Read the Paper LIVESHOW EXTRAVAGAN...,other,The five star rated (by Sune’s mom) science po...
1,LL2,Panel Debate - Using Computational Social Scie...,13:00,Host: Vedran Sekara,Panel Debate - Using Computational Social Scie...,other,"The 2030 Agenda for Sustainable Development, a..."


In [11]:
def f(x):
    if x:
        return x.split(":")[0]+":"+x.split(":")[1].ljust(2,'0')
    else:
        return x
presentations["presentation_time"] = presentations["presentation_time"].astype(str).fillna("").apply(lambda x:f(x.replace('.',":")))

In [12]:
streams = presentations[presentations.session_id.apply(lambda x:x[:2]).isin(["P1","P2","K1","K2","K3","K4","K5","K6","K7","K8","K9","K10"])].copy()
streams["session_id"] = streams["session_id"].apply(lambda x:x+"_S")
streams["session_name"] = "STREAMING:"+streams["session_name"]
presentations = pd.concat([presentations,streams])
presentations["session_name"] = presentations["session_name"].apply(lambda x: x.replace("STREAMING:","STREAMING:<br>") if not pd.isna(x) else x)

# Merge all data

In [13]:
sessions = pd.read_csv("./data_for_agenda - sessions.csv").rename(columns = {"location":"room"})
sessions["session_title"] = sessions["session_title"].apply(lambda x: x.replace("STREAMING:","STREAMING:\n") if not pd.isna(x) else x)
contributions = pd.read_pickle("/Users/lauale/Dropbox/Projects/openreview-py/IC2S2_stuff/data_for_programme2.pkl")[["number","keywords","presenter","title","abstract","authors","id"]]

color_dict = {"RA": "#7084AB",
              "T":"#ECF8F9",
              "OP":"#CB746D",
              "CL":"#CB746D",
              "K":"#CB746D",
              "CB":"#7084AB",
              "LU":"#7084AB",
              "LL":"#7084AB",
              "PT":"#CB746D",
              "P":"#CB746D",
              "A":"#ECF8F9",
              "B":"#ECF8F9",
              "C":"#ECF8F9",
              "D":"#ECF8F9",
              "E":"#ECF8F9",
              "F":"#ECF8F9",
              "G":"#ECF8F9",
              "H":"#ECF8F9",
              "SD":"#7084AB"
             }

sessions["color"] = sessions["session_id"].apply(lambda x: color_dict[''.join([i for i in x.split("_")[0] if not i.isdigit()])])



In [14]:
data = pd.merge(presentations, sessions, on = "session_id", how = "outer")
data["time"] = data["time_start"] + " - "+data["time_end"]
data = pd.merge(data, contributions, how="left", left_on = "submission_id", right_on = "number", suffixes=["","_s"])

data["presenter"] = data["presenter_s"].fillna(data["presenter"])
data["title"] = data["title_s"].fillna(data["title"])
data = data.drop(["presenter_s","title_s"],axis=1)
data["authors"] = data["authors"].apply(lambda x: ", ".join(x) if type(x)==list else x)
data["keywords"] = data["keywords"].apply(lambda x: ", ".join(x) if type(x)==list else x)
data["start_time"] = data["time_start"]
data["end_time"] = data["time_end"]

In [15]:
data = data[["day","authors","session_title","time","title","presenter","room","color","start_time","end_time","type","presentation_time","session_id","link","abstract","Bio","poster_board_id","session_chair","Sponsor"]]
data = data.rename(columns = {"session_title":"session"})
data.loc[data.session_id=="CL","session"] = "Awards and closing"
data.loc[data.session_id=="CL_S","session"] = "STREAMING: Awards and closing"

In [16]:
from difflib import SequenceMatcher


def underline_presenter(x):
    presenter = similarity(x["presenter"], x["authors"].split("|"))
    return ", ".join([i if not i==presenter else "<u>{}</u>".format(i) for i in x["authors"].split("|")])

def similar(a, b):
    return SequenceMatcher(None, a, b).ratio()

def similarity(presenter, authors):

    return max([(k, similar(presenter.lower(),k.lower())) for k in authors], key = lambda x:x[1])[0]


data["authors"] = data.fillna("").apply(underline_presenter,axis=1)


#  New

In [17]:

# Group the DataFrame by day and session
grouped = data.fillna("").groupby(['day', 'session_id'])

# Create the agenda dictionary
agenda = {
    'days': []
}

# Iterate over each group and create the agenda structure
for (day, session), group in grouped:
    session_data = {
        'title': group["session"].values[0],
        'time': group['time'].values[0],
        "start_time":group['start_time'].values[0],
        "end_time":group['end_time'].values[0],
        'presentations': [],
        "room":group["room"].values[0],
        "color":group["color"].values[0],
        "type":group["type"].values[0],        
        "sessionId":session,
        "session_chair":group["session_chair"].values[0],
        "sponsor":group["Sponsor"].values[0]
    }

    for _, row in group.iterrows():
        presentation_data = {
            'title': row['title'],
            'presenter': row['presenter'], 
            "presentation_time":row["presentation_time"],
            "link":row["link"],
            "authors":row["authors"],
            "bio":row["Bio"],
            "abstract":row["abstract"],
            "poster_board_id":row["poster_board_id"],
        }
        session_data['presentations'].append(presentation_data)

    day_exists = False
    for day_data in agenda['days']:
        if day_data['day'] == day:
            day_data['sessions'].append(session_data)
            day_exists = True
            break

    if not day_exists:
        day_data = {
            'day': day,
            'sessions': [session_data]
        }
        agenda['days'].append(day_data)

# Convert the agenda dictionary to JSON
agenda_json = json.dumps(agenda, indent=4)

# Save the agenda JSON to a file
with open('/Users/lauale/Desktop/website_v3/agenda2.json', 'w') as f:
    f.write(agenda_json)