# Session assignments
Interactively determines the assignments of presentations to sessions.

## Paths and parameters

In [1]:
# Division to analyze 
division = 'dcb_dvm'

# Presentations ('talks' or 'posters')
presentation_type = 'talks'

# Minimum and maximum number of presentations per session
min_size = 6
max_size = 8

# Import outside packages
import os, sys
import pandas as pd

# Mount Google Drive, if running on Google Colab
if 'COLAB_GPU' in os.environ:
    # Mount Google Drive
    from google.colab import drive
    drive.mount('/content/drive')

    # Set the data root to a Google Drive folder
    data_root = '/content/drive/MyDrive/meeting_planning_2024/' + division 
    code_root = '/content/drive/MyDrive/Colab Notebooks/conference_planner'

    # Add code to path
    sys.path.append(code_root)

# If running locally, set the data root
else:
    # Set paths
    code_root = '/Users/mmchenry/Documents/code/conference_planner'
    data_root = '/Users/mmchenry/Documents/Projects/meeting_planner_test/division_files/' + division

# Custom packages
import make_sessions as ms
import compile_program as cp

# Load abstract data for presentation type
in_path = os.path.join(data_root,  presentation_type + '_ratings.csv')
df = pd.read_csv(in_path)

# Clear out any previous assignments and save a copy of the original data
df['session_num']   = None
df['talk_num']      = None
df['major_group']   = None
df.to_csv(os.path.join(data_root,  presentation_type + '_ratings.csv'), index=False)

# Adjust table display setting 
pd.set_option('display.max_colwidth', None)


# Hierarchical clustering
Creates a phylogeny (i.e., dendrogram) of relationships between talks and defines the major branches, using a distance threshold. 

Note that the weightings of each keyword, defined in keyword_weights.xlsx, is critical in defining which keywords will be used for the major branches. 
In particular, the 'weight_clustering' column values are multiplied by the keyword ratings from GPT. 

Any zero weighting values are excluded from consideration in determining the major branches.
It is a good idea at this stage to zero out any keywords that are non-essential in differentiating major categories of talks.


In [1]:
# Run initial clustering
df, distance_threshold = ms.run_hierarchical(data_root, data_type='similarity', presentation_type=presentation_type, min_size=16)

# Summarize the major keywords for each branch
branch_summ = ms.summarize_branch_keywords(df, df_weights, num_keywords=9)

# Plot dendrogram
ms.plot_dendrogram(df, df_weights, distance_threshold)

# Make complete dataframe for the division
df_full = cp.merge_dataframes(df, data_root)

# Output divisional schedule to html
cp.list_branches_html(df_full, data_root, presentation_type, branch_summ, include_summary=True)

Traceback (most recent call last):
  File "_pydevd_bundle/pydevd_cython.pyx", line 577, in _pydevd_bundle.pydevd_cython.PyDBFrame._handle_exception
  File "_pydevd_bundle/pydevd_cython.pyx", line 312, in _pydevd_bundle.pydevd_cython.PyDBFrame.do_wait_suspend
  File "/Users/mmchenry/miniforge3/envs/sicb/lib/python3.11/site-packages/debugpy/_vendored/pydevd/pydevd.py", line 2070, in do_wait_suspend
    keep_suspended = self._do_wait_suspend(thread, frame, event, arg, suspend_type, from_this_thread, frames_tracker)
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/mmchenry/miniforge3/envs/sicb/lib/python3.11/site-packages/debugpy/_vendored/pydevd/pydevd.py", line 2106, in _do_wait_suspend
    time.sleep(0.01)
KeyboardInterrupt


NameError: name 'pd' is not defined

## Session making (i.e., sequencing)
Finally, each branch is divided into sessions of 6-8 talks that are in an optimal order by their keyword ratings.

This stage, called sequencing, ignores all keywords that were used for hierarchical clustering (i.e., where weight_clustering are non-zero).
The weightings of the remaining keywords are specified in the 'weight_sequencing' column in keyword_weights.xlsx.

In [None]:
# Load keyword weights
df_weights = pd.read_excel(os.path.join(data_root, 'keyword_weights.xlsx'))

# Create sessions of talks that are similar to each other
df = ms.process_each_branch(df, df_weights, min_size=min_size, max_size=max_size, echo=True)

# Make complete dataframe for the division that includes session numbers
df_full = cp.merge_dataframes(df, data_root)

## Render the schedule in html

In [3]:
# Output divisional schedule to html
cp.render_div_schedule_html(df_full, df, df_weights, data_root, presentation_type, include_summary=False, num_ratings=8, include_ratings=True)

Schedule written to /Users/mmchenry/Documents/Projects/meeting_planner_test/division_files/dcb_dvm/talks.html
Copy and paste path into a web browser
