M/D/Time
Poster
1012.

12/4/1900 x225
12/5/1900 x225
12/6/1900 x230

Oral x2 tracks
12/5/1040 x3
12/5/1450 x2
12/5/1620 x3

12/6/1020 x4
12/6/1450 x2
12/6/1620 x3

12/7/1110 x3

Spotlight x2 tracks
12/5/1040 x7
12/5/1450 x6
12/5/1620 x11

12/6/1020 x8
12/6/1450 x6
12/6/1620 x11

12/7/1110 x7




In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

from collections import defaultdict
from sklearn.manifold import MDS, TSNE
from sklearn.feature_extraction.text import CountVectorizer
from lxml import etree
from tqdm import tqdm_notebook, tnrange

from ortools.linear_solver import pywraplp
from ortools.constraint_solver import pywrapcp

from IPython.display import display, clear_output

import sys
from cmtutils import extract_stem_words as bow

In [2]:

def paper2tmpsvec(xmlfn, papers):    
    rids = set()
    pids = set()
    paper_reviewer_tpms = {}
    doc = etree.parse(xmlfn)
    for s in tqdm_notebook(doc.xpath('submission')):
        pid = int(s.get('submissionId'))
        if pid not in papers:
            continue
        pids.add(pid)
        rev_rels = {}
        for r in s.xpath('metareviewer|reviewer'):
            rid = r.get('email').lower()
            score = float(r.get('score'))
            rev_rels[rid] = score
            rids.add(rid)            
        paper_reviewer_tpms[pid] = rev_rels
        
    rid2idx = dict(zip(rids, range(len(rids))))
    paper_tpmsvec = []
    for p, rid_rels in tqdm_notebook(paper_reviewer_tpms.items()):
        vec = np.zeros(len(rids))
        for rid, rel in rid_rels.items():
            vec[rid2idx[rid]] = rel
        paper_tpmsvec.append((p, vec))
    
    for p in papers:
        if p not in pids:
            paper_tpmsvec.append((int(p), np.zeros(len(rids))))
    df = pd.DataFrame.from_records(paper_tpmsvec, columns=["pid", "tpms"])
    return df, rid2idx

# df [pid, [np array]]
# dict reviewers to paper scores

In [3]:
def paper2tmp(fname, papers):
    rids = set()
    rids = set()
    pids = set()
    paper_reviewer_tpms = {}
    
    #  Paper ID                               1
    #  Email          16211020010@fudan.edu.cn
    #  TPMS Score                     0.553318
    
    tpms = pd.read_csv(fname)
    
    for index, row in tqdm_notebook(tpms.iterrows(), total=14535185):
        
        Id = row['Paper ID']
        
        if Id in papers:
            email = row[' Email'].lower()
            score = row[' TPMS Score']
            pids.add(Id)

            if Id not in paper_reviewer_tpms.keys():
                paper_reviewer_tpms[Id] = {}

            paper_reviewer_tpms[Id][email] = score
            rids.add(email)
    
    rid2idx = dict(zip(rids, range(len(rids))))
    paper_tpmsvec = []
    
    
    for p, rid_rels in tqdm_notebook(paper_reviewer_tpms.items()):
        vec = np.zeros(len(rids))
        for rid, rel in rid_rels.items():
            vec[rid2idx[rid]] = rel
        paper_tpmsvec.append((p, vec))
    
    for p in papers:
        if p not in pids:
            paper_tpmsvec.append((int(p), np.zeros(len(rids))))
    df = pd.DataFrame.from_records(paper_tpmsvec, columns=["pid", "tpms"])
      
    return df, rid2idx

        
        
        
        
    

In [4]:
# each row is a paper
def cosinesim(A, znorm=True):    
    if znorm:
        mean = np.mean(A, axis=1).reshape((-1, 1))
        std = np.std(A, axis=1).reshape((-1, 1))    
        A = (A - mean) / std        
    A[np.isnan(A)] = 0
    sim = np.dot(A, A.T)    
    D = np.diag(sim)    
    invD = 1./D
    invD[np.isinf(invD)] = 0    
    invD = np.sqrt(invD)    
    cosine = sim * invD
    cosine = cosine.T * invD
    return cosine

In [5]:
def pairwise_equal(strings):
    A = [hash(a) for a in strings]
    return np.equal.outer(A, A).astype(float)

In [6]:
# papers metadata and decision downloaded from: https://docs.google.com/spreadsheets/d/1KHytYGNwTWuBpr7MhxEc9HL_Bd9hE48VA9SOFE6EsO0/edit#gid=0
papers_csv = 'Arangements.csv'

papers_df = pd.read_csv(papers_csv, usecols=['Paper ID', 'Paper Title', 'Abstract', 'Subject Areas', 'Decision', 'Author Emails'], encoding='utf-8')
papers_df['Paper Title'] = papers_df['Paper Title'].apply(lambda x: x.strip())
papers_df['Abstract'] = papers_df['Abstract'].apply(lambda x: x.strip())
papers_df['Primary Subject Area'] = papers_df['Subject Areas'].apply(lambda x: x.split(';')[0][:-1])
papers_df['Top-level Primary Subject Area'] = papers_df['Primary Subject Area'].apply(lambda x: x.split('/')[0])
papers_df[['Paper Title', 'Abstract']]
papers_df['bow'] = papers_df[['Paper Title', 'Abstract']].apply(lambda x: bow(x[0]) + bow(x[1]), axis=1)
# papers_df['bow'] = papers_df[['Paper Title', 'Abstract']].apply(lambda x: bow(x[0]) + bow(x[1]), axis=1)

papers_df[['Paper Title', 'bow']]


Unnamed: 0,Paper Title,bow
0,Efficient Algorithms for Non-convex Isotonic R...,"[effici, algorithm, non, convex, isoton, regre..."
1,Structure-Aware Convolutional Neural Networks,"[structur, awar, convolut, neural, network, co..."
2,Kalman Normalization,"[kalman, normal, indispens, compon, batch, nor..."
3,HOGWILD!-Gibbs can be PanAccurate,"[hogwild, gibb, panaccur, asynchron, gibb, sam..."
4,Text-Adaptive Generative Adversarial Networks:...,"[text, adapt, gener, adversari, network, manip..."
5,IntroVAE: Introspective Variational Autoencode...,"[introva, introspect, variat, autoencod, photo..."
6,Doubly Robust Bayesian Inference for Non-Stati...,"[doubli, robust, bayesian, infer, non, station..."
7,Adapted Deep Embeddings: A Synthesis of Method...,"[adapt, deep, embed, synthesi, method, shot, i..."
8,Generalized Inverse Optimization through Onlin...,"[gener, invers, optim, onlin, learn, invers, o..."
9,An Off-policy Policy Gradient Theorem Using Em...,"[polici, polici, gradient, theorem, use, empha..."


In [7]:
papers_df[['Paper Title', 'Top-level Primary Subject Area']]

Unnamed: 0,Paper Title,Top-level Primary Subject Area
0,Efficient Algorithms for Non-convex Isotonic R...,Optimization
1,Structure-Aware Convolutional Neural Networks,Deep Learning
2,Kalman Normalization,Deep Learning
3,HOGWILD!-Gibbs can be PanAccurate,Probabilistic Methods
4,Text-Adaptive Generative Adversarial Networks:...,Applications
5,IntroVAE: Introspective Variational Autoencode...,Deep Learning
6,Doubly Robust Bayesian Inference for Non-Stati...,Applications
7,Adapted Deep Embeddings: A Synthesis of Method...,Algorithms
8,Generalized Inverse Optimization through Onlin...,Algorithms
9,An Off-policy Policy Gradient Theorem Using Em...,Reinforcement Learning and Planning


In [8]:
# TPMS scores downloaded from: https://cmt.research.microsoft.com/NIPS2017/Protected/Chair/ManageMetaReviewAssignmentsExport.aspx?data=externalmatching&view=cs&format=xml&serviceid=1)
# tpms_df, rid2idx = paper2tmpsvec('../../data/assignment/rev_tpms.xml', set(papers_df['Paper ID']))

tpms_df, rid2idx = paper2tmp('ReviewerTpmsScores_NIPS2018.csv', set(papers_df['Paper ID']))
# paper2tmp('ReviewerTpmsScores_NIPS2018.csv')

# print tpms_df
# print rid2idx



HBox(children=(IntProgress(value=0, max=14535185), HTML(value=u'')))




HBox(children=(IntProgress(value=0, max=1008), HTML(value=u'')))




In [9]:
df = pd.merge(left=papers_df, right=tpms_df, how='left', left_on='Paper ID', right_on='pid')

## Project papers into one-dimensional space

In [10]:
TPMS = cosinesim(np.asarray(df.tpms.tolist()))
TPSA = pairwise_equal(df['Top-level Primary Subject Area'])
PSA = pairwise_equal(df['Primary Subject Area'])
BOW = CountVectorizer('content', tokenizer=lambda x: x, lowercase=False, binary=True).fit_transform(df.bow).todense()
BOW = np.asarray(BOW, dtype=float)
BOW = cosinesim(BOW, znorm=False)

  
  # Remove the CWD from sys.path while we load stuff.


In [11]:
#dissim = np.clip(3. - TPSA - PSA - TPMS, a_min=0., a_max=3.)
sims_all = [TPMS, TPSA, PSA, BOW]
dissim_all = (float(len(sims_all)) - sum(sims_all)) / len(sims_all)
dissim_all = np.clip(dissim_all, a_min=0., a_max=float(len(sims_all)))
dissim_tpms = np.clip(1. - TPMS, a_min=0., a_max=1.)

In [12]:
tsne_all = TSNE(n_components=1, metric='precomputed').fit_transform(dissim_all)
tsne_tpms = TSNE(n_components=1, metric='precomputed').fit_transform(dissim_tpms)

In [13]:
df['tsne_all'] = tsne_all
df['tsne_tpms'] = tsne_tpms

In [14]:
# df.to_csv('pre_arrangement_final.csv', encoding='utf-8', index=False, columns=['Paper ID', 'Paper Title', 'Abstract', 'Subject Areas', 
#                                                                    'Primary Subject Area', 'Top-level Primary Subject Area',
#                                                                    'tsne_all', 'tsne_tpms', 'Decision'])

# df = pd.read_csv('pre_arrangement.csv', encoding='utf-8')


In [14]:
df

Unnamed: 0,Paper ID,Paper Title,Abstract,Author Emails,Subject Areas,Decision,Primary Subject Area,Top-level Primary Subject Area,bow,pid,tpms,tsne_all,tsne_tpms
0,29,Efficient Algorithms for Non-convex Isotonic R...,We consider the minimization of submodular fun...,francis.bach@inria.fr,Optimization/Submodular Optimization*; Optimiz...,Poster,Optimization/Submodular Optimization,Optimization,"[effici, algorithm, non, convex, isoton, regre...",29,"[0.724210006, 0.547847061, 0.62121431, 0.69992...",-65.037216,43.380173
1,33,Structure-Aware Convolutional Neural Networks,Convolutional neural networks (CNNs) are inher...,jianlong.chang@nlpr.ia.ac.cn;jie.gu@nlpr.ia.ac...,Deep Learning*; Deep Learning/CNN Architecture...,Poster,Deep Learning,Deep Learning,"[structur, awar, convolut, neural, network, co...",33,"[0.722796601, 0.7524726279999999, 0.681080612,...",-27.842775,-41.931042
2,34,Kalman Normalization,"As an indispensable component, Batch Normaliza...",wanggrun@mail2.sysu.edu.cn;jiefengpeng@gmail.c...,Deep Learning/CNN Architectures*; Applications...,Poster,Deep Learning/CNN Architectures,Deep Learning,"[kalman, normal, indispens, compon, batch, nor...",34,"[0.605652029, 0.624528822, 0.5549479039999999,...",-45.000961,-22.988417
3,37,HOGWILD!-Gibbs can be PanAccurate,Asynchronous Gibbs sampling has been recently ...,costis@csail.mit.edu;ndikkala@mit.edu;jayanti@...,Probabilistic Methods/Distributed Inference*; ...,Poster,Probabilistic Methods/Distributed Inference,Probabilistic Methods,"[hogwild, gibb, panaccur, asynchron, gibb, sam...",37,"[0.6039084920000001, 0.571028136, 0.547187392,...",-86.396667,24.022959
4,40,Text-Adaptive Generative Adversarial Networks:...,This paper addresses the problem of manipulati...,shnnam@yonsei.ac.kr;kim_yunji@yonsei.ac.kr;seo...,Applications/Computational Photography*; Appli...,Spotlight,Applications/Computational Photography,Applications,"[text, adapt, gener, adversari, network, manip...",40,"[0.6074082789999999, 0.604121554, 0.5843663610...",10.959604,-44.323528
5,59,IntroVAE: Introspective Variational Autoencode...,We present a novel introspective variational a...,huaibo.huang@cripac.ia.ac.cn;zhihang.li@nlpr.i...,Deep Learning/Generative Models*; Deep Learnin...,Poster,Deep Learning/Generative Models,Deep Learning,"[introva, introspect, variat, autoencod, photo...",59,"[0.554898813, 0.566786583, 0.514959909, 0.5606...",-52.689449,-14.038417
6,68,Doubly Robust Bayesian Inference for Non-Stati...,We present the very first robust Bayesian Onli...,j.knoblauch@warwick.ac.uk;j.e.jewson@warwick.a...,Applications/Time Series Analysis*; Algorithms...,Poster,Applications/Time Series Analysis,Applications,"[doubli, robust, bayesian, infer, non, station...",68,"[0.811906661, 0.7286812490000001, 0.745909935,...",21.148312,-1.963071
7,75,Adapted Deep Embeddings: A Synthesis of Method...,The focus in machine learning has branched bey...,tysc7237@colorado.edu;karl.ridgeway@colorado.e...,Algorithms/Multitask and Transfer Learning*; A...,Spotlight,Algorithms/Multitask and Transfer Learning,Algorithms,"[adapt, deep, embed, synthesi, method, shot, i...",75,"[0.7246898359999999, 0.74860931, 0.72164041200...",57.467770,-28.714304
8,77,Generalized Inverse Optimization through Onlin...,Inverse optimization is a powerful paradigm fo...,chaosheng@pitt.edu;yiran.chen@duke.edu;bzeng@p...,Algorithms/Online Learning*; Applications/Quan...,Poster,Algorithms/Online Learning,Algorithms,"[gener, invers, optim, onlin, learn, invers, o...",77,"[0.8553071390000001, 0.729569048, 0.771284843,...",62.712032,38.852795
9,85,An Off-policy Policy Gradient Theorem Using Em...,Policy gradient methods are widely used for co...,imani@ualberta.ca;graves@ualberta.ca;whitem@ua...,Reinforcement Learning and Planning/Reinforcem...,Poster,Reinforcement Learning and Planning/Reinforcem...,Reinforcement Learning and Planning,"[polici, polici, gradient, theorem, use, empha...",85,"[0.5770863589999999, 0.523376441, 0.518179388,...",85.118027,6.052812


In [16]:
df['Top-level Primary Subject Area'].value_counts()

Algorithms                                           207
Deep Learning                                        194
Applications                                         193
Probabilistic Methods                                106
Theory                                                97
Reinforcement Learning and Planning                   85
Optimization                                          80
Neuroscience and Cognitive Science                    33
Deep Learnin                                           7
Data, Competitions, Implementations, and Software      6
Application                                            2
Optimizatio                                            1
Reinforcement Learning and Plannin                     1
Name: Top-level Primary Subject Area, dtype: int64

In [17]:
import matplotlib.pyplot as plt

df[df['Top-level Primary Subject Area'] == 'Deep Learning'].sample(20).tsne_all.describe()


count    20.000000
mean     77.250687
std       8.525471
min      64.499245
25%      69.843943
50%      77.044952
75%      82.516861
max      91.969955
Name: tsne_all, dtype: float64

In [18]:
df[df['Top-level Primary Subject Area'] == 'Applications'].sample(20).tsne_all.describe()


count    20.000000
mean    -14.494806
std       6.922783
min     -28.435780
25%     -19.532537
50%     -14.800924
75%      -8.996397
max      -3.179419
Name: tsne_all, dtype: float64

In [19]:
df[df['Primary Subject Area'] == 'Reinforcement Learning and Planning/Reinforcement Learning'].sample(20).tsne_all.describe()


count    20.000000
mean     48.934467
std       0.504604
min      48.104179
25%      48.520535
50%      48.901932
75%      49.327170
max      49.699711
Name: tsne_all, dtype: float64

### Spotlight paper-session assignment

In [15]:
# Day 1
# 10:05-10:45 3 tracks (1 oral, 2 spotlights, 1 oral in each)
# 3:30-5 3 tracks (4 spotlights, 1 oral, 4 spotlights, 1 oral, 4 spotlights in each)

# Day 2
# 9:45-10:45 3 tracks (4 spotlights, 1 oral, 5 spotlights in each)
# 3:30-5 3 tracks (4 spotlights, 1 oral, 4 spotlights, 1 oral, 4 spotlights in each)

# Day 3
# 9:45-10:45 3 tracks (4 spotlights, 1 oral, 5 spotlights in each)
# 3:30-5 3 tracks (4 spotlights, 1 oral, 4 spotlights, 1 oral, 4 spotlights in each)

# set by Program Chairs
# Day_Session_Time = Size
sess_size = dict(
D1_S1_T1=2,
D1_S1_T2=2,
D1_S1_T3=2,
D1_S2_T1=12,
D1_S2_T2=12,
D1_S2_T3=12,
D2_S1_T1=9,
D2_S1_T2=9,
D2_S1_T3=9,
D2_S2_T1=12,
D2_S2_T2=12,
D2_S2_T3=12,
D3_S1_T1=9,
D3_S1_T2=9,
D3_S1_T3=9,
D3_S2_T1=12,
D3_S2_T2=12,
D3_S2_T3=12)

In [16]:
sum(sess_size.values())

168

In [22]:
# oral paper-session assignments set by Program Chairs
# for appending: orals done manually. 
# cur_sess_asgn = pd.read_csv('/Users/choonhui/Downloads/session_arrangement.20170927_rob.csv')
# orals = cur_sess_asgn[cur_sess_asgn.Decision == 'Oral'][['Paper ID', 'Oral Session']]
# newdf = df.merge(orals, on='Paper ID', how='left', suffixes=('','_'))

cur_sess_asgn = pd.read_csv('Oral_Arrange.csv', encoding='utf-8')
orals = cur_sess_asgn[['Paper ID', 'Session']]
newdf = df.merge(orals, on='Paper ID', how='left', suffixes=('','_'))



In [23]:
orals

Unnamed: 0,Paper ID,Session
0,431,D2_S1_T2
1,496,D1_S1_T2
2,1026,D3_S2_T2
3,1143,D3_S2_T2
4,1147,D3_S2_T3
5,1242,D2_S2_T1
6,1246,D1_S2_T2
7,1319,D2_S2_T2
8,1446,D3_S2_T3
9,1520,D2_S2_T3


#### Build a Spotlight-to-oral-session similarity matrix

In [24]:
oral_sess = newdf[newdf.Decision == 'Oral']['Session']
oral_idx = newdf[newdf.Decision == 'Oral'].index
spot_idx = newdf[newdf.Decision == 'Spotlight'].index

In [25]:
# Assigns orals to groups
groups = {}
for g, idx in zip(oral_sess, oral_idx):
    if g not in groups:
        groups[g] = []
    groups[g].append(idx)
groups = sorted(groups.items())

In [26]:
groups

[(u'D1_S1_T1', [714L, 805L]),
 (u'D1_S1_T2', [82L, 925L]),
 (u'D1_S1_T3', [315L, 843L]),
 (u'D1_S2_T1', [653L, 783L]),
 (u'D1_S2_T2', [229L, 607L]),
 (u'D1_S2_T3', [656L, 949L]),
 (u'D2_S1_T1', [819L]),
 (u'D2_S1_T2', [71L]),
 (u'D2_S1_T3', [428L]),
 (u'D2_S2_T1', [226L, 503L]),
 (u'D2_S2_T2', [241L, 741L]),
 (u'D2_S2_T3', [268L, 985L]),
 (u'D3_S1_T1', [391L]),
 (u'D3_S1_T2', [531L]),
 (u'D3_S1_T3', [884L]),
 (u'D3_S2_T1', [759L, 916L]),
 (u'D3_S2_T2', [190L, 208L]),
 (u'D3_S2_T3', [210L, 253L])]

In [27]:
# for the spotlight
# 
TPMS_s2g = np.zeros((len(spot_idx), len(groups)))
TPSA_s2g = np.zeros((len(spot_idx), len(groups)))
PSA_s2g = np.zeros((len(spot_idx), len(groups)))
BOW_s2g = np.zeros((len(spot_idx), len(groups)))

for i, si in enumerate(spot_idx):
    for j, (g, ois) in enumerate(groups):
        max_tpms = max(TPMS[si][ois])
        TPMS_s2g[i,j] = max_tpms
        
        max_tpsa = max(TPSA[si][ois])
        TPSA_s2g[i,j] = max_tpsa

        max_psa = max(PSA[si][ois])
        PSA_s2g[i,j] = max_psa

        max_bow = max(BOW[si][ois])
        BOW_s2g[i,j] = max_bow

sim_tpms_tpsa_psa_bow = TPMS_s2g + TPSA_s2g + PSA_s2g + BOW_s2g
print(sim_tpms_tpsa_psa_bow.shape)

(168L, 18L)


In [28]:
sim_tpms_tpsa_psa_bow

array([[0.78987471, 1.99515441, 0.6408277 , ..., 0.72701925, 2.11745715,
        0.32900658],
       [0.88371475, 1.04764761, 1.63510003, ..., 0.84653585, 0.99328952,
        0.35412792],
       [0.90716266, 0.86574053, 2.1539758 , ..., 0.95200101, 0.52260961,
        0.9137021 ],
       ...,
       [0.87563601, 0.73612839, 3.12871888, ..., 0.79650968, 0.36631355,
        1.14169679],
       [0.71338357, 0.80920969, 0.71997002, ..., 0.81354574, 0.58068742,
        0.54964782],
       [0.8442645 , 0.78832288, 0.7234235 , ..., 0.91279386, 0.65770574,
        0.55635297]])

#### Integer programming for finding best spotlight session assignment

In [29]:
def assign_spotlights(similarities, constraints):
    solver = pywraplp.Solver('solver', pywraplp.Solver.CBC_MIXED_INTEGER_PROGRAMMING)
    
    print solver
    
    num_spots, num_groups = similarities.shape
    spots = range(num_spots)
    groups = range(num_groups)
    
    # defind variables
    x = {}
    for i in spots:
        for j in groups:
            x[i, j] = solver.BoolVar('x[%i,%i]'%(i,j))
    
    # define objective
    solver.Maximize(solver.Sum(similarities[i,j]*x[i,j] for i in spots for j in groups))
    
    # define constraints per group
    for j in groups:
        solver.Add(solver.Sum(x[i,j] for i in spots) == constraints[j])
                
    # define constraints per spot
    for i in spots:
        solver.Add(solver.Sum(x[i,j] for j in groups) == 1)
        
    sol = solver.Solve()
    
    print 'objective:', solver.Objective().Value()
    
    assigned_groups = {}
    for j in groups:
        assigned_groups[j] = []
        for i in spots:
            if x[i,j].solution_value() > 0:
                assigned_groups[j].append(i)
                
    #assignments = [(i,j) for i in spots for j in groups if x[i,j].solution_value() > 0]
    return assigned_groups
    

In [30]:
constraints = [sess_size[g] for g, ois in groups]
print(constraints)

[2, 2, 2, 12, 12, 12, 9, 9, 9, 12, 12, 12, 9, 9, 9, 12, 12, 12]


In [31]:
# should be larger than random
assgn_tpms_tpsa_psa_bow = assign_spotlights(sim_tpms_tpsa_psa_bow, constraints)

<ortools.linear_solver.pywraplp.Solver; proxy of <Swig Object of type 'operations_research::MPSolver *' at 0x00000000D05DA960> >
objective: 334.728593321


In [32]:
def print_assignment(assgn, session_topic_only=False):    
    pid_session = []
    for g in assgn:           
        output = []
        psa = defaultdict(int)
        for oi in groups[g][1]:            
            record = newdf.iloc[oi]
            psa[record['Primary Subject Area']] += 1
            output.append('  [Oral] %4s : %s' % (record['Paper ID'], record['Paper Title']))
            pid_session.append((int(record['Paper ID']), groups[g][0]))
            
        for i in assgn[g]:
            record = newdf.iloc[spot_idx[i]]
            psa[record['Primary Subject Area']] += 1
            output.append('  [Spot] %4s : %s' % (record['Paper ID'], record['Paper Title']))
            pid_session.append((int(record['Paper ID']), groups[g][0]))
            
        print 'Session: ', groups[g][0], sorted(psa.items(), key=lambda x: x[1], reverse=True)
        if not session_topic_only:
            print '\n'.join(output)
        print
    return pd.DataFrame.from_records(pid_session, columns=['Paper ID', 'Session'])

In [33]:
spot_df = print_assignment(assgn_tpms_tpsa_psa_bow)


Session:  D1_S1_T1 [(u'Theory', 3), (u'Neuroscience and Cognitive Science/Plasticity and Adaptation', 1)]
  [Oral] 3826 : On Neuronal Capacity
  [Oral] 5269 : Dendritic cortical microcircuits approximate the backpropagation algorithm
  [Spot] 3213 : Size-Noise Tradeoffs in Generative Networks
  [Spot] 3300 : On Coresets for Logistic Regression

Session:  D1_S1_T2 [(u'Applications/Natural Language Processing', 2), (u'Deep Learning/Embedding Approaches', 1), (u'Deep Learning/Program Inductio', 1)]
  [Oral]  496 : On Word Embedding Dimensionality
  [Oral] 6491 : A Retrieve-and-Edit Framework for Predicting Structured Outputs
  [Spot] 3765 : Diffusion Maps for Textual Network Embedding
  [Spot] 4859 : Learning Libraries of Subroutines for Neurally–Guided Bayesian Program Learning

Session:  D1_S1_T3 [(u'Algorithms/Sparsity and Compressed Sensing', 3), (u'Theory/Learning Theory', 1)]
  [Oral] 1737 : Nearly tight sample complexity bounds for learning mixtures of Gaussians via sample compress

  [Spot] 6793 : Robust Subspace Approximation in a Stream

Session:  D3_S2_T1 [(u'Reinforcement Learning and Planning/Reinforcement Learning', 4), (u'Algorithms/Classification', 2), (u'Reinforcement Learning and Planning/Model-Based R', 1), (u'Deep Learning/Meta-Learning', 1), (u'Reinforcement Learning and Planning', 1), (u'Reinforcement Learning and Planning/Decision and Contro', 1), (u'Reinforcement Learning and Planning/Model-Based RL', 1), (u'Deep Learning/Meta-Learnin', 1), (u'Theory/Regularization', 1), (u'Reinforcement Learning and Planning/Exploration', 1)]
  [Oral] 5026 : Sample-Efficient Reinforcement Learning with Stochastic Ensemble Value Expansion
  [Oral] 6460 : Non-delusional Q-learning and Value-iteration
  [Spot]  454 : Connectionist Temporal Classification with Maximum Entropy Regularization
  [Spot] 2298 : Data-Efficient Model-based Reinforcement Learning with Deep Probabilistic Dynamics Models
  [Spot] 2491 : Contour location via entropy reduction leveraging multipl

In [34]:
spot_df

Unnamed: 0,Paper ID,Session
0,3826,D1_S1_T1
1,5269,D1_S1_T1
2,3213,D1_S1_T1
3,3300,D1_S1_T1
4,496,D1_S1_T2
5,6491,D1_S1_T2
6,3765,D1_S1_T2
7,4859,D1_S1_T2
8,1737,D1_S1_T3
9,5491,D1_S1_T3


### Check for papers with overlapping authors across parallel tracks

In [35]:
def check_parallel_track_conficts(assgn):
    # group sessions by track number and order by day and session
    T1s = sorted([g for g in assgn if groups[g][0].endswith('T1')])
    T2s = sorted([g for g in assgn if groups[g][0].endswith('T2')])
    clashes = {}
    for g1, g2 in zip(T1s, T2s):        
        session = 'Sessions: %s vs %s' % (groups[g1][0], groups[g2][0])        
        t1data = []
        t1_idx = groups[g1][1] + [spot_idx[i] for i in assgn[g1]]
        for i in t1_idx:
            record = newdf.iloc[i]
            t1data.append((
                record['Paper ID'],
                record['Paper Title'],
                set([e.lower() for e in record['Author Emails'].split(';')])
            ))

        t2data = []
        t2_idx = groups[g2][1] + [spot_idx[i] for i in assgn[g2]]
        for i in t2_idx:
            record = newdf.iloc[i]
            t2data.append((
                record['Paper ID'],
                record['Paper Title'],
                set([e.lower() for e in record['Author Emails'].split(';')])
            ))

        for pid1, title1, emails1 in t1data:
            for pid2, title2, emails2 in t2data:
                conflicts = emails1.intersection(emails2)
                if len(conflicts) > 0:
                    if session not in clashes:
                        clashes[session] = []
                    clashes[session].append((pid1, pid2, conflicts))
            break
    
    if len(clashes) > 0:
        print clashes
        

In [36]:
check_parallel_track_conficts(assgn_tpms_tpsa_psa_bow)

In [17]:
# add Session from file
adf = pd.read_csv('S&O2.csv')
spot = adf[['Paper ID','Session']]

tmp = df.merge(spot, on='Paper ID', how='left', suffixes=('','_'))
tmp



Unnamed: 0,Paper ID,Paper Title,Abstract,Author Emails,Subject Areas,Decision,Primary Subject Area,Top-level Primary Subject Area,bow,pid,tpms,tsne_all,tsne_tpms,Session
0,29,Efficient Algorithms for Non-convex Isotonic R...,We consider the minimization of submodular fun...,francis.bach@inria.fr,Optimization/Submodular Optimization*; Optimiz...,Poster,Optimization/Submodular Optimization,Optimization,"[effici, algorithm, non, convex, isoton, regre...",29,"[0.724210006, 0.547847061, 0.62121431, 0.69992...",-65.037216,43.380173,
1,33,Structure-Aware Convolutional Neural Networks,Convolutional neural networks (CNNs) are inher...,jianlong.chang@nlpr.ia.ac.cn;jie.gu@nlpr.ia.ac...,Deep Learning*; Deep Learning/CNN Architecture...,Poster,Deep Learning,Deep Learning,"[structur, awar, convolut, neural, network, co...",33,"[0.722796601, 0.7524726279999999, 0.681080612,...",-27.842775,-41.931042,
2,34,Kalman Normalization,"As an indispensable component, Batch Normaliza...",wanggrun@mail2.sysu.edu.cn;jiefengpeng@gmail.c...,Deep Learning/CNN Architectures*; Applications...,Poster,Deep Learning/CNN Architectures,Deep Learning,"[kalman, normal, indispens, compon, batch, nor...",34,"[0.605652029, 0.624528822, 0.5549479039999999,...",-45.000961,-22.988417,
3,37,HOGWILD!-Gibbs can be PanAccurate,Asynchronous Gibbs sampling has been recently ...,costis@csail.mit.edu;ndikkala@mit.edu;jayanti@...,Probabilistic Methods/Distributed Inference*; ...,Poster,Probabilistic Methods/Distributed Inference,Probabilistic Methods,"[hogwild, gibb, panaccur, asynchron, gibb, sam...",37,"[0.6039084920000001, 0.571028136, 0.547187392,...",-86.396667,24.022959,
4,40,Text-Adaptive Generative Adversarial Networks:...,This paper addresses the problem of manipulati...,shnnam@yonsei.ac.kr;kim_yunji@yonsei.ac.kr;seo...,Applications/Computational Photography*; Appli...,Spotlight,Applications/Computational Photography,Applications,"[text, adapt, gener, adversari, network, manip...",40,"[0.6074082789999999, 0.604121554, 0.5843663610...",10.959604,-44.323528,D2_S1_T2
5,59,IntroVAE: Introspective Variational Autoencode...,We present a novel introspective variational a...,huaibo.huang@cripac.ia.ac.cn;zhihang.li@nlpr.i...,Deep Learning/Generative Models*; Deep Learnin...,Poster,Deep Learning/Generative Models,Deep Learning,"[introva, introspect, variat, autoencod, photo...",59,"[0.554898813, 0.566786583, 0.514959909, 0.5606...",-52.689449,-14.038417,
6,68,Doubly Robust Bayesian Inference for Non-Stati...,We present the very first robust Bayesian Onli...,j.knoblauch@warwick.ac.uk;j.e.jewson@warwick.a...,Applications/Time Series Analysis*; Algorithms...,Poster,Applications/Time Series Analysis,Applications,"[doubli, robust, bayesian, infer, non, station...",68,"[0.811906661, 0.7286812490000001, 0.745909935,...",21.148312,-1.963071,
7,75,Adapted Deep Embeddings: A Synthesis of Method...,The focus in machine learning has branched bey...,tysc7237@colorado.edu;karl.ridgeway@colorado.e...,Algorithms/Multitask and Transfer Learning*; A...,Spotlight,Algorithms/Multitask and Transfer Learning,Algorithms,"[adapt, deep, embed, synthesi, method, shot, i...",75,"[0.7246898359999999, 0.74860931, 0.72164041200...",57.467770,-28.714304,D1_S2_T2
8,77,Generalized Inverse Optimization through Onlin...,Inverse optimization is a powerful paradigm fo...,chaosheng@pitt.edu;yiran.chen@duke.edu;bzeng@p...,Algorithms/Online Learning*; Applications/Quan...,Poster,Algorithms/Online Learning,Algorithms,"[gener, invers, optim, onlin, learn, invers, o...",77,"[0.8553071390000001, 0.729569048, 0.771284843,...",62.712032,38.852795,
9,85,An Off-policy Policy Gradient Theorem Using Em...,Policy gradient methods are widely used for co...,imani@ualberta.ca;graves@ualberta.ca;whitem@ua...,Reinforcement Learning and Planning/Reinforcem...,Poster,Reinforcement Learning and Planning/Reinforcem...,Reinforcement Learning and Planning,"[polici, polici, gradient, theorem, use, empha...",85,"[0.5770863589999999, 0.523376441, 0.518179388,...",85.118027,6.052812,


In [37]:
newdf = newdf.drop(columns=['Session'])
tmp = newdf.merge(spot_df, left_on='Paper ID', right_on='Paper ID', how='left')

In [38]:
tmp

Unnamed: 0,Paper ID,Paper Title,Abstract,Author Emails,Subject Areas,Decision,Primary Subject Area,Top-level Primary Subject Area,bow,pid,tpms,tsne_all,tsne_tpms,Session
0,29,Efficient Algorithms for Non-convex Isotonic R...,We consider the minimization of submodular fun...,francis.bach@inria.fr,Optimization/Submodular Optimization*; Optimiz...,Poster,Optimization/Submodular Optimization,Optimization,"[effici, algorithm, non, convex, isoton, regre...",29,"[0.724210006, 0.547847061, 0.62121431, 0.69992...",29.950420,41.906616,
1,33,Structure-Aware Convolutional Neural Networks,Convolutional neural networks (CNNs) are inher...,jianlong.chang@nlpr.ia.ac.cn;jie.gu@nlpr.ia.ac...,Deep Learning*; Deep Learning/CNN Architecture...,Poster,Deep Learning,Deep Learning,"[structur, awar, convolut, neural, network, co...",33,"[0.722796601, 0.7524726279999999, 0.681080612,...",69.435448,-41.231922,
2,34,Kalman Normalization,"As an indispensable component, Batch Normaliza...",wanggrun@mail2.sysu.edu.cn;jiefengpeng@gmail.c...,Deep Learning/CNN Architectures*; Applications...,Poster,Deep Learning/CNN Architectures,Deep Learning,"[kalman, normal, indispens, compon, batch, nor...",34,"[0.605652029, 0.624528822, 0.5549479039999999,...",75.101242,-22.875866,
3,37,HOGWILD!-Gibbs can be PanAccurate,Asynchronous Gibbs sampling has been recently ...,costis@csail.mit.edu;ndikkala@mit.edu;jayanti@...,Probabilistic Methods/Distributed Inference*; ...,Poster,Probabilistic Methods/Distributed Inference,Probabilistic Methods,"[hogwild, gibb, panaccur, asynchron, gibb, sam...",37,"[0.6039084920000001, 0.571028136, 0.547187392,...",-42.259544,22.692909,
4,40,Text-Adaptive Generative Adversarial Networks:...,This paper addresses the problem of manipulati...,shnnam@yonsei.ac.kr;kim_yunji@yonsei.ac.kr;seo...,Applications/Computational Photography*; Appli...,Spotlight,Applications/Computational Photography,Applications,"[text, adapt, gener, adversari, network, manip...",40,"[0.6074082789999999, 0.604121554, 0.5843663610...",-8.646683,-43.590607,D3_S2_T2
5,59,IntroVAE: Introspective Variational Autoencode...,We present a novel introspective variational a...,huaibo.huang@cripac.ia.ac.cn;zhihang.li@nlpr.i...,Deep Learning/Generative Models*; Deep Learnin...,Poster,Deep Learning/Generative Models,Deep Learning,"[introva, introspect, variat, autoencod, photo...",59,"[0.554898813, 0.566786583, 0.514959909, 0.5606...",81.031715,-14.190269,
6,68,Doubly Robust Bayesian Inference for Non-Stati...,We present the very first robust Bayesian Onli...,j.knoblauch@warwick.ac.uk;j.e.jewson@warwick.a...,Applications/Time Series Analysis*; Algorithms...,Poster,Applications/Time Series Analysis,Applications,"[doubli, robust, bayesian, infer, non, station...",68,"[0.811906661, 0.7286812490000001, 0.745909935,...",-18.716530,-3.146081,
7,75,Adapted Deep Embeddings: A Synthesis of Method...,The focus in machine learning has branched bey...,tysc7237@colorado.edu;karl.ridgeway@colorado.e...,Algorithms/Multitask and Transfer Learning*; A...,Spotlight,Algorithms/Multitask and Transfer Learning,Algorithms,"[adapt, deep, embed, synthesi, method, shot, i...",75,"[0.7246898359999999, 0.74860931, 0.72164041200...",-55.623451,-29.995140,D3_S1_T1
8,77,Generalized Inverse Optimization through Onlin...,Inverse optimization is a powerful paradigm fo...,chaosheng@pitt.edu;yiran.chen@duke.edu;bzeng@p...,Algorithms/Online Learning*; Applications/Quan...,Poster,Algorithms/Online Learning,Algorithms,"[gener, invers, optim, onlin, learn, invers, o...",77,"[0.8553071390000001, 0.729569048, 0.771284843,...",-79.361519,25.245207,
9,85,An Off-policy Policy Gradient Theorem Using Em...,Policy gradient methods are widely used for co...,imani@ualberta.ca;graves@ualberta.ca;whitem@ua...,Reinforcement Learning and Planning/Reinforcem...,Poster,Reinforcement Learning and Planning/Reinforcem...,Reinforcement Learning and Planning,"[polici, polici, gradient, theorem, use, empha...",85,"[0.5770863589999999, 0.523376441, 0.518179388,...",44.501297,7.835011,


In [21]:
tmp.to_csv('session_arrangement_with_oral_and_spot_final_hanna.csv', index=False, columns=['Paper ID', 'Paper Title', 'Abstract', 'Subject Areas', 
                                                                      'Primary Subject Area', 'Top-level Primary Subject Area',
                                                                      'tsne_all', 'tsne_tpms', 'Decision', 'Session'], encoding='utf-8')

In [18]:
# Moving day 4 orals and spotlight to 1.
poster_df = tmp.copy()
poster_df['PosterSession'] = poster_df['Session'].apply(lambda x: 0 if isinstance(x, float) else int(x.split('_')[0][1])*2 + int(x.split('_')[1][1])-2)
# poster_df['PosterSession'] = poster_df['PosterSession'].apply(lambda x: 1 if x == 4 else x)

In [19]:
poster_df[poster_df['PosterSession'] == 6]

Unnamed: 0,Paper ID,Paper Title,Abstract,Author Emails,Subject Areas,Decision,Primary Subject Area,Top-level Primary Subject Area,bow,pid,tpms,tsne_all,tsne_tpms,Session,PosterSession
37,267,(Probably) Concave Graph Matching,In this paper we address the graph matching pr...,haggai.maron@weizmann.ac.il;yaron.lipman@weizm...,Optimization/Convex Optimization*; Optimizatio...,Spotlight,Optimization/Convex Optimization,Optimization,"[probabl, concav, graph, match, thi, paper, ad...",267,"[0.858271366, 0.682665183, 0.7683895479999999,...",-67.131195,29.937187,D3_S2_T3,6
101,576,Efficient nonmyopic batch active search,Active search is a learning paradigm for activ...,jiang.s@wustl.edu;luizgustavo@wustl.edu;mbabbo...,Algorithms/Active Learning,Spotlight,Algorithms/Active Learnin,Algorithms,"[effici, nonmyop, batch, activ, search, activ,...",576,"[0.5437475, 0.48220424799999995, 0.508575226, ...",60.245289,9.279699,D3_S2_T1,6
103,597,Interactive Structure Learning with Structural...,"In this work, we introduce interactive structu...",ctosh@cs.ucsd.edu;dasgupta@cs.ucsd.edu,Algorithms/Active Learning*; Algorithms/Semi-S...,Spotlight,Algorithms/Active Learning,Algorithms,"[interact, structur, learn, structur, queri, c...",597,"[0.529437719, 0.45932953600000004, 0.490521479...",59.319515,37.968044,D3_S2_T1,6
190,1026,Discovery of Latent 3D Keypoints via End-to-en...,"This paper presents KeypointNet, an end-to-end...",supasorn@gmail.com;snavely@google.com;tompson@...,Applications/Computer Vision,Oral,Applications/Computer Visio,Applications,"[discoveri, latent, 3d, keypoint, via, end, en...",1026,"[0.581163477, 0.5984493120000001, 0.542658568,...",9.342001,-47.488724,D3_S2_T2,6
199,1099,Norm matters: efficient and accurate normaliza...,"Over the past few years, Batch-Normalization h...",elad.hoffer@gmail.com;ron.banner@intel.com;ita...,Deep Learning*; Deep Learning/CNN Architecture...,Spotlight,Deep Learning,Deep Learning,"[norm, matter, effici, accur, normal, scheme, ...",1099,"[0.546826715, 0.578286664, 0.504709097, 0.5672...",-26.945894,-22.626886,D3_S2_T2,6
208,1143,Learning to Reconstruct Shapes from Unseen Cat...,"From a single view, humans are able to halluci...",xiuming@mit.edu;ztzhang@mit.edu;ckzhang@mit.ed...,Applications/Object Recognition*; Applications...,Oral,Applications/Object Recognition,Applications,"[learn, reconstruct, shape, unseen, categori, ...",1143,"[0.572495249, 0.567489183, 0.534011075, 0.5716...",10.185015,-47.350956,D3_S2_T2,6
210,1147,Smoothed analysis of the low-rank approach for...,We consider semidefinite programs (SDPs) of si...,tpumir@princeton.edu;sjelassi@princeton.edu;nb...,Optimization*; Optimization/Convex Optimizatio...,Oral,Optimization,Optimization,"[smooth, analysi, low, rank, approach, smooth,...",1147,"[0.460976129, 0.33447025, 0.36871131700000004,...",-70.034546,43.80368,D3_S2_T3,6
253,1446,Optimal Algorithms for Non-Smooth Distributed ...,"In this work, we consider the distributed opti...",kevin.scaman@gmail.com;francis.bach@inria.fr;s...,Optimization/Convex Optimization*; Application...,Oral,Optimization/Convex Optimization,Optimization,"[optim, algorithm, non, smooth, distribut, opt...",1446,"[0.637403434, 0.535576182, 0.5500409629999999,...",-67.13916,45.827702,D3_S2_T3,6
346,1878,DeepProbLog: Neural Probabilistic Logic Progr...,"We introduce DeepProbLog, a probabilistic logi...",robin.manhaeve@cs.kuleuven.be;sebastijan.duman...,Algorithms/Relational Learning*; Deep Learning...,Spotlight,Algorithms/Relational Learning,Algorithms,"[deepproblog, neural, probabilist, logic, prog...",1878,"[0.6685511, 0.718956288, 0.641330434, 0.673511...",53.303432,-30.856102,D3_S2_T2,6
347,1881,Convergence of Cubic Regularization for Noncon...,Cubic-regularized Newton's method (CR) is a po...,zhou.1172@osu.edu;wang.10982@osu.edu;liang.889...,Optimization*; Optimization/Non-Convex Optimiz...,Spotlight,Optimization,Optimization,"[converg, cubic, regular, nonconvex, optim, kl...",1881,"[0.7832019729999999, 0.517215119, 0.519890496,...",-69.925735,46.154724,D3_S2_T3,6


In [41]:
# 
poster_df['day_v3'] = tmp['Session'].apply(lambda x: 0 if isinstance(x, float) else int(x.split('_')[0][1]))

In [42]:
#
day4_talks = poster_df[poster_df['day_v3'] == 4].index

### Poster session assignment

#### Poster conflicts is defined as the number of common authors between two papers

In [20]:
poster_conflicts = {}
for i, irow in tqdm_notebook(poster_df.iterrows()):
    iemails = set(e.strip() for e in irow['Author Emails'].split(';'))
    for j, jrow in poster_df.iterrows():
        if j <= i:
            continue
        jemails = set(e.strip() for e in jrow['Author Emails'].split(';'))
        c = len(iemails.intersection(jemails))
        if c > 0:
            poster_conflicts[i,j] = poster_conflicts[j,i] = c
    

HBox(children=(IntProgress(value=1, bar_style=u'info', max=1), HTML(value=u'')))




In [21]:
poster_conflicts_first_author = {}
for i, irow in tqdm_notebook(poster_df.iterrows()):
    iemail = irow['Author Emails'].split(';')[0].strip()
    for j, jrow in poster_df.iterrows():
        if j <= i:
            continue
        jemail = jrow['Author Emails'].split(';')[0].strip()
        if iemail == jemail:
            poster_conflicts_first_author[i,j] = poster_conflicts_first_author[j,i] = 1

HBox(children=(IntProgress(value=1, bar_style=u'info', max=1), HTML(value=u'')))




In [22]:
# number of papers with at least one conflict paper
len(poster_conflicts)

1674

In [23]:
poster_conflicts_first_author

{(3L, 852L): 1,
 (34L, 35L): 1,
 (35L, 34L): 1,
 (52L, 53L): 1,
 (53L, 52L): 1,
 (54L, 947L): 1,
 (56L, 80L): 1,
 (59L, 815L): 1,
 (78L, 361L): 1,
 (80L, 56L): 1,
 (82L, 868L): 1,
 (97L, 206L): 1,
 (99L, 121L): 1,
 (106L, 114L): 1,
 (106L, 247L): 1,
 (106L, 343L): 1,
 (113L, 183L): 1,
 (114L, 106L): 1,
 (114L, 247L): 1,
 (114L, 343L): 1,
 (121L, 99L): 1,
 (183L, 113L): 1,
 (200L, 476L): 1,
 (206L, 97L): 1,
 (207L, 332L): 1,
 (241L, 607L): 1,
 (242L, 807L): 1,
 (247L, 106L): 1,
 (247L, 114L): 1,
 (247L, 343L): 1,
 (257L, 377L): 1,
 (287L, 401L): 1,
 (291L, 1006L): 1,
 (332L, 207L): 1,
 (343L, 106L): 1,
 (343L, 114L): 1,
 (343L, 247L): 1,
 (361L, 78L): 1,
 (377L, 257L): 1,
 (382L, 622L): 1,
 (401L, 287L): 1,
 (426L, 552L): 1,
 (428L, 523L): 1,
 (432L, 448L): 1,
 (437L, 453L): 1,
 (448L, 432L): 1,
 (449L, 452L): 1,
 (452L, 449L): 1,
 (453L, 437L): 1,
 (461L, 842L): 1,
 (465L, 888L): 1,
 (476L, 200L): 1,
 (486L, 797L): 1,
 (495L, 817L): 1,
 (519L, 525L): 1,
 (523L, 428L): 1,
 (525L, 519L):

In [24]:
poster_df['Top-level Primary Subject Area'].value_counts()

Algorithms                                           207
Deep Learning                                        194
Applications                                         193
Probabilistic Methods                                106
Theory                                                97
Reinforcement Learning and Planning                   85
Optimization                                          80
Neuroscience and Cognitive Science                    33
Deep Learnin                                           7
Data, Competitions, Implementations, and Software      6
Application                                            2
Optimizatio                                            1
Reinforcement Learning and Plannin                     1
Name: Top-level Primary Subject Area, dtype: int64

#### Poster similarity is defined as the sum of tpms, tpsa, psa, and bow minus the number of common authors

In [25]:
poster_sim = TPMS + TPSA + PSA + BOW

In [26]:
poster_sim.shape

(1012L, 1012L)

In [27]:
poster_day = list(poster_df.PosterSession.values)
poster_fixed = set(poster_df[poster_df.Decision != 'Poster'].index.values)
poster_free = list(poster_df[poster_df.Decision == 'Poster'].index.values)

In [28]:
poster_free

[0,
 1,
 2,
 3,
 5,
 6,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 38,
 39,
 41,
 42,
 43,
 44,
 45,
 46,
 47,
 48,
 49,
 50,
 51,
 52,
 53,
 54,
 55,
 56,
 58,
 59,
 60,
 61,
 62,
 64,
 65,
 66,
 69,
 70,
 72,
 73,
 74,
 75,
 76,
 78,
 79,
 81,
 83,
 84,
 85,
 86,
 87,
 88,
 89,
 90,
 91,
 92,
 93,
 94,
 96,
 97,
 98,
 99,
 100,
 102,
 105,
 106,
 108,
 109,
 110,
 111,
 112,
 113,
 114,
 115,
 116,
 117,
 118,
 119,
 120,
 121,
 122,
 123,
 124,
 125,
 126,
 127,
 129,
 131,
 133,
 134,
 135,
 137,
 138,
 139,
 140,
 141,
 142,
 143,
 144,
 145,
 146,
 147,
 148,
 149,
 150,
 152,
 153,
 154,
 155,
 156,
 157,
 158,
 159,
 160,
 161,
 162,
 163,
 164,
 165,
 166,
 168,
 169,
 170,
 171,
 172,
 173,
 174,
 175,
 177,
 179,
 180,
 181,
 182,
 183,
 184,
 185,
 187,
 188,
 189,
 191,
 192,
 193,
 194,
 195,
 197,
 198,
 200,
 201,
 202,
 203,
 204,
 206,
 207,
 211,
 212,
 213,
 214,
 215,
 2

In [29]:
tpsa_vc = poster_df['Top-level Primary Subject Area'].value_counts()
tpsa_idx = dict(zip(tpsa_vc.index, range(len(tpsa_vc))))
poster_df['tpsa'] = poster_df['Top-level Primary Subject Area'].apply(lambda x: tpsa_idx[x])
poster_tpsa = poster_df['tpsa'].values

In [30]:
# poster size contraint
def count_violators(label, constraints):
    for day, size in enumerate(constraints):
        day += 1
        cnt = sum(1 for l in label if l == day)
        if cnt != size:
            print 'day %d: size (%d) != count (%d)' % (day, size, cnt)

In [31]:
def find_conflicts(label, conflicts):
    return [((i,j),v,label[i]) for (i,j),v in conflicts.iteritems() if label[i] == label[j] and j > i]

In [32]:
from collections import Counter
def check_label_topic_dist(label, topic):    
    for g, v in sorted(Counter(zip(label,topic)).items()):
        print g, v        
    

In [33]:
pairs = [(i,j) for i in range(len(poster_day)-1) for j in range(i+1, len(poster_day))]

def metric(label, sim, conflicts):
    total_sim = sum(sim[i,j] for i,j in pairs if label[i] == label[j])
    total_conflict = sum(1 for (i,j),v in conflicts.iteritems() if label[i] == label[j])
    return total_sim, total_conflict    


In [34]:
def balance(vals, swap=None):
    # do the swap before computation
    if swap is not None:
        src, dst = swap
        vals[src] -= 1
        vals[dst] += 1
        
    m = 1.0*sum(vals)/len(vals)    
    res = max(0, max(vals) - m)
    
    # undo the swap after computation
    if swap is not None:
        src, dst = swap
        vals[src] += 1
        vals[dst] -= 1
    
    return res


class Searcher(object):
    def __init__(self, label, topic, sim, conflicts):
        self.L = label
        self.N = len(label)
        self.nLabel = max(label)+1
        
        self.T = topic        
        self.nTopic = len(set(topic))
        self.Bcnt = dict((i, [0]*self.nLabel) for i in xrange(self.nTopic))
        for i in range(len(label)):
            self.Bcnt[topic[i]][label[i]] += 1
            
        self.Bval = sum(balance(vals) for t,vals in self.Bcnt.iteritems())        
        
        self.S = sim
        self.Sval = sum(sim[i,j] for i in xrange(len(label)-1) for j in xrange(i+1, len(label)) if label[i] == label[j])        
        
        self.C = conflicts        
        self.Cval = sum(1 for (i,j),v in conflicts.iteritems() if label[i] == label[j])                
            
    def value(self):        
        return self.Sval, self.Cval, self.Bval
        
    def update(self, i, j, mode):        
        N, L, S, C, T, Bcnt = self.N, self.L, self.S, self.C, self.T, self.Bcnt
        Li = L[i]
        Lj = L[j]
        if Li != Lj:
            # changes due to swap of label between i and j            
            oldSi = sum(S[i,k] for k in xrange(i+1, N) if Li == L[k]) + sum(S[k,i] for k in xrange(i) if Li == L[k])
            newSi = sum(S[i,k] for k in xrange(i+1, N) if Lj == L[k]) + sum(S[k,i] for k in xrange(i) if Lj == L[k])
            oldSj = sum(S[j,k] for k in xrange(j+1, N) if Lj == L[k]) + sum(S[k,j] for k in xrange(j) if Lj == L[k])
            newSj = sum(S[j,k] for k in xrange(j+1, N) if Li == L[k]) + sum(S[k,j] for k in xrange(j) if Li == L[k])                                                
            newSval = self.Sval - oldSi + newSi - oldSj + newSj
            
            oldCi = sum(1 for (a,b),v in C.iteritems() if (a == i and Li == L[b]) or (b == i and Li == L[a]))
            newCi = sum(1 for (a,b),v in C.iteritems() if (a == i and Lj == L[b]) or (b == i and Lj == L[a]))
            oldCj = sum(1 for (a,b),v in C.iteritems() if (a == j and Lj == L[b]) or (b == i and Lj == L[a]))
            newCj = sum(1 for (a,b),v in C.iteritems() if (a == j and Li == L[b]) or (b == i and Li == L[a]))
            newCval = self.Cval - oldCi + newCi - oldCj + newCj
            
            oldBi = balance(Bcnt[T[i]])
            newBi = balance(Bcnt[T[i]], (Li, Lj))
            oldBj = balance(Bcnt[T[j]])
            newBj = balance(Bcnt[T[j]], (Lj, Li))            
            newBval = self.Bval - oldBi + newBi - oldBj + newBj

            do_update = False
            if mode == 'sim':
                if newSval > self.Sval and self.Cval >= newCval and self.Bval >= newBval:
                    do_update = True
            elif mode == 'con':
                if newSval >= self.Sval*0.95 and self.Cval > newCval and self.Bval >= newBval:
                    do_update = True
            elif mode == 'bal':
                if newSval >= self.Sval*0.95 and self.Cval >= newCval and self.Bval > newBval:
                    do_update = True
            elif mode == 'force':
                do_update = True
                                    
            if do_update:
                self.Cval = newCval
                self.Sval = newSval
                self.Bval = newBval
                self.Bcnt[T[i]][Li] -= 1
                self.Bcnt[T[i]][Lj] += 1
                self.Bcnt[T[j]][Lj] -= 1                
                self.Bcnt[T[j]][Li] += 1                               
                self.L[i], self.L[j] = self.L[j], self.L[i]      

        return self.value()
        
    def optimize_similarities(self, i, j):
        return self.update(i,j,'sim')
    
    def optimize_conflicts(self, i, j):
        return self.update(i,j,'con')
    
    def optimize_balance(self, i, j):
        return self.update(i,j,'bal')


In [35]:
%time metric(poster_day, poster_sim, poster_conflicts)

Wall time: 246 ms


(314320.6206786493, 1042)

In [36]:
# random initialization
num_posters_per_day = [169, 169, 169, 169, 168,168]
np.random.shuffle(poster_free)
start = 0
for day in [1,2,3,4,5,6]:
    n = sum(1 for i in poster_day if i == day)
    gap = num_posters_per_day[day-1] - n    
    for p in poster_free[start:start+gap]:
        poster_day[p] = day
    start = start + gap

In [37]:
min(poster_day)

1

In [38]:
searcher = Searcher(poster_day, poster_tpsa, poster_sim, poster_conflicts)
update_cnt = 0
for epoch in tnrange(1):
    np.random.shuffle(poster_free)
    for i,j in zip(poster_free[:-1:2], poster_free[1::2]):        
        sval, cval, bval = searcher.optimize_balance(i, j)
        update_cnt += 1
        if update_cnt % 500 == 1:
            clear_output()
            display('[bal] %.2f, %d, %d' % (sval, cval, bval))        
    
    np.random.shuffle(poster_free)
    for i,j in zip(poster_free[:-1:2], poster_free[1::2]):        
        sval, cval, bval = searcher.optimize_conflicts(i, j)
        update_cnt += 1
        if update_cnt % 500 == 1:
            clear_output()
            display('[con] %.2f, %d, %d' % (sval, cval, bval))

    np.random.shuffle(poster_free)
    for i,j in zip(poster_free[:-1:2], poster_free[1::2]):        
        sval, cval, bval = searcher.optimize_similarities(i, j)
        update_cnt += 1
        if update_cnt % 500 == 1:
            clear_output()
            display('[sim] %.2f, %d, %d' % (sval, cval, bval))

'[sim] 79411.78, 264, 64'




In [39]:
count_violators(poster_day, num_posters_per_day)

In [40]:
check_label_topic_dist(poster_day, poster_tpsa)

(1, 0) 33
(1, 1) 37
(1, 2) 36
(1, 3) 14
(1, 4) 19
(1, 5) 11
(1, 6) 6
(1, 7) 7
(1, 8) 2
(1, 9) 2
(1, 11) 1
(1, 12) 1
(2, 0) 38
(2, 1) 37
(2, 2) 36
(2, 3) 11
(2, 4) 15
(2, 5) 13
(2, 6) 11
(2, 7) 7
(2, 8) 1
(3, 0) 27
(3, 1) 37
(3, 2) 33
(3, 3) 22
(3, 4) 19
(3, 5) 9
(3, 6) 15
(3, 7) 5
(3, 8) 1
(3, 10) 1
(4, 0) 33
(4, 1) 19
(4, 2) 36
(4, 3) 24
(4, 4) 15
(4, 5) 21
(4, 6) 18
(4, 7) 3
(5, 0) 38
(5, 1) 27
(5, 2) 29
(5, 3) 24
(5, 4) 19
(5, 5) 10
(5, 6) 12
(5, 7) 6
(5, 9) 2
(5, 10) 1
(6, 0) 38
(6, 1) 37
(6, 2) 23
(6, 3) 11
(6, 4) 10
(6, 5) 21
(6, 6) 18
(6, 7) 5
(6, 8) 3
(6, 9) 2


In [41]:
len(sorted(find_conflicts(poster_day, poster_conflicts)))

94

In [42]:
for (i,j),_,_ in find_conflicts(poster_day, poster_conflicts):
    ni = len(poster_df.iloc[i]['Author Emails'].split(';'))
    nj = len(poster_df.iloc[j]['Author Emails'].split(';'))
    if ni == 1 or nj == 1:
        print i,j,ni,nj

106 247 1 1
42 809 1 3


In [43]:
from itertools import combinations

In [44]:
best_sim_score, best_conflict_loss = metric(poster_day, poster_sim, poster_conflicts)
# set large number
for epoch in range(400):
    np.random.shuffle(poster_free)
    for i,j in combinations(poster_free[:20], 2):
        if (i not in poster_fixed) and (j not in poster_fixed) and (poster_day[i] != poster_day[j]):
            poster_day[i], poster_day[j] = poster_day[j], poster_day[i]
            s, c = metric(poster_day, poster_sim, poster_conflicts)
            if s >= best_sim_score and c <= best_conflict_loss:
                best_sim_score = s
                best_conflict_loss = c
            else:
                poster_day[i], poster_day[j] = poster_day[j], poster_day[i]
    print epoch
    print '%.2f, %d' % metric(poster_day, poster_sim, poster_conflicts)

0
79512.18, 186
1
79590.84, 186
2
79683.57, 184
3
79780.75, 184
4
79881.14, 184
5
80024.91, 178
6
80165.12, 176
7
80286.04, 168
8
80390.09, 168
9
80561.63, 166
10
80749.75, 164
11
80781.21, 160
12
80881.42, 156
13
81054.12, 156
14
81190.36, 154
15
81359.00, 148
16
81514.38, 142
17
81744.73, 142
18
81931.88, 142
19
82141.19, 142
20
82273.57, 142
21
82519.25, 134
22
82705.31, 134
23
82951.89, 134
24
83123.45, 132
25
83247.63, 132
26
83426.24, 132
27
83647.67, 132
28
84101.04, 132
29
84254.24, 132
30
84305.46, 130
31
84516.31, 130
32
84749.94, 128
33
85051.96, 128
34
85115.54, 124
35
85393.56, 122
36
85597.13, 118
37
85677.16, 118
38
85941.78, 114
39
86056.02, 112
40
86135.97, 110
41
86485.82, 110
42
86638.45, 110
43
86813.80, 110
44
86863.06, 110
45
87112.83, 106
46
87311.92, 106
47
87493.90, 106
48
87585.96, 106
49
87664.42, 106
50
87899.82, 106
51
88289.22, 106
52
88509.63, 104
53
88786.12, 104
54
88974.22, 104
55
89058.66, 104
56
89221.99, 104
57
89539.58, 104
58
89905.04, 102
59
9011

In [45]:
poster_day

[4,
 1,
 1,
 4,
 3,
 1,
 2,
 2,
 6,
 3,
 6,
 5,
 3,
 3,
 1,
 2,
 6,
 5,
 4,
 2,
 6,
 3,
 6,
 2,
 2,
 2,
 1,
 3,
 2,
 2,
 2,
 3,
 1,
 5,
 5,
 4,
 4,
 6,
 1,
 3,
 5,
 1,
 6,
 1,
 6,
 5,
 4,
 3,
 1,
 5,
 2,
 2,
 3,
 1,
 2,
 1,
 5,
 5,
 5,
 4,
 3,
 3,
 2,
 4,
 1,
 5,
 1,
 5,
 4,
 1,
 1,
 3,
 2,
 4,
 2,
 6,
 1,
 5,
 5,
 4,
 3,
 5,
 1,
 3,
 4,
 1,
 5,
 1,
 4,
 4,
 1,
 6,
 5,
 6,
 6,
 2,
 1,
 1,
 3,
 3,
 2,
 6,
 3,
 6,
 1,
 1,
 6,
 3,
 3,
 4,
 5,
 4,
 2,
 6,
 1,
 6,
 3,
 1,
 5,
 1,
 2,
 6,
 1,
 4,
 3,
 3,
 2,
 5,
 2,
 1,
 3,
 4,
 2,
 1,
 2,
 6,
 3,
 1,
 1,
 3,
 2,
 2,
 2,
 1,
 1,
 6,
 6,
 1,
 4,
 2,
 1,
 3,
 5,
 2,
 5,
 2,
 2,
 2,
 3,
 1,
 4,
 4,
 2,
 4,
 6,
 4,
 2,
 5,
 1,
 3,
 5,
 1,
 2,
 2,
 6,
 4,
 2,
 1,
 3,
 2,
 1,
 3,
 5,
 5,
 1,
 1,
 2,
 1,
 3,
 2,
 6,
 5,
 3,
 3,
 2,
 1,
 5,
 5,
 2,
 6,
 6,
 3,
 6,
 2,
 3,
 3,
 4,
 6,
 6,
 2,
 6,
 4,
 5,
 4,
 4,
 6,
 1,
 3,
 3,
 2,
 6,
 4,
 5,
 4,
 6,
 3,
 4,
 6,
 5,
 2,
 4,
 3,
 2,
 1,
 6,
 1,
 1,
 5,
 2,
 2,
 4,
 4,
 5,
 6,
 4,
 2,
 4,
 4,
 3,
 5,


In [75]:
poster_df

Unnamed: 0,Paper ID,Paper Title,Abstract,Author Emails,Subject Areas,Decision,Primary Subject Area,Top-level Primary Subject Area,bow,pid,tpms,tsne_all,tsne_tpms,Session,PosterSession,tpsa
0,29,Efficient Algorithms for Non-convex Isotonic R...,We consider the minimization of submodular fun...,francis.bach@inria.fr,Optimization/Submodular Optimization*; Optimiz...,Poster,Optimization/Submodular Optimization,Optimization,"[effici, algorithm, non, convex, isoton, regre...",29,"[0.724210006, 0.547847061, 0.62121431, 0.69992...",29.950420,41.906616,,0,6
1,33,Structure-Aware Convolutional Neural Networks,Convolutional neural networks (CNNs) are inher...,jianlong.chang@nlpr.ia.ac.cn;jie.gu@nlpr.ia.ac...,Deep Learning*; Deep Learning/CNN Architecture...,Poster,Deep Learning,Deep Learning,"[structur, awar, convolut, neural, network, co...",33,"[0.722796601, 0.7524726279999999, 0.681080612,...",69.435448,-41.231922,,0,1
2,34,Kalman Normalization,"As an indispensable component, Batch Normaliza...",wanggrun@mail2.sysu.edu.cn;jiefengpeng@gmail.c...,Deep Learning/CNN Architectures*; Applications...,Poster,Deep Learning/CNN Architectures,Deep Learning,"[kalman, normal, indispens, compon, batch, nor...",34,"[0.605652029, 0.624528822, 0.5549479039999999,...",75.101242,-22.875866,,0,1
3,37,HOGWILD!-Gibbs can be PanAccurate,Asynchronous Gibbs sampling has been recently ...,costis@csail.mit.edu;ndikkala@mit.edu;jayanti@...,Probabilistic Methods/Distributed Inference*; ...,Poster,Probabilistic Methods/Distributed Inference,Probabilistic Methods,"[hogwild, gibb, panaccur, asynchron, gibb, sam...",37,"[0.6039084920000001, 0.571028136, 0.547187392,...",-42.259544,22.692909,,0,3
4,40,Text-Adaptive Generative Adversarial Networks:...,This paper addresses the problem of manipulati...,shnnam@yonsei.ac.kr;kim_yunji@yonsei.ac.kr;seo...,Applications/Computational Photography*; Appli...,Spotlight,Applications/Computational Photography,Applications,"[text, adapt, gener, adversari, network, manip...",40,"[0.6074082789999999, 0.604121554, 0.5843663610...",-8.646683,-43.590607,D3_S2_T2,6,2
5,59,IntroVAE: Introspective Variational Autoencode...,We present a novel introspective variational a...,huaibo.huang@cripac.ia.ac.cn;zhihang.li@nlpr.i...,Deep Learning/Generative Models*; Deep Learnin...,Poster,Deep Learning/Generative Models,Deep Learning,"[introva, introspect, variat, autoencod, photo...",59,"[0.554898813, 0.566786583, 0.514959909, 0.5606...",81.031715,-14.190269,,0,1
6,68,Doubly Robust Bayesian Inference for Non-Stati...,We present the very first robust Bayesian Onli...,j.knoblauch@warwick.ac.uk;j.e.jewson@warwick.a...,Applications/Time Series Analysis*; Algorithms...,Poster,Applications/Time Series Analysis,Applications,"[doubli, robust, bayesian, infer, non, station...",68,"[0.811906661, 0.7286812490000001, 0.745909935,...",-18.716530,-3.146081,,0,2
7,75,Adapted Deep Embeddings: A Synthesis of Method...,The focus in machine learning has branched bey...,tysc7237@colorado.edu;karl.ridgeway@colorado.e...,Algorithms/Multitask and Transfer Learning*; A...,Spotlight,Algorithms/Multitask and Transfer Learning,Algorithms,"[adapt, deep, embed, synthesi, method, shot, i...",75,"[0.7246898359999999, 0.74860931, 0.72164041200...",-55.623451,-29.995140,D3_S1_T1,5,0
8,77,Generalized Inverse Optimization through Onlin...,Inverse optimization is a powerful paradigm fo...,chaosheng@pitt.edu;yiran.chen@duke.edu;bzeng@p...,Algorithms/Online Learning*; Applications/Quan...,Poster,Algorithms/Online Learning,Algorithms,"[gener, invers, optim, onlin, learn, invers, o...",77,"[0.8553071390000001, 0.729569048, 0.771284843,...",-79.361519,25.245207,,0,0
9,85,An Off-policy Policy Gradient Theorem Using Em...,Policy gradient methods are widely used for co...,imani@ualberta.ca;graves@ualberta.ca;whitem@ua...,Reinforcement Learning and Planning/Reinforcem...,Poster,Reinforcement Learning and Planning/Reinforcem...,Reinforcement Learning and Planning,"[polici, polici, gradient, theorem, use, empha...",85,"[0.5770863589999999, 0.523376441, 0.518179388,...",44.501297,7.835011,,0,5


In [66]:
poster_df[poster_df['PosterSession']==0].count() 

Paper ID                          325
Paper Title                       325
Abstract                          325
Author Emails                     325
Subject Areas                     325
Decision                          325
Primary Subject Area              325
Top-level Primary Subject Area    325
bow                               325
pid                               325
tpms                              325
tsne_all                          325
tsne_tpms                         325
Session                             0
PosterSession                     325
day_v3                            325
tpsa                              325
dtype: int64

In [67]:
poster_df[['Top-level Primary Subject Area', 'Decision', 'Session', 'Oral Session', 'PosterSession']]

KeyError: "['Oral Session'] not in index"

In [145]:
poster_df['Top-level Primary Subject Area'].groupby(poster_df.day).apply(pd.value_counts)

AttributeError: 'DataFrame' object has no attribute 'day'

In [559]:
# poster_df.to_csv('../../data/session_arrangement_20171012.csv',
#                  index=False, columns=['Paper ID', 'Paper Title',
#                                 'Abstract', 'Subject Areas',
#                                 'Primary Subject Area', 'Top-level Primary Subject Area',
#                                 'tsne_all', 'tsne_tpms', 'Decision',
#                                 'Session', 'PosterSession'])

In [None]:
poster_df

#### Manually group a few posters to the same day

In [69]:
poster_df[poster_df['Top-level Primary Subject Area'].isin(['None of the above', 'Data, Competitions, Implementations, and Software'])][['Top-level Primary Subject Area', 'Decision', 'Session', 'PosterSession']]

Unnamed: 0,Top-level Primary Subject Area,Decision,Session,PosterSession
299,"Data, Competitions, Implementations, and Software",Spotlight,D3_S1_T1,3
578,"Data, Competitions, Implementations, and Software",Poster,,0
702,"Data, Competitions, Implementations, and Software",Poster,,0
808,"Data, Competitions, Implementations, and Software",Spotlight,D3_S1_T1,3
905,"Data, Competitions, Implementations, and Software",Poster,,1
937,"Data, Competitions, Implementations, and Software",Poster,,0


In [70]:
poster_df[poster_df['Top-level Primary Subject Area'].isin(['Algorithms']) & (poster_df.Decision == 'Poster') & (poster_df.PosterSession == 3)][['Paper Title', 'Decision', 'Session', 'PosterSession']].sample(2)

Unnamed: 0,Paper Title,Decision,Session,PosterSession
534,Learning to Multitask,Poster,,3
276,When do random forests fail?,Poster,,3


In [588]:
searcher.update(325, 330, 'force')
searcher.update(677, 476, 'force')

(72999.511335739619, 85, 63.25)

In [590]:
poster_fixed.update([325, 677])
poster_free.remove(325)
poster_free.remove(677)

In [76]:
poster_df['day_v2'] = poster_day

In [77]:
poster_df['Top-level Primary Subject Area'].groupby(poster_df.day_v2).apply(pd.value_counts)

day_v2                                                   
1       Algorithms                                           143
        Deep Learning                                          6
        Theory                                                 6
        Applications                                           4
        Reinforcement Learning and Planning                    3
        Neuroscience and Cognitive Science                     3
        Probabilistic Methods                                  2
        Optimization                                           2
2       Theory                                                66
        Deep Learning                                         48
        Neuroscience and Cognitive Science                    12
        Probabilistic Methods                                 11
        Algorithms                                            10
        Applications                                           9
        Deep Learnin            

In [619]:
searcher = Searcher(poster_day, poster_tpsa, poster_sim, poster_conflicts_first_author)

update_cnt = 0
for epoch in tnrange(5):
#     np.random.shuffle(poster_free)
#     for i,j in zip(poster_free[:-1:2], poster_free[1::2]):        
#         sval, cval, bval = searcher.optimize_balance(i, j)
#         update_cnt += 1
#         if update_cnt % 500 == 1:
#             clear_output()
#             display('[bal] %.2f, %d, %d' % (sval, cval, bval))        
    
    np.random.shuffle(poster_free)
    for i,j in zip(poster_free[:-1:2], poster_free[1::2]):        
        sval, cval, bval = searcher.optimize_conflicts(i, j)
        update_cnt += 1
        if update_cnt % 100 == 1:
            clear_output()
            display('[con] %.2f, %d, %d' % (sval, cval, bval))

#     np.random.shuffle(poster_free)
#     for i,j in zip(poster_free[:-1:2], poster_free[1::2]):        
#         sval, cval, bval = searcher.optimize_similarities(i, j)
#         update_cnt += 1
#         if update_cnt % 500 == 1:
#             clear_output()
#             display('[sim] %.2f, %d, %d' % (sval, cval, bval))

'[con] 72994.22, 2, 62'




In [78]:
poster_df['PosterSession'] = poster_day

In [79]:
poster_df.to_csv('session_arr_final.csv',
                 index=False, columns=['Paper ID', 'Paper Title',
                                'Abstract', 'Subject Areas',
                                'Primary Subject Area', 'Top-level Primary Subject Area',
                                'tsne_all', 'tsne_tpms', 'Decision',
                                'Session', 'PosterSession'], encoding='utf-8')

#### Moving the posters of Day 4 talks from Day 1 to Day 3

In [657]:
for i in day4_talks:
    poster_day[i] = 3

In [662]:
# move free posters from day 3 to day 1
day3_free = [i for i in poster_free if poster_day[i] == 3]
np.random.shuffle(day3_free)
for i in day3_free[:len(day4_talks)]:
    poster_day[i] = 1

In [682]:
len(find_conflicts(poster_day, poster_conflicts))

135

In [683]:
len(find_conflicts(poster_day, poster_conflicts_first_author))

0

In [684]:
count_violators(poster_day, num_posters_per_day)

In [681]:
searcher = Searcher(poster_day, poster_tpsa, poster_sim, poster_conflicts_first_author)

update_cnt = 0
for epoch in tnrange(5000):
#     np.random.shuffle(poster_free)
#     for i,j in zip(poster_free[:-1:2], poster_free[1::2]):        
#         sval, cval, bval = searcher.optimize_balance(i, j)
#         update_cnt += 1
#         if update_cnt % 500 == 1:
#             clear_output()
#             display('[bal] %.2f, %d, %d' % (sval, cval, bval))        
    
#     np.random.shuffle(poster_free)
#     for i,j in zip(poster_free[:-1:2], poster_free[1::2]):        
#         sval, cval, bval = searcher.optimize_conflicts(i, j)
#         update_cnt += 1
#         if update_cnt % 100 == 1:
#             clear_output()
#             display('[con] %.2f, %d, %d' % (sval, cval, bval))

    np.random.shuffle(poster_free)
    for i,j in zip(poster_free[:-1:2], poster_free[1::2]):        
        sval, cval, bval = searcher.optimize_similarities(i, j)
        update_cnt += 1
        if update_cnt % 500 == 1:
            clear_output()
            display('[sim] %.2f, %d, %d' % (sval, cval, bval))

'[sim] 75397.44, 0, 62'




In [685]:
poster_df['day_v3'] = poster_day

In [672]:
poster_df['Top-level Primary Subject Area'].groupby(poster_df.day_v2).apply(pd.value_counts)

day_v2                                                   
1       Algorithms                                           66
        Deep Learning                                        45
        Applications                                         32
        Probabilistic Methods                                23
        Optimization                                         21
        Theory                                               21
        Neuroscience and cognitive science                   11
        Reinforcement Learning and Planning                   8
2       Algorithms                                           66
        Deep Learning                                        45
        Applications                                         32
        Probabilistic Methods                                23
        Theory                                               22
        Optimization                                         21
        Neuroscience and cognitive science    

In [686]:
poster_df['Top-level Primary Subject Area'].groupby(poster_df.day_v3).apply(pd.value_counts)

day_v3                                                   
1       Algorithms                                           66
        Deep Learning                                        45
        Applications                                         32
        Probabilistic Methods                                23
        Optimization                                         21
        Theory                                               21
        Neuroscience and cognitive science                   11
        Reinforcement Learning and Planning                   8
2       Algorithms                                           66
        Deep Learning                                        45
        Applications                                         32
        Probabilistic Methods                                23
        Theory                                               22
        Optimization                                         21
        Neuroscience and cognitive science    

In [46]:
poster_df['PosterSession'] = poster_day

In [47]:
poster_df['Top-level Primary Subject Area'].groupby(poster_df.PosterSession).apply(pd.value_counts)

PosterSession                                                   
1              Deep Learning                                        134
               Applications                                          16
               Algorithms                                             7
               Theory                                                 3
               Optimization                                           3
               Neuroscience and Cognitive Science                     2
               Deep Learnin                                           1
               Application                                            1
               Probabilistic Methods                                  1
               Reinforcement Learning and Planning                    1
2              Applications                                         128
               Deep Learning                                         15
               Algorithms                                             8

In [48]:
poster_df.to_csv('poster_new.csv',
                 index=False, columns=['Paper ID', 'Paper Title',
                                'Abstract', 'Subject Areas',
                                'Primary Subject Area', 'Top-level Primary Subject Area',
                                'tsne_all', 'tsne_tpms', 'Decision',
                                'Session', 'PosterSession'], encoding='utf-8')