# This notebook contains the figures relatives to the Table 1 and 2 of the paper

The performances may slightly differ from the figures reported in the paper as we did not fix the seed to draw the datasets. However, it does not change any conclusions!

In [1]:
import utils
import numpy as np
from IPython.display import HTML, display, Markdown
import pandas as pd

import ot
import partial_gw as pgw
import matplotlib.pyplot as plt



## UCI dataset - SCAR scenario

In [2]:
n_unl = 800
n_pos = 400
nb_reps = 10
nb_dummies = 10


### Partial-W

In [3]:
prior = 0.518
perfs_mushrooms, perfs_list_mushrooms = pgw.compute_perf_emd('mushrooms', 'mushrooms', n_unl, n_pos, prior, nb_reps, nb_dummies)
avg_mush_emd_groups =  perfs_mushrooms['emd_groups']

prior = 0.786
perfs_shuttle, perfs_list_shuttle = pgw.compute_perf_emd('shuttle', 'shuttle', n_unl, n_pos, prior, nb_reps, nb_dummies)
avg_shut_emd_groups =  perfs_shuttle['emd_groups']

prior = 0.898
perfs_pageblocks, perfs_list_pageblocks = pgw.compute_perf_emd('pageblocks', 'pageblocks', n_unl, n_pos, prior, nb_reps, nb_dummies)
avg_page_emd_groups =  perfs_pageblocks['emd_groups']

prior = 0.167
perfs_usps, perfs_list_usps = pgw.compute_perf_emd('usps', 'usps', n_unl, n_pos, prior, nb_reps, nb_dummies)
avg_usps_emd_groups =  perfs_usps['emd_groups']

prior = 0.658
perfs_connect4, perfs_list_connect4 = pgw.compute_perf_emd('connect-4', 'connect-4', n_unl, n_pos, prior, nb_reps, nb_dummies)
avg_connect4_emd_groups =  perfs_connect4['emd_groups']

prior = 0.394
perfs_spambase, perfs_list_spambase = pgw.compute_perf_emd('spambase', 'spambase', n_unl, n_pos, prior, nb_reps, nb_dummies)
avg_spambase_emd_groups =  perfs_spambase['emd_groups']

### Partial-GW

In [4]:
prior = 0.518
perfs_mushrooms, perfs_list_mushrooms = pgw.compute_perf_pgw('mushrooms', 'mushrooms', n_unl, n_pos, prior, nb_reps, nb_dummies)
avg_mush_gw_groups =  perfs_mushrooms['pgw']

prior = 0.786
perfs_shuttle, perfs_list_shuttle = pgw.compute_perf_pgw('shuttle', 'shuttle', n_unl, n_pos, prior, nb_reps, nb_dummies)
avg_shut_gw_groups =  perfs_shuttle['pgw']

prior = 0.898
perfs_pageblocks, perfs_list_pageblocks = pgw.compute_perf_pgw('pageblocks', 'pageblocks', n_unl, n_pos, prior, nb_reps, nb_dummies)
avg_page_gw_groups =  perfs_pageblocks['pgw']

prior = 0.167
perfs_usps, perfs_list_usps = pgw.compute_perf_pgw('usps', 'usps', n_unl, n_pos, prior, nb_reps, nb_dummies)
avg_usps_gw_groups =  perfs_usps['pgw']

prior = 0.658
perfs_connect4, perfs_list_connect4 = pgw.compute_perf_pgw('connect-4', 'connect-4', n_unl, n_pos, prior, nb_reps, nb_dummies)
avg_connect4_gw_groups =  perfs_connect4['pgw']

prior = 0.394
perfs_spambase, perfs_list_spambase = pgw.compute_perf_pgw('spambase', 'spambase', n_unl, n_pos, prior, nb_reps, nb_dummies)
avg_spambase_gw_groups =  perfs_spambase['pgw']

### Tab.1

In [5]:
results_UCI = {'dataset':['mushrooms', 'shuttle', 'pageblocks', 'usps', 'connect-4', 'spambase'], 
               '$\pi$': [0.518, 0.786, 0.898, 0.167, 0.658, 0.394],
               'p-W': [avg_mush_emd_groups, avg_shut_emd_groups, avg_page_emd_groups, avg_usps_emd_groups, avg_connect4_emd_groups, avg_spambase_emd_groups],
               'p-GW': [avg_mush_gw_groups, avg_shut_gw_groups, avg_page_gw_groups, avg_usps_gw_groups, avg_connect4_gw_groups, avg_spambase_gw_groups]
              }
results_UCI = pd.DataFrame(data=results_UCI)
results_UCI

Unnamed: 0,dataset,$\pi$,p-W,p-GW
0,mushrooms,0.518,0.9605,0.9465
1,shuttle,0.786,0.957,0.943
2,pageblocks,0.898,0.921,0.907
3,usps,0.167,0.985,0.9415
4,connect-4,0.658,0.607,0.595
5,spambase,0.394,0.7825,0.7075


## Colored MNIST dataset - SAR scenario

In [6]:
n_unl = 800
n_pos = 400
nb_reps = 10
nb_dummies = 10
prior = 0.1

### Partial-W

In [7]:
perfs_mnist, perfs_list_mnist = pgw.compute_perf_emd('mnist', 'mnist', n_unl, n_pos, prior, nb_reps, nb_dummies)
avg_mnist_emd_groups =  perfs_mnist['emd_groups']

perfs_mnist, perfs_list_mnist = pgw.compute_perf_emd('mnist_color_change_p', 'mnist_color_change_u', n_unl, n_pos, prior, nb_reps, nb_dummies)
avg_colored_mnist_emd_groups =  perfs_mnist['emd_groups']

### Partial-GW

In [8]:
perfs_mnist, perfs_list_mnist = pgw.compute_perf_pgw('mnist', 'mnist', n_unl, n_pos, prior, nb_reps, nb_dummies)
avg_mnist_gw_groups =  perfs_mnist['pgw']

perfs_mnist, perfs_list_mnist = pgw.compute_perf_pgw('mnist_color_change_p', 'mnist_color_change_u', n_unl, n_pos, prior, nb_reps, nb_dummies)
avg_colored_mnist_gw_groups =  perfs_mnist['pgw']


### Tab. 1 (continued)

In [9]:
results_MNIST = {'dataset':['mnist', 'colored mnist'], 
               '$\pi$': [0.1, 0.1],
               'p-W': [avg_mnist_emd_groups, avg_colored_mnist_emd_groups],
               'p-GW': [avg_mnist_gw_groups, avg_colored_mnist_gw_groups]
              }
results_MNIST = pd.DataFrame(data=results_MNIST)
results_MNIST

Unnamed: 0,dataset,$\pi$,p-W,p-GW
0,mnist,0.1,0.9875,0.9785
1,colored mnist,0.1,0.9145,0.968


## Caltech dataset - PU on different domains

In [10]:
n_unl = 100
n_pos = 100
nb_reps = 10
nb_dummies = 10
prior = 0.1

In [11]:
avg_caltech_emd_groups = []
avg_caltech_gw_groups = []
domain_u = ['surf_Caltech10', 'surf_amazon', 'surf_webcam', 'surf_dslr']
for d in domain_u:
    perfs_caltech_surf, perfs_list_caltech_surf = pgw.compute_perf_emd('surf_Caltech10', d, n_unl, n_pos, prior, nb_reps, nb_dummies)
    avg_caltech_emd_groups.append(perfs_caltech_surf['emd_groups'])
    perfs_caltech_surf, perfs_list_caltech_surf = pgw.compute_perf_pgw('surf_Caltech10', d, n_unl, n_pos, prior, nb_reps, nb_dummies)
    avg_caltech_gw_groups.append(perfs_caltech_surf['pgw'])
domain_u = ['decaf_caltech', 'decaf_amazon', 'decaf_webcam', 'decaf_dslr']
for d in domain_u:
    perfs_caltech_surf, perfs_list_caltech_surf = pgw.compute_perf_emd('decaf_caltech', d, n_unl, n_pos, prior, nb_reps, nb_dummies)
    avg_caltech_emd_groups.append(perfs_caltech_surf['emd_groups'])
    perfs_caltech_surf, perfs_list_caltech_surf = pgw.compute_perf_pgw('decaf_caltech', d, n_unl, n_pos, prior, nb_reps, nb_dummies)
    avg_caltech_gw_groups.append(perfs_caltech_surf['pgw'])

### Tab.1 (continued)

In [12]:
results_caltech_diff_domains = {'dataset':['surf C -> surf C', 'surf C -> surf A', 'surf C -> surf W', 'surf C -> surf D', 'decaf C -> decaf C', 'decaf C -> decaf A', 'decaf C -> decaf W', 'decaf C -> decaf D'], 
               '$\pi$': [0.1]*8,
               'p-W': avg_caltech_emd_groups,
               'p-GW': avg_caltech_gw_groups
              }
results_caltech_diff_domains = pd.DataFrame(data=results_caltech_diff_domains)
results_caltech_diff_domains

Unnamed: 0,dataset,$\pi$,p-W,p-GW
0,surf C -> surf C,0.1,0.9,0.864
1,surf C -> surf A,0.1,0.816,0.87
2,surf C -> surf W,0.1,0.822,0.862
3,surf C -> surf D,0.1,0.8,0.88
4,decaf C -> decaf C,0.1,0.94,0.862
5,decaf C -> decaf A,0.1,0.802,0.878
6,decaf C -> decaf W,0.1,0.802,0.886
7,decaf C -> decaf D,0.1,0.808,0.924


## Caltech dataset - PU on different feature spaces

In [13]:
avg_caltech_gw_groups_surf = []
avg_caltech_gw_groups_decaf = []
domain_u = ['surf_Caltech10', 'surf_amazon', 'surf_webcam', 'surf_dslr']
for d in domain_u:
    perfs_caltech_surf, perfs_list_caltech_surf = pgw.compute_perf_pgw('decaf_caltech', d, n_unl, n_pos, prior, nb_reps, nb_dummies)
    avg_caltech_gw_groups_decaf.append(perfs_caltech_surf['pgw'])
domain_u = ['decaf_caltech', 'decaf_amazon', 'decaf_webcam', 'decaf_dslr']
for d in domain_u:
    perfs_caltech_surf, perfs_list_caltech_surf = pgw.compute_perf_pgw('surf_Caltech10', d, n_unl, n_pos, prior, nb_reps, nb_dummies)
    avg_caltech_gw_groups_surf.append(perfs_caltech_surf['pgw'])

### Tab. 2

In [14]:
results_caltech_diff_features = {'domains':['*= C', '*= A', '*= W', '*= D'], 
               'surf C -> decaf *': avg_caltech_gw_groups_surf,
               'decaf C -> surf *': avg_caltech_gw_groups_decaf,
              }
results_caltech_diff_features = pd.DataFrame(data=results_caltech_diff_features)
results_caltech_diff_features

Unnamed: 0,domains,surf C -> decaf *,decaf C -> surf *
0,*= C,0.874,0.86
1,*= A,0.94,0.866
2,*= W,0.944,0.894
3,*= D,0.966,0.866
