In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
from scipy.stats import mode
from tqdm.notebook import tqdm
from itertools import product
from multiprocessing import Pool

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
df1 = pd.read_csv('../../finematching_result_phone1.csv', index_col=None)
df2 = pd.read_csv('../../finematching_result_phone2.csv', index_col=None)
df3 = pd.read_csv('../../finematching_result_phone3.csv', index_col=None)

In [3]:
df = pd.concat([df1, df2, df3])

In [4]:
#df = pd.read_csv('../../finematching_result_phone1.csv', index_col=None)
df = df.drop('Unnamed: 0', axis=1)
df = df.rename(columns={
    '0':'sample_actual',
    '1':'segmentation_algorithm',
    '2':'package_actual',
    '3':'phone',
    '4':'package_candidate',
    '5':'descriptor',
    '6':'kp_amount_actual',
    '7':'kp_amount_candidate',
    '8':'matches_amount',
    '9':'good_matches_amount',
    '10':'ransac_matches_amount',
})
df.head(10)
excluded_packages = ['0235_D01_S001', '0200_D01_S001', '0049_D01_S001', '0842_D01_S001', '0337_D01_S001', '0263_D01_S001', '0735_D01_S001', '0541_D01_S001', '0311_D01_S001']
df = df[~df.package_actual.isin(excluded_packages)]

In [5]:
df.sample(10)

Unnamed: 0,sample_actual,segmentation_algorithm,package_actual,phone,package_candidate,descriptor,kp_amount_actual,kp_amount_candidate,matches_amount,good_matches_amount,ransac_matches_amount
614136,Ph1_P0516_D01_S001_C1_az120,MSER,0516_D01_S001,3,0516_D01_S001,SIFT,1285.0,1046.0,1285.0,382.0,307.0
1313008,Ph2_P0237_D01_S001_C3_az120,MI1,0237_D01_S001,2,0744_D01_S001,SIFT,685.0,1649.0,685.0,5.0,
1591523,Ph2_P0484_D01_S001_C2_az140,MI1,0484_D01_S001,1,0888_D01_S001,AKAZE,1939.0,3347.0,1939.0,7.0,
2857595,Ph3_P0711_D01_S001_C4_az280,MSER,0711_D01_S001,3,0005_D01_S003,AKAZE,1242.0,1778.0,1242.0,0.0,
542454,Ph1_P0459_D01_S001_C2_az140,MSER,0459_D01_S001,2,0459_D01_S001,ORB,500.0,500.0,500.0,16.0,13.0
2050841,Ph3_P0011_D01_S001_C4_az060,MI1,0011_D01_S001,1,0390_D01_S001,AKAZE,539.0,800.0,539.0,4.0,
1713972,Ph2_P0590_D01_S001_C1_az220,MI1,0590_D01_S001,1,0863_D01_S001,AKAZE,1491.0,2952.0,1491.0,4.0,
1607557,Ph2_P0491_D02_S003_C3_az040,MI1,0491_D02_S003,1,0491_D01_S006,AKAZE,730.0,1948.0,730.0,32.0,31.0
2406303,Ph3_P0299_D01_S009_C2_az160,MSER,0299_D01_S009,2,0299_D01_S001,ORB,500.0,500.0,500.0,16.0,14.0
2874799,Ph3_P0728_D01_S001_C2_az260,MSER,0728_D01_S001,3,0728_D01_S001,SIFT,1374.0,1353.0,1374.0,449.0,395.0


In [6]:
df = df[~df.ransac_matches_amount.isna()]

In [7]:
df.matches_kp_ratio.isna().value_counts()

AttributeError: 'DataFrame' object has no attribute 'matches_kp_ratio'

In [8]:
df['package_match'] = (df.package_actual == df.package_candidate)
df['ransac_kp_ratio'] = df.ransac_matches_amount / df.kp_amount_actual
df['ransac_matches_ratio'] = df.ransac_matches_amount / df.good_matches_amount
df['matches_kp_ratio'] = df.good_matches_amount / df.kp_amount_actual
df['kp_ratio'] = df.kp_amount_actual / df.kp_amount_candidate

In [None]:
plt.figure(figsize=(8, 8))
s = plt.hist(df[df.package_match].ransac_matches_ratio, alpha=0.5, bins=100, label='match')
s = plt.hist(df[~df.package_match].ransac_matches_ratio, alpha=0.5, bins=100, label='~match')
plt.title('DEGENSAC inliers ratio')
plt.legend(loc='upper left')

In [None]:
plt.figure(figsize=(8, 8))
s = plt.hist(df[df.package_match].matches_kp_ratio, alpha=0.5, bins=200, label='match')
s = plt.hist(df[~df.package_match].matches_kp_ratio, alpha=0.5, bins=200, label='~match')
#plt.yscale('log')
#plt.xscale('log')
plt.title('Matches to verification keypoints ratio')
plt.legend(loc='upper left')

In [None]:
plt.figure(figsize=(8, 8))
s = plt.hist(df[df.package_match].ransac_kp_ratio, alpha=0.5, bins=200, label='match')
s = plt.hist(df[~df.package_match].ransac_kp_ratio, alpha=0.5, bins=200, label='~match')
#plt.yscale('log')
#plt.xscale('log')
plt.title('Inliers to verification keypoints ratio')
plt.legend(loc='upper left')

In [None]:
(df[~df.package_match].matches_kp_ratio < df[df.package_match].matches_kp_ratio.min()).value_counts()

In [None]:
list(product(df.segmentation_algorithm.unique(), df.descriptor.unique(), df.phone.unique()))

In [None]:
accuracies = {}
for algorithm, descriptor, phone in product(df.segmentation_algorithm.unique(), df.descriptor.unique(), df.phone.unique()):
    df_sub = df[(df.segmentation_algorithm == algorithm) & (df.descriptor == descriptor) & (df.phone == phone)]
    df_m = df_sub.loc[df_sub.groupby("sample_actual")["ransac_kp_ratio"].idxmax()]
    ac = df_m['package_match'].value_counts().to_dict()[True] / df_sub.sample_actual.nunique()
    print(algorithm, descriptor, phone, ac)
    accuracies[(algorithm, descriptor, phone)] = ac

In [None]:
df_sub.shape

In [None]:
df_s = df[(df.descriptor == 'SIFT') & (df.segmentation_algorithm == 'MI1')]
df_m = df_s.loc[df_s.groupby(["sample_actual", "segmentation_algorithm", 'descriptor'])["ransac_kp_ratio"].idxmax()]
df_p = df_m.pivot_table(index='sample_actual', columns='package_match', aggfunc='size', fill_value=0).reset_index()
df_p

In [None]:
len(df_p[df_piv[True] > df_p[False]].index) / df_p.sample_actual.nunique()

In [None]:
1 - df[df.package_match].sample_actual.nunique() / df.sample_actual.nunique()

In [None]:
df_piv

In [None]:
.shape

In [None]:
total_samples_amount = df.sample_actual.nunique()

In [None]:
df_sub = df[(df.segmentation_algorithm == 'MI1') & (df.descriptor == 'AKAZE')]

In [None]:
1 - (df[(df.descriptor == 'SIFT') & (~df.ransac_matches_amount.isna())].sample_actual.nunique() / total_samples_amount)

In [None]:
df[['sample_actual', 'package_actual', 'segmentation_algorithm', 'package_candidate', 'descriptor']].\
pivot_table(index=['sample_actual','package_actual'], columns=['segmentation_algorithm', 'descriptor'], values='package_candidate', aggfunc=lambda x: [s for s in x])

In [None]:
df.package_candidate.nunique()

In [49]:
df_sub = df[(df.phone == 1) & (df.descriptor == 'SIFT')][['descriptor', 'sample_actual', 'package_actual', 'segmentation_algorithm', 'package_candidate', 'ransac_kp_ratio', 'ransac_matches_ratio', 'matches_kp_ratio']]
df_sub.sample(10)

Unnamed: 0,descriptor,sample_actual,package_actual,segmentation_algorithm,package_candidate,ransac_kp_ratio,ransac_matches_ratio,matches_kp_ratio
911220,SIFT,Ph1_P0795_D01_S001_C1_az340,0795_D01_S001,MI1,0099_D01_S001,0.017871,0.389831,0.045843
2577082,SIFT,Ph3_P0456_D01_S001_C2_az180,0456_D01_S001,MI1,0003_D02_S001,0.022727,0.75,0.030303
1828541,SIFT,Ph2_P0698_D01_S001_C2_az340,0698_D01_S001,MSER,0238_D01_S003,0.020228,0.217877,0.092842
310101,SIFT,Ph1_P0248_D01_S001_C4_az200,0248_D01_S001,MI1,0265_D01_S001,0.003711,0.287129,0.012924
1960963,SIFT,Ph2_P0825_D01_S001_C2_az280,0825_D01_S001,MI1,0728_D01_S001,0.029412,0.346154,0.084967
681924,SIFT,Ph1_P0578_D01_S001_C2_az080,0578_D01_S001,MSER,0578_D01_S001,0.39916,0.866788,0.460504
2764015,SIFT,Ph3_P0620_D01_S001_C1_az320,0620_D01_S001,MI1,0531_D01_S001,0.009319,0.180556,0.051613
1857757,SIFT,Ph2_P0727_D01_S001_C2_az220,0727_D01_S001,MSER,0588_D01_S001,0.011609,0.358974,0.032338
1973484,SIFT,Ph2_P0837_D01_S001_C1_az040,0837_D01_S001,MI1,0083_D01_S001,0.008615,0.351351,0.02452
1979583,SIFT,Ph2_P0843_D01_S001_C2_az040,0843_D01_S001,MSER,0223_D02_S001,0.018182,0.180995,0.100455


In [50]:
dfs = [x for _, x in df_sub.groupby('sample_actual')]

In [51]:
def calc_one(df):
    return df.pivot_table(
    index=['sample_actual', 'package_actual', 'package_candidate'],
    columns=['segmentation_algorithm'], # , 'descriptor'
).rank(
    method='max', na_option='bottom', ascending=False
).sum(axis=1).idxmin()

In [52]:
pool = Pool(processes=42)
res = pool.map(calc_one, dfs)
pool.close()

In [53]:
success = 0
fail = []
for r in res:
    if r[1][:-5] == r[2][:-5]:
        success += 1
    else:
        fail.append(r)
success / len(res)

0.9787508176574362

In [47]:
fail

[('Ph1_P0007_D01_S001_C2_az100', '0007_D01_S001', '0389_D01_S001'),
 ('Ph1_P0009_D01_S001_C3_az080', '0009_D01_S001', '0733_D01_S001'),
 ('Ph1_P0009_D01_S001_C3_az300', '0009_D01_S001', '0733_D01_S001'),
 ('Ph1_P0009_D01_S001_C3_az320', '0009_D01_S001', '0441_D01_S001'),
 ('Ph1_P0011_D01_S001_C4_az060', '0011_D01_S001', '0361_D01_S001'),
 ('Ph1_P0011_D01_S001_C4_az340', '0011_D01_S001', '0012_D01_S001'),
 ('Ph1_P0019_D01_S002_C2_az220', '0019_D01_S002', '0080_D01_S001'),
 ('Ph1_P0019_D01_S002_C2_az280', '0019_D01_S002', '0080_D01_S001'),
 ('Ph1_P0031_D01_S001_C2_az180', '0031_D01_S001', '0274_D01_S001'),
 ('Ph1_P0033_D01_S001_C2_az060', '0033_D01_S001', '0085_D01_S001'),
 ('Ph1_P0033_D01_S001_C2_az260', '0033_D01_S001', '0085_D01_S001'),
 ('Ph1_P0048_D02_S001_C2_az020', '0048_D02_S001', '0495_D01_S001'),
 ('Ph1_P0056_D01_S001_C1_az080', '0056_D01_S001', '0085_D01_S001'),
 ('Ph1_P0056_D01_S001_C1_az180', '0056_D01_S001', '0501_D01_S001'),
 ('Ph1_P0056_D01_S001_C1_az300', '0056_D01_S001'

In [35]:
dfs[0].pivot_table(
    index=['sample_actual', 'package_actual', 'package_candidate'],
    columns=['phone', 'segmentation_algorithm', 'descriptor'],
).rank(
    method='max', na_option='bottom', ascending=False
).sum(axis=1).idxmin()

('Ph1_P0001_D01_S001_C4_az020', '0001_D01_S001', '0001_D01_S001')