In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
from scipy.stats import mode
from tqdm.notebook import tqdm
from p_tqdm import p_map

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [16]:
csv_folder = Path("/ndata/chaban/pharmapack/CSVs/lopq/")

In [9]:
df_mser = pd.read_csv(csv_folder / "LOPQ:MSER:resnet50:512:20.csv", index_col=None)
df_mi1 = pd.read_csv(csv_folder / "LOPQ:MI1:resnet50:512:20.csv", index_col=None)

In [10]:
def prepare_df(df, margin):
    colunmns_mapping = {'0': 'uuid_actual', '1': 'uuid_predicted', '2': 'distance'}
    df.rename(columns=colunmns_mapping, inplace=True)
    df_ret = pd.DataFrame()
    df_ret['distance'] = df['distance'].astype(np.float)
    df_ret['uuid_actual'] = df['uuid_actual'].str[margin:]
    df_ret['uuid_predicted'] = df['uuid_predicted'].str[margin:]
    
    df_ret['sample_actual'] = df_ret['uuid_actual'].str[4:31]
    df_ret['package_actual'] = df_ret['uuid_actual'].str[9:22]
    df_ret['descriptor_actual'] = df_ret['uuid_actual'].str[38:].astype(np.int)

    df_ret['package_predicted'] = df_ret['uuid_predicted'].str[9:22]
    return df_ret
    

In [11]:
df_mser_prep = prepare_df(df_mser, 18)
df_mi1_prep = prepare_df(df_mi1, 17)

In [12]:
df_mser_prep['algorithm'] = 'MSER'
df_mi1_prep['algorithm'] = 'MI1'

In [13]:
df_combined = pd.concat([df_mser_prep, df_mi1_prep])

In [14]:
df_combined['match'] = df_combined['package_actual'] == df_combined['package_predicted']

In [23]:
df_combined.to_csv(csv_folder / "LOPQ:BOTH:resnet50:512:20.csv", index=False)

In [24]:
df_combined[df_combined['sample_actual'].isin(df_combined.sample_actual.unique()[:20])].to_csv(csv_folder / "LOPQ:BOTH:resnet50:512:20:part.csv", index=False)

In [25]:
df_combined.shape

(44635520, 9)

In [26]:
df_combined.algorithm.unique()

array(['MSER', 'MI1'], dtype=object)