In [113]:
import pandas as pd
from sentence_transformers import SentenceTransformer
from tqdm import tqdm

# Embedding the title and query into vector space

In [2]:
model = SentenceTransformer('all-mpnet-base-v2')

In [3]:
data_df = pd.read_csv('data/final_data.csv')
data_df.drop(columns=['Unnamed: 0'], inplace=True)
data_df['abstract'] = data_df['abstract'].astype(str)
data_df['title'] = data_df['title'].astype(str)

In [4]:
data_df.head()

Unnamed: 0,area,rank_gs,title,rate_gs,rank_pc,rate_pc,rank_ss,rate_ss,agg_rate,agg_rank,rank,pdf_link,abstract,file_name,citationCount,referenceCount,influentialCitationCount
0,3D Reconstruction,9.0,occupancy networks: learning 3d reconstruction...,5.0,18.0,5.0,1.0,5.0,5.0,10.2,1,http://openaccess.thecvf.com/content_CVPR_2019...,"With the advent of deep neural networks, learn...",3ocnele3dreinfusp,623.0,86.0,136.0
1,3D Reconstruction,68.0,weakly supervised 3d reconstruction with adver...,4.0,4.0,5.0,29.0,5.0,4.7,30.7,2,https://arxiv.org/pdf/1705.10904,Supervised 3D reconstruction has witnessed a s...,3wesu3drewiadco,85.0,60.0,3.0
2,3D Reconstruction,95.0,infinitam v3: a framework for large-scale 3d r...,4.0,17.0,5.0,32.0,5.0,4.7,44.9,3,https://arxiv.org/pdf/1708.00783,Volumetric models have become a popular repres...,3inv3afrfola3drewilocl,59.0,13.0,6.0
3,3D Reconstruction,30.0,pix2vox: context-aware 3d reconstruction from ...,5.0,45.0,4.0,5.0,5.0,4.6,28.5,4,http://openaccess.thecvf.com/content_ICCV_2019...,Recovering the 3D representation of an object ...,3pico3drefrsianmuim,70.0,49.0,8.0
4,3D Reconstruction,33.0,image2mesh: a learning framework for single im...,5.0,94.0,4.0,15.0,5.0,4.6,52.0,5,https://arxiv.org/pdf/1711.10669,One challenge that remains open in 3D deep lea...,3imalefrfosiim3dre,70.0,53.0,3.0


In [5]:
text_info = data_df[['title', 'abstract']].copy()
text_info['text'] = text_info['title'] + '\n' + text_info['abstract']

In [6]:
text_info.head()

Unnamed: 0,title,abstract,text
0,occupancy networks: learning 3d reconstruction...,"With the advent of deep neural networks, learn...",occupancy networks: learning 3d reconstruction...
1,weakly supervised 3d reconstruction with adver...,Supervised 3D reconstruction has witnessed a s...,weakly supervised 3d reconstruction with adver...
2,infinitam v3: a framework for large-scale 3d r...,Volumetric models have become a popular repres...,infinitam v3: a framework for large-scale 3d r...
3,pix2vox: context-aware 3d reconstruction from ...,Recovering the 3D representation of an object ...,pix2vox: context-aware 3d reconstruction from ...
4,image2mesh: a learning framework for single im...,One challenge that remains open in 3D deep lea...,image2mesh: a learning framework for single im...


In [7]:
title_embeddings = []
for title in tqdm(text_info['title']):
    title_embeddings.append(model.encode(title))

100%|██████████| 8255/8255 [02:56<00:00, 46.72it/s]


In [8]:
abstract_embeddings = []
for abstract in tqdm(text_info['abstract']):
    abstract_embeddings.append(model.encode('abstract'))

100%|██████████| 8255/8255 [02:41<00:00, 51.16it/s]


In [9]:
total_embeddings = []
for total in tqdm(text_info['text']):
    total_embeddings.append(model.encode(total))

100%|██████████| 8255/8255 [06:42<00:00, 20.50it/s]


In [10]:
data_df['title_embeddings'] = title_embeddings
data_df['abstract_embeddings'] = abstract_embeddings
data_df['total_embeddings'] = total_embeddings

In [11]:
data_df.to_pickle('data/data.pkl')

# Calculate centroids

In [114]:
import numpy as np
import pickle

In [115]:
data_df = pd.read_pickle('data/data.pkl')

In [74]:
temp_df = data_df[['area','title_embeddings', 'abstract_embeddings', 'total_embeddings']].copy()

In [75]:
centroid_df = temp_df.groupby('area').mean()

In [78]:
centroid_dict = {area: centroid_df.loc[area, ['title_embeddings', 'abstract_embeddings', 'total_embeddings']].tolist() for area in centroid_df.index}

In [70]:
centroid_df.to_pickle('data/centroids.pkl')

In [83]:
with open ('data/centroids_dict.pkl', 'wb') as f:
    pickle.dump(centroid_dict, f)

# Create PT indexing

In [71]:
import pyterrier as pt
import pandas as pd
import numpy as np

In [27]:
if not pt.started():
    pt.init()

In [28]:
index_dir = './data/index'
indexer = pt.DFIndexer(index_dir, overwrite=True)

In [29]:
data_df = pd.read_pickle('data/data.pkl')

In [30]:
data_df.head()

Unnamed: 0,area,rank_gs,title,rate_gs,rank_pc,rate_pc,rank_ss,rate_ss,agg_rate,agg_rank,rank,pdf_link,abstract,file_name,citationCount,referenceCount,influentialCitationCount,title_embeddings,abstract_embeddings,total_embeddings
0,3D Reconstruction,9.0,occupancy networks: learning 3d reconstruction...,5.0,18.0,5.0,1.0,5.0,5.0,10.2,1,http://openaccess.thecvf.com/content_CVPR_2019...,"With the advent of deep neural networks, learn...",3ocnele3dreinfusp,623.0,86.0,136.0,"[-0.05088244, 0.053124595, 0.0023723403, 0.022...","[0.012619468, 0.06828206, -0.0018017464, 0.002...","[-0.060328517, 0.06280401, 0.026933515, 0.0345..."
1,3D Reconstruction,68.0,weakly supervised 3d reconstruction with adver...,4.0,4.0,5.0,29.0,5.0,4.7,30.7,2,https://arxiv.org/pdf/1705.10904,Supervised 3D reconstruction has witnessed a s...,3wesu3drewiadco,85.0,60.0,3.0,"[0.041443087, 0.09971384, 0.034612227, 0.05238...","[0.012619468, 0.06828206, -0.0018017464, 0.002...","[-0.014805883, 0.093130484, 0.03753042, 0.0462..."
2,3D Reconstruction,95.0,infinitam v3: a framework for large-scale 3d r...,4.0,17.0,5.0,32.0,5.0,4.7,44.9,3,https://arxiv.org/pdf/1708.00783,Volumetric models have become a popular repres...,3inv3afrfola3drewilocl,59.0,13.0,6.0,"[-0.027944634, 0.058888804, 0.016118925, 0.007...","[0.012619468, 0.06828206, -0.0018017464, 0.002...","[0.007331516, -0.0009151098, 0.054103583, 0.03..."
3,3D Reconstruction,30.0,pix2vox: context-aware 3d reconstruction from ...,5.0,45.0,4.0,5.0,5.0,4.6,28.5,4,http://openaccess.thecvf.com/content_ICCV_2019...,Recovering the 3D representation of an object ...,3pico3drefrsianmuim,70.0,49.0,8.0,"[-0.03636022, 0.038363226, 0.06699201, 0.05293...","[0.012619468, 0.06828206, -0.0018017464, 0.002...","[-0.021163054, 0.089947246, 0.057269774, 0.069..."
4,3D Reconstruction,33.0,image2mesh: a learning framework for single im...,5.0,94.0,4.0,15.0,5.0,4.6,52.0,5,https://arxiv.org/pdf/1711.10669,One challenge that remains open in 3D deep lea...,3imalefrfosiim3dre,70.0,53.0,3.0,"[-0.046021443, 0.087248586, 0.038778853, 0.037...","[0.012619468, 0.06828206, -0.0018017464, 0.002...","[-0.08701608, 0.06889409, 0.028026856, 0.01509..."


In [116]:
papers = data_df[['title', 'abstract', 'title_embeddings', 'abstract_embeddings', 'area', 'total_embeddings']].copy()
papers = papers.groupby(['title']).agg({'title': 'first', 'abstract': 'first', 'title_embeddings': 'first', 'abstract_embeddings': 'first', 'area': ','.join}).reset_index(drop=True)
# papers = papers.drop_duplicates(subset=['title']).reset_index(drop=True)
papers['docno'] = papers.index.to_series().apply(lambda x: str(x))
papers.rename(columns={'abstract': 'text'}, inplace=True)
papers['text'] = papers['text'].astype(str)

In [42]:
papers.head()

Unnamed: 0,title,text,title_embeddings,abstract_embeddings,area,docno
0,"""zero-shot"" super-resolution using deep intern...",Generative adversarial networks (GANs) are amo...,"[-0.07775616, 0.05288399, 0.03650218, 0.038884...","[0.012619468, 0.06828206, -0.0018017464, 0.002...","Image Compression,Super-Resolution",0
1,(blockchain technology and industry 4.0 at aut...,"Through a literature review, this paper aims t...","[-0.020874746, 0.028415756, -0.016895752, 0.05...","[0.012619468, 0.06828206, -0.0018017464, 0.002...",Autonomous vehicles,1
2,"(male, bachelor) and (female, ph.d) have diffe...",Stylistic variation in text needs to be studie...,"[0.05497964, 0.056969024, -0.04139412, 0.01792...","[0.012619468, 0.06828206, -0.0018017464, 0.002...",Style Transfer,2
3,124-color super-resolution imaging by engineer...,Optical super-resolution techniques reach unpr...,"[0.0032263368, 0.015332368, -0.0021535154, -0....","[0.012619468, 0.06828206, -0.0018017464, 0.002...",Super-Resolution,3
4,1st place solution for ava-kinetics crossover ...,This technical report introduces our winning s...,"[-0.06893435, -0.011644914, -0.035723064, 0.04...","[0.012619468, 0.06828206, -0.0018017464, 0.002...",Action Recognition,4


In [43]:
indexref = indexer.index(papers['text'], papers['docno'], papers['title'], papers['area'])

In [45]:
papers[['docno', 'title_embeddings', 'abstract_embeddings']].to_pickle('data/paper_embeddings.pkl')

# Other information

In [100]:
extra_info_df = data_df[['title', 'area', 'rank_gs', 'rate_gs', 'rank_pc', 'rate_pc', 'rank_ss',
       'rate_ss', 'agg_rate', 'agg_rank', 'rank', 'citationCount', 'referenceCount',
       'influentialCitationCount']].copy()

In [90]:
ref_dict = {docno: title for docno, title in zip(papers['docno'].to_list(), papers['title'].to_list())}
with open ('data/ref_dict.pkl', 'wb') as f:
    pickle.dump(ref_dict, f)

In [101]:
extra_info_df = extra_info_df.merge(papers[['docno', 'title']], on='title', how='left')
extra_info_df.drop(columns=['title'], inplace=True)

In [102]:
extra_info_df.head()

Unnamed: 0,title,area,rank_gs,rate_gs,rank_pc,rate_pc,rank_ss,rate_ss,agg_rate,agg_rank,rank,citationCount,referenceCount,influentialCitationCount,docno
0,occupancy networks: learning 3d reconstruction...,3D Reconstruction,9.0,5.0,18.0,5.0,1.0,5.0,5.0,10.2,1,623.0,86.0,136.0,5295
1,weakly supervised 3d reconstruction with adver...,3D Reconstruction,68.0,4.0,4.0,5.0,29.0,5.0,4.7,30.7,2,85.0,60.0,3.0,7753
2,infinitam v3: a framework for large-scale 3d r...,3D Reconstruction,95.0,4.0,17.0,5.0,32.0,5.0,4.7,44.9,3,59.0,13.0,6.0,3979
3,pix2vox: context-aware 3d reconstruction from ...,3D Reconstruction,30.0,5.0,45.0,4.0,5.0,5.0,4.6,28.5,4,70.0,49.0,8.0,5646
4,image2mesh: a learning framework for single im...,3D Reconstruction,33.0,5.0,94.0,4.0,15.0,5.0,4.6,52.0,5,70.0,53.0,3.0,3809


In [103]:
extra_info_df.to_pickle('data/extra_info.pkl')

# For similarity model

In [108]:
papers_info_df = data_df[['title', 'area', 'title_embeddings', 'abstract_embeddings', 'total_embeddings']].copy()

In [117]:
papers_info_df = papers_info_df.groupby(['title']).agg({'title': 'first', 'title_embeddings': 'first', 'abstract_embeddings': 'first',  'total_embeddings': 'first', 'area': lambda x: set(*x)}).reset_index(drop=True)

In [118]:
papers_info_df = papers_info_df.merge(papers[['docno', 'title']], on='title', how='left')

In [119]:
papers_info_df.head()

Unnamed: 0,title,title_embeddings,abstract_embeddings,total_embeddings,area,docno
0,"""zero-shot"" super-resolution using deep intern...","[-0.07775616, 0.05288399, 0.03650218, 0.038884...","[0.012619468, 0.06828206, -0.0018017464, 0.002...","[-0.022982784, 0.07698608, 0.0030802086, 0.034...","{Image Compression, Super-Resolution}",0
1,(blockchain technology and industry 4.0 at aut...,"[-0.020874746, 0.028415756, -0.016895752, 0.05...","[0.012619468, 0.06828206, -0.0018017464, 0.002...","[-0.021961384, 0.0038702078, -0.02499763, 0.01...",{Autonomous vehicles},1
2,"(male, bachelor) and (female, ph.d) have diffe...","[0.05497964, 0.056969024, -0.04139412, 0.01792...","[0.012619468, 0.06828206, -0.0018017464, 0.002...","[0.038503695, 0.093446776, -0.04756615, 0.0151...",{Style Transfer},2
3,124-color super-resolution imaging by engineer...,"[0.0032263368, 0.015332368, -0.0021535154, -0....","[0.012619468, 0.06828206, -0.0018017464, 0.002...","[-0.0019260754, -0.0055030133, 0.005773849, -0...",{Super-Resolution},3
4,1st place solution for ava-kinetics crossover ...,"[-0.06893435, -0.011644914, -0.035723064, 0.04...","[0.012619468, 0.06828206, -0.0018017464, 0.002...","[-0.05613881, -0.00845654, -0.020145206, 0.050...",{Action Recognition},4


In [120]:
papers_info_df.to_pickle('data/papers_info.pkl')

# test set

In [183]:
test_df = pd.read_csv('test_set.csv')

In [184]:
test_df

Unnamed: 0,query,Unnamed: 1,Unnamed: 2,title,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,abstract
0,I would like to reconstruct 3D models from ima...,,,infinitam v3: a framework for large-scale 3d r...,,,,,,,,,,Volumetric models have become a popular repres...
1,I would like to reconstruct 3D models from ima...,,,image2mesh: a learning framework for single im...,,,,,,,,,,One challenge that remains open in 3D deep lea...
2,I would like to reconstruct 3D models from ima...,,,video based reconstruction of 3d people models,,,,,,,,,,This paper describes a method to obtain accura...
3,I would like to reconstruct 3D models from ima...,,,differentiable volumetric rendering: learning ...,,,,,,,,,,Learning-based 3D reconstruction methods have ...
4,I would like to reconstruct 3D models from ima...,,,implicit functions in feature space for 3d sha...,,,,,,,,,,While many works focus on 3D reconstruction fr...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2995,I would like to imprve the resolution of an im...,,,towards a mathematical theory of superâ€resol...,,,,,,,,,,After motivating the need of a multiscale vers...
2996,I would like to imprve the resolution of an im...,,,super-resolution image reconstruction: a techn...,,,,,,,,,,"Face super-resolution (FSR), also known as fac..."
2997,I would like to imprve the resolution of an im...,,,space-time super-resolution,,,,,,,,,,The concept of the space-time as emerging in t...
2998,I would like to imprve the resolution of an im...,,,super-resolution in medical imaging,,,,,,,,,,Although high resolution isotropic 3D medical ...


In [185]:
test_df = test_df[['title', 'query']]

In [186]:
papers_info_df

Unnamed: 0,title,title_embeddings,abstract_embeddings,total_embeddings,area,docno
0,"""zero-shot"" super-resolution using deep intern...","[-0.07775616, 0.05288399, 0.03650218, 0.038884...","[0.012619468, 0.06828206, -0.0018017464, 0.002...","[-0.022982784, 0.07698608, 0.0030802086, 0.034...","{Image Compression, Super-Resolution}",0
1,(blockchain technology and industry 4.0 at aut...,"[-0.020874746, 0.028415756, -0.016895752, 0.05...","[0.012619468, 0.06828206, -0.0018017464, 0.002...","[-0.021961384, 0.0038702078, -0.02499763, 0.01...",{Autonomous vehicles},1
2,"(male, bachelor) and (female, ph.d) have diffe...","[0.05497964, 0.056969024, -0.04139412, 0.01792...","[0.012619468, 0.06828206, -0.0018017464, 0.002...","[0.038503695, 0.093446776, -0.04756615, 0.0151...",{Style Transfer},2
3,124-color super-resolution imaging by engineer...,"[0.0032263368, 0.015332368, -0.0021535154, -0....","[0.012619468, 0.06828206, -0.0018017464, 0.002...","[-0.0019260754, -0.0055030133, 0.005773849, -0...",{Super-Resolution},3
4,1st place solution for ava-kinetics crossover ...,"[-0.06893435, -0.011644914, -0.035723064, 0.04...","[0.012619468, 0.06828206, -0.0018017464, 0.002...","[-0.05613881, -0.00845654, -0.020145206, 0.050...",{Action Recognition},4
...,...,...,...,...,...,...
7860,zooming slowmo: an efficient one-stage framewo...,"[-0.030745365, 0.016652511, 0.02880512, 0.0043...","[0.012619468, 0.06828206, -0.0018017464, 0.002...","[-0.028682087, 0.05198012, 0.033375114, 0.0397...",{Super-Resolution},7860
7861,α7nachr/nmdar coupling affects nmdar function ...,"[-0.013007551, 0.03416746, 0.004286104, 0.0203...","[0.012619468, 0.06828206, -0.0018017464, 0.002...","[-0.017490184, 0.08755458, -0.020279747, 0.013...",{Object Recognition},7861
7862,“i've seen it all before”: explaining age-rela...,"[0.009059156, 0.027311925, 0.0058719674, 0.024...","[0.012619468, 0.06828206, -0.0018017464, 0.002...","[0.0071213795, -0.024961853, 0.0179813, 0.0040...",{Object Recognition},7862
7863,… graft augmentation for severe glenoid bone l...,"[0.03842942, 2.0709778e-05, -0.021613646, 0.02...","[0.012619468, 0.06828206, -0.0018017464, 0.002...","[0.013518965, -0.022561295, -0.035119504, 0.01...",{Image augmentation},7863


In [187]:
test_df = test_df.merge(papers[['title', 'docno']], on='title', how='left')

In [188]:
test = test_df.merge(data_df[['title', 'area']], on='title', how='left')

In [197]:
test = test.dropna()
test.drop_duplicates(subset=['query', 'title'], inplace=True)

In [199]:
test.groupby('query').head(20).to_csv('./data/test_set_20.csv', index=False)

In [192]:
queries = test[['query', 'area', 'docno']].groupby(['query', 'area']).count().sort_values(by='docno', ascending=False).head(30).reset_index()

In [193]:
queries = queries[['query', 'area']]

In [194]:
queries['qid'] = queries.index.to_series().apply(lambda x: str(x))

In [201]:
test = test.merge(queries[['qid', 'query']], on='query', how='left')

In [195]:
queries.to_csv('data/queries.csv', index=False)

In [202]:
test.to_csv('data/test_set.csv', index=False)

In [203]:
test

Unnamed: 0,title,query,docno,area,qid
0,infinitam v3: a framework for large-scale 3d r...,I would like to reconstruct 3D models from ima...,3979,3D Reconstruction,13
1,image2mesh: a learning framework for single im...,I would like to reconstruct 3D models from ima...,3809,3D Reconstruction,13
2,video based reconstruction of 3d people models,I would like to reconstruct 3D models from ima...,7620,3D Reconstruction,13
3,differentiable volumetric rendering: learning ...,I would like to reconstruct 3D models from ima...,2269,3D Reconstruction,13
4,implicit functions in feature space for 3d sha...,I would like to reconstruct 3D models from ima...,3823,3D Reconstruction,13
...,...,...,...,...,...
2968,example-based super-resolution,I would like to imprve the resolution of an im...,2708,Super-Resolution,23
2969,super-resolution image reconstruction: a techn...,I would like to imprve the resolution of an im...,6906,Super-Resolution,23
2970,space-time super-resolution,I would like to imprve the resolution of an im...,6685,Super-Resolution,23
2971,super-resolution in medical imaging,I would like to imprve the resolution of an im...,6909,Super-Resolution,23


In [260]:
data_df = data_df.merge(papers[['title', 'docno']], on='title', how='left')

In [210]:
data_df

Unnamed: 0,area,rank_gs,title,rate_gs,rank_pc,rate_pc,rank_ss,rate_ss,agg_rate,agg_rank,...,pdf_link,abstract,file_name,citationCount,referenceCount,influentialCitationCount,title_embeddings,abstract_embeddings,total_embeddings,docno
0,3D Reconstruction,9.0,occupancy networks: learning 3d reconstruction...,5.0,18.0,5.0,1.0,5.0,5.0,10.2,...,http://openaccess.thecvf.com/content_CVPR_2019...,"With the advent of deep neural networks, learn...",3ocnele3dreinfusp,623.0,86.0,136.0,"[-0.05088244, 0.053124595, 0.0023723403, 0.022...","[0.012619468, 0.06828206, -0.0018017464, 0.002...","[-0.060328517, 0.06280401, 0.026933515, 0.0345...",5295
1,3D Reconstruction,68.0,weakly supervised 3d reconstruction with adver...,4.0,4.0,5.0,29.0,5.0,4.7,30.7,...,https://arxiv.org/pdf/1705.10904,Supervised 3D reconstruction has witnessed a s...,3wesu3drewiadco,85.0,60.0,3.0,"[0.041443087, 0.09971384, 0.034612227, 0.05238...","[0.012619468, 0.06828206, -0.0018017464, 0.002...","[-0.014805883, 0.093130484, 0.03753042, 0.0462...",7753
2,3D Reconstruction,95.0,infinitam v3: a framework for large-scale 3d r...,4.0,17.0,5.0,32.0,5.0,4.7,44.9,...,https://arxiv.org/pdf/1708.00783,Volumetric models have become a popular repres...,3inv3afrfola3drewilocl,59.0,13.0,6.0,"[-0.027944634, 0.058888804, 0.016118925, 0.007...","[0.012619468, 0.06828206, -0.0018017464, 0.002...","[0.007331516, -0.0009151098, 0.054103583, 0.03...",3979
3,3D Reconstruction,30.0,pix2vox: context-aware 3d reconstruction from ...,5.0,45.0,4.0,5.0,5.0,4.6,28.5,...,http://openaccess.thecvf.com/content_ICCV_2019...,Recovering the 3D representation of an object ...,3pico3drefrsianmuim,70.0,49.0,8.0,"[-0.03636022, 0.038363226, 0.06699201, 0.05293...","[0.012619468, 0.06828206, -0.0018017464, 0.002...","[-0.021163054, 0.089947246, 0.057269774, 0.069...",5646
4,3D Reconstruction,33.0,image2mesh: a learning framework for single im...,5.0,94.0,4.0,15.0,5.0,4.6,52.0,...,https://arxiv.org/pdf/1711.10669,One challenge that remains open in 3D deep lea...,3imalefrfosiim3dre,70.0,53.0,3.0,"[-0.046021443, 0.087248586, 0.038778853, 0.037...","[0.012619468, 0.06828206, -0.0018017464, 0.002...","[-0.08701608, 0.06889409, 0.028026856, 0.01509...",3809
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8250,Super-Resolution,401.0,hyperspectral image super-resolution using dee...,1.0,201.0,1.0,193.0,3.0,1.6,258.6,...,http://arxiv.org/pdf/1905.09211v1,Abstract Limited by the existed imagery hardwa...,Shyimsuusdeconene,69.0,52.0,3.0,"[-0.01713901, 0.027642196, 0.012370186, 0.0398...","[0.012619468, 0.06828206, -0.0018017464, 0.002...","[0.012876433, 0.04741985, 0.0039633377, -0.001...",3608
8251,Super-Resolution,401.0,quantitative super-resolution imaging with qpa...,1.0,201.0,1.0,195.0,3.0,1.6,259.2,...,http://arxiv.org/abs/2011.15052v1,Counting molecules in complexes is challenging...,Squsuimwiqpustrbian,173.0,40.0,0.0,"[-0.04307452, -0.010083245, -0.025073582, 0.00...","[0.012619468, 0.06828206, -0.0018017464, 0.002...","[-0.02646856, 0.003393217, -0.017404953, -0.00...",5833
8252,Super-Resolution,401.0,robust single image super-resolution via deep ...,1.0,201.0,1.0,197.0,3.0,1.6,259.8,...,http://arxiv.org/pdf/2006.08857v1,Single image super-resolution (SR) is an ill-p...,Srosiimsuvidenewisppr,112.0,51.0,9.0,"[-0.027699452, 0.06728266, 0.03679345, 0.02347...","[0.012619468, 0.06828206, -0.0018017464, 0.002...","[-0.0063791964, 0.095465, 0.034922574, 0.00885...",6238
8253,Super-Resolution,401.0,support recovery for sparse super-resolution o...,1.0,201.0,1.0,199.0,3.0,1.6,260.4,...,http://arxiv.org/pdf/1003.0888v1,We study sparse spikes super-resolution over t...,Ssurefospsuofpome,62.0,24.0,2.0,"[-0.094009355, 0.013627753, 0.017686302, -0.00...","[0.012619468, 0.06828206, -0.0018017464, 0.002...","[-0.034272734, 0.008217079, 0.0050220564, 0.05...",6936


In [261]:
rank_df = data_df[['title', 'area', 'agg_rate', 'agg_rank', 'docno']].copy()

In [262]:
queries = pd.read_csv('data/queries.csv')

In [263]:
queries

Unnamed: 0,query,area,qid
0,I would like to remove the noise from images.,Denoising,0
1,How to add captions to the images?,Image Captioning,1
2,I want to detect the act of people.,Action Recognition,2
3,What is the action of people in the image.,Action Recognition,3
4,Object Tracking with Neural Network,Object Tracking,4
5,I would like to track a moving item in a video,Object Tracking,5
6,How do self-driving vehicles work?,Autonomous vehicles,6
7,Change time and season of an image,Domain adaptation,7
8,Can the computer recognize characters in the i...,Optical Character Recognition,8
9,I would like to find all the faces in the image.,Face Recognition,9


In [264]:
rank_df

Unnamed: 0,title,area,agg_rate,agg_rank,docno
0,occupancy networks: learning 3d reconstruction...,3D Reconstruction,5.0,10.2,5295
1,weakly supervised 3d reconstruction with adver...,3D Reconstruction,4.7,30.7,7753
2,infinitam v3: a framework for large-scale 3d r...,3D Reconstruction,4.7,44.9,3979
3,pix2vox: context-aware 3d reconstruction from ...,3D Reconstruction,4.6,28.5,5646
4,image2mesh: a learning framework for single im...,3D Reconstruction,4.6,52.0,3809
...,...,...,...,...,...
8250,hyperspectral image super-resolution using dee...,Super-Resolution,1.6,258.6,3608
8251,quantitative super-resolution imaging with qpa...,Super-Resolution,1.6,259.2,5833
8252,robust single image super-resolution via deep ...,Super-Resolution,1.6,259.8,6238
8253,support recovery for sparse super-resolution o...,Super-Resolution,1.6,260.4,6936


In [265]:
rank_df = rank_df.merge(queries[['qid', 'area']], on='area', how='left')

In [268]:
temp_test = test.copy()

In [269]:
temp_test['rank'] = temp_test.index

In [270]:
temp_test[(temp_test['docno'] == '122') & (temp_test['qid'] == '27')]['rank'].values[0]

2491

In [275]:
temp_test['qid'] = temp_test['qid'].astype(int)

In [276]:
rank_df = rank_df.merge(temp_test[['qid', 'docno', 'rank']], on=['qid', 'docno'], how='left')

In [277]:
rank_df.fillna(3000, inplace=True)

In [278]:
rank_df.sort_values(by=['qid', 'rank', 'agg_rank', 'agg_rate'])

Unnamed: 0,title,area,agg_rate,agg_rank,docno,qid,rank
1620,unprocessing images for learned raw denoising,Denoising,5.0,13.4,7466,0,394.0
1621,denoising diffusion probabilistic models,Denoising,5.0,15.4,2125,0,395.0
1622,beyond a gaussian denoiser: residual learning ...,Denoising,5.0,20.6,1222,0,396.0
1623,toward convolutional blind denoising of real p...,Denoising,4.7,29.9,7207,0,397.0
1624,noise2self: blind denoising by self-supervision,Denoising,4.6,35.3,5119,0,398.0
...,...,...,...,...,...,...,...
10235,global context for convolutional pose machines,Semantic segmentation,1.8,258.2,3304,29,3000.0
10238,blenderproc,Semantic segmentation,1.8,259.0,1260,29,3000.0
10241,pyramid vision transformer: a versatile backbo...,Semantic segmentation,1.8,259.4,5822,29,3000.0
10529,semantic correlation promoted shape-variant co...,Semantic segmentation,1.6,259.8,6416,29,3000.0


In [279]:
final_test_df = rank_df[['qid', 'docno', 'title']].copy()

In [281]:
final_test_df.to_csv('data/final_test_set.csv', index=False)