## A. Filter out the A/A* conferences

In [4]:
from collections import defaultdict
import pickle

In [3]:
import pandas as pd

In [38]:
df = pd.read_csv('data/CORE.csv', names=['Id', 'Title', 'Acronym', 'Source', 
                                    'Rank', 'hasData', 
                                    'FieldOfResearch', 'Comments', 
                                    'Average Rating'])

In [43]:
field_of_research_codes = [4602, 4603, 4605, 4608, 4609, 4610, 4611]

In [44]:
df.head()

Unnamed: 0,Id,Title,Acronym,Source,Rank,hasData,FieldOfResearch,Comments,Average Rating
0,1629,National Conference of the American Associatio...,AAAI,CORE2020,A*,No,4602,4603.0,4611.0
1,922,International Joint Conference on Autonomous A...,AAMAS,CORE2020,A*,Yes,4602,,
2,196,Association of Computational Linguistics,ACL,CORE2020,A*,No,4602,,
3,37,ACM Multimedia,ACMMM,CORE2020,A*,No,4603,,
4,147,Architectural Support for Programming Language...,ASPLOS,CORE2020,A*,No,4612,4606.0,


In [45]:
core_conf_dfs = df.loc[(df['Rank'].isin(["A*", "A"])) & (df['FieldOfResearch'].isin(field_of_research_codes))]

In [46]:
len(core_conf_dfs)

84

In [47]:
core_conf_dfs['Acronym']

0       AAAI
1      AAMAS
2        ACL
3      ACMMM
7        CHI
       ...  
207     SSPR
211      TEI
216     FAST
221     ESWC
226    ICDAR
Name: Acronym, Length: 84, dtype: object

In [50]:
list(core_conf_dfs['Title'])

['National Conference of the American Association for Artificial Intelligence',
 'International Joint Conference on Autonomous Agents and Multiagent Systems (previously the International Conference on Multiagent Systems, ICMAS, changed in 2000)',
 'Association of Computational Linguistics',
 'ACM Multimedia',
 'International Conference on Human Factors in Computing Systems',
 'Conference on Learning Theory',
 'IEEE Conference on Computer Vision and Pattern Recognition',
 'Data Compression Conference',
 'ACM Conference on Economics and Computation',
 'Foundations of Genetic Algorithms',
 'International Conference on Automated Planning and Scheduling',
 'IEEE International Conference on Computer Vision',
 'International Conference on Data Engineering',
 'IEEE International Conference on Data Mining',
 'International Conference on Machine Learning',
 'IEEE Information Visualization Conference',
 'International Joint Conference on Artificial Intelligence',
 'International Joint Conference 

In [53]:
core_conf_dfs[['Title', 'Acronym']]

Unnamed: 0,Title,Acronym
0,National Conference of the American Associatio...,AAAI
1,International Joint Conference on Autonomous A...,AAMAS
2,Association of Computational Linguistics,ACL
3,ACM Multimedia,ACMMM
7,International Conference on Human Factors in C...,CHI
...,...,...
207,Structural and Syntactical Pattern Recognition,SSPR
211,"Tangible, Embedded, and Embodied Interaction",TEI
216,Conference on File and Storage Technologies,FAST
221,Extended Semantic Web Conference (was European...,ESWC


In [58]:
pwd

'/home/singh_shruti/workspace/meaningful_comparison/comparison_network'

In [60]:
core_conf_dfs[['Title', 'Acronym']].to_csv('utils/core_astar_confs.csv', index=False)

In [48]:
df[df['FieldOfResearch'] == 4602]

Unnamed: 0,Id,Title,Acronym,Source,Rank,hasData,FieldOfResearch,Comments,Average Rating
0,1629,National Conference of the American Associatio...,AAAI,CORE2020,A*,No,4602,4603.0,4611.0
1,922,International Joint Conference on Autonomous A...,AAMAS,CORE2020,A*,Yes,4602,,
2,196,Association of Computational Linguistics,ACL,CORE2020,A*,No,4602,,
12,14,ACM Conference on Economics and Computation,EC,CORE2020,A*,Yes,4602,4613.0,
16,546,Foundations of Genetic Algorithms,FOGA,CORE2020,A*,No,4602,,
...,...,...,...,...,...,...,...,...,...
833,1618,Modelling and Optimization: Theory and Applica...,MOPTA,CORE2020,National: USA,No,4602,,
848,2183,International Work-conference on the Interplay...,IWINAC,CORE2020,National:Spain,Yes,4602,,
852,1682,Portuguese Conference on Artificial Intelligence,EPIA,CORE2020,Regional,Yes,4602,,
866,2195,European Conference on Evolutionary Computatio...,EvoCOP,CORE2020,Unranked,Yes,4602,,


## Filter out the A/A* journals

In [63]:
df_journals = pd.read_csv('data/CORE_journals.csv')

In [64]:
df_journals.head()

Unnamed: 0,id,title,source,rank,has changed?,for1,for2,for3,ISSN1,ISSN2,ISSN3,ISSN4
0,356,ACM Computing Surveys,CORE2020,A*,No,803,,,0360-0300,1557-7341,,
1,518,ACM Transactions on Computer - Human Interaction,CORE2020,A*,No,806,,,1073-0516,1557-7325,,
2,357,ACM Transactions on Computer Systems,CORE2020,A*,No,803,806.0,,0734-2071,1557-7333,,
3,416,ACM Transactions on Database Systems,CORE2020,A*,No,804,806.0,,0362-5915,1557-4644,,
4,155,ACM Transactions on Graphics,CORE2020,A*,No,801,806.0,,0730-0301,1557-7368,,


In [73]:
journals_for = [801, 806, 807]
# ["080101", "080102", "080103", "080104", "080105", 
#                 "080106", "080107", "080108", "080109", "080110", 
#                 "080111", "080112", "080199", "080602", "080604",
#                 "080704", "080705", "080706", "080707"]

In [66]:
for i in range(1, 10):
    print("08010"+str(i))

080101
080102
080103
080104
080105
080106
080107
080108
080109


In [74]:
core_journals_df = df_journals.loc[(df_journals['rank'].isin(["A*", "A"])) & (df_journals['for1'].isin(journals_for))]

In [75]:
core_journals_df

Unnamed: 0,id,title,source,rank,has changed?,for1,for2,for3,ISSN1,ISSN2,ISSN3,ISSN4
1,518,ACM Transactions on Computer - Human Interaction,CORE2020,A*,No,806,,,1073-0516,1557-7325,,
4,155,ACM Transactions on Graphics,CORE2020,A*,No,801,806.0,,0730-0301,1557-7368,,
9,659,Annual Review of Information Science and Techn...,CORE2020,A*,No,807,,,0066-4200,1550-8382,,
10,156,Artificial Intelligence,CORE2020,A*,No,801,1702.0,,0004-3702,1872-7921,,
11,160,Cognitive Science,CORE2020,A*,No,801,1701.0,1702.0,0364-0213,1551-6709,,
12,161,Computational Linguistics,CORE2020,A*,No,801,2004.0,1702.0,0891-2017,1530-9312,,
16,163,IEEE Transactions on Evolutionary Computation,CORE2020,A*,No,801,1702.0,906.0,1089-778X,1941-0026,,
17,164,IEEE Transactions on Fuzzy Systems,CORE2020,A*,No,801,906.0,,1063-6706,1941-0034,,
18,165,IEEE Transactions on Image Processing,CORE2020,A*,No,801,1702.0,906.0,1057-7149,1941-0042,,
19,166,IEEE Transactions on Information Theory,CORE2020,A*,No,801,1702.0,906.0,0018-9448,1557-9654,,


In [77]:
core_journals_df[['title']].to_csv('utils/core_astar_journals.csv', index=False)

## B. Create patterns from the conf and journal names

### B.1. Conferences

In [2]:
!head utils/core_astar_confs.csv

Title,Acronym
National Conference of the American Association for Artificial Intelligence,AAAI
"International Joint Conference on Autonomous Agents and Multiagent Systems (previously the International Conference on Multiagent Systems, ICMAS, changed in 2000)",AAMAS
Association of Computational Linguistics,ACL
ACM Multimedia,ACMMM
International Conference on Human Factors in Computing Systems,CHI
Conference on Learning Theory,COLT
IEEE Conference on Computer Vision and Pattern Recognition,CVPR
Data Compression Conference,DCC
ACM Conference on Economics and Computation,EC


In [28]:
acr_conf = defaultdict(list)
title_conf = defaultdict(list)


with open('utils/core_astar_confs.csv', 'r') as f:
    lines = f.readlines()
    
    for _, line in enumerate(lines[1:]):
        splittext = line.rsplit(",", 1)
        title_conf[_].append(splittext[0].strip())
        acr_conf[_].append(splittext[1].strip())
    

In [29]:
len(acr_conf), acr_conf

(84,
 defaultdict(list,
             {0: ['AAAI'],
              1: ['AAMAS'],
              2: ['ACL'],
              3: ['ACMMM'],
              4: ['CHI'],
              5: ['COLT'],
              6: ['CVPR'],
              7: ['DCC'],
              8: ['EC'],
              9: ['FOGA'],
              10: ['ICAPS'],
              11: ['ICCV'],
              12: ['ICDE'],
              13: ['ICDM'],
              14: ['ICML'],
              15: ['IEEE InfoVis'],
              16: ['IJCAI'],
              17: ['IJCAR'],
              18: ['ISWC'],
              19: ['KDD'],
              20: ['KR'],
              21: ['NeurIPS'],
              22: ['PERCOM'],
              23: ['PODS'],
              24: ['SIGIR'],
              25: ['SIGMOD'],
              26: ['UAI'],
              27: ['VLDB'],
              28: ['WSDM'],
              29: ['CSCW'],
              30: ['CIKM'],
              31: ['SIGCSE'],
              32: ['UIST'],
              33: ['CogSci'],
              34: 

In [12]:
for c in [(1, "ICMAS"), (21, "NIPS"), (42,"PKDD"), (42,"ECML"), (43, "EuroSpeech"), (43,"ICSLP"), 
          (59,"ICARCV"), (80,"TEI"), (82,"ESWC"), (73,"ISS")]:
    acr_conf[c[0]].append(c[1])
    acr_conf[c[0]] = list(set(acr_conf[c[0]]))
    
# acr_conf = list(set(acr_conf))

In [13]:
len(acr_conf)

89

In [30]:
len(title_conf), title_conf

(84,
 defaultdict(list,
             {0: ['National Conference of the American Association for Artificial Intelligence'],
              1: ['"International Joint Conference on Autonomous Agents and Multiagent Systems (previously the International Conference on Multiagent Systems, ICMAS, changed in 2000)"'],
              2: ['Association of Computational Linguistics'],
              3: ['ACM Multimedia'],
              4: ['International Conference on Human Factors in Computing Systems'],
              5: ['Conference on Learning Theory'],
              6: ['IEEE Conference on Computer Vision and Pattern Recognition'],
              7: ['Data Compression Conference'],
              8: ['ACM Conference on Economics and Computation'],
              9: ['Foundations of Genetic Algorithms'],
              10: ['International Conference on Automated Planning and Scheduling'],
              11: ['IEEE International Conference on Computer Vision'],
              12: ['International Conference

In [34]:
to_remove = ['"International Joint Conference on Autonomous Agents and Multiagent Systems (previously the International Conference on Multiagent Systems, ICMAS, changed in 2000)"',
            'Advances in Neural Information Processing Systems (was NIPS)',
            'European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Database (PKDD and ECML combined from 2008)',
            'Interspeech (combined EuroSpeech and ICSLP in 2000)',
            '"International Conference on Control, Automation, Robotics and Vision"',
            'ACM International Conference on Interactive Surfaces and Spaces (was International Workshop on Horizontal Interactive Human-Computer Systems: Tabletop)',
            '"Tangible, Embedded, and Embodied Interaction"',
            'Extended Semantic Web Conference (was European Semantic Web Conference)']

to_add = [(1 ,['International Joint Conference on Autonomous Agents and Multiagent Systems', 'International Conference on Multiagent Systems']), 
          (21, ['Neural Information Processing Systems']),
          (42, ['European Conference on Machine Learning', 'Principles and Practice of Knowledge Discovery in Database']),
          (43, ['Interspeech']), 
          (59, ['International Conference on Control, Automation, Robotics and Vision']),
          (73, ['International Conference on Interactive Surfaces and Spaces', 'International Workshop on Horizontal Interactive Human-Computer Systems']),
          (80, ['Tangible, Embedded, and Embodied Interaction']),
          (82, ['Extended Semantic Web Conference', 'European Semantic Web Conference'])]
          

In [35]:
for c in to_add:
    title_conf[c[0]] = c[1]

In [39]:
len(title_conf), len(acr_conf)

(84, 84)

### B.2 Fuzzy search for names

In [15]:
!pip3 install --user fuzzysearch
!pip3 install --user fuzzywuzzy

Collecting fuzzysearch
  Downloading https://files.pythonhosted.org/packages/f7/28/3e9e4e55fd35356f331a22976694e151eb0214b68d3cd471936f9c09deba/fuzzysearch-0.7.3.tar.gz (112kB)
[K    100% |████████████████████████████████| 122kB 1.5MB/s ta 0:00:01
Building wheels for collected packages: fuzzysearch
  Running setup.py bdist_wheel for fuzzysearch ... [?25ldone
[?25h  Stored in directory: /home/singh_shruti/.cache/pip/wheels/6b/28/23/87d8b2c68b65300ac30e13b019f0589dc38b88ed983010155f
Successfully built fuzzysearch
Installing collected packages: fuzzysearch
Successfully installed fuzzysearch-0.7.3
Collecting fuzzywuzzy
[33m  Cache entry deserialization failed, entry ignored[0m
  Downloading https://files.pythonhosted.org/packages/43/ff/74f23998ad2f93b945c0309f825be92e04e0348e062026998b5eefef4c33/fuzzywuzzy-0.18.0-py2.py3-none-any.whl
Installing collected packages: fuzzywuzzy
Successfully installed fuzzywuzzy-0.18.0


In [21]:
from fuzzywuzzy import fuzz



In [23]:
fuzz.partial_ratio("Proceedings of the Annual Meeting of the Cognitive Science Society".lower(), 
                                  "Annual Conference of the Cognitive Science Society".lower())

85

In [25]:
fuzz.partial_ratio("ECAI Workshop on Knowledge Representation and Reasoning".lower(), "International Conference on the Principles of Knowledge Representation and Reasoning".lower())

82

In [33]:
fuzz.partial_ratio("Conference on Speech and Language Processing".lower(), "ACM Transactions on Speech and Language Processing".lower())

88

### B.3. Journals

In [46]:
!wc -l utils/core_astar_journals.csv

51 utils/core_astar_journals.csv


In [38]:
!head -20 utils/core_astar_journals.csv

title
ACM Transactions on Computer - Human Interaction
ACM Transactions on Graphics
Annual Review of Information Science and Technology
Artificial Intelligence
Cognitive Science
Computational Linguistics
IEEE Transactions on Evolutionary Computation
IEEE Transactions on Fuzzy Systems
IEEE Transactions on Image Processing
IEEE Transactions on Information Theory
IEEE Transactions on Neural Networks and Learning Systems (was IEEE Transactions on Neural Networks)
IEEE Transactions on Pattern Analysis and Machine Intelligence
IEEE Transactions on Robotics
Information Systems
International Journal of Computer Vision
International Journal of Robotics Research
Journal of Information Technology
Journal of Machine Learning Research
Journal of the Association for Information Science and Technology (was Journal of the American Society for Information Science and Technology)


In [47]:
# Journal titles are exact matches 

title_journ = defaultdict(list)

with open('utils/core_astar_journals.csv', 'r') as f:
    lines = f.readlines()
    
    for _, line in enumerate(lines[1:]):
        line = line.strip()
        splittext = line.split(" (was ", )
#         print(splittext)
        title_journ[_+84].append(splittext[0].strip())
        if len(splittext) > 1:
            title_journ[_+84].append(splittext[1][:-1].strip())

In [48]:
title_journ

defaultdict(list,
            {84: ['ACM Transactions on Computer - Human Interaction'],
             85: ['ACM Transactions on Graphics'],
             86: ['Annual Review of Information Science and Technology'],
             87: ['Artificial Intelligence'],
             88: ['Cognitive Science'],
             89: ['Computational Linguistics'],
             90: ['IEEE Transactions on Evolutionary Computation'],
             91: ['IEEE Transactions on Fuzzy Systems'],
             92: ['IEEE Transactions on Image Processing'],
             93: ['IEEE Transactions on Information Theory'],
             94: ['IEEE Transactions on Neural Networks and Learning Systems',
              'IEEE Transactions on Neural Networks'],
             95: ['IEEE Transactions on Pattern Analysis and Machine Intelligence'],
             96: ['IEEE Transactions on Robotics'],
             97: ['Information Systems'],
             98: ['International Journal of Computer Vision'],
             99: ['Internatio

### C. Save the conf/jounral dicts

In [50]:
import pickle

In [52]:
with open('utils/conf_acr_dict.pkl', 'wb') as f:
    pickle.dump(acr_conf, f)
    
with open('utils/conf_title_dict.pkl', 'wb') as f:
    pickle.dump(title_conf, f)
    
with open('utils/jour_title_dict.pkl', 'wb') as f:
    pickle.dump(title_journ, f)

## D. Create unified dict

In [5]:
with open('utils/conf_acr_dict.pkl', 'rb') as f:
    conf_acr = pickle.load(f)
    
with open('utils/conf_title_dict.pkl', 'rb') as f:
    conf_title = pickle.load(f)
    
with open('utils/jour_title_dict.pkl', 'rb') as f:
    jour_title = pickle.load(f)

In [7]:
c_j_dict = {'max_counter': 0, 'confs': conf_title, 'jours': jour_title, 'acronyms': conf_acr}
c_j_dict['max_counter'] = len(c_j_dict['confs']) + len(c_j_dict['jours'])

In [8]:
with open('data/venue_ids.pkl', 'wb') as f:
    pickle.dump(c_j_dict, f)

In [9]:
c_j_dict['max_counter']

134

In [10]:
def add_new_venue(c_j_dict, venue_name, is_conf=True, acr=None):
    new_idx = c_j_dict['max_counter']
    try:
        if is_conf:
            c_j_dict['confs'].update({new_idx: venue_name})
        else:
            c_j_dict['jours'].update({new_idx: venue_name})
        
        if acr:
            c_j_dict['acronyms'].update({new_idx: acr})
        
        c_j_dict['max_counter'] = new_idx + 1
    except Exception as ex:
        return
    return c_j_dict

In [11]:
c_j_dict = add_new_venue(c_j_dict, "International Conference on Learning Representations", is_conf=True, acr='ICLR')

In [12]:
c_j_dict['max_counter']

135

In [13]:
with open('data/venue_ids.pkl', 'wb') as f:
    pickle.dump(c_j_dict, f)