In [1]:
import numpy as np
import pandas as pd
from source.wikidataquery import WikidataQuery

{'head': {'vars': ['item', 'itemLabel']}, 'results': {'bindings': [{'item': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q378619'}, 'itemLabel': {'xml:lang': 'en', 'type': 'literal', 'value': 'CC'}}, {'item': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q498787'}, 'itemLabel': {'xml:lang': 'en', 'type': 'literal', 'value': 'Muezza'}}, {'item': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q677525'}, 'itemLabel': {'xml:lang': 'en', 'type': 'literal', 'value': 'Orangey'}}, {'item': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q851190'}, 'itemLabel': {'xml:lang': 'en', 'type': 'literal', 'value': 'Mrs. Chippy'}}, {'item': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q893453'}, 'itemLabel': {'xml:lang': 'en', 'type': 'literal', 'value': 'Unsinkable Sam'}}, {'item': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q1050083'}, 'itemLabel': {'xml:lang': 'en', 'type': 'literal', 'value': 'Catmando'}}, {'item': {'type': 'uri', '

## Let's consider the number of total academic conferences of Wikidata (P31/instance of='academic conference')

In [2]:
temp = WikidataQuery.queryWikiData(
    '''SELECT
    ?conferencesLabel
    ?seriesLabel
    ?short_name
    ?beginnings
    ?WikiCFP_identifier
    ?DBLP_identifier

    WHERE {
    ?conferences wdt:P31 wd:Q2020153.

    OPTIONAL {
    ?conferences wdt:P179 ?series.
    OPTIONAL { ?series wdt:P1813 ?short_name. }
    OPTIONAL { ?series wdt:P571 ?beginnings. }
    OPTIONAL { ?series wdt:P5127 ?WikiCFP_identifier. }
    OPTIONAL { ?series wdt:P8926 ?DBLP_identifier. }
    }

    SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en".
    }
}''')

In [3]:
# untangling the results in order to get a dataframe

results = temp['results']['bindings']

conf_labels = np.array([result['conferencesLabel']['value'] for result in results])
series_labels = np.array([result['seriesLabel']['value'] if 'seriesLabel' in result.keys() else pd.NA for result in results])
short_name = np.array([result['short_name']['value'] if 'short_name' in result.keys() else pd.NA for result in results])
beginnings = np.array([result['beginnings']['value'] if 'beginnings' in result.keys() else pd.NA for result in results])
wikicfp_identifier = np.array([result['WikiCFP_identifier']['value'] if 'WikiCFP_identifier' in result.keys() else pd.NA for result in results])
dblp_identifier = np.array([result['DBLP_identifier']['value'] if 'DBLP_identifier' in result.keys() else pd.NA for result in results])

In [4]:
# zip all arrays together to create a DataFrame

together = zip(conf_labels, series_labels, short_name, beginnings, wikicfp_identifier, dblp_identifier)

df = pd.DataFrame(together, columns=['conf_labels', 'series_labels', 'short_name', 'beginnings', 'WikiCFP_identifier', 'DBLP_identifier'])
df.sample(200, random_state=42)

Unnamed: 0,conf_labels,series_labels,short_name,beginnings,WikiCFP_identifier,DBLP_identifier
4575,"Advances in Artificial Life, 6th European Conf...",European Conference on Artificial Life,ECAL,,794,conf/ecal
1488,Data Security and Security Data - 27th British...,British International Conference on Databases,BICOD,1981-01-01T00:00:00Z,317,conf/bncod
4970,14th International Conference on Semantic Syst...,International Conference on Semantic Systems,SEMANTICS,,1705,conf/i-semantics
7333,Colloquium Mosbach 1978,Colloquium Mosbach,,1950-01-01T00:00:00Z,,
3027,7th International Conference on Hardware/Softw...,International Conference on Hardware/Software ...,CODES+ISSS,,,conf/codesisss
...,...,...,...,...,...,...
6916,The 2006 Conference on Empirical Methods in Na...,Empirical Methods in Natural Language Processing,EMNLP,1996-01-01T00:00:00Z,883,conf/emnlp
5993,Equation-Based Object-Oriented Modeling Langua...,International Workshop on Equation-Based Objec...,EOOLT,,894,conf/eoolt
5664,Seventh International Conference on Computatio...,International Conference on Computational Crea...,ICCC,2010-01-01T00:00:00Z,,conf/icccrea
718,12th IEEE/ACM Working Conference on Mining Sof...,IEEE Working Conference on Mining Software Rep...,MSR,,2148,conf/msr


In [5]:
print(df.shape)
df.isna().sum()

(8911, 6)


conf_labels              0
series_labels         1040
short_name            1558
beginnings            6364
WikiCFP_identifier    5239
DBLP_identifier       1541
dtype: int64

# Let's check an example and its possible queries for the different databases:

example: conference series IWCMC

In [6]:
df.loc[df['short_name'] == 'IWCMC']

Unnamed: 0,conf_labels,series_labels,short_name,beginnings,WikiCFP_identifier,DBLP_identifier
3607,7th International Wireless Communications and ...,International Conference on Wireless Communica...,IWCMC,,1824,conf/iwcmc
5427,2016 International Wireless Communications and...,International Conference on Wireless Communica...,IWCMC,,1824,conf/iwcmc
5428,International Wireless Communications and Mobi...,International Conference on Wireless Communica...,IWCMC,,1824,conf/iwcmc
5430,International Conference on Wireless Communica...,International Conference on Wireless Communica...,IWCMC,,1824,conf/iwcmc
5431,6th International Wireless Communications and ...,International Conference on Wireless Communica...,IWCMC,,1824,conf/iwcmc
5432,International Conference on Wireless Communica...,International Conference on Wireless Communica...,IWCMC,,1824,conf/iwcmc
7546,International Conference on Wireless Communica...,International Conference on Wireless Communica...,IWCMC,,1824,conf/iwcmc


## In Wikidata:
events for the years: 2007, 2009, 2010, 2011, 2014, 2016 and 2022
--> I also could not find other entries that are just not connected to the series...
_________________________________________________________________________________________________________

## In WikiCFP:
events for the years: <b>2008</b>, 2009, 2010, <b>2012</b>, <b>2013</b>, 2016 and <b>2017</b>
http://www.wikicfp.com/cfp/program?id=1824&s=IWCMC&f=International%20Conference%20on%20Wireless%20Communications%20and%20Mobile%20Computing

## In DBLP:
events for the years: <b>2006</b>, 2007, 2009, 2010, 2011, <b>2012</b>, <b>2013</b>, 2014, <b>2015</b>, 2016, <b>2017</b>, <b>2018</b>, <b>2019</b>, <b>2020</b>, <b>2021</b>, 2022, <b>2023</b>
https://dblp.org/db/conf/iwcmc/index.html
every series is well organized

## In Crossref:
https://search.crossref.org/search/works?q=iwcmc&from_ui=yes
There is a lot of unorganized information about this conference series. There is no such thing as the overall conference series with an enumeration of all events. Structuring could be a lot of work.

## In CORE:
https://core.ac.uk/search?q=iwcmc
There only seems to be information about papers. Maybe we can use the papers property in order to deduce information about conference events.

## In BASE:
https://www.base-search.net/Search/Results?type=all&lookfor=IWCMC&ling=1&oaboost=1&keepFilters=1&filter%5B%5D=f_dctypenorm%3A%2213%22&name=&thes=&refid=dcresde&newsearch=1
Information seems to be a bit more organized than in Crossref. All information seems to be imported from Crossref though.
The reference to the events are directed to IEEE. Here we also get a list of all conference events:

## In IEEE:
https://ieeexplore.ieee.org/xpl/conhome/1002410/all-proceedings
events for the years: <b>2008</b>, 2011, <b>2012</b>, <b>2013</b>, 2014, <b>2015</b>, 2016, <b>2017</b>, <b>2018</b>, <b>2019</b>, <b>2020</b>, <b>2021</b>, 2022 and <b>2023</b>

# Also there are duplicates: Should we merge those?

In [7]:
groupedby_dblp_df = df.groupby('DBLP_identifier').agg(set)
groupedby_dblp_df

Unnamed: 0_level_0,conf_labels,series_labels,short_name,beginnings,WikiCFP_identifier
DBLP_identifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
conf/3dica,"{Scaling Topic Maps, Third International Confe...","{Three-Dimensional Image Processing, Measureme...",{3DIPM},{1998-01-01T00:00:00Z},{<NA>}
conf/3dim,"{Fourth International Conference on 3D Vision,...",{International Conference on 3D Vision},{3DV},{<NA>},{<NA>}
conf/3dui,"{3DUI 2007, 3DUI 2015, 3DUI 2010, 3DUI 2016, 3...",{IEEE Symposium on 3D User Interfaces},{3DUI},{<NA>},{<NA>}
conf/5gwf,"{2nd IEEE 5G World Forum, 5GWF 2019, Dresden, ...",{5G World Forum},{5GWF},{2018-01-01T00:00:00Z},{<NA>}
conf/5gwn,{5G for Future Wireless Networks - 2nd EAI Int...,{International Conference on 5G for Future Wir...,{5GWN},{2017-01-01T00:00:00Z},{<NA>}
...,...,...,...,...,...
conf/www,"{23rd International World Wide Web Conference,...",{The Web Conference},{WWW},{1994-01-01T00:00:00Z},{3182}
conf/xps,{Digital Bildverarbeitung - Digital Image Proc...,{German Conference on Knowledge-Based Systems},{XPS},{<NA>},{<NA>}
conf/xpu,{Agile Processes in Software Engineering and E...,{International Conference on Agile Software De...,{XP},{<NA>},{3186}
conf/xsede,{Practice and Experience in Advanced Research ...,{Practice and Experience in Advanced Research ...,{PEARC},{<NA>},{<NA>}


In [8]:
groupedby_dblp_df = groupedby_dblp_df.assign(length = groupedby_dblp_df['series_labels'].apply(len))

In [9]:
groupedby_dblp_df.loc[groupedby_dblp_df['length'] > 1]

Unnamed: 0_level_0,conf_labels,series_labels,short_name,beginnings,WikiCFP_identifier,length
DBLP_identifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
conf/ecml,"{Machine Learning: EMCL 2001, 12th European Co...","{European Conference on Machine Learning, Euro...","{ECML PKDD, ECML}",{<NA>},{<NA>},2


In [10]:
groupedby_shortname_df = df.groupby('short_name').agg(set)
groupedby_shortname_df

Unnamed: 0_level_0,conf_labels,series_labels,beginnings,WikiCFP_identifier,DBLP_identifier
short_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
*SEM,{The Eighth Joint Conference on Lexical and Co...,{Joint Conference on Lexical and Computational...,{<NA>},{<NA>},{conf/starsem}
3DIPM,"{Scaling Topic Maps, Third International Confe...","{Three-Dimensional Image Processing, Measureme...",{1998-01-01T00:00:00Z},{<NA>},{conf/3dica}
3DUI,"{3DUI 2007, 3DUI 2015, 3DUI 2010, 3DUI 2016, 3...",{IEEE Symposium on 3D User Interfaces},{<NA>},{<NA>},{conf/3dui}
3DV,"{Fourth International Conference on 3D Vision,...",{International Conference on 3D Vision},{<NA>},{<NA>},{conf/3dim}
3GSE,"{28th USENIX Security Symposium, USENIX Securi...",{USENIX Security Symposium},{<NA>},{<NA>},{conf/uss}
...,...,...,...,...,...
iNetSeC,{Geospatial Free and Open Source Software in t...,{Open Research Problems in Network Security},{<NA>},{<NA>},{conf/ifip11-4}
iTAG,{2014 International Conference on Interactive ...,{International Conference on Interactive Techn...,{<NA>},{<NA>},{conf/itag}
icSPORTS,{4th International Congress on Sport Sciences ...,{International Congress on Sport Sciences Rese...,{<NA>},{<NA>},{conf/icsports}
iiWAS,{15th International Conference on Information ...,{International Conference on Information Integ...,{<NA>},{<NA>},{conf/iiwas}


In [11]:
groupedby_shortname_df = groupedby_shortname_df.assign(length = groupedby_shortname_df['series_labels'].apply(len))

In [12]:
groupedby_shortname_df.loc[groupedby_shortname_df['length'] > 1]

Unnamed: 0_level_0,conf_labels,series_labels,beginnings,WikiCFP_identifier,DBLP_identifier,length
short_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ACE,{Sixth Australasian Computing Education Confer...,"{Australasian Computing Education Conference, ...","{2004-01-01T00:00:00Z, <NA>}","{24, 23}","{conf/ACMace, conf/ace}",2
ACL,{The 57th Annual Meeting of the Association fo...,{Proceedings of the conference. Association fo...,{<NA>},"{37, <NA>}","{conf/acl, <NA>}",2
ACSAC,{ACSAC '20: Annual Computer Security Applicati...,{Annual Computer Security Applications Confere...,{<NA>},"{45, 46}","{conf/acsac, conf/aPcsac}",2
AH,{AH '20: 11th Augmented Human International Co...,"{Augmented Human International Conference, Int...",{<NA>},"{90, 89}","{conf/aughuman, conf/ah}",2
AIM,{2ème Journée AIM de recherche Serious Games e...,{Conference of the Association Information and...,{<NA>},{<NA>},"{conf/aim, conf/aimech}",2
AIPR,{2nd International Conference on Artificial In...,{International Conference on Artificial Intell...,{<NA>},"{117, <NA>}","{conf/aiprf, conf/aipr2}",2
BDA,{Big Data Analytics - 4th International Confer...,"{Journées Bases de Données Avancées, Internati...","{1985-01-01T00:00:00Z, <NA>}",{<NA>},"{conf/bda, conf/bigda}",2
CCS,{17th ACM Conference on Computer and Communica...,{Conference on Computer and Communications Sec...,"{1993-01-01T00:00:00Z, <NA>}","{399, <NA>}","{conf/ccs, conf/iastedCCS}",2
CIC,"{1st Color and Imaging Conference, CIC 1993, S...",{International Conference on Collaboration and...,"{2015-01-01T00:00:00Z, <NA>}","{449, <NA>}","{conf/imaging, conf/coinco}",2
CISIS,{International Joint Conference - CISIS'15 and...,"{International Conference on Complex, Intellig...",{<NA>},"{473, 474}","{conf/cisis-spain, conf/cisis}",2
