# Creating Venue Table

In [0]:
import pyspark.sql.functions as F

### Reading in the data

In [0]:
# replace with reading parquet files in the future
df = spark.read.option("multiline", True).json('dbfs:/user/dblpv13/dblpv13.*.json.gz')

In [0]:
df.printSchema()

root
 |-- _id: string (nullable = true)
 |-- abstract: string (nullable = true)
 |-- authors: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- _id: string (nullable = true)
 |    |    |-- avatar: string (nullable = true)
 |    |    |-- bio: string (nullable = true)
 |    |    |-- email: string (nullable = true)
 |    |    |-- gid: string (nullable = true)
 |    |    |-- homepage: string (nullable = true)
 |    |    |-- name: string (nullable = true)
 |    |    |-- name_zh: string (nullable = true)
 |    |    |-- oid: string (nullable = true)
 |    |    |-- oid_zh: string (nullable = true)
 |    |    |-- orcid: string (nullable = true)
 |    |    |-- org: string (nullable = true)
 |    |    |-- org_zh: string (nullable = true)
 |    |    |-- orgid: string (nullable = true)
 |    |    |-- orgs: array (nullable = true)
 |    |    |    |-- element: string (containsNull = true)
 |    |    |-- orgs_zh: array (nullable = true)
 |    |    |    |-- element:

### Preprocessing
Replacing venues with empty fileds with null.

In [0]:
venue_is_empty = (
    F.col("venue.issn").isNull() &
    F.col("venue.name").isNull() &
    F.col("venue.name_d").isNull() &
    F.col("venue.name_s").isNull() &
    F.col("venue.online_issn").isNull() &
    F.col("venue.publisher").isNull() &
    F.col("venue.raw").isNull() &
    F.col("venue.raw_zh").isNull() &
    F.col("venue.t").isNull()
)
df_unified_none = df.withColumn("venue", F.when(venue_is_empty, None).otherwise(F.col("venue")))

### Creating new ids for venues.

We use issn when _id is null. If both are null we ignore it. We lose venues for <5% of rows.

In [0]:
df_updated_ids = df_unified_none.withColumn("venue", F.col("venue").withField("_id", F.coalesce(F.col("venue._id"), F.col("venue.issn"))))

### Creating the venues table

Creating the venues table with venues that have ids.

Dropping useless columns: src, type, sid. Column t gets renamed into type, since it has useful information: J = Journal, C = Workshop or Conference

In [0]:
raw_venues_df = df_updated_ids.select("venue.*").filter(F.col("_id").isNotNull()).withColumn("type", F.col("t")).drop("src", "t", "sid").distinct()
display(raw_venues_df.limit(10))

_id,issn,name,name_d,name_s,online_issn,publisher,raw,raw_zh,sid,type
1877-0509,1877-0509,,,,,Elsevier,Procedia Computer Science,,procedia-computer-science,J
5390885520f70186a0d8e8b8,,,,,,,Games,,,
1053-8119,1053-8119,,,,,Academic Press,NeuroImage,,neuroimage,J
0165-0114,0165-0114,,,,,North-Holland,Fuzzy Sets and Systems,,fuzzy-sets-and-systems,J
53a7258b20f7420be8b51bd6,,,,,,,ICCS,,,
2169-3536,2169-3536,,,,,,IEEE ACCESS,,IEEE ACCESS,J
0018-9545,0018-9545,,,,1939-9359,,IEEE Transactions on Vehicular Technology,,25,J
0097-8493,0097-8493,,,,,Pergamon,Computers & Graphics,,computers-and-graphics,J
53a72e1520f7420be8c7e9a4,,,,,,,SDM,,,
555037cd7cea80f95419a56b,,,,,,,PRICAI,,,


### Combining rows with the same id

Some rows with matching id have fields filled and some do not. Make the rows distinct by _id and fill the fields with the first non-null value.

In [0]:
venue_columns = (
    "issn",
    "name",
    "name_d",
    "name_s",
    "raw",
    "raw_zh",
    "online_issn",
    "publisher",
    "type"
)

distinct_raw_venues_df = raw_venues_df.groupBy(F.col("_id")).agg(*(F.first(F.col(col), ignorenulls=True).alias(col) for col in venue_columns))

### Creating a name column

Combining various columns that may contain name of the venue into a single column

In [0]:
venue_df = (
    distinct_raw_venues_df
    .withColumn("name", F.coalesce(
            F.col("name"), 
            F.col("name_d"), 
            F.col("raw"), 
            F.col("name_s"), 
            F.col("raw_zh")
        ))
    .drop("name_d", "raw", "name_s", "raw_zh")    
)

display(venue_df.orderBy(F.rand()).limit(10))

_id,issn,name,online_issn,publisher,type
57d08aa00a3ac5db49921a90,,GamifIR@ECIR,,ACM,
5390a44320f70186a0e70f71,,ISPASS '08 Proceedings of the ISPASS 2008 - IEEE International Symposium on Performance Analysis of Systems and software,,,
5736ae6bd39c4f40a7978b44,,OSS4MDE@MoDELS,,,
53a732cf20f7420be8db745d,,Parallel Computing,,,
53a7289920f7420be8bb2ab9,,MESOCA,,,
53907b7f20f770854f5ed389,,Predictive data mining: a practical guide,,,
53a7313620f7420be8d2d604,,Journal of Machine Learning Research,,,
53e18c1f20f7dfbc07e8ffb2,,Symposium on Applications and the Internet,,,
55f95973c35f4fb0d21ce642,,Asia and South Pacific Design Automation Conference,,,
0038-9056,0038-9056,STARKE,1521-379X,,J


In [0]:
venue_df.count()

Out[94]: 50560

### Saving the table

In [0]:
venue_df.write.format("delta").mode("overwrite").saveAsTable("venues") 

### Testing

In [0]:
main_table = spark.read.option("multiline", True).json('dbfs:/user/dblpv13/dblpv13.4.json.gz').withColumn("venue", F.coalesce(F.col("venue._id"), F.col("venue.issn")))
venue_table = spark.table("venues")

In [0]:
display(main_table.join(venue_table, main_table.venue ==  venue_table._id, "leftouter").limit(10))

_id,abstract,authors,doi,fos,isbn,issn,issue,keywords,lang,n_citation,page_end,page_start,pdf,references,title,url,venue,volume,year,_id.1,issn.1,name,online_issn,publisher,type
53e9a751b7602d970308796b,"In this paper I investigate the problem of tagging elements of a set, and the elements of those elements, uniquely, when they admit an order, and two boundary elements are tagged. A heuristic sorting algorithm is also investigated. (Updated grammar and spellings.)","List(List(53f431cddabfaec09f14c449, null, null, null, Sayandeep Khan, null, null, null, null, null, null, null, null, null, null))",,"List(Heuristic, Cave, Yet another, Numbering scheme, Treasure, Computer science, Algorithm, Pallet, Theoretical computer science, Sorting problem, Sorting algorithm)",,,,List(),en,0,,,https://static.aminer.cn/storage/pdf/arxiv/12/1206/1206.5335.pdf,,The McDougal Cave and Counting issues,"List(http://arxiv.org/abs/1206.5335, https://arxiv.org/abs/1206.5335)",555036d37cea80f95415b0ba,abs/1206.5335,2012,555036d37cea80f95415b0ba,,arXiv: Learning,,,
53e9a751b7602d9703087a5c,,"List(List(53f4e88adabfaeb1a7cd45fb, null, null, null, Wen-mei W. Hwu, null, null, null, null, null, null, null, null, null, null), List(5489313adabfaed7b5fa39db, null, null, null, Thomas M. Conte, null, null, null, null, null, null, null, null, null, null))",,"List(Computer science, Parallel computing, Multiprocessing, Instruction prefetch)",,,,List(),en,0,,,,,A Simulation Study of Simultaneous Vector Prefetch Performance in Multiprocessor Memory Subsystems (Extended Abstract),List(),53a72ca420f7420be8c4aef0,,1989,53a72ca420f7420be8c4aef0,,Measurement and Modeling of Computer Systems,,,C
53e9a751b7602d9703087aa9,"In order to efficiently encrypt multimedia streams deliv- ered in real-time environments, a Fast Encryption Al- gorithm for Multimedia (FEA-M) was proposed (8, 9). Cryptanalyses of this technique (1, 3, 4, 5) have iden- tified its weaknesses and an improved variant has been suggested in (5). In this paper, we identify further weak- nesses in the original FEA-M and also in the improved variant. Our solution provides message integrity, guaran- tees zero packet loss and protects against specific known plaintext attacks.","List(List(53f4783adabfaee4dc89484a, null, null, null, Depeng Li, null, null, null, null, null, null, null, null, null, null), List(53f4391bdabfaee4dc79cecf, null, null, null, Srinivas Sampalli, null, null, null, null, null, null, null, null, null, null))",,"List(Message integrity, Computer science, Known-plaintext attack, Computer network, Cryptanalysis, Packet loss, Encryption, Probabilistic encryption, Multimedia, Ciphertext-only attack, Plaintext)",,,2.0,"List(multimedia encryption, cryptanalysis, reliable transportation, real time, packet loss, known plaintext attack)",en,1,192.0,187.0,,"List(53e99a20b7602d9702277bc5, 53e9a501b7602d9702e209aa, 53e9bb29b7602d9704762958, 53e99885b7602d97020bd403, 558a40f1e4b0b32fcb35b90c, 53e99e8cb7602d9702754f1d, 53e99d37b7602d97025ef9ac)",Further Improvements of Fast Encryption Algoirthm for Multimedia,List(http://ijns.femto.com.tw/contents/ijns-v7-n2/ijns-2008-v7-n2-p187-192.pdf),555036e47cea80f954164b11,7,2008,555036e47cea80f954164b11,,International Journal of Network Security,,,
53e9a751b7602d9703087af4,,"List(List(54303191dabfaeca69bd4998, null, null, null, Ed Dawson, null, null, null, null, null, null, null, null, null, 617262), List(54409306dabfae7d84b84d7f, null, null, null, Duncan S. Wong, null, null, null, null, null, null, null, null, null, 640091))",,,isbn,,,List(),en,25,,,,,"Information Security Practice and Experience, Third International Conference, ISPEC 2007, Hong Kong, China, May 7-9, 2007, Proceedings",,555037437cea80f95417d301,4464,2007,555037437cea80f95417d301,,Information Security Practice and Experience,,,
53e9a751b7602d9703087997,"In this paper, we present a new framework for shape modelling and analysis: we suggest to look at the problem from a pattern recognition point of view, and claim that under this prospective several advantages are achieved. The modelling of a surface with a point distribution model is seen as an unsupervised clustering problem, and tackled by using growing cell structures. The adaptation of a model to new shapes is studied as a classification task, and provides a straightforward solution to the point correspondence problem in active shape modelling. The method is illustrated and tested in 3D synthetic datasets and applied to the modelling of brain ventricles in an elderly population.","List(List(53f43020dabfaeb22f42aa65, null, L.Ferrarini@lumc.nl, 5b86a6dce1cd8e14a3e00973, Luca Ferrarini, null, null, null, null, LKEB - Leiden University Medical Center, The Netherlands, null, 5f71b2ab1c455f439fe3d64a, null, null, null), List(53f43411dabfaedd74d89ad9, null, null, 5b86a6dce1cd8e14a3e00973, Hans Olofsen, null, null, null, null, LKEB - Leiden University Medical Center, The Netherlands, null, 5f71b2ab1c455f439fe3d64a, null, null, null), List(53f45a75dabfaee02ad682a2, null, null, 5b86a6dce1cd8e14a3e00973, Mark A. van Buchem, null, null, null, null, LKEB - Leiden University Medical Center, The Netherlands, null, 5f71b2ab1c455f439fe3d64a, null, null, null), List(53f7c3d6dabfae938c6d9513, null, null, 5b86a6dce1cd8e14a3e00973, Johan H. C. Reiber, null, null, null, null, LKEB - Leiden University Medical Center, The Netherlands, null, 5f71b2ab1c455f439fe3d64a, null, null, null), List(53f44ae3dabfaee43ec8fb51, null, null, 5b86a6dce1cd8e14a3e00973, Faiza Admiraal-Behloul, null, null, null, null, LKEB - Leiden University Medical Center, The Netherlands, null, 5f71b2ab1c455f439fe3d64a, null, null, null))",10.1007/11566489_56,"List(Point distribution model, Point correspondence, Elderly population, Pattern recognition, Computer science, Unsupervised clustering, Fully automatic, Artificial intelligence, Artificial neural network, Machine learning)",3-540-29326-4,0302-9743,,"List(automatic shape, shape modelling, unsupervised clustering problem, cell structure, new shape, point distribution model, new framework, point correspondence problem, active shape modelling, pattern recognition point, cell neural network, brain ventricle, neural network, pattern recognition, correspondence problem)",en,4,458.0,451.0,,"List(53e9a5beb7602d9702eeaed4, 53e9a7e4b7602d97031261a6, 53e9aad1b7602d9703450479, 53e99e99b7602d970275eb77, 53e9bc26b7602d970489105d, 53e99ab3b7602d970232ef9d, 53e9b089b7602d9703af104c, 558abfce84ae84d265bf69b3, 53e9b1a3b7602d9703c2f8eb, 53e9a5e1b7602d9702f09370, 55a528ff612c6b12ab056ecd)",Fully automatic shape modelling using growing cell neural networks,"List(http://dx.doi.org/10.1007/11566489_56, http://www.webofknowledge.com/)",53a72b6c20f7420be8c25381,3750,2005,53a72b6c20f7420be8c25381,,Medical Image Computing and Computer-Assisted Intervention,,,C
53e9a751b7602d970308799c,"Gmat 2.1 is a program able to compute the rovibrational G matrix in different molecule-fixed axes extending the capabilities of Gmat 1.0. The present version is able to select optimal molecule-fixed axes minimizing the pure rotational kinetic elements, the rovibrational kinetic elements or both simultaneously. To such an end, it uses a hybrid minimization approach. Thus, it combines a global search heuristic based in simulated annealing with a gradient-free local minimization. As the previous version, the program handles the structural results of potential energy hypersurface mappings computed in computer clusters or computational Grid environments. However, since now more general molecule-fixed axes can be defined, a procedure is implemented to ensure the same minimum of the cost function is used in all the molecular structures. In addition, an algorithm for the unambiguous definition of the molecule-fixed axes orientation is used.","List(List(53f4d05fdabfaeeee2f81b0d, null, null, 5b8695b7e1cd8e14a36e36f6, M.E. Castro, null, null, null, null, Grupo de Química Computacional y Computación de Alto Rendimiento, Escuela Superior de Informática, Universidad de Castilla-La Mancha, Paseo de la Universidad 4, 13071 Ciudad Real, Spain, null, 5f71b2901c455f439fe3cb6d, List(Grupo de Química Computacional y Computación de Alto Rendimiento, Escuela Superior de Informática, Universidad de Castilla-La Mancha, Paseo de la Universidad 4, 13071 Ciudad Real, Spain), null, null), List(53f431c2dabfaee4dc75104d, null, null, 5b8695b7e1cd8e14a36e36f6, A. Niño, null, null, null, null, Grupo de Química Computacional y Computación de Alto Rendimiento, Escuela Superior de Informática, Universidad de Castilla-La Mancha, Paseo de la Universidad 4, 13071 Ciudad Real, Spain, null, 5f71b2901c455f439fe3cb6d, List(Grupo de Química Computacional y Computación de Alto Rendimiento, Escuela Superior de Informática, Universidad de Castilla-La Mancha, Paseo de la Universidad 4, 13071 Ciudad Real, Spain), null, null), List(53f476e4dabfaee4dc890078, null, camelia.munoz@uclm.es, null, C. Muñoz-Caro, null, null, null, null, Corresponding author. Tel.: +34 926295300; fax: +34 926295354., null, null, List(Corresponding author. Tel.: +34 926295300; fax: +34 926295354., Grupo de Química Computacional y Computación de Alto Rendimiento, Escuela Superior de Informática, Universidad de Castilla-La Mancha, Paseo de la Universidad 4, 13071 Ciudad Real, Spain), null, null))",10.1016/j.cpc.2010.04.016,"List(Simulated annealing, Mathematical optimization, Heuristic, Rotation matrix, Matrix (mathematics), Rotational–vibrational spectroscopy, Algorithm, Minification, Mathematics, Grid, Computation)",,0010-4655,8.0,"List(Rovibrational G matrix, Rotation matrix, Optimal Euler angles, Unambiguous molecule-fixed axes)",en,6,1475.0,1471.0,,"List(53e9b5a2b7602d97040ea50d, 53e9a50fb7602d9702e32ee4)",Heuristic computation of the rovibrational G matrix in optimized molecule-fixed axes. Gmat 2.1,"List(http://dx.doi.org/10.1016/j.cpc.2010.04.016, https://www.sciencedirect.com/science/article/pii/S0010465510001396, http://www.webofknowledge.com/)",0010-4655,181,2010,0010-4655,0010-4655,Computer Physics Communications,,North-Holland,J
53e9a751b7602d9703087b1a,"Purpose The purpose of this paper is to develop an interval method for vehicle allocation and route planning in case of an evacuation. Design/methodology/approach - First, the evacuation route planning system is described and the notations are defined. An inexact programming model is proposed. The goal of the model is to achieve optimal planning of vehicles allocation with a minimized system time under the condition of inexact information. The constraints of the model include four types: number of vehicles constraint, passengers balance constraints, maximum capacity of links constraints and no negative constraints. The model is solved through the decomposition of the inexact model A hypothetical case is developed to illustrate the proposed model. Findings - The paper finds that the interval solutions are feasible and stable for evacuation model in the given decision space, and this may reduce the negative effects of uncertainty, thereby improving evacuation managers' estimates under different conditions. Originality/value - This method entails incorporation of uncertainties existing as interval values into model formulation and solution procedure, and application of the developed model and the related solution algorithm in a hypothetical case study.","List(List(53f44631dabfaeb22f4bec22, null, null, null, Chaozhong Wu, null, null, null, null, Wuhan Univ Technol, Intelligent Transportat Syst Ctr, Wuhan 430070, Hubei, Peoples R China, null, null, List(Wuhan Univ Technol, Intelligent Transportat Syst Ctr, Wuhan 430070, Hubei, Peoples R China), null, null), List(53f43614dabfaee4dc780707, null, null, null, Gordon H. Huang, null, null, null, null, Univ Regina, Fac Engn, Regina, SK S4S 0A2, Canada, null, null, List(Univ Regina, Fac Engn, Regina, SK S4S 0A2, Canada), null, null), List(548a2818dabfae9b40134f0d, null, null, null, Xinping Yan, null, null, null, 0000-0002-2265-2689, Wuhan Univ Technol, Intelligent Transportat Syst Ctr, Wuhan 430070, Hubei, Peoples R China, null, null, List(Wuhan Univ Technol, Intelligent Transportat Syst Ctr, Wuhan 430070, Hubei, Peoples R China), null, null), List(53f42ff6dabfaeb1a7babc36, null, null, null, Yanpeng Cai, null, null, null, null, Univ Regina, Fac Engn, Regina, SK S4S 0A2, Canada, null, null, List(Univ Regina, Fac Engn, Regina, SK S4S 0A2, Canada), null, null), List(54299ee0dabfaec70819be2d, null, null, null, Yongping Li, null, null, null, 0000-0002-3253-4088, Peking Univ, Coll Urban & Environm Sci, Beijing 100871, Peoples R China, null, null, List(Peking Univ, Coll Urban & Environm Sci, Beijing 100871, Peoples R China), null, null), List(53f389cfdabfae4b34a2960c, null, null, null, Nengchao Lv, null, null, null, null, Wuhan Univ Technol, Intelligent Transportat Syst Ctr, Wuhan 430070, Hubei, Peoples R China, null, null, List(Wuhan Univ Technol, Intelligent Transportat Syst Ctr, Wuhan 430070, Hubei, Peoples R China), null, null))",10.1108/03684920910994033,,,0368-492X,10.0,"List(Cybernetics, Traffic flow, Decision support systems, Uncertainty management)",en,4,1683.0,1676.0,,,An inexact optimization model for evacuation planning.,"List(http://dx.doi.org/10.1108/03684920910994033, http://www.webofknowledge.com/)",0368-492X,38,2009,0368-492X,0368-492X,KYBERNETES,1758-7883,,J
53e9a751b7602d9703087b38,"This paper introduces a new security problem in which individuals movement traces (in terms of accurate routes) can be inferred from just a series of mutual contact records and the map of the area in which they roam around. Such contact records may be obtained through the bluetooth communication on mobile phones. We present an approach that solve the trace inference problem in reasonable time, and analyze some properties of the inference algorithm.","List(List(542a8806dabfae61d498c33a, null, wpc009@gmail.com, 5b86b6c0e1cd8e14a34c9c74, Pengcheng Wang, null, null, null, null, Shanghai Jiao Tong University, Shanghai, China, null, 5f71b54b1c455f439fe502b0, List(Shanghai Jiao Tong Univ, Shanghai 200030, Peoples R China), null, null), List(53f435a0dabfaee02acc39e5, null, null, 5b86b6c0e1cd8e14a34c9c74, Zhaoyu Gao, null, null, null, null, Shanghai Jiao Tong University, Shanghai, China, null, 5f71b54b1c455f439fe502b0, List(Shanghai Jiao Tong Univ, Shanghai 200030, Peoples R China), null, null), List(53f4347fdabfaedf4356dacb, null, null, 5b86b6c0e1cd8e14a34c9c74, Xinhui Xu, null, null, null, null, Shanghai Jiao Tong University, Shanghai, China, null, 5f71b54b1c455f439fe502b0, List(Shanghai Jiao Tong Univ, Shanghai 200030, Peoples R China), null, null), List(53f436fedabfaeb22f476e32, null, null, 5b86b6c0e1cd8e14a34c9c74, Yujiao Zhou, null, null, null, null, Shanghai Jiao Tong University, Shanghai, China, null, 5f71b54b1c455f439fe502b0, List(Shanghai Jiao Tong Univ, Shanghai 200030, Peoples R China), null, null), List(53f47612dabfaeee22a8e448, null, null, 5b86b6c0e1cd8e14a34c9c74, Haojin Zhu, null, null, null, null, Shanghai Jiao Tong University, Shanghai, China, null, 5f71b54b1c455f439fe502b0, List(Shanghai Jiao Tong Univ, Shanghai 200030, Peoples R China), null, null), List(53f449d4dabfaee43ec8b75f, null, null, 5b86b6c0e1cd8e14a34c9c74, Kenny Q. Zhu, null, null, null, null, Shanghai Jiao Tong University, Shanghai, China, null, 5f71b54b1c455f439fe502b0, List(Shanghai Jiao Tong Univ, Shanghai 200030, Peoples R China), null, null))",10.1145/2018436.2018481,"List(Data mining, Inference, Computer science, Artificial intelligence, Automatic inference, Bluetooth communication)",,0146-4833,4.0,"List(bluetooth communication, automatic inference, reasonable time, contact record, individuals movement trace, mutual contact record, inference algorithm, contact history, accurate route, trace inference problem, mobile phone, new security problem)",en,13,387.0,386.0,,"List(53e9b3e9b7602d9703edb09d, 53e9ba94b7602d97046bb055, 53e9b44bb7602d9703f46d5d, 53e9b6cbb7602d9704256fca, 53e99e0bb7602d97026cc935)",Automatic inference of movements from contact histories,"List(http://dx.doi.org/10.1145/2018436.2018481, http://doi.acm.org/10.1145/2018436.2018481, http://dx.doi.org/10.1145/2043164.2018481, http://www.webofknowledge.com/)",53a7264620f7420be8b6f44f,41,2011,53a7264620f7420be8b6f44f,,ACM Special Interest Group on Data Communication,,ACM,
53e9a751b7602d97030879a9,In this paper we consider the inverse linear programming problem under the l2 norm and formulate it as a second order cone programming (SOCP) problem. This approach is applied to the enterprise's production planning problem and the optimal solution is obtained using the CVX software toolbox.,"List(List(5605cf3245cedb3396709c01, null, null, null, Hong-xia Yu, null, null, null, null, null, null, null, null, null, null), List(5444ce6adabfae87074e8654, null, null, null, Li Jin, null, null, null, null, null, null, null, null, null, null))",10.1109/ICNC.2010.5582992,"List(Second-order cone programming, Inverse, Mathematical optimization, System on a chip, Algorithm, Production planning, Software, Linear programming, Inverse problem, Norm (mathematics), Mathematics)",978-1-4244-5958-2,,,"List(second order cone programming problem, second order cone programming, inverse lp problem, production planning, inverse optimization method, cvx software toolbox, linear programming, inverse linear programming problem, production planning problem, mathematical model, linear program, system on a chip, optimization, inverse problems, programming)",en,0,3798.0,3796.0,,"List(53e9adb0b7602d97037ad50f, 53e9b5e0b7602d9704134269, 53e9b85bb7602d97044242dc, 53e9a87eb7602d97031cd184, 53e9b2ffb7602d9703dc12f7, 53e9a905b7602d9703254e70)",An inverse optimization method for production planning,"List(http://dx.doi.org/10.1109/ICNC.2010.5582992, http://ieeexplore.ieee.org/xpl/abstractAuthors.jsp?tp=&arnumber=5582992)",5550370c7cea80f9541705a0,7,2010,5550370c7cea80f9541705a0,,International Conference on Natural Computation,,,
53e9a751b7602d9703087bad,"The Naval Ocean Systems Center (NOSC) has conducted the third in a series of evaluations of English text analysis systems. These evaluations are intended to advance our understanding of the merits of current text analysis techniques, as applied to the performance of a realistic information extraction task. The latest one is also intended to provide insight into information retrieval technology (document retrieval and categorization) used instead of or in concert with language understanding technology. The inputs to the analysis/extraction process consist of naturally-occurring texts that were obtained in the form of electronic messages. The outputs of the process are a set of templates or semantic frames resembling the contents of a partially formatted database.","List(List(53f4371cdabfaedce55358a1, null, null, 5b8693ffe1cd8e14a362f29d, Beth Sundheim, null, null, null, null, Naval Ocean Systems Center, San Diego, CA, null, null, null, null, null))",10.3115/1071958.1071960,"List(Categorization, Text mining, Information retrieval, Computer science, Electronic mail, Information extraction, Electronic equipment, Document retrieval, Language understanding, Semantics)",1-55860-236-4,,,"List(English text analysis system, current text analysis technique, document retrieval, extraction process, information retrieval technology, language understanding technology, naturally-occurring text, realistic information extraction task, Naval Ocean Systems Center, electronic message, message understanding evaluation)",en,66,16.0,3.0,//static.aminer.org/pdf/20160902/aclanthology/MUC/MUC-1991-2717.pdf,"List(53e9ad26b7602d9703705412, 53e9a5fdb7602d9702f29cbb)",Overview of the third message understanding evaluation and conference,"List(http://dx.doi.org/10.3115/1071958.1071960, http://doi.acm.org/10.1145/1071958.1071960, http://dx.doi.org/10.1145/1071958.1071960, https://static.aminer.org/pdf/20160902/aclanthology/index.txt, https://static.aminer.org/pdf/20170130/aclanthology/index.txt)",53a727d320f7420be8b9ddb9,,1991,53a727d320f7420be8b9ddb9,,MUC,,,C
