In [1]:
import requests
import boto3
import json
import os
import sys
import argparse
import importlib
import transformers
import torch
import pathlib
import awswrangler as wr
from IPython.display import display
from sagemaker.huggingface.processing import HuggingFaceProcessor
from sagemaker.sklearn.processing import SKLearnProcessor
from sagemaker.processing import FrameworkProcessor
from sagemaker.sklearn.estimator import SKLearn
from sagemaker.workflow.steps import ProcessingStep
from sagemaker.workflow.pipeline_context import PipelineSession
from sagemaker.processing import ProcessingInput, ProcessingOutput
from sagemaker.session import get_execution_role


# Adding ../01_modules or ./01_modules to the system path so that we can load modules from 
# there as well
if '__file__' in globals():
    script_dir = pathlib.Path(__file__).parent.resolve()
else:
    script_dir = pathlib.Path().absolute()
modules_path_in_dev = os.path.abspath(os.path.join(script_dir, '..', '01_modules'))
modules_path_in_prod = os.path.abspath(os.path.join(script_dir, '01_modules'))
if os.path.exists(modules_path_in_dev):
    sys.path.append(modules_path_in_dev)
if os.path.exists(modules_path_in_prod):
    sys.path.append(modules_path_in_prod)


# # Jupyter only reads a local module the first time after 
# # kernel start. Re-running a cell with 
# # "from mymodulename import *" would not change
# # anything, even if the imported module has since changed.
# # As a workaround, we need to directly load the module, 
# # use importlib.reload to reload it and then import * 
import utils
_ = importlib.reload(utils)
import config
_ = importlib.reload(config) 


sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
config.py loaded: v0.1
utils.py loaded: v0.2.12
utils.py loaded: v0.2.12
config.py loaded: v0.1


In [2]:
if False: # do not run this again, takes a day
    execution_role = get_execution_role()
    source_dir = os.path.abspath(os.path.join(os.getcwd(), '..', '..'))
    print('source_dir:', source_dir)
    sklearn_processor = FrameworkProcessor(
        estimator_cls=SKLearn,
        framework_version='1.2-1', # The newest supported version by sagemaker
        instance_type='ml.c7i.16xlarge',
        instance_count=1,
        base_job_name=f'openalex_works_reduction'.replace('_','-'),
        role=execution_role
    )
    
    step_args = sklearn_processor.run(
        code='src/03_transformation/03_11_transformation_openalex_works_reduction.py',
        source_dir=source_dir,
        inputs=[], # We are not using automatic input-output mapping, instead we handle everything in the script directly on S3
        outputs=[],
        arguments=[
            '--runtype', 'prod',
            '--file-max-limit', '10000',
        ],
        wait=True
    )

In [3]:
timelogger = utils.TimeLogger()
utils.create_table_from_sql_file(
    database_name = '02_stg',
    table_name = 'base_openalex_works_reduced',
    overwrite_strategy='overwrite', # options: fail, overwrite, ignore
    wait=True,
)
timelogger.log('"02_stg".base_openalex_works_reduced created')

 :: :: TIMELOGGER STARTED :: | since_start: 0.00 seconds | since_last: 0.00 seconds :: 
Table 02_stg.base_openalex_works_reduced already exists. Overwriting since overwrite_strategy=="overwrite".
Deleting table from Glue Catalog 02_stg base_openalex_works_reduced
Deleting S3 objects from s3://sagemaker-research-methodology-extraction/01_data/02_stg/base_openalex_works_reduced/
s3_parent_target_path:  s3://sagemaker-research-methodology-extraction/01_data/02_stg
 :: "02_stg".base_openalex_works_reduced created | since_start: 1.0 minute, 42.04 seconds | since_last: 1.0 minute, 42.04 seconds :: 


' :: "02_stg".base_openalex_works_reduced created | since_start: 1.0 minute, 42.04 seconds | since_last: 1.0 minute, 42.04 seconds :: '

In [4]:
utils.pd_set_options()
display(wr.athena.read_sql_query("""SELECT * FROM "02_stg".base_openalex_works_reduced LIMIT 5 """, '02_stg'))
display(wr.athena.read_sql_query("""SELECT COUNT(*) AS c FROM "02_stg".base_openalex_works_reduced """, '02_stg'))

Unnamed: 0,id_openalex,id_doi,title,language,primary_topic_id,primary_topic_display_name,primary_topic_subfield_id,primary_topic_subfield_display_name,primary_topic_field_id,primary_topic_field_display_name,primary_topic_domain_id,primary_topic_domain_display_name
0,269677805,,2. Profil type du détenu politique à Eysses,fr,T10153,"Education, sociology, and vocational training",3312,Sociology and Political Science,33,Social Sciences,2,Social Sciences
1,2696780303,,Analisa Struktur Dan Material Speed Bump Dengan Bahan Concrete Foam Untuk Penggerak Tenaga Listrik,id,T13674,Computer Science and Engineering,1702,Artificial Intelligence,17,Computer Science,3,Physical Sciences
2,2696784097,,Strategies of survival during the holocaust,en,T11203,Jewish and Middle Eastern Studies,3312,Sociology and Political Science,33,Social Sciences,2,Social Sciences
3,2696788076,,Business intelligence and Marketing analytics/Inteligencia de negocio y análisis de datos,,T11891,Big Data and Business Intelligence,1404,Management Information Systems,14,"Business, Management and Accounting",2,Social Sciences
4,2696777682,,El papel transversal de la lectura en el currículo,es,T13061,Literacy and Educational Practices,3304,Education,33,Social Sciences,2,Social Sciences


Unnamed: 0,c
0,270051911


In [5]:
timelogger = utils.TimeLogger()
utils.create_table_from_sql_file(
    database_name = '02_stg',
    table_name = 'base_semanticscholar_s2orcv2',
    overwrite_strategy='overwrite', # options: fail, overwrite, ignore
    wait=True,
)
timelogger.log('"02_stg".base_semanticscholar_s2orcv2 created')

 :: :: TIMELOGGER STARTED :: | since_start: 0.00 seconds | since_last: 0.00 seconds :: 
Table 02_stg.base_semanticscholar_s2orcv2 already exists. Overwriting since overwrite_strategy=="overwrite".
Deleting table from Glue Catalog 02_stg base_semanticscholar_s2orcv2
Deleting S3 objects from s3://sagemaker-research-methodology-extraction/01_data/02_stg/base_semanticscholar_s2orcv2/
s3_parent_target_path:  s3://sagemaker-research-methodology-extraction/01_data/02_stg
 :: "02_stg".base_semanticscholar_s2orcv2 created | since_start: 11.0 minutes, 4.77 seconds | since_last: 11.0 minutes, 4.77 seconds :: 


' :: "02_stg".base_semanticscholar_s2orcv2 created | since_start: 11.0 minutes, 4.77 seconds | since_last: 11.0 minutes, 4.77 seconds :: '

In [6]:
utils.pd_set_options(cols=500)
display(wr.athena.read_sql_query("""SELECT * FROM "02_stg".base_semanticscholar_s2orcv2 LIMIT 3 """, '02_stg'))
display(wr.athena.read_sql_query("""SELECT COUNT(*) AS c FROM "02_stg".base_semanticscholar_s2orcv2 """, '02_stg'))

Unnamed: 0,id_semanticscholar,id_mag,id_doi,id_arxiv,title,source_url,openaccess_status,content_text,annotations_paragraph,annotations_section_header,license
0,85913114,2142459664.0,10.5897/AJB2013.12952,,Biochemical and cytological analysis of five cultivars of Cicer (chickpea),https://doi.org/10.5897/AJB2013.12952,GREEN,"\nINTRODUCTION\n\nThe genus Cicer include 33 perennial, eight annual, one unspecified wild species as well as the cultivated ones ( Van der Maesen, 1987). Chickpea is the second most important cool season pulse crop in the world and is grown in at least 33 countries including central and west Asia, South Europe, Ethiopia, North Africa, North and South America and Australia (Ladizinsky and Adler, 1976;Singh and Ocampo, 1997). It is native to South Europe and is the most important pulse crop o...","[{""attributes"":null,""end"":1267,""start"":15},{""attributes"":null,""end"":2151,""start"":1269},{""attributes"":null,""end"":2673,""start"":2153},{""attributes"":null,""end"":2963,""start"":2698},{""attributes"":null,""end"":3235,""start"":2985},{""attributes"":null,""end"":3322,""start"":3237},{""attributes"":null,""end"":3875,""start"":3344},{""attributes"":null,""end"":4521,""start"":3896},{""attributes"":null,""end"":4856,""start"":4558},{""attributes"":null,""end"":5177,""start"":4873},{""attributes"":null,""end"":5594,""start"":5208},{""attributes""...","[{""attributes"":null,""end"":13,""start"":1},{""attributes"":null,""end"":2696,""start"":2675},{""attributes"":null,""end"":2983,""start"":2965},{""attributes"":null,""end"":3342,""start"":3324},{""attributes"":null,""end"":3894,""start"":3877},{""attributes"":null,""end"":4556,""start"":4523},{""attributes"":null,""end"":4871,""start"":4858},{""attributes"":null,""end"":5186,""start"":5179},{""attributes"":null,""end"":5206,""start"":5188},{""attributes"":null,""end"":5613,""start"":5596},{""attributes"":null,""end"":7337,""start"":7320},{""attributes"":nu...",CCBY
1,268714736,,10.3389/fmicb.2024.1359263,,Genomic and phenotypic analyses reveal Paenibacillus polymyxa PJH16 is a potential biocontrol agent against cucumber fusarium wilt,https://pmc.ncbi.nlm.nih.gov/articles/PMC11000672,GOLD,"\nIntroduction\n\nCucumber is an important economic crop. China is the main producer of cucumber, and its planting area and scale have ranked first in the world for many years. One cucumber disease that poses a serious threat to yield and quality is cucumber fusarium wilt caused by the fungus Fusarium oxysporum f. sp. cucumerinum which belongs to Ascomycota (Gao et al., 2014). This fungus is a soil-borne pathogen that can infect plants at any stage of growth. It begins invading the wounds an...","[{""attributes"":null,""end"":822,""start"":15},{""attributes"":null,""end"":3347,""start"":824},{""attributes"":null,""end"":4114,""start"":3349},{""attributes"":null,""end"":4376,""start"":4116},{""attributes"":null,""end"":4578,""start"":4378},{""attributes"":null,""end"":4604,""start"":4580},{""attributes"":null,""end"":5287,""start"":4673},{""attributes"":null,""end"":6002,""start"":5289},{""attributes"":null,""end"":7390,""start"":6042},{""attributes"":null,""end"":7708,""start"":7446},{""attributes"":null,""end"":8282,""start"":7710},{""attributes"":n...","[{""attributes"":{""n"":""1""},""end"":13,""start"":1},{""attributes"":{""n"":""2.1""},""end"":4671,""start"":4606},{""attributes"":{""n"":""2.2""},""end"":6040,""start"":6004},{""attributes"":{""n"":""2.3""},""end"":7444,""start"":7392},{""attributes"":{""n"":""2.4""},""end"":11415,""start"":11356},{""attributes"":{""n"":""2.5""},""end"":11951,""start"":11884},{""attributes"":{""n"":""2.6""},""end"":14741,""start"":14678},{""attributes"":{""n"":""2.7""},""end"":15795,""start"":15762},{""attributes"":{""n"":""2.8""},""end"":17658,""start"":17615},{""attributes"":{""n"":""2.9""},""end"":1...",CCBY
2,787028,2160387886.0,10.1159/000345413,,Laparoscopic Splenectomy in Colorectal Cancer Patients with Chemotherapy-Associated Thrombocytopenia due to Hypersplenism,https://pmc.ncbi.nlm.nih.gov/articles/PMC3531924,GOLD,"\nIntroduction\n\nMetastatic colorectal cancer (mCRC) affects approximately 50,000 people a year [1]. The cornerstone of treatment for these patients is systemic chemotherapy, especially with oxaliplatin-based regimens. Acquired thrombocytopenia is a condition that complicates treatment with many chemotherapy regimens. Of the known mechanisms of chemotherapy-induced thrombocytopenia, bone marrow suppression is the most common. Oxaliplatin is also associated with two other etiologies of throm...","[{""attributes"":null,""end"":1177,""start"":15},{""attributes"":null,""end"":2286,""start"":1179},{""attributes"":null,""end"":2856,""start"":2297},{""attributes"":null,""end"":3528,""start"":2858},{""attributes"":null,""end"":4223,""start"":3530},{""attributes"":null,""end"":4599,""start"":4225},{""attributes"":null,""end"":4998,""start"":4610},{""attributes"":null,""end"":5480,""start"":5000},{""attributes"":null,""end"":6490,""start"":5482},{""attributes"":null,""end"":7621,""start"":6504},{""attributes"":null,""end"":8226,""start"":7623},{""attributes""...","[{""attributes"":null,""end"":13,""start"":1},{""attributes"":null,""end"":2295,""start"":2288},{""attributes"":null,""end"":4608,""start"":4601},{""attributes"":null,""end"":6502,""start"":6492},{""attributes"":null,""end"":10853,""start"":10833}]",CCBYNC


Unnamed: 0,c
0,11609787


In [5]:
table_name = 'stg_semanticscholar_combined_works'
timelogger = utils.TimeLogger()
utils.create_table_from_sql_file(
    database_name = '02_stg',
    table_name = table_name,
    overwrite_strategy='overwrite', # options: fail, overwrite, ignore
    wait=True,
)
timelogger.log(f'"02_stg".{table_name} created')

utils.pd_set_options(cols=500)
display(wr.athena.read_sql_query(f"""SELECT * FROM "02_stg".{table_name} LIMIT 3 """, '02_stg'))
display(wr.athena.read_sql_query(f"""SELECT COUNT(*) AS c FROM "02_stg".{table_name} """, '02_stg'))
timelogger.log(f'"02_stg".{table_name} queries finished')

 :: :: TIMELOGGER STARTED :: | since_start: 0.00 seconds | since_last: 0.00 seconds :: 
Table 02_stg.stg_semanticscholar_combined_works already exists. Overwriting since overwrite_strategy=="overwrite".
Deleting table from Glue Catalog 02_stg stg_semanticscholar_combined_works
Deleting S3 objects from s3://sagemaker-research-methodology-extraction/01_data/02_stg/stg_semanticscholar_combined_works/
s3_parent_target_path:  s3://sagemaker-research-methodology-extraction/01_data/02_stg
 :: "02_stg".stg_semanticscholar_combined_works created | since_start: 13.0 minutes, 57.99 seconds | since_last: 13.0 minutes, 57.99 seconds :: 


Unnamed: 0,id_semanticscholar,id_mag,id_doi,id_arxiv,title,source_url,openaccess_status,content_text,annotations_paragraph,annotations_section_header,license,content_abstract,publication_year,publication_date
0,268748136,,10.12928/jhsr.v5i1.8951,,Concentration of isolated DNA face masks made of gelatin for halal authentication,https://doi.org/10.12928/jhsr.v5i1.8951,HYBRID,"\nINTRODUCTION\n\nIndonesia is one of the countries with a Muslim majority. The large Muslim population in Indonesia affects the halal lifestyle, which becomes the basis for product selection (Nur, 2014). Therefore, the government issued a regulation, Law Number 33 of 2014, concerning the Halal Product Guarantee (UUJPH), which can protect consumers' selection of products. Halal status has become a global issue, including non-food products such as medicines and cosmetics (Widayat et al., 2019...","[{""attributes"":null,""end"":496,""start"":15},{""attributes"":null,""end"":2564,""start"":498},{""attributes"":null,""end"":2976,""start"":2566},{""attributes"":null,""end"":3352,""start"":3005},{""attributes"":null,""end"":4369,""start"":3393},{""attributes"":null,""end"":5763,""start"":4440},{""attributes"":null,""end"":6571,""start"":5807},{""attributes"":null,""end"":7057,""start"":6573},{""attributes"":null,""end"":8433,""start"":7082},{""attributes"":null,""end"":9548,""start"":8435},{""attributes"":null,""end"":9942,""start"":9562},{""attributes"":n...","[{""attributes"":null,""end"":13,""start"":1},{""attributes"":null,""end"":3003,""start"":2978},{""attributes"":null,""end"":3361,""start"":3354},{""attributes"":{""n"":""1.""},""end"":3391,""start"":3363},{""attributes"":{""n"":""2.""},""end"":4438,""start"":4371},{""attributes"":{""n"":""3.""},""end"":5805,""start"":5765},{""attributes"":null,""end"":7080,""start"":7059},{""attributes"":null,""end"":9560,""start"":9550}]",CCBYNCSA,"The function of using a face mask is to remove dirt and dead skin cells found in the pores of the skin, open clogged pores and clean the remaining cosmetics that cannot remove with ordinary cleansers, repair and tighten the skin, provide nutrition, smooth, soften and maintain skin moisture. People are aware of halal-labeled cosmetics, one of which is face masks because some face masks contain gelatin. Some face masks on the market contain gelatin. Preparation of 5 reference samples with diff...",2024,2024-02-21
1,270039448,,10.3390/ani14111518,,Laying Hens: Why Smothering and Not Surviving?—A Literature Review,https://pmc.ncbi.nlm.nih.gov/articles/PMC11171085,GOLD,"\nIntroduction\n\nSince the Industrial Revolution, there has been progressive growth in poultry production systems, primarily aimed at maximizing economic returns.In parallel, global awareness of the importance of animal welfare practices has emerged, reflected in increasingly stringent policies aimed at prohibiting poultry breeding systems that neglect ethical principles and respect for animal integrity.Such policies are increasingly valued by consumers, directly influencing market selectiv...","[{""attributes"":null,""end"":893,""start"":15},{""attributes"":null,""end"":1321,""start"":895},{""attributes"":null,""end"":1845,""start"":1323},{""attributes"":null,""end"":2290,""start"":1847},{""attributes"":null,""end"":3415,""start"":2292},{""attributes"":null,""end"":3900,""start"":3417},{""attributes"":null,""end"":4513,""start"":3902},{""attributes"":null,""end"":4941,""start"":4578},{""attributes"":null,""end"":5666,""start"":4943},{""attributes"":null,""end"":6130,""start"":5668},{""attributes"":null,""end"":6985,""start"":6132},{""attributes"":n...","[{""attributes"":{""n"":""1.""},""end"":13,""start"":1},{""attributes"":{""n"":""2.""},""end"":4576,""start"":4515},{""attributes"":{""n"":""3.""},""end"":8773,""start"":8719},{""attributes"":{""n"":""4.""},""end"":13914,""start"":13871},{""attributes"":null,""end"":15516,""start"":15410},{""attributes"":null,""end"":15563,""start"":15518},{""attributes"":null,""end"":15880,""start"":15850},{""attributes"":null,""end"":16042,""start"":15995},{""attributes"":null,""end"":16296,""start"":16280},{""attributes"":null,""end"":16690,""start"":16645},{""attributes"":null,""en...",CCBY,"Simple Summary Simple Summary: Recent studies have delved into the adverse phenomenon of smothering in cage-free laying hen-rearing systems, challenging the traditional notion of this behavior as “natural” or the result of hysteria among birds in the flock. This work identifies smothering as a detrimental, abnormal behavior with significant economic repercussions for poultry farming. Through a comprehensive literature review and bibliographic mapping, combined with consultations with poultry...",2024,2024-05-21
2,101543261,2338423426.0,10.1080/10610278.2016.1167210,,"Design, synthesis and structure determination of new inherently chiral para-bromoalkoxycalix[4]arenes",https://doi.org/10.1080/10610278.2016.1167210,GREEN,"\nIntroduction\n\nCalix [4]arenes (1) due to the unique macrocyclic capeshaped structure are widely used as specific receptors for recognition and separation of anions, cations or neutral molecules (2). The inherently chiral calix [4]arenes whose chirality derives from the asymmetrical arrangement of achiral substituents on the macrocyclic platform are of particular interest for the desing of chiral receptors (3). 'Host' molecules based on these compounds were shown to recognise, bind and se...","[{""attributes"":null,""end"":809,""start"":15},{""attributes"":null,""end"":1279,""start"":811},{""attributes"":null,""end"":2543,""start"":1335},{""attributes"":null,""end"":3054,""start"":2621},{""attributes"":null,""end"":3332,""start"":3105},{""attributes"":null,""end"":4052,""start"":3334},{""attributes"":null,""end"":4639,""start"":4054},{""attributes"":null,""end"":5146,""start"":4664},{""attributes"":null,""end"":5335,""start"":5148},{""attributes"":null,""end"":5648,""start"":5421},{""attributes"":null,""end"":6199,""start"":5727},{""attributes"":n...","[{""attributes"":null,""end"":13,""start"":1},{""attributes"":null,""end"":1333,""start"":1281},{""attributes"":null,""end"":2619,""start"":2545},{""attributes"":null,""end"":3103,""start"":3056},{""attributes"":null,""end"":4662,""start"":4641},{""attributes"":null,""end"":5419,""start"":5337},{""attributes"":null,""end"":5725,""start"":5650},{""attributes"":null,""end"":6234,""start"":6201},{""attributes"":null,""end"":6395,""start"":6315},{""attributes"":null,""end"":7160,""start"":7108},{""attributes"":null,""end"":7273,""start"":7221},{""attributes"":nu...",CCBY,"Abstract The preparative method for the synthesis of inherently chiral para-bromoalkoxycalix[4]arenes based on para-bromination, stepwise regioselective debenzoylation and the following alkylation of the readily available 25-propoxy-26,27-dibenzoyloxycalix[4]arene with propyl bromide or (R)-N-(1-phenylethyl)bromoacetamide has been developed. Three types of the inherently chiral calix[4]arenes in cone or partial cone conformations with asymmetrical (AHHHHBHH, AAHHHBHH, AHBHHCHH) substitution ...",2017,2017-01-02


Unnamed: 0,c
0,11609787


 :: "02_stg".stg_semanticscholar_combined_works queries finished | since_start: 14.0 minutes, 5.93 seconds | since_last: 7.94 seconds :: 


' :: "02_stg".stg_semanticscholar_combined_works queries finished | since_start: 14.0 minutes, 5.93 seconds | since_last: 7.94 seconds :: '

In [2]:
timelogger = utils.TimeLogger()
utils.create_table_from_sql_file(
    database_name = '02_stg',
    table_name = 'base_arxiv_metadata',
    overwrite_strategy='overwrite', # options: fail, overwrite, ignore
    wait=True,
)
timelogger.log('"02_stg".base_arxiv_metadata created')

 :: :: TIMELOGGER STARTED :: | since_start: 0.00 seconds | since_last: 0.00 seconds :: 
Table 02_stg.base_arxiv_metadata already exists. Overwriting since overwrite_strategy=="overwrite".
Deleting table from Glue Catalog 02_stg base_arxiv_metadata
Deleting S3 objects from s3://sagemaker-research-methodology-extraction/01_data/02_stg/base_arxiv_metadata/
s3_parent_target_path:  s3://sagemaker-research-methodology-extraction/01_data/02_stg
 :: "02_stg".base_arxiv_metadata created | since_start: 17.13 seconds | since_last: 17.13 seconds :: 


' :: "02_stg".base_arxiv_metadata created | since_start: 17.13 seconds | since_last: 17.13 seconds :: '

In [3]:
utils.pd_set_options(cols=500)
display(wr.athena.read_sql_query("""SELECT * FROM "02_stg".base_arxiv_metadata LIMIT 5 """, '02_stg'))
display(wr.athena.read_sql_query("""SELECT COUNT(*) AS c FROM "02_stg".base_arxiv_metadata """, '02_stg'))

Unnamed: 0,id_arxiv,id_doi,title,abstract,license
0,1810.00965,,Natural measures of alignment,Natural coordinate system will be proposed. In this coordinate system alignment procedure of a device and a detector can be easily performed. This approach is generalization of previous specific formulas in the field of calibration and provide top level description of the procedure. A basic example application to linac therapy plan is also provided.,ArXiv nonexclusive-distrib
1,1810.00967,,Efficient and Accurate Abnormality Mining from Radiology Reports with Customized False Positive Reduction,"Obtaining datasets labeled to facilitate model development is a challenge for most machine learning tasks. The difficulty is heightened for medical imaging, where data itself is limited in accessibility and labeling requires costly time and effort by trained medical specialists. Medical imaging studies, however, are often accompanied by a medical report produced by a radiologist, identifying important features on the corresponding scan for other physicians not specifically trained in radiolo...",ArXiv nonexclusive-distrib
2,1810.00952,10.1145/3211346.3211348,Relay: A New IR for Machine Learning Frameworks,"Machine learning powers diverse services in industry including search, translation, recommendation systems, and security. The scale and importance of these models require that they be efficient, expressive, and portable across an array of heterogeneous hardware devices. These constraints are often at odds; in order to better accommodate them we propose a new high-level intermediate representation (IR) called Relay. Relay is being designed as a purely-functional, statically-typed language wit...",ArXiv nonexclusive-distrib
3,1810.00956,,Challenges of Using Text Classifiers for Causal Inference,"Causal understanding is essential for many kinds of decision-making, but causal inference from observational data has typically only been applied to structured, low-dimensional datasets. While text classifiers produce low-dimensional outputs, their use in causal inference has not previously been studied. To facilitate causal analyses based on language data, we consider the role that text classifiers can play in causal inference through established modeling mechanisms from the causality liter...",ArXiv nonexclusive-distrib
4,1810.00958,,Measurement of the neutron lifetime using a magneto-gravitational trap,"Precision measurements of the free neutron lifetime $\tau_n$, when combined with measurements of the axial vector form factor, can be used to test unitarity of the CKM matrix. Non-unitarity is a signal for physics Beyond the Standard Model (BSM). Sensitivity to BSM physics requires measurements of $\tau_n$ to a precision of 0.1~s. However, the two dominant techniques to measure $\tau_n$ (colloquially beam and bottle measurements) disagree by nearly 10~s. UCN$\tau$ is a neutron lifetime exper...",ArXiv nonexclusive-distrib


Unnamed: 0,c
0,2816721


In [None]:
stg_unified_works_01_joined_to_arxiv