In [1]:
import os
import torch
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import numpy as np
from sqlalchemy import create_engine, text
import pandas as pd
import tarfile
import shutil
import re
import unicodedata
from tqdm import tqdm 

In [2]:
engine = create_engine(
    'postgresql+psycopg2://rg5073:rg5073pass@meta_data_postgres:5432/cleaned_meta_data_db',
    pool_size=10,
    max_overflow=0,
    pool_timeout=30,
)

In [61]:
query_preview = "SELECT * FROM arxiv_training LIMIT 5;"
preview = pd.read_sql(query_preview, engine)
print(" Preview of data:")
print(preview)

 Preview of data:
      paper_id  chunk_id     txt_filename                              query  \
0  0801.1875v3         5  0801.1875v3.txt                               None   
1  0801.3931v1         1  0801.3931v1.txt  What is the name of the astro ph?   
2  0801.3931v1         2  0801.3931v1.txt                               None   
3  0801.3931v1         3  0801.3931v1.txt                               None   
4  0801.4446v1         2  0801.4446v1.txt                               None   

                                          chunk_data  
0  the statistical dis tribution of the concentra...  
1  arXiv 0801.3931v1 astro ph 25 Jan 2008 Dynamic...  
2  . In general, two different initial devi ation...  
3  xpy ypx . Dynamical study of 2D and 3D barred ...  
4  been applied. The rms scatter of these measure...  


In [62]:
query_preview = "SELECT paper_id, chunk_id, chunk_data FROM arxiv_training LIMIT 400;"
preview = pd.read_sql(query_preview, engine)
print(" Preview of data:")
print(preview)

 Preview of data:
        paper_id  chunk_id                                         chunk_data
0    0801.1875v3         5  the statistical dis tribution of the concentra...
1    0801.3931v1         1  arXiv 0801.3931v1 astro ph 25 Jan 2008 Dynamic...
2    0801.3931v1         2  . In general, two different initial devi ation...
3    0801.3931v1         3  xpy ypx . Dynamical study of 2D and 3D barred ...
4    0801.4446v1         2  been applied. The rms scatter of these measure...
..           ...       ...                                                ...
395  0801.2351v1         5  , E and the elliptic Harnack inequality hold. ...
396  0801.2098v1         1  arXiv 0801.2098v1 physics.flu dyn 14 Jan 2008 ...
397  0801.2956v2         1  arXiv 0801.2956v2 quant ph 24 Mar 2008 Multi p...
398  0801.2956v2         2  implies that one can always find the equal amp...
399  0801.0768v1         1  The strongest size in the inverse Hall Petch r...

[400 rows x 3 columns]


In [5]:
pip install ipykernel


Note: you may need to restart the kernel to use updated packages.


In [7]:

!pip install transformers accelerate
!pip install torch
!pip install pandas
!pip install sqlalchemy
!pip install psycopg2-binary




In [7]:
!pip install optimum




In [17]:
query_preview = "SELECT paper_id,query, chunk_id, chunk_data FROM arxiv_training LIMIT 400;"
preview = pd.read_sql(query_preview, engine)
print(" Preview of data:")
print(preview)

 Preview of data:
        paper_id query  chunk_id  \
0    0801.0508v1               4   
1    0801.0508v1               5   
2    0801.0508v1               6   
3    0801.0508v1               7   
4    0801.0508v1               8   
..           ...   ...       ...   
395  0801.2387v1              13   
396  0801.2387v1              14   
397  0801.2387v1              15   
398  0801.2387v1              16   
399  0801.2387v1              17   

                                            chunk_data  
0    donc e tre de fini comme celui de la solution ...  
1    z Et r,z 2j 2 r,z , la solution conduit a la p...  
2    z ka 1 2 avec ka 1 2 1. Dans ce cas l inte gra...  
3    al 9 6. Conclusion Nous avons montre dans cet ...  
4    Am. 34 355 356 D. A. Hutchins, H. D. Mair, P. ...  
..                                                 ...  
395  have companions. We did not use these obser va...  
396  tion, mass ratio distribution, and total fract...  
397  so we will limit our analysis t

In [14]:
query_preview = "SELECT paper_id,query, chunk_id, chunk_data FROM arxiv_training LIMIT 400;"
preview = pd.read_sql(query_preview, engine)
print(" Preview of data:")
print(preview)

 Preview of data:
        paper_id query  chunk_id  \
0    0801.1663v3               1   
1    0801.1663v3               2   
2    0801.1663v3               3   
3    0801.1663v3               4   
4    0801.1663v3               5   
..           ...   ...       ...   
395  0801.3285v2              14   
396  0801.3285v2              15   
397  0801.3285v2              16   
398  0801.3285v2              17   
399  0801.3285v2              18   

                                            chunk_data  
0    arXiv 0801.1663v3 math.SG 18 Jul 2008 Courant ...  
1    from a common starting point, namely a Manin p...  
2    E if there is no risk of confusion. We use the...  
3    compatible with the anchor, that is, K TQ d2 f...  
4    algebroid morphism E1 E2 , such that the image...  
..                                                 ...  
395  radius for further details on these issues see...  
396  heating to a much more realistic value of 10 ,...  
397  with CR bubbles. The Compton y 

In [18]:
print(f"Total records fetched: {len(df)}")
print(df.head())


Total records fetched: 0
Empty DataFrame
Columns: [paper_id, chunk_id, chunk_data]
Index: []


In [23]:
from sqlalchemy import inspect

inspector = inspect(engine)
columns = inspector.get_columns("arxiv_training")
for col in columns:
    print(col["name"], col["type"])


paper_id TEXT
chunk_id INTEGER
txt_filename TEXT
query TEXT
chunk_data TEXT


In [24]:
print("=== FULL RAW DECODED OUTPUT ===")
print(repr(decoded))


=== FULL RAW DECODED OUTPUT ===
'What is the radius of a cluster?'


In [27]:
query_preview = "SELECT paper_id,query, chunk_id, chunk_data FROM arxiv_training LIMIT 40;"
preview = pd.read_sql(query_preview, engine)
print(" Preview of data:")
print(preview)

 Preview of data:
       paper_id query  chunk_id  \
0   0801.0714v1              10   
1   0801.1954v3               2   
2   0801.1954v3               3   
3   0801.2232v2               2   
4   0801.2232v2               3   
5   0801.2232v2               4   
6   0801.2232v2  \n\n         1   
7   0801.1954v3  \n\n         1   
8   0801.2232v2               5   
9   0801.2232v2               6   
10  0801.2232v2               7   
11  0801.2232v2               8   
12  0801.2232v2               9   
13  0801.2232v2              10   
14  0801.2232v2              11   
15  0801.2232v2              12   
16  0801.2232v2              13   
17  0801.3277v4              12   
18  0801.2232v2              14   
19  0801.2232v2              15   
20  0801.2232v2              16   
21  0801.2843v2               2   
22  0801.2843v2               3   
23  0801.3220v1               2   
24  0801.3220v1               3   
25  0801.3220v1               4   
26  0801.3220v1               5   
27

In [40]:
query = """
    SELECT chunk_data, query
    FROM arxiv_training
    WHERE query IS NOT NULL
    LIMIT 20;
"""

df = pd.read_sql(query, engine)
print(df.head(10))  


                                          chunk_data  \
0  arXiv 0801.3072v1 math.DS 21 Jan 2008 HAMILTON...   
1  ASYMPTOTICS OF SEMIGROUPS GENERATED BY OPERATO...   
2  arXiv 0801.2671v1 gr qc 17 Jan 2008 A NOTE ON ...   

                                               query  
0              What is the name of the sys tems?\n\n  
1  What is the name of the generator of the gener...  
2                  What is the name of the text?\n\n  


In [46]:
query_count = """
    SELECT
        COUNT(*) FILTER (WHERE query IS NOT NULL) AS filled,
        COUNT(*) FILTER (WHERE query IS NULL) AS empty
    FROM arxiv_training;
"""

df_count = pd.read_sql(query_count, engine)
print(df_count)


   filled    empty
0       1  2008086


In [44]:
query = """
    SELECT chunk_data, query
    FROM arxiv_training
    WHERE query IS NOT NULL AND LENGTH(TRIM(query)) > 0
    LIMIT 20;
"""
df = pd.read_sql(query, engine)
print(df.head(10))


                                          chunk_data  \
0  ASYMPTOTICS OF SEMIGROUPS GENERATED BY OPERATO...   

                                               query  
0  What is the name of the generator of the gener...  


In [47]:
query = """
    SELECT chunk_data, query
    FROM arxiv_training
    WHERE query IS NOT NULL AND LENGTH(TRIM(query)) > 0
    LIMIT 20;
"""
df = pd.read_sql(query, engine)
print(df.head(10))


                                          chunk_data  \
0  ASYMPTOTICS OF SEMIGROUPS GENERATED BY OPERATO...   

                                               query  
0  What is the name of the generator of the gener...  


In [48]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from sqlalchemy import create_engine, text
from tqdm import tqdm
import pandas as pd
import json
import torch

model_name = "google/flan-t5-small"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
model.to("cpu")

engine = create_engine(
    'postgresql+psycopg2://rg5073:rg5073pass@meta_data_postgres:5432/cleaned_meta_data_db',
    pool_size=10, max_overflow=0, pool_timeout=30
)

query = """
    SELECT paper_id, chunk_id, chunk_data
    FROM arxiv_training
    WHERE query IS NULL
    ORDER BY chunk_id
    LIMIT 200
"""
df = pd.read_sql(query, engine)

for idx, row in tqdm(df.iterrows(), total=len(df)):
    chunk = row["chunk_data"][:700]
    prompt = f"Generate 1 user question from this scientific text:\n\n{chunk}"

    try:
        inputs = tokenizer(prompt, return_tensors="pt", truncation=True).to("cpu")
        with torch.no_grad():
            outputs = model.generate(**inputs, max_new_tokens=100)

        decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
        print(f"\n=== {row['paper_id']} | chunk_id {row['chunk_id']} ===")
        print(repr(decoded))  

        query_text = ""
        for line in decoded.split('\n'):
            line = line.strip()
            if line.startswith("1.") or line.startswith("•") or "?" in line:
                query_text = line.lstrip("123.• ").strip()
                break
            
        update_query = text("""
            UPDATE arxiv_training
            SET query = :query_data
            WHERE paper_id = :pid AND chunk_id = :cid
        """)
        
        with engine.begin() as connection:
            connection.execute(update_query, {
                "query_data": query_text,
                "pid": row["paper_id"],
                "cid": row["chunk_id"]
            })

    except Exception as e:
        print(f" Error for {row['paper_id']} chunk_id {row['chunk_id']}: {e}")

print(" records prcessed and 1 query per chunk has beensaved.")


  0%|          | 0/200 [00:00<?, ?it/s]


=== 0801.2402v3 | chunk_id 1 ===
'What is the name of the particle that is a molecule of the molecule?'


  0%|          | 1/200 [00:01<06:34,  1.98s/it]


=== 0801.3490v1 | chunk_id 1 ===
'What is the best method for estimating an unknown deterministic signal?'


  1%|          | 2/200 [00:03<06:19,  1.92s/it]


=== 0801.1298v1 | chunk_id 1 ===
'What is the ionized H II region?'


  2%|▏         | 3/200 [00:05<06:13,  1.90s/it]


=== 0801.4190v2 | chunk_id 1 ===
'What is the name of the first phylogenetic reconstruction algorithm?'


  2%|▏         | 4/200 [00:08<06:43,  2.06s/it]


=== 0801.1490v2 | chunk_id 1 ===
'What is the polarized and unpolarized CP asymmetries?'


  2%|▎         | 5/200 [00:10<06:38,  2.04s/it]


=== 0801.2838v3 | chunk_id 1 ===
'What is the problem of synchronizing coloring of a di?'


  3%|▎         | 6/200 [00:11<06:25,  1.99s/it]


=== 0801.3319v1 | chunk_id 1 ===
'What is the name of the i.i.d. sample from the random design regression model X?'


  4%|▎         | 7/200 [00:13<06:23,  1.99s/it]


=== 0801.3711v1 | chunk_id 1 ===
'What is the name of the ad hoc laboratory?'


  4%|▍         | 8/200 [00:15<06:10,  1.93s/it]


=== 0801.2229v2 | chunk_id 1 ===
'What is the MC NLO event gen?'


  4%|▍         | 9/200 [00:17<05:57,  1.87s/it]


=== 0801.1307v1 | chunk_id 1 ===
'What is the name of the sub hierarchy that is contained in the linear time hierarchy?'


  5%|▌         | 10/200 [00:19<05:51,  1.85s/it]


=== 0801.4506v2 | chunk_id 1 ===
'What is the name of the neo-neo-neo-neo-neo-neo-neo-neo-neo-neo-neo-neo-neo-neo-neo-neo-neo-neo-neo-neo-neo-neo-neo-n'


  6%|▌         | 11/200 [00:23<07:44,  2.46s/it]


=== 0801.2295v1 | chunk_id 1 ===
'What is the cause of the solitary wave?'


  6%|▌         | 12/200 [00:25<07:10,  2.29s/it]


=== 0801.0059v3 | chunk_id 1 ===
'What is the maximum probability that k wise independent distributions are all 1?'


  6%|▋         | 13/200 [00:26<06:42,  2.15s/it]


=== 0801.2039v1 | chunk_id 1 ===
'What is the name of the focal plane instruments on board WSO UV?'


  7%|▋         | 14/200 [00:28<06:22,  2.06s/it]


=== 0801.0768v1 | chunk_id 1 ===
'What is the inverse Hall Petch relationship?'


  8%|▊         | 15/200 [00:30<06:00,  1.95s/it]


=== 0801.3001v3 | chunk_id 1 ===
'What is the name of the research team that focuses on the arithmetic of a trader?'


  8%|▊         | 16/200 [00:32<06:01,  1.97s/it]


=== 0801.2244v1 | chunk_id 1 ===
'What is the name of the e6 model?'


  8%|▊         | 17/200 [00:34<05:48,  1.91s/it]


=== 0801.4708v1 | chunk_id 1 ===
'What is the name of the resemblance of the resemblance of the resemblance of the resemblance of the resemblance of the resemblance of the resemblance of the resemblance of the resemblance of the resemblance of the resemblance of the resemblance of the resemblance of the res'


  9%|▉         | 18/200 [00:38<07:35,  2.50s/it]


=== 0801.4398v1 | chunk_id 1 ===
'What is the superconformal algebra of a Weyl algebra?'


 10%|▉         | 19/200 [00:40<07:03,  2.34s/it]


=== 0801.1291v2 | chunk_id 1 ===
'What is the renormalization of the phonon energy dispersion in graphene?'


 10%|█         | 20/200 [00:42<07:01,  2.34s/it]


=== 0801.1040v1 | chunk_id 1 ===
'What is the name of the device that is devoted especially to the thermal characterization of packages?'


 10%|█         | 21/200 [00:44<06:41,  2.24s/it]


=== 0801.4625v1 | chunk_id 1 ===
'What is the spectral properties of the quarter filled CDW systems?'


 11%|█         | 22/200 [00:46<06:19,  2.13s/it]


=== 0801.0568v1 | chunk_id 1 ===
'What is the purpose of this note?'


 12%|█▏        | 23/200 [00:48<06:01,  2.04s/it]


=== 0801.1851v1 | chunk_id 1 ===
'What is the spectral index of the linear matter power spectrum P k ?'


 12%|█▏        | 24/200 [00:49<05:51,  2.00s/it]


=== 0801.4829v1 | chunk_id 1 ===
'What is the date of the final phase of inspiral of strange quark star binaries?'


 12%|█▎        | 25/200 [00:52<06:05,  2.09s/it]


=== 0801.2007v3 | chunk_id 1 ===
'What is the name of the Chinese Journal of Astronomy and Astrophysics?'


 13%|█▎        | 26/200 [00:54<05:48,  2.00s/it]


=== 0801.3349v1 | chunk_id 1 ===
'What is the result of the BFKL equation?'


 14%|█▎        | 27/200 [00:55<05:35,  1.94s/it]


=== 0801.2702v2 | chunk_id 1 ===
'What is the name of the typeset in JHEP style HYPER VERSION PH TH CERN 2007 243, UB ECM PF 07 36, Toho CP 0885 Space time Vector Supersymmetry and Massive Spinning Particle Roberto Casalbuoni Department of Physics, University of Florence, INFN Florence and Galileo Galilei Institute, Florence Italy E mail casalbuoni fi.infn.it'


 14%|█▍        | 28/200 [00:59<07:15,  2.53s/it]


=== 0801.3014v1 | chunk_id 1 ===
'What is the name of the f ur Physica D?'


 14%|█▍        | 29/200 [01:01<06:38,  2.33s/it]


=== 0801.0482v3 | chunk_id 1 ===
'What is the symmetry of the trans spherical solutions in higher dimensions?'


 15%|█▌        | 30/200 [01:03<06:18,  2.23s/it]


=== 0801.3877v1 | chunk_id 1 ===
'What is the name of the hep ex?'


 16%|█▌        | 31/200 [01:05<06:14,  2.22s/it]


=== 0801.2722v2 | chunk_id 1 ===
'What is the density wave state in the underdoped cuprates?'


 16%|█▌        | 32/200 [01:07<05:52,  2.10s/it]


=== 0801.4724v1 | chunk_id 1 ===
'What is the name of the equatorial coronal hole CH?'


 16%|█▋        | 33/200 [01:09<05:36,  2.01s/it]


=== 0801.0866v1 | chunk_id 1 ===
'What is the hhZ of the Higgs self coupling?'


 17%|█▋        | 34/200 [01:11<05:25,  1.96s/it]


=== 0801.4859v1 | chunk_id 1 ===
'What is the model of population dynamics on lattices?'


 18%|█▊        | 35/200 [01:13<05:14,  1.91s/it]


=== 0801.4673v2 | chunk_id 1 ===
'What is the gamma ra y and neutrino uxes from a osmologi al dark matter sim ulation?'


 18%|█▊        | 36/200 [01:15<05:43,  2.09s/it]


=== 0801.4839v1 | chunk_id 1 ===
'What is the name of the laboratory of nuclear problems in the tetrafluid?'


 18%|█▊        | 37/200 [01:17<05:43,  2.11s/it]


=== 0801.1557v2 | chunk_id 1 ===
"What is the name of the physicist who explains the physicist's theory of a cross hatched regime?"


 19%|█▉        | 38/200 [01:20<05:56,  2.20s/it]


=== 0801.1547v2 | chunk_id 1 ===
'What is the name of the text?'


 20%|█▉        | 39/200 [01:21<05:30,  2.05s/it]


=== 0801.4825v1 | chunk_id 1 ===
'What is the name of the microlensing event?'


 20%|██        | 40/200 [01:23<05:14,  1.96s/it]


=== 0801.1107v1 | chunk_id 1 ===
'What is the name of the white dwarf WD?'


 20%|██        | 41/200 [01:25<05:02,  1.90s/it]


=== 0801.0181v2 | chunk_id 1 ===
'What is the symmetry of the tri bimaximal mixing?'


 21%|██        | 42/200 [01:27<04:58,  1.89s/it]


=== 0801.1616v1 | chunk_id 1 ===
'What is the name of the physicist who is the lead author of the physicist?'


 22%|██▏       | 43/200 [01:29<05:21,  2.05s/it]


=== 0801.1245v2 | chunk_id 1 ===
"What is the name of the author of the book 'Atrix'?"


 22%|██▏       | 44/200 [01:31<05:14,  2.01s/it]


=== 0801.1217v1 | chunk_id 1 ===
'What is the name of the blazars that are the prime candidates for simultaneous multi wavelength observing campaigns?'


 22%|██▎       | 45/200 [01:33<05:08,  1.99s/it]


=== 0801.3968v1 | chunk_id 1 ===
'What is the code for Monte Carlo models for the dynamical evolution of the nearby globular cluster M4?'


 23%|██▎       | 46/200 [01:35<05:09,  2.01s/it]


=== 0801.3587v1 | chunk_id 1 ===
'What is the name of the molecule in the case of planar LiBeBCNOF?'


 24%|██▎       | 47/200 [01:37<05:05,  2.00s/it]


=== 0801.2138v2 | chunk_id 1 ===
'What is the kagome strip found previously by DMRG?'


 24%|██▍       | 48/200 [01:39<04:58,  1.96s/it]


=== 0801.4683v1 | chunk_id 1 ===
'What is the crystal and magnetic structure of La0.70Ca0.30 CryMn1 y O3?'


 24%|██▍       | 49/200 [01:41<05:04,  2.02s/it]


=== 0801.4424v2 | chunk_id 1 ===
'What is the name of the astronomical journal?'


 25%|██▌       | 50/200 [01:43<05:05,  2.04s/it]


=== 0801.3233v4 | chunk_id 1 ===
'What is the term for twisted associative resp. Poisson deformation of the structure sheaf OX?'


 26%|██▌       | 51/200 [01:45<05:07,  2.06s/it]


=== 0801.3236v1 | chunk_id 1 ===
'What is the name of the g of the cycle class map?'


 26%|██▌       | 52/200 [01:47<05:01,  2.04s/it]


=== 0801.1730v1 | chunk_id 1 ===
'What is the probability of the occurrence of large deviations of the largest smallest eigenvalue of random matrices belonging to the Gaussian orthogonal, unitary and symplectic ensembles?'


 26%|██▋       | 53/200 [01:50<05:41,  2.32s/it]


=== 0801.4855v1 | chunk_id 1 ===
'What is the inverse power of the distance in the spin glass model?'


 27%|██▋       | 54/200 [01:52<05:25,  2.23s/it]


=== 0801.4938v2 | chunk_id 1 ===
'What is the difference between the two atoms of the atoms of the atoms of the atoms of the atoms of the atoms of the atoms of the atoms of the atoms of the atoms of the atoms of the atoms of the atoms of the atoms of the atoms of the atoms of the atoms of the atoms of the atoms'


 28%|██▊       | 55/200 [01:57<06:51,  2.84s/it]


=== 0801.2317v3 | chunk_id 1 ===
'What is the function of the functional renormalisation group for two body scattering?'


 28%|██▊       | 56/200 [01:59<06:13,  2.59s/it]


=== 0801.1611v1 | chunk_id 1 ===
'What is the name of the atomic number of the Raman scattering in heavily boron doped diamond?'


 28%|██▊       | 57/200 [02:01<05:47,  2.43s/it]


=== 0801.2688v4 | chunk_id 1 ===
'What is the generalized second law of thermodynamics in generalized theories of gravity?'


 29%|██▉       | 58/200 [02:03<05:25,  2.29s/it]


=== 0801.4459v4 | chunk_id 1 ===
'What is the name of the genus 2 hyperelliptic mode?'


 30%|██▉       | 59/200 [02:04<05:07,  2.18s/it]


=== 0801.0577v1 | chunk_id 1 ===
'What is the method for detecting a magnetic field?'


 30%|███       | 60/200 [02:07<05:06,  2.19s/it]


=== 0801.3742v1 | chunk_id 1 ===
'What is the effect of primordial fluctuations on neutrino oscillations in the early universe?'


 30%|███       | 61/200 [02:09<04:55,  2.13s/it]


=== 0801.1160v3 | chunk_id 1 ===
'What is the name of the topological membrane model?'


 31%|███       | 62/200 [02:10<04:39,  2.03s/it]


=== 0801.0789v1 | chunk_id 1 ===
'What is the atomic cloud?'


 32%|███▏      | 63/200 [02:12<04:24,  1.93s/it]


=== 0801.4382v2 | chunk_id 1 ===
'What is the name of the astronomical institute that is conducting a search for supermassive black holes?'


 32%|███▏      | 64/200 [02:14<04:25,  1.95s/it]


=== 0801.0368v1 | chunk_id 1 ===
'What is the name of the quantum Monte Carlo QMC , a plane wave density functional theory DFT , and interatomic pair potential calculations of the zero temperature equation of state EOS of solid neon?'


 32%|███▎      | 65/200 [02:17<04:57,  2.21s/it]


=== 0801.2098v1 | chunk_id 1 ===
'What is the name of the phe nomena involving shocks, interfaces or material surfaces?'


 33%|███▎      | 66/200 [02:19<04:49,  2.16s/it]


=== 0801.1682v1 | chunk_id 1 ===
'What is the nb2Se3 spectrometer?'


 34%|███▎      | 67/200 [02:21<04:33,  2.06s/it]


=== 0801.0662v1 | chunk_id 1 ===
'What is the function of the two dimensional nearest neighbour Ising model pertaining to square lattices?'


 34%|███▍      | 68/200 [02:23<04:38,  2.11s/it]


=== 0801.1576v1 | chunk_id 1 ===
'What is the shortest time period for the shortest time period for the shortest time period?'


 34%|███▍      | 69/200 [02:25<04:30,  2.06s/it]


=== 0801.3074v2 | chunk_id 1 ===
'What is the name of the Lie algebraic group G?'


 35%|███▌      | 70/200 [02:27<04:34,  2.11s/it]


=== 0801.2501v1 | chunk_id 1 ===
'What is the name of the research center for the study of the art of painting?'


 36%|███▌      | 71/200 [02:29<04:24,  2.05s/it]


=== 0801.3911v1 | chunk_id 1 ===
'What is the main function of Vect S1 of real vector fields on the circle S1 Lm?'


 36%|███▌      | 72/200 [02:31<04:20,  2.03s/it]


=== 0801.0400v1 | chunk_id 1 ===
'What is the ice X phase 55 380GPa?'


 36%|███▋      | 73/200 [02:33<04:11,  1.98s/it]


=== 0801.1685v2 | chunk_id 1 ===
'What is the name of the star that transits a 12th mag Southern hemisphere star?'


 37%|███▋      | 74/200 [02:35<04:12,  2.00s/it]


=== 0801.2171v1 | chunk_id 1 ===
'What is the tra e map?'


 38%|███▊      | 75/200 [02:37<04:03,  1.95s/it]


=== 0801.0471v1 | chunk_id 1 ===
'What is the name of the black hole for the Dirac fields?'


 38%|███▊      | 76/200 [02:39<04:02,  1.96s/it]


=== 0801.1041v1 | chunk_id 1 ===
'What is the method for detecting off plane surface displacement on two microelectronic devices?'


 38%|███▊      | 77/200 [02:41<04:15,  2.08s/it]


=== 0801.2027v1 | chunk_id 1 ===
'What is the name of the research center of the physics of the body?'


 39%|███▉      | 78/200 [02:43<04:09,  2.04s/it]


=== 0801.0587v1 | chunk_id 1 ===
'What is the symmetry of the spin spin coupling?'


 40%|███▉      | 79/200 [02:45<03:59,  1.98s/it]


=== 0801.3681v3 | chunk_id 1 ===
'What is the maximal entanglement deterministically distillable from a bipartition of a single specimen of that system?'


 40%|████      | 80/200 [02:47<04:05,  2.04s/it]


=== 0801.0481v1 | chunk_id 1 ===
'What is the name of the astronomical center of the universe?'


 40%|████      | 81/200 [02:49<03:58,  2.00s/it]


=== 0801.4544v1 | chunk_id 1 ===
'What is the name of the decoder that is used to transmit information over an unknown, possibly unreliable channel?'


 41%|████      | 82/200 [02:52<04:13,  2.15s/it]


=== 0801.1683v1 | chunk_id 1 ===
'What is the main purpose of the av-based approach?'


 42%|████▏     | 83/200 [02:54<04:04,  2.09s/it]


=== 0801.4913v2 | chunk_id 1 ===
'What is the name of the sex experiment?'


 42%|████▏     | 84/200 [02:55<03:52,  2.00s/it]


=== 0801.1453v1 | chunk_id 1 ===
'What is the name of the chemical formula that is used to calculate the zero temperature phase diagram of binary mixtures of particles interacting via a screened Coulomb pair potential?'


 42%|████▎     | 85/200 [02:58<04:11,  2.19s/it]


=== 0801.3090v2 | chunk_id 1 ===
'What is the mechanism of adjustment of vacuum energy down to the observed value from an initially huge one?'


 43%|████▎     | 86/200 [03:00<04:01,  2.12s/it]


=== 0801.1916v1 | chunk_id 1 ===
'What is the most striking effect in a low temperature interaction?'


 44%|████▎     | 87/200 [03:02<03:50,  2.04s/it]


=== 0801.2485v1 | chunk_id 1 ===
'What is the name of the element that has been identified by the Xenon?'


 44%|████▍     | 88/200 [03:04<03:45,  2.01s/it]


=== 0801.0918v2 | chunk_id 1 ===
'What is the influence of net quarks on the yields and rapidity spectra of various hadrons in central Au Au collisions at?'


 44%|████▍     | 89/200 [03:06<03:46,  2.04s/it]


=== 0801.3175v1 | chunk_id 1 ===
'What is the opulation of multiple stellar popula tions in Galactic globular clusters?'


 45%|████▌     | 90/200 [03:08<03:56,  2.15s/it]


=== 0801.0304v1 | chunk_id 1 ===
'What is the second order perturbation theory for gravity wave and the WMAP results of inhomogeneity and clustering in the early universe?'


 46%|████▌     | 91/200 [03:10<03:56,  2.17s/it]


=== 0801.3306v4 | chunk_id 1 ===
'What is the abelian sand pile model and rotor router model?'


 46%|████▌     | 92/200 [03:12<03:47,  2.10s/it]


=== 0801.0682v4 | chunk_id 1 ===
'What is the name of the s wave pairing?'


 46%|████▋     | 93/200 [03:14<03:35,  2.02s/it]


=== 0801.1680v1 | chunk_id 1 ===
'What is the name of the star that is discovered by EROS 2?'


 47%|████▋     | 94/200 [03:16<03:31,  1.99s/it]


=== 0801.0785v1 | chunk_id 1 ===
'What is the name of the acoustic field used to increase the critical heat flux CHF of a flat boiling heat transfer surface?'


 48%|████▊     | 95/200 [03:19<03:45,  2.15s/it]


=== 0801.2956v2 | chunk_id 1 ===
'What is the name of the physics department in the Grover algorithm?'


 48%|████▊     | 96/200 [03:21<03:37,  2.09s/it]


=== 0801.3868v1 | chunk_id 1 ===
'What is the nmr study based on?'


 48%|████▊     | 97/200 [03:22<03:27,  2.02s/it]


=== 0801.3318v1 | chunk_id 1 ===
'What is the name of the brush that is grafted to a surface acti?'


 49%|████▉     | 98/200 [03:24<03:24,  2.00s/it]


=== 0801.3629v1 | chunk_id 1 ===
'What is the name of the paper of Schwarz s Lemma?'


 50%|████▉     | 99/200 [03:26<03:18,  1.97s/it]


=== 0801.4096v2 | chunk_id 1 ===
'What is the function of the sym metric anisotropic 4 lattice model?'


 50%|█████     | 100/200 [03:29<03:29,  2.10s/it]


=== 0801.0075v4 | chunk_id 1 ===
'What is the z 7 f z 7?'


 50%|█████     | 101/200 [03:31<03:20,  2.03s/it]


=== 0801.1906v1 | chunk_id 1 ===
'What is the name of the Rieffel s deformation of locally compact quantum groups?'


 51%|█████     | 102/200 [03:33<03:15,  2.00s/it]


=== 0801.2053v2 | chunk_id 1 ===
'What is the name of the research center of the Peoples Republic of Ch?'


 52%|█████▏    | 103/200 [03:34<03:10,  1.96s/it]


=== 0801.2448v1 | chunk_id 1 ===
'What is the atomic distance between atoms and atoms?'


 52%|█████▏    | 104/200 [03:36<03:04,  1.93s/it]


=== 0801.0726v2 | chunk_id 1 ===
'What is the name of the text?'


 52%|█████▎    | 105/200 [03:38<02:58,  1.88s/it]


=== 0801.1429v1 | chunk_id 1 ===
'What is the dynamical state of RX J1347.5 1145?'


 53%|█████▎    | 106/200 [03:40<02:59,  1.91s/it]


=== 0801.4493v1 | chunk_id 1 ===
'What is the name of the single dish observations at sub millimeter wavelengths toward a sample of massive star forming regions?'


 54%|█████▎    | 107/200 [03:43<03:18,  2.14s/it]


=== 0801.4809v1 | chunk_id 1 ===
'What is the gravito radiative force?'


 54%|█████▍    | 108/200 [03:45<03:08,  2.05s/it]


=== 0801.3187v1 | chunk_id 1 ===
'What is the occurrence of triple helmet streamers even during solar minimum?'


 55%|█████▍    | 109/200 [03:46<03:01,  1.99s/it]


=== 0801.2996v1 | chunk_id 1 ===
'What is the function of Gasper?'


 55%|█████▌    | 110/200 [03:48<02:52,  1.92s/it]


=== 0801.4274v1 | chunk_id 1 ===
'What is the name of the paper?'


 56%|█████▌    | 111/200 [03:50<02:44,  1.85s/it]


=== 0801.0045v3 | chunk_id 1 ===
'What is the name of the physicist who is a member of the physicist?'


 56%|█████▌    | 112/200 [03:52<02:50,  1.94s/it]


=== 0801.4458v1 | chunk_id 1 ===
'What is the renormalization of matter?'


 56%|█████▋    | 113/200 [03:54<02:43,  1.87s/it]


=== 0801.4077v1 | chunk_id 1 ===
'What is the kinetic properties of PdCoO2 and PtCoO2?'


 57%|█████▋    | 114/200 [03:56<02:56,  2.05s/it]


=== 0801.2728v1 | chunk_id 1 ===
'What is the name of the particle that is tagged in a given time?'


 57%|█████▊    | 115/200 [03:58<02:51,  2.01s/it]


=== 0801.2311v1 | chunk_id 1 ===
'What is the density of the interfacial layers?'


 58%|█████▊    | 116/200 [04:00<02:44,  1.96s/it]


=== 0801.3705v1 | chunk_id 1 ===
'What is the energy momentum for a gravitating system?'


 58%|█████▊    | 117/200 [04:02<02:40,  1.94s/it]


=== 0801.1446v1 | chunk_id 1 ===
'What is the underlying degree of freedom prevalent in the hot plasma?'


 59%|█████▉    | 118/200 [04:04<02:38,  1.93s/it]


=== 0801.0190v1 | chunk_id 1 ===
'What is the name of the laboratory that is located in the Institute of Materials Str?'


 60%|█████▉    | 119/200 [04:06<02:35,  1.92s/it]


=== 0801.0084v2 | chunk_id 1 ===
'A with high contrast pe riodic coefficients with period in each coordinate?'


 60%|██████    | 120/200 [04:08<02:34,  1.93s/it]


=== 0801.4356v2 | chunk_id 1 ===
'What is the name of the model that focuses on the inter-band hybridization of two band Hubbard?'


 60%|██████    | 121/200 [04:10<02:44,  2.08s/it]


=== 0801.2386v1 | chunk_id 1 ===
'What is the name of the class of np np potentials?'


 61%|██████    | 122/200 [04:12<02:39,  2.04s/it]


=== 0801.4460v1 | chunk_id 1 ===
'What is the spectroscopic function of the Schr odinger operator?'


 62%|██████▏   | 123/200 [04:14<02:34,  2.01s/it]


=== 0801.1662v2 | chunk_id 1 ===
'What is the name of the symmetry of the Higgs boson?'


 62%|██████▏   | 124/200 [04:16<02:29,  1.97s/it]


=== 0801.1351v2 | chunk_id 1 ===
'What is the neotropy in the magnetic and electrical transport properties of Fe1 xCrxSb2?'


 62%|██████▎   | 125/200 [04:18<02:31,  2.01s/it]


=== 0801.1088v1 | chunk_id 1 ===
'What is the name of the model designed for Optimal Transport Theory see Vi?'


 63%|██████▎   | 126/200 [04:20<02:35,  2.10s/it]


=== 0801.2234v1 | chunk_id 1 ===
'What is the limiting case when the variance becomes so small that the Gaussian is its own Fourier transform?'


 64%|██████▎   | 127/200 [04:22<02:32,  2.08s/it]


=== 0801.3848v1 | chunk_id 1 ===
'What is the name of the accretion discs?'


 64%|██████▍   | 128/200 [04:24<02:25,  2.03s/it]


=== 0801.3368v4 | chunk_id 1 ===
'What is the name of the disc that is presented in the text?'


 64%|██████▍   | 129/200 [04:26<02:21,  2.00s/it]


=== 0801.1226v3 | chunk_id 1 ===
'What is the name of the extension of the character expansion method?'


 65%|██████▌   | 130/200 [04:28<02:16,  1.95s/it]


=== 0801.2592v2 | chunk_id 1 ===
'rms spin polarizations are a function of the number of channels in each lead in the limit of large spin orbit coupling?'


 66%|██████▌   | 131/200 [04:30<02:27,  2.14s/it]


=== 0801.1769v1 | chunk_id 1 ===
'What is the name of the astronomical institute of astronomy?'


 66%|██████▌   | 132/200 [04:32<02:21,  2.08s/it]


=== 0801.0887v1 | chunk_id 1 ===
'What is the model of information retrieval derived from the Kintsch Ericsson scheme?'


 66%|██████▋   | 133/200 [04:34<02:16,  2.04s/it]


=== 0801.2683v1 | chunk_id 1 ===
'What is the morphology of the electrowetting induced transition between the Cassie Baxter and Wenzel states?'


 67%|██████▋   | 134/200 [04:36<02:16,  2.07s/it]


=== 0801.0310v2 | chunk_id 1 ===
'What is the entanglement between a two level system and a quantum harmonic oscillator?'


 68%|██████▊   | 135/200 [04:38<02:13,  2.05s/it]


=== 0801.1365v1 | chunk_id 1 ===
'What is the name of the object that is a neutron star?'


 68%|██████▊   | 136/200 [04:41<02:17,  2.14s/it]


=== 0801.0286v2 | chunk_id 1 ===
'What is the name of the physics department of the Russian Academy of Sciences VINITI?'


 68%|██████▊   | 137/200 [04:43<02:10,  2.08s/it]


=== 0801.4245v1 | chunk_id 1 ===
'What is the neutron transversity from semi inclusive DIS off3He?'


 69%|██████▉   | 138/200 [04:45<02:03,  1.99s/it]


=== 0801.3694v1 | chunk_id 1 ===
'What is the name of the condensate flow?'


 70%|██████▉   | 139/200 [04:46<01:57,  1.92s/it]


=== 0801.4743v2 | chunk_id 1 ===
'What is the ring set S R of?'


 70%|███████   | 140/200 [04:48<01:50,  1.84s/it]


=== 0801.3033v1 | chunk_id 1 ===
'What is the study of rotor stator contact inducing vibration in rota?'


 70%|███████   | 141/200 [04:50<01:50,  1.87s/it]


=== 0801.1433v2 | chunk_id 1 ===
'What state does lose less quantum informa?'


 71%|███████   | 142/200 [04:52<01:45,  1.81s/it]


=== 0801.1706v1 | chunk_id 1 ===
'What is the name of the equivalence of quantum states under local unitary trans formations?'


 72%|███████▏  | 143/200 [04:54<01:46,  1.87s/it]


=== 0801.4016v2 | chunk_id 1 ===
'What is the axial charge of a nucleon in 2 1 flavor dynamical lattice QCD with domain wall fermions?'


 72%|███████▏  | 144/200 [04:56<01:48,  1.94s/it]


=== 0801.0796v1 | chunk_id 1 ===
'What is the name of the emergence of a predator prey system?'


 72%|███████▎  | 145/200 [04:58<01:51,  2.02s/it]


=== 0801.0428v2 | chunk_id 1 ===
'What is the ep ph of the ep ph?'


 73%|███████▎  | 146/200 [05:00<01:49,  2.03s/it]


=== 0801.1434v1 | chunk_id 1 ===
'What is the ratio of next neare to the ratio of next neare?'


 74%|███████▎  | 147/200 [05:02<01:46,  2.02s/it]


=== 0801.2377v1 | chunk_id 1 ===
'What is the shortest supersymmetric bulk plus boundary action?'


 74%|███████▍  | 148/200 [05:04<01:43,  1.99s/it]


=== 0801.2607v1 | chunk_id 1 ===
'What is the name of the laboratory that is the main laboratory of the CoO2?'


 74%|███████▍  | 149/200 [05:06<01:39,  1.96s/it]


=== 0801.3168v1 | chunk_id 1 ===
'What is the main idea of the concept of representative statistical ensembles?'


 75%|███████▌  | 150/200 [05:08<01:42,  2.05s/it]


=== 0801.1167v1 | chunk_id 1 ===
'A. C. A. Boogert3,4,5, K. M. Pontoppidan6,7, C. Knez8, F. Lahuis9,10, J. Kessler Silacci11, E. F. van Dishoeck9, G. A. Blake6, J. C. Augereau12, S. E. Bisschop9, S. Bottinelli9, T. Y. Brooke13'


 76%|███████▌  | 151/200 [05:12<02:07,  2.60s/it]


=== 0801.4634v2 | chunk_id 1 ===
'What is the diagonal cosets of the Heisenberg group Giuseppe D Appollonioa,b and Thomas Quellac?'


 76%|███████▌  | 152/200 [05:14<02:05,  2.61s/it]


=== 0801.2904v1 | chunk_id 1 ===
'What is the name of the method used to prepare isomerically clean samples of ions with a mass resolving power of more than 1 105?'


 76%|███████▋  | 153/200 [05:17<01:59,  2.54s/it]


=== 0801.2036v2 | chunk_id 1 ===
'What is the Y meson of charmonia ?'


 77%|███████▋  | 154/200 [05:19<01:47,  2.34s/it]


=== 0801.1484v1 | chunk_id 1 ===
'What is the name of the Institut f ur Kernphysik, TU Darmstadt, Schlossgartenstr. 9, 64289 Darmstadt, Germany P. Navr atil Lawrence Livermore National Laboratory, P.O. Box 808, L 414, Livermore, CA 94551, USA'


 78%|███████▊  | 155/200 [05:22<02:01,  2.70s/it]


=== 0801.1444v3 | chunk_id 1 ===
'What is the resemblance of FIOs of order zero?'


 78%|███████▊  | 156/200 [05:24<01:49,  2.48s/it]


=== 0801.0198v2 | chunk_id 1 ===
'What is the name of the three dimensional gravity dual of a process in which two clouds of 1 1 dimensional conformal matter moving in opposite directions collide?'


 78%|███████▊  | 157/200 [05:27<01:44,  2.43s/it]


=== 0801.2871v2 | chunk_id 1 ===
'What is the inverse amplitude method for the scattering amplitude and dispersion theory?'


 79%|███████▉  | 158/200 [05:29<01:36,  2.30s/it]


=== 0801.2433v2 | chunk_id 1 ===
'What is the name of the metric evolution code?'


 80%|███████▉  | 159/200 [05:30<01:28,  2.15s/it]


=== 0801.4946v3 | chunk_id 1 ===
'What is the relationship between the two dimensional Lorentz and the symmetry b?'


 80%|████████  | 160/200 [05:32<01:24,  2.11s/it]


=== 0801.3128v1 | chunk_id 1 ===
'What is the name of the astronomical center in the astronomical center?'


 80%|████████  | 161/200 [05:34<01:21,  2.08s/it]


=== 0801.2725v7 | chunk_id 1 ===
'What is the name of the particle physics department?'


 81%|████████  | 162/200 [05:36<01:15,  1.98s/it]


=== 0801.0497v2 | chunk_id 1 ===
'What is the name of the algorithm that solves the problem of finding a desired item out of N items arranged on the sites of a two dimensional lattice of size N N?'


 82%|████████▏ | 163/200 [05:39<01:22,  2.23s/it]


=== 0801.0638v3 | chunk_id 1 ===
'What is the name of the journal of the astronomical observatory?'


 82%|████████▏ | 164/200 [05:41<01:17,  2.15s/it]


=== 0801.2572v3 | chunk_id 1 ===
'What is the name of the research center for the early universe?'


 82%|████████▎ | 165/200 [05:43<01:11,  2.06s/it]


=== 0801.4244v1 | chunk_id 1 ===
'What is the name of the tmfy MSE?'


 83%|████████▎ | 166/200 [05:45<01:08,  2.01s/it]


=== 0801.4293v2 | chunk_id 1 ===
'What is the name of the resulting phase bundle?'


 84%|████████▎ | 167/200 [05:47<01:05,  1.97s/it]


=== 0801.3191v1 | chunk_id 1 ===
'What is the intensity process of based on this information?'


 84%|████████▍ | 168/200 [05:49<01:05,  2.04s/it]


=== 0801.2653v3 | chunk_id 1 ===
'Quantum Noise in the Collective Abstraction Reaction A B2 AB B?'


 84%|████████▍ | 169/200 [05:51<01:02,  2.02s/it]


=== 0801.3761v1 | chunk_id 1 ===
'What is the influence of noise on magnetic properties of a su perconducting loop?'


 85%|████████▌ | 170/200 [05:53<01:00,  2.01s/it]


=== 0801.1192v1 | chunk_id 1 ===
'What is the date of the date of cond mat.mtrl sci?'


 86%|████████▌ | 171/200 [05:55<00:57,  2.00s/it]


=== 0801.1515v1 | chunk_id 1 ===
'What is the name of the institute of materials science and engineering?'


 86%|████████▌ | 172/200 [05:57<00:54,  1.94s/it]


=== 0801.1996v2 | chunk_id 1 ===
'What is the name of the particle that is con sidered?'


 86%|████████▋ | 173/200 [05:58<00:51,  1.90s/it]


=== 0801.2557v1 | chunk_id 1 ===
'What is the kinetic term for the Liouville wave function?'


 87%|████████▋ | 174/200 [06:00<00:49,  1.89s/it]


=== 0801.3334v1 | chunk_id 1 ===
'What is the spectral properties of the four neutralino states?'


 88%|████████▊ | 175/200 [06:02<00:50,  2.00s/it]


=== 0801.4436v1 | chunk_id 1 ===
'What is the name of the institute of Applied Physics and Computational Mathematics?'


 88%|████████▊ | 176/200 [06:04<00:48,  2.01s/it]


=== 0801.0511v1 | chunk_id 1 ===
'What is the epl draft Exact Haldane mapping for all S and super universality in spin chains?'


 88%|████████▊ | 177/200 [06:07<00:46,  2.02s/it]


=== 0801.0192v2 | chunk_id 1 ===
'What is the name of the symplectic fiber s?'


 89%|████████▉ | 178/200 [06:08<00:43,  1.99s/it]


=== 0801.1624v2 | chunk_id 1 ===
'What is the maximum patch method for Directional Dark Matter Detection?'


 90%|████████▉ | 179/200 [06:10<00:41,  1.97s/it]


=== 0801.2011v1 | chunk_id 1 ===
'What is the resemblance of the l i l j li?'


 90%|█████████ | 180/200 [06:13<00:42,  2.10s/it]


=== 0801.4055v1 | chunk_id 1 ===
'What is the name of the astronomical observatory?'


 90%|█████████ | 181/200 [06:15<00:38,  2.04s/it]


=== 0801.1185v2 | chunk_id 1 ===
'What is the maximum capacity of the Discrete Time AWGN Channel under output quantization?'


 91%|█████████ | 182/200 [06:17<00:36,  2.02s/it]


=== 0801.3158v1 | chunk_id 1 ===
'What is the name of the atomic number of the black body radiation?'


 92%|█████████▏| 183/200 [06:19<00:33,  1.98s/it]


=== 0801.0735v2 | chunk_id 1 ===
'What is the name of the university of wisconsin?'


 92%|█████████▏| 184/200 [06:20<00:31,  1.95s/it]


=== 0801.4415v3 | chunk_id 1 ===
'What is the name of the typeset in JHEP style HYPER VERSION Huge entropy production inside black holes?'


 92%|█████████▎| 185/200 [06:23<00:32,  2.18s/it]


=== 0801.1407v3 | chunk_id 1 ===
'What is the name of the physicist who is the author of the article?'


 93%|█████████▎| 186/200 [06:25<00:30,  2.15s/it]


=== 0801.3076v1 | chunk_id 1 ===
'What is the name of the scientific conference held at the Joint Institute for Nuclear Research in Dubna?'


 94%|█████████▎| 187/200 [06:27<00:27,  2.11s/it]


=== 0801.0853v1 | chunk_id 1 ===
'What is the epoch of the cosmology in which cold dark matter begins to decay into relativistic particles?'


 94%|█████████▍| 188/200 [06:30<00:26,  2.23s/it]


=== 0801.0501v1 | chunk_id 1 ===
'What is the symmetry of quark mass operators?'


 94%|█████████▍| 189/200 [06:32<00:23,  2.12s/it]


=== 0801.2257v4 | chunk_id 1 ===
'What is the role of the core mass function?'


 95%|█████████▌| 190/200 [06:33<00:20,  2.02s/it]


=== 0801.1393v1 | chunk_id 1 ===
'What is the name of the physics center that is used to charge time dependent density functional theory?'


 96%|█████████▌| 191/200 [06:35<00:17,  2.00s/it]


=== 0801.4739v4 | chunk_id 1 ===
'What is the axion mass of the axion?'


 96%|█████████▌| 192/200 [06:37<00:15,  1.93s/it]


=== 0801.2421v1 | chunk_id 1 ===
'What is the name of the p time proof net for MALL?'


 96%|█████████▋| 193/200 [06:39<00:13,  1.90s/it]


=== 0801.3108v1 | chunk_id 1 ===
'What is the function of the universal toric genus for the complex, almost complex and stable complex structures?'


 97%|█████████▋| 194/200 [06:41<00:11,  1.94s/it]


=== 0801.2809v1 | chunk_id 1 ===
'What is the potential of agegraphic dark energy?'


 98%|█████████▊| 195/200 [06:43<00:09,  1.89s/it]


=== 0801.2600v1 | chunk_id 1 ===
'What is the acoustic behaviour of the deconvolution kernel density estimator?'


 98%|█████████▊| 196/200 [06:45<00:07,  1.92s/it]


=== 0801.3931v1 | chunk_id 1 ===
'What is the name of the astro ph?'


 98%|█████████▊| 197/200 [06:47<00:06,  2.01s/it]


=== 0801.0504v1 | chunk_id 1 ===
'What is the lattice of ideals of a commutative ring?'


 99%|█████████▉| 198/200 [06:49<00:03,  1.99s/it]


=== 0801.4379v2 | chunk_id 1 ===
'What is the fraction of classical Broad Absorption Line quasars BALQSOs?'


100%|█████████▉| 199/200 [06:51<00:01,  1.99s/it]


=== 0801.3627v2 | chunk_id 1 ===
'What is the chiral limit of the SU V?'


100%|██████████| 200/200 [06:53<00:00,  2.07s/it]

 records prcessed and 1 query per chunk has beensaved.





In [51]:
query = """
SELECT chunk_data, query
FROM arxiv_training
WHERE LENGTH(TRIM(query)) > 0
LIMIT 210;
"""

df = pd.read_sql(query, engine)
print(df)


                                            chunk_data  \
0    arXiv 0801.3931v1 astro ph 25 Jan 2008 Dynamic...   
1    arXiv 0801.4077v1 cond mat.mtrl sci 26 Jan 200...   
2    arXiv 0801.2728v1 cond mat.stat mech 17 Jan 20...   
3    arXiv 0801.3705v1 gr qc 24 Jan 2008 Gravitatio...   
4    arXiv 0801.2592v2 cond mat.mes hall 12 Feb 200...   
..                                                 ...   
190  arXiv 0801.0190v1 cond mat.str el 31 Dec 2007 ...   
191  arXiv 0801.2377v1 hep th 15 Jan 2008 Preprint ...   
192  arXiv 0801.1192v1 cond mat.mtrl sci 8 Jan 2008...   
193  arXiv 0801.0511v1 cond mat.mes hall 3 Jan 2008...   
194  arXiv 0801.1407v3 astro ph 29 May 2008 USTC IC...   

                                                 query  
0                    What is the name of the astro ph?  
1    What is the kinetic properties of PdCoO2 and P...  
2    What is the name of the particle that is tagge...  
3    What is the energy momentum for a gravitating ...  
4    rms spin pola