In [1]:
import os
import gc
import random
import itertools
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
import tokenizers
import transformers
from transformers import AutoTokenizer, AutoConfig, AutoModel, T5EncoderModel, get_linear_schedule_with_warmup
import datasets
from datasets import load_dataset, load_metric
import sentencepiece
import argparse
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
import torch.nn as nn
from torch.optim import AdamW
import pickle
import time
import math
from sklearn.preprocessing import MinMaxScaler
from datasets.utils.logging import disable_progress_bar
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
disable_progress_bar()

def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument("--data_path", type=str, required=False)
#     parser.add_argument("--dataset_name", type=str, required=False)
    parser.add_argument("--pretrained_model_name_or_path", type=str, default="sagawa/ZINC-t5", required=False)
    parser.add_argument("--model_name_or_path", type=str, required=False)
    parser.add_argument("--scaler_path", type=str, default="/data2/sagawa/tcrp-regression-model-archive/10-23-1st-new-metric-reactant-product", required=False)
    parser.add_argument("--debug", action='store_true', default=False, required=False)
    parser.add_argument("--batch_size", type=int, default=5, required=False)
    parser.add_argument("--max_len", type=int, default=512, required=False)
    parser.add_argument("--num_workers", type=int, default=1, required=False)
    parser.add_argument("--fc_dropout", type=float, default=0.1, required=False)
    parser.add_argument("--output_dir", type=str, default='./', required=False)
    parser.add_argument("--seed", type=int, default=42, required=False)

    return parser.parse_args()

class CFG():
    data_path='../../all_ord_reaction_uniq_with_attr_v3.tsv'
#     pretrained_model_name_or_path = 'sagawa/ZINC-t5'
    model = 'sagawa/ZINC-t5'
    batch_size = 5 #max_lenを大きくしたらoomしたから15から5に
    seed = 42
    num_workers = 4
    output_dir = './'
    model_name_or_path = '/data2/sagawa/tcrp-regression-model-archive/10-23-1st-new-metric-reactant-product'
    scaler_path = '/data2/sagawa/tcrp-regression-model-archive/10-23-1st-new-metric-reactant-product'

In [2]:
import subprocess, os
os.mkdir('tokenizer')
subprocess.run('wget https://huggingface.co/spaces/sagawa/predictyield-t5/resolve/main/ZINC-t5_best.pth', shell=True)
subprocess.run('wget https://huggingface.co/spaces/sagawa/predictyield-t5/resolve/main/config.pth', shell=True)
subprocess.run('wget https://huggingface.co/spaces/sagawa/predictyield-t5/raw/main/special_tokens_map.json -P ./tokenizer', shell=True)
subprocess.run('wget https://huggingface.co/spaces/sagawa/predictyield-t5/raw/main/tokenizer.json -P ./tokenizer', shell=True)
subprocess.run('wget https://huggingface.co/spaces/sagawa/predictyield-t5/raw/main/tokenizer_config.json -P ./tokenizer', shell=True)

--2023-02-25 12:28:02--  https://huggingface.co/spaces/sagawa/predictyield-t5/resolve/main/ZINC-t5_best.pth
huggingface.co (huggingface.co) をDNSに問いあわせています... 54.235.118.239, 3.231.67.228, 2600:1f18:147f:e850:e203:c458:10cd:fc3c, ...
huggingface.co (huggingface.co)|54.235.118.239|:443 に接続しています... 接続しました。
HTTP による接続要求を送信しました、応答を待っています... 302 Found
場所: https://cdn-lfs.huggingface.co/repos/38/a9/38a98fb8d59558823e00ec41ed194d140d0d2b87721e14ea06afee1a1fe52930/880b98ca411a43e4917dcf50c3fc03226757513fc3d47a79400260d8eaab99bd?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27ZINC-t5_best.pth%3B+filename%3D%22ZINC-t5_best.pth%22%3B&Expires=1677554884&Policy=eyJTdGF0ZW1lbnQiOlt7IlJlc291cmNlIjoiaHR0cHM6Ly9jZG4tbGZzLmh1Z2dpbmdmYWNlLmNvL3JlcG9zLzM4L2E5LzM4YTk4ZmI4ZDU5NTU4ODIzZTAwZWM0MWVkMTk0ZDE0MGQwZDJiODc3MjFlMTRlYTA2YWZlZTFhMWZlNTI5MzAvODgwYjk4Y2E0MTFhNDNlNDkxN2RjZjUwYzNmYzAzMjI2NzU3NTEzZmMzZDQ3YTc5NDAwMjYwZDhlYWFiOTliZD9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoiLCJDb25kaXRpb24iOn

  5300K .......... .......... .......... .......... ..........  0%  570K 2m44s
  5350K .......... .......... .......... .......... ..........  0% 75.1M 2m43s
  5400K .......... .......... .......... .......... ..........  0% 64.7M 2m42s
  5450K .......... .......... .......... .......... ..........  0% 71.8M 2m40s
  5500K .......... .......... .......... .......... ..........  0% 63.1M 2m39s
  5550K .......... .......... .......... .......... ..........  0% 61.4M 2m38s
  5600K .......... .......... .......... .......... ..........  0% 86.3M 2m36s
  5650K .......... .......... .......... .......... ..........  0%  108M 2m35s
  5700K .......... .......... .......... .......... ..........  0%  113M 2m34s
  5750K .......... .......... .......... .......... ..........  0%  129M 2m32s
  5800K .......... .......... .......... .......... ..........  0% 14.7M 2m31s
  5850K .......... .......... .......... .......... ..........  0% 63.6M 2m30s
  5900K .......... .......... .......... .......... 

 13200K .......... .......... .......... .......... ..........  1%  496K 83s
 13250K .......... .......... .......... .......... ..........  1%  115M 83s
 13300K .......... .......... .......... .......... ..........  1% 93.9M 82s
 13350K .......... .......... .......... .......... ..........  1% 97.1M 82s
 13400K .......... .......... .......... .......... ..........  1%  115M 82s
 13450K .......... .......... .......... .......... ..........  1%  121M 81s
 13500K .......... .......... .......... .......... ..........  1%  114M 81s
 13550K .......... .......... .......... .......... ..........  1%  114M 81s
 13600K .......... .......... .......... .......... ..........  1%  117M 81s
 13650K .......... .......... .......... .......... ..........  1%  122M 80s
 13700K .......... .......... .......... .......... ..........  1%  115M 80s
 13750K .......... .......... .......... .......... ..........  1%  108M 80s
 13800K .......... .......... .......... .......... ..........  1%  117M 80s

 25050K .......... .......... .......... .......... ..........  3%  450K 61s
 25100K .......... .......... .......... .......... ..........  3% 94.8M 60s
 25150K .......... .......... .......... .......... ..........  3%  118M 60s
 25200K .......... .......... .......... .......... ..........  3%  127M 60s
 25250K .......... .......... .......... .......... ..........  3%  102M 60s
 25300K .......... .......... .......... .......... ..........  3%  105M 60s
 25350K .......... .......... .......... .......... ..........  3%  108M 60s
 25400K .......... .......... .......... .......... ..........  3%  112M 60s
 25450K .......... .......... .......... .......... ..........  3%  113M 60s
 25500K .......... .......... .......... .......... ..........  3%  122M 60s
 25550K .......... .......... .......... .......... ..........  3%  112M 59s
 25600K .......... .......... .......... .......... ..........  3% 86.4M 59s
 25650K .......... .......... .......... .......... ..........  3%  108M 59s

 33000K .......... .......... .......... .......... ..........  4%  464K 52s
 33050K .......... .......... .......... .......... ..........  4%  111M 52s
 33100K .......... .......... .......... .......... ..........  4% 93.9M 52s
 33150K .......... .......... .......... .......... ..........  4%  119M 52s
 33200K .......... .......... .......... .......... ..........  4%  117M 52s
 33250K .......... .......... .......... .......... ..........  4% 55.9M 52s
 33300K .......... .......... .......... .......... ..........  4%  109M 52s
 33350K .......... .......... .......... .......... ..........  4%  109M 52s
 33400K .......... .......... .......... .......... ..........  4%  118M 52s
 33450K .......... .......... .......... .......... ..........  4%  102M 51s
 33500K .......... .......... .......... .......... ..........  4%  112M 51s
 33550K .......... .......... .......... .......... ..........  4%  116M 51s
 33600K .......... .......... .......... .......... ..........  4%  123M 51s

 40600K .......... .......... .......... .......... ..........  5%  118M 46s
 40650K .......... .......... .......... .......... ..........  5%  114M 46s
 40700K .......... .......... .......... .......... ..........  5%  108M 45s
 40750K .......... .......... .......... .......... ..........  5%  116M 45s
 40800K .......... .......... .......... .......... ..........  5% 66.0M 45s
 40850K .......... .......... .......... .......... ..........  5%  101M 45s
 40900K .......... .......... .......... .......... ..........  5%  573K 47s
 40950K .......... .......... .......... .......... ..........  5% 66.2M 47s
 41000K .......... .......... .......... .......... ..........  5%  117M 47s
 41050K .......... .......... .......... .......... ..........  5%  108M 47s
 41100K .......... .......... .......... .......... ..........  5%  102M 47s
 41150K .......... .......... .......... .......... ..........  5%  107M 47s
 41200K .......... .......... .......... .......... ..........  5%  127M 47s

 52700K .......... .......... .......... .......... ..........  6%  453K 42s
 52750K .......... .......... .......... .......... ..........  6% 64.2M 42s
 52800K .......... .......... .......... .......... ..........  6%  111M 42s
 52850K .......... .......... .......... .......... ..........  6% 23.6M 42s
 52900K .......... .......... .......... .......... ..........  6%  103M 42s
 52950K .......... .......... .......... .......... ..........  6%  105M 42s
 53000K .......... .......... .......... .......... ..........  6%  119M 42s
 53050K .......... .......... .......... .......... ..........  6%  116M 42s
 53100K .......... .......... .......... .......... ..........  6%  115M 42s
 53150K .......... .......... .......... .......... ..........  6%  107M 42s
 53200K .......... .......... .......... .......... ..........  6% 95.5M 41s
 53250K .......... .......... .......... .......... ..........  6%  126M 41s
 53300K .......... .......... .......... .......... ..........  6%  117M 41s

 60500K .......... .......... .......... .......... ..........  7%  447K 39s
 60550K .......... .......... .......... .......... ..........  7%  120M 39s
 60600K .......... .......... .......... .......... ..........  7%  111M 39s
 60650K .......... .......... .......... .......... ..........  7% 98.6M 39s
 60700K .......... .......... .......... .......... ..........  7%  117M 39s
 60750K .......... .......... .......... .......... ..........  7%  113M 39s
 60800K .......... .......... .......... .......... ..........  7%  122M 39s
 60850K .......... .......... .......... .......... ..........  7%  116M 39s
 60900K .......... .......... .......... .......... ..........  7% 98.8M 39s
 60950K .......... .......... .......... .......... ..........  7%  128M 39s
 61000K .......... .......... .......... .......... ..........  7%  114M 39s
 61050K .......... .......... .......... .......... ..........  7% 97.0M 39s
 61100K .......... .......... .......... .......... ..........  7%  111M 39s

 68650K .......... .......... .......... .......... ..........  8%  518K 37s
 68700K .......... .......... .......... .......... ..........  8%  104M 37s
 68750K .......... .......... .......... .......... ..........  8%  102M 37s
 68800K .......... .......... .......... .......... ..........  8%  103M 37s
 68850K .......... .......... .......... .......... ..........  8% 93.8M 37s
 68900K .......... .......... .......... .......... ..........  8% 91.8M 37s
 68950K .......... .......... .......... .......... ..........  8%  113M 37s
 69000K .......... .......... .......... .......... ..........  8% 98.7M 37s
 69050K .......... .......... .......... .......... ..........  8%  104M 37s
 69100K .......... .......... .......... .......... ..........  8% 84.9M 37s
 69150K .......... .......... .......... .......... ..........  8%  103M 37s
 69200K .......... .......... .......... .......... ..........  8% 96.2M 37s
 69250K .......... .......... .......... .......... ..........  8% 89.2M 37s

 75650K .......... .......... .......... .......... ..........  9% 64.7M 36s
 75700K .......... .......... .......... .......... ..........  9%  101M 35s
 75750K .......... .......... .......... .......... ..........  9% 39.3M 35s
 75800K .......... .......... .......... .......... ..........  9% 60.7M 35s
 75850K .......... .......... .......... .......... ..........  9% 39.2M 35s
 75900K .......... .......... .......... .......... ..........  9% 30.3M 35s
 75950K .......... .......... .......... .......... ..........  9%  107M 35s
 76000K .......... .......... .......... .......... ..........  9% 64.9M 35s
 76050K .......... .......... .......... .......... ..........  9%  105M 35s
 76100K .......... .......... .......... .......... ..........  9% 41.0M 35s
 76150K .......... .......... .......... .......... ..........  9%  119M 35s
 76200K .......... .......... .......... .......... ..........  9% 61.1M 35s
 76250K .......... .......... .......... .......... ..........  9% 36.7M 35s

 87600K .......... .......... .......... .......... .......... 11%  117M 34s
 87650K .......... .......... .......... .......... .......... 11% 53.6M 34s
 87700K .......... .......... .......... .......... .......... 11% 78.0M 34s
 87750K .......... .......... .......... .......... .......... 11% 72.2M 34s
 87800K .......... .......... .......... .......... .......... 11% 57.4M 34s
 87850K .......... .......... .......... .......... .......... 11%  109M 33s
 87900K .......... .......... .......... .......... .......... 11% 61.9M 33s
 87950K .......... .......... .......... .......... .......... 11% 65.5M 33s
 88000K .......... .......... .......... .......... .......... 11% 27.9M 33s
 88050K .......... .......... .......... .......... .......... 11% 30.7M 33s
 88100K .......... .......... .......... .......... .......... 11% 51.4M 33s
 88150K .......... .......... .......... .......... .......... 11%  117M 33s
 88200K .......... .......... .......... .......... .......... 11% 91.7M 33s

 98750K .......... .......... .......... .......... .......... 12%  118M 32s
 98800K .......... .......... .......... .......... .......... 12% 71.0M 32s
 98850K .......... .......... .......... .......... .......... 12%  105M 32s
 98900K .......... .......... .......... .......... .......... 12% 52.6M 32s
 98950K .......... .......... .......... .......... .......... 12% 79.8M 32s
 99000K .......... .......... .......... .......... .......... 12%  118M 32s
 99050K .......... .......... .......... .......... .......... 12%  585K 33s
 99100K .......... .......... .......... .......... .......... 12% 53.9M 33s
 99150K .......... .......... .......... .......... .......... 12% 86.0M 33s
 99200K .......... .......... .......... .......... .......... 12% 78.0M 33s
 99250K .......... .......... .......... .......... .......... 12% 64.8M 33s
 99300K .......... .......... .......... .......... .......... 12% 87.0M 33s
 99350K .......... .......... .......... .......... .......... 12% 68.7M 33s

109900K .......... .......... .......... .......... .......... 14% 36.8M 31s
109950K .......... .......... .......... .......... .......... 14% 82.1M 31s
110000K .......... .......... .......... .......... .......... 14% 30.5M 31s
110050K .......... .......... .......... .......... .......... 14% 46.2M 31s
110100K .......... .......... .......... .......... .......... 14%  114M 31s
110150K .......... .......... .......... .......... .......... 14% 28.5M 31s
110200K .......... .......... .......... .......... .......... 14% 26.7M 31s
110250K .......... .......... .......... .......... .......... 14% 30.9M 31s
110300K .......... .......... .......... .......... .......... 14% 81.3M 31s
110350K .......... .......... .......... .......... .......... 14% 54.3M 31s
110400K .......... .......... .......... .......... .......... 14% 48.7M 31s
110450K .......... .......... .......... .......... .......... 14% 46.0M 31s
110500K .......... .......... .......... .......... .......... 14% 29.2M 31s

121000K .......... .......... .......... .......... .......... 15% 16.6M 30s
121050K .......... .......... .......... .......... .......... 15% 59.3M 30s
121100K .......... .......... .......... .......... .......... 15% 70.8M 30s
121150K .......... .......... .......... .......... .......... 15% 62.5M 30s
121200K .......... .......... .......... .......... .......... 15% 45.2M 30s
121250K .......... .......... .......... .......... .......... 15% 39.2M 30s
121300K .......... .......... .......... .......... .......... 15% 50.4M 30s
121350K .......... .......... .......... .......... .......... 15% 18.7M 30s
121400K .......... .......... .......... .......... .......... 15%  119M 30s
121450K .......... .......... .......... .......... .......... 15% 54.2M 30s
121500K .......... .......... .......... .......... .......... 15% 37.8M 30s
121550K .......... .......... .......... .......... .......... 15% 71.2M 30s
121600K .......... .......... .......... .......... .......... 15%  119M 30s

126500K .......... .......... .......... .......... .......... 16%  806K 30s
126550K .......... .......... .......... .......... .......... 16% 31.0M 30s
126600K .......... .......... .......... .......... .......... 16% 58.7M 30s
126650K .......... .......... .......... .......... .......... 16% 58.0M 30s
126700K .......... .......... .......... .......... .......... 16% 58.5M 30s
126750K .......... .......... .......... .......... .......... 16% 52.3M 30s
126800K .......... .......... .......... .......... .......... 16% 54.1M 30s
126850K .......... .......... .......... .......... .......... 16% 52.8M 30s
126900K .......... .......... .......... .......... .......... 16% 30.5M 30s
126950K .......... .......... .......... .......... .......... 16% 27.5M 30s
127000K .......... .......... .......... .......... .......... 16% 36.7M 30s
127050K .......... .......... .......... .......... .......... 16% 76.5M 30s
127100K .......... .......... .......... .......... .......... 16% 54.5M 30s

133100K .......... .......... .......... .......... .......... 17% 33.0M 29s
133150K .......... .......... .......... .......... .......... 17% 47.0M 29s
133200K .......... .......... .......... .......... .......... 17% 44.4M 29s
133250K .......... .......... .......... .......... .......... 17% 61.6M 29s
133300K .......... .......... .......... .......... .......... 17%  104M 29s
133350K .......... .......... .......... .......... .......... 17% 64.5M 29s
133400K .......... .......... .......... .......... .......... 17% 68.2M 29s
133450K .......... .......... .......... .......... .......... 17% 20.2M 29s
133500K .......... .......... .......... .......... .......... 17%  129M 29s
133550K .......... .......... .......... .......... .......... 17% 27.8M 29s
133600K .......... .......... .......... .......... .......... 17% 54.4M 29s
133650K .......... .......... .......... .......... .......... 17% 45.0M 29s
133700K .......... .......... .......... .......... .......... 17% 59.8M 29s

138700K .......... .......... .......... .......... .......... 17%  813K 29s
138750K .......... .......... .......... .......... .......... 17% 87.6M 29s
138800K .......... .......... .......... .......... .......... 17% 56.1M 29s
138850K .......... .......... .......... .......... .......... 17% 68.8M 29s
138900K .......... .......... .......... .......... .......... 17% 54.3M 29s
138950K .......... .......... .......... .......... .......... 17% 56.0M 29s
139000K .......... .......... .......... .......... .......... 17% 55.0M 29s
139050K .......... .......... .......... .......... .......... 17% 51.6M 29s
139100K .......... .......... .......... .......... .......... 17%  117M 29s
139150K .......... .......... .......... .......... .......... 17% 16.1M 29s
139200K .......... .......... .......... .......... .......... 17% 54.9M 29s
139250K .......... .......... .......... .......... .......... 17% 63.7M 29s
139300K .......... .......... .......... .......... .......... 17%  116M 29s

145400K .......... .......... .......... .......... .......... 18% 30.6M 28s
145450K .......... .......... .......... .......... .......... 18% 55.8M 28s
145500K .......... .......... .......... .......... .......... 18%  103M 28s
145550K .......... .......... .......... .......... .......... 18% 43.7M 28s
145600K .......... .......... .......... .......... .......... 18% 66.0M 28s
145650K .......... .......... .......... .......... .......... 18% 57.3M 28s
145700K .......... .......... .......... .......... .......... 18% 31.2M 28s
145750K .......... .......... .......... .......... .......... 18% 32.9M 28s
145800K .......... .......... .......... .......... .......... 18%  118M 28s
145850K .......... .......... .......... .......... .......... 18% 32.6M 28s
145900K .......... .......... .......... .......... .......... 18% 46.6M 28s
145950K .......... .......... .......... .......... .......... 18% 42.1M 28s
146000K .......... .......... .......... .......... .......... 18%  115M 28s

157200K .......... .......... .......... .......... .......... 20% 49.7M 27s
157250K .......... .......... .......... .......... .......... 20% 61.8M 27s
157300K .......... .......... .......... .......... .......... 20%  102M 27s
157350K .......... .......... .......... .......... .......... 20% 53.0M 27s
157400K .......... .......... .......... .......... .......... 20% 26.7M 27s
157450K .......... .......... .......... .......... .......... 20% 34.4M 27s
157500K .......... .......... .......... .......... .......... 20% 43.0M 27s
157550K .......... .......... .......... .......... .......... 20%  109M 27s
157600K .......... .......... .......... .......... .......... 20% 44.3M 27s
157650K .......... .......... .......... .......... .......... 20% 49.9M 27s
157700K .......... .......... .......... .......... .......... 20% 65.5M 27s
157750K .......... .......... .......... .......... .......... 20%  124M 27s
157800K .......... .......... .......... .......... .......... 20% 47.2M 27s

169400K .......... .......... .......... .......... .......... 21% 18.9M 26s
169450K .......... .......... .......... .......... .......... 21% 66.0M 26s
169500K .......... .......... .......... .......... .......... 21% 60.6M 26s
169550K .......... .......... .......... .......... .......... 21% 46.3M 26s
169600K .......... .......... .......... .......... .......... 21%  116M 26s
169650K .......... .......... .......... .......... .......... 21% 92.4M 26s
169700K .......... .......... .......... .......... .......... 21% 59.5M 26s
169750K .......... .......... .......... .......... .......... 21% 46.4M 26s
169800K .......... .......... .......... .......... .......... 21%  119M 26s
169850K .......... .......... .......... .......... .......... 21% 52.3M 26s
169900K .......... .......... .......... .......... .......... 21% 19.8M 26s
169950K .......... .......... .......... .......... .......... 21% 57.6M 26s
170000K .......... .......... .......... .......... .......... 21% 58.8M 26s

181250K .......... .......... .......... .......... .......... 23% 34.8M 26s
181300K .......... .......... .......... .......... .......... 23% 56.4M 26s
181350K .......... .......... .......... .......... .......... 23% 70.6M 26s
181400K .......... .......... .......... .......... .......... 23%  118M 26s
181450K .......... .......... .......... .......... .......... 23% 72.8M 26s
181500K .......... .......... .......... .......... .......... 23% 55.9M 26s
181550K .......... .......... .......... .......... .......... 23% 48.4M 26s
181600K .......... .......... .......... .......... .......... 23% 36.0M 26s
181650K .......... .......... .......... .......... .......... 23%  123M 26s
181700K .......... .......... .......... .......... .......... 23% 35.1M 26s
181750K .......... .......... .......... .......... .......... 23% 33.0M 26s
181800K .......... .......... .......... .......... .......... 23% 58.0M 26s
181850K .......... .......... .......... .......... .......... 23% 66.1M 26s

193050K .......... .......... .......... .......... .......... 24% 33.7M 25s
193100K .......... .......... .......... .......... .......... 24% 81.1M 25s
193150K .......... .......... .......... .......... .......... 24%  151M 25s
193200K .......... .......... .......... .......... .......... 24% 40.6M 25s
193250K .......... .......... .......... .......... .......... 24% 67.9M 25s
193300K .......... .......... .......... .......... .......... 24% 67.3M 25s
193350K .......... .......... .......... .......... .......... 24% 63.8M 25s
193400K .......... .......... .......... .......... .......... 24% 77.0M 25s
193450K .......... .......... .......... .......... .......... 24% 40.4M 25s
193500K .......... .......... .......... .......... .......... 24% 32.9M 25s
193550K .......... .......... .......... .......... .......... 24% 35.7M 25s
193600K .......... .......... .......... .......... .......... 24%  132M 25s
193650K .......... .......... .......... .......... .......... 24% 98.1M 25s

204250K .......... .......... .......... .......... .......... 26% 67.9M 24s
204300K .......... .......... .......... .......... .......... 26% 69.9M 24s
204350K .......... .......... .......... .......... .......... 26% 57.8M 24s
204400K .......... .......... .......... .......... .......... 26%  116M 24s
204450K .......... .......... .......... .......... .......... 26% 85.5M 24s
204500K .......... .......... .......... .......... .......... 26% 30.4M 24s
204550K .......... .......... .......... .......... .......... 26% 27.8M 24s
204600K .......... .......... .......... .......... .......... 26% 47.2M 24s
204650K .......... .......... .......... .......... .......... 26%  107M 24s
204700K .......... .......... .......... .......... .......... 26% 62.4M 24s
204750K .......... .......... .......... .......... .......... 26% 76.5M 24s
204800K .......... .......... .......... .......... .......... 26% 75.6M 24s
204850K .......... .......... .......... .......... .......... 26% 55.5M 24s

214000K .......... .......... .......... .......... .......... 27%  576K 24s
214050K .......... .......... .......... .......... .......... 27% 53.0M 24s
214100K .......... .......... .......... .......... .......... 27% 71.6M 24s
214150K .......... .......... .......... .......... .......... 27% 70.4M 24s
214200K .......... .......... .......... .......... .......... 27%  105M 24s
214250K .......... .......... .......... .......... .......... 27% 73.0M 24s
214300K .......... .......... .......... .......... .......... 27% 61.8M 24s
214350K .......... .......... .......... .......... .......... 27% 66.9M 24s
214400K .......... .......... .......... .......... .......... 27% 69.3M 24s
214450K .......... .......... .......... .......... .......... 27%  113M 24s
214500K .......... .......... .......... .......... .......... 27% 65.9M 24s
214550K .......... .......... .......... .......... .......... 27% 17.5M 24s
214600K .......... .......... .......... .......... .......... 27% 70.5M 24s

220550K .......... .......... .......... .......... .......... 28% 65.4M 24s
220600K .......... .......... .......... .......... .......... 28% 58.8M 24s
220650K .......... .......... .......... .......... .......... 28%  117M 24s
220700K .......... .......... .......... .......... .......... 28% 79.5M 24s
220750K .......... .......... .......... .......... .......... 28% 60.9M 24s
220800K .......... .......... .......... .......... .......... 28% 86.9M 24s
220850K .......... .......... .......... .......... .......... 28%  588K 24s
220900K .......... .......... .......... .......... .......... 28% 80.3M 24s
220950K .......... .......... .......... .......... .......... 28% 71.7M 24s
221000K .......... .......... .......... .......... .......... 28% 68.7M 24s
221050K .......... .......... .......... .......... .......... 28%  120M 24s
221100K .......... .......... .......... .......... .......... 28% 63.1M 24s
221150K .......... .......... .......... .......... .......... 28% 73.1M 24s

232050K .......... .......... .......... .......... .......... 29% 35.6M 23s
232100K .......... .......... .......... .......... .......... 29% 40.2M 23s
232150K .......... .......... .......... .......... .......... 29%  118M 23s
232200K .......... .......... .......... .......... .......... 29% 60.1M 23s
232250K .......... .......... .......... .......... .......... 29% 70.7M 23s
232300K .......... .......... .......... .......... .......... 29%  103M 23s
232350K .......... .......... .......... .......... .......... 29% 57.2M 23s
232400K .......... .......... .......... .......... .......... 29%  119M 23s
232450K .......... .......... .......... .......... .......... 29% 59.4M 23s
232500K .......... .......... .......... .......... .......... 29%  606K 23s
232550K .......... .......... .......... .......... .......... 29%  104M 23s
232600K .......... .......... .......... .......... .......... 29% 75.4M 23s
232650K .......... .......... .......... .......... .......... 29% 94.4M 23s

241900K .......... .......... .......... .......... .......... 30%  480K 23s
241950K .......... .......... .......... .......... .......... 30%  100M 23s
242000K .......... .......... .......... .......... .......... 30% 77.2M 23s
242050K .......... .......... .......... .......... .......... 30% 76.6M 23s
242100K .......... .......... .......... .......... .......... 30% 96.9M 23s
242150K .......... .......... .......... .......... .......... 30% 63.9M 23s
242200K .......... .......... .......... .......... .......... 30%  102M 23s
242250K .......... .......... .......... .......... .......... 30% 65.3M 23s
242300K .......... .......... .......... .......... .......... 30% 83.6M 23s
242350K .......... .......... .......... .......... .......... 30% 70.7M 23s
242400K .......... .......... .......... .......... .......... 30% 70.9M 23s
242450K .......... .......... .......... .......... .......... 30% 21.4M 23s
242500K .......... .......... .......... .......... .......... 30% 80.9M 23s

247950K .......... .......... .......... .......... .......... 31%  515K 23s
248000K .......... .......... .......... .......... .......... 31% 86.9M 23s
248050K .......... .......... .......... .......... .......... 31% 85.1M 23s
248100K .......... .......... .......... .......... .......... 31% 93.4M 23s
248150K .......... .......... .......... .......... .......... 31% 82.7M 23s
248200K .......... .......... .......... .......... .......... 31% 65.8M 23s
248250K .......... .......... .......... .......... .......... 31% 95.6M 23s
248300K .......... .......... .......... .......... .......... 31% 67.2M 23s
248350K .......... .......... .......... .......... .......... 31% 69.2M 23s
248400K .......... .......... .......... .......... .......... 31% 72.4M 23s
248450K .......... .......... .......... .......... .......... 31%  118M 23s
248500K .......... .......... .......... .......... .......... 31% 71.7M 23s
248550K .......... .......... .......... .......... .......... 31% 19.5M 23s

253850K .......... .......... .......... .......... .......... 32%  518K 23s
253900K .......... .......... .......... .......... .......... 32% 75.7M 23s
253950K .......... .......... .......... .......... .......... 32% 92.5M 23s
254000K .......... .......... .......... .......... .......... 32% 85.2M 23s
254050K .......... .......... .......... .......... .......... 32% 84.6M 23s
254100K .......... .......... .......... .......... .......... 32% 85.3M 23s
254150K .......... .......... .......... .......... .......... 32% 66.2M 23s
254200K .......... .......... .......... .......... .......... 32% 75.7M 23s
254250K .......... .......... .......... .......... .......... 32% 78.0M 23s
254300K .......... .......... .......... .......... .......... 32%  118M 23s
254350K .......... .......... .......... .......... .......... 32% 66.1M 23s
254400K .......... .......... .......... .......... .......... 32% 27.8M 23s
254450K .......... .......... .......... .......... .......... 32% 35.4M 23s

262750K .......... .......... .......... .......... .......... 33%  856K 22s
262800K .......... .......... .......... .......... .......... 33% 1.15M 22s
262850K .......... .......... .......... .......... .......... 33% 94.5M 22s
262900K .......... .......... .......... .......... .......... 33% 86.7M 22s
262950K .......... .......... .......... .......... .......... 33% 84.8M 22s
263000K .......... .......... .......... .......... .......... 33% 71.0M 22s
263050K .......... .......... .......... .......... .......... 33%  152M 22s
263100K .......... .......... .......... .......... .......... 33% 65.8M 22s
263150K .......... .......... .......... .......... .......... 33% 72.3M 22s
263200K .......... .......... .......... .......... .......... 33% 79.1M 22s
263250K .......... .......... .......... .......... .......... 33% 73.8M 22s
263300K .......... .......... .......... .......... .......... 33%  130M 22s
263350K .......... .......... .......... .......... .......... 33% 78.0M 22s

272050K .......... .......... .......... .......... .......... 34%  507K 22s
272100K .......... .......... .......... .......... .......... 34% 96.0M 22s
272150K .......... .......... .......... .......... .......... 34%  130M 22s
272200K .......... .......... .......... .......... .......... 34% 53.8M 22s
272250K .......... .......... .......... .......... .......... 34%  103M 22s
272300K .......... .......... .......... .......... .......... 34% 94.0M 22s
272350K .......... .......... .......... .......... .......... 34%  117M 22s
272400K .......... .......... .......... .......... .......... 34% 91.8M 22s
272450K .......... .......... .......... .......... .......... 34% 78.0M 22s
272500K .......... .......... .......... .......... .......... 34% 76.0M 22s
272550K .......... .......... .......... .......... .......... 34%  117M 22s
272600K .......... .......... .......... .......... .......... 34% 70.6M 22s
272650K .......... .......... .......... .......... .......... 34% 28.6M 22s

277700K .......... .......... .......... .......... .......... 35%  421K 22s
277750K .......... .......... .......... .......... .......... 35% 82.6M 22s
277800K .......... .......... .......... .......... .......... 35% 93.0M 22s
277850K .......... .......... .......... .......... .......... 35%  121M 22s
277900K .......... .......... .......... .......... .......... 35% 67.6M 22s
277950K .......... .......... .......... .......... .......... 35% 80.6M 22s
278000K .......... .......... .......... .......... .......... 35% 83.2M 22s
278050K .......... .......... .......... .......... .......... 35%  120M 22s
278100K .......... .......... .......... .......... .......... 35% 71.6M 22s
278150K .......... .......... .......... .......... .......... 35% 79.1M 22s
278200K .......... .......... .......... .......... .......... 35% 77.6M 22s
278250K .......... .......... .......... .......... .......... 35% 76.6M 22s
278300K .......... .......... .......... .......... .......... 35% 26.1M 22s

285300K .......... .......... .......... .......... .......... 36% 7.65M 22s
285350K .......... .......... .......... .......... .......... 36% 87.7M 22s
285400K .......... .......... .......... .......... .......... 36% 95.5M 22s
285450K .......... .......... .......... .......... .......... 36% 80.3M 22s
285500K .......... .......... .......... .......... .......... 36% 98.8M 22s
285550K .......... .......... .......... .......... .......... 36% 3.21M 22s
285600K .......... .......... .......... .......... .......... 36% 93.7M 22s
285650K .......... .......... .......... .......... .......... 36% 79.0M 22s
285700K .......... .......... .......... .......... .......... 36% 76.5M 22s
285750K .......... .......... .......... .......... .......... 36% 83.8M 22s
285800K .......... .......... .......... .......... .......... 36% 84.4M 22s
285850K .......... .......... .......... .......... .......... 36% 95.4M 22s
285900K .......... .......... .......... .......... .......... 36% 81.7M 22s

293800K .......... .......... .......... .......... .......... 37% 1.89M 21s
293850K .......... .......... .......... .......... .......... 37%  603K 22s
293900K .......... .......... .......... .......... .......... 37% 84.4M 22s
293950K .......... .......... .......... .......... .......... 37% 93.0M 22s
294000K .......... .......... .......... .......... .......... 37% 88.3M 22s
294050K .......... .......... .......... .......... .......... 37% 88.7M 22s
294100K .......... .......... .......... .......... .......... 37% 84.9M 22s
294150K .......... .......... .......... .......... .......... 37% 92.3M 22s
294200K .......... .......... .......... .......... .......... 37% 83.6M 22s
294250K .......... .......... .......... .......... .......... 37% 11.0M 22s
294300K .......... .......... .......... .......... .......... 37% 36.4M 22s
294350K .......... .......... .......... .......... .......... 37% 88.6M 22s
294400K .......... .......... .......... .......... .......... 37% 94.9M 22s

300900K .......... .......... .......... .......... .......... 38%  773K 21s
300950K .......... .......... .......... .......... .......... 38% 1.19M 21s
301000K .......... .......... .......... .......... .......... 38% 70.2M 21s
301050K .......... .......... .......... .......... .......... 38% 30.2M 21s
301100K .......... .......... .......... .......... .......... 38% 83.2M 21s
301150K .......... .......... .......... .......... .......... 38% 96.6M 21s
301200K .......... .......... .......... .......... .......... 38% 86.2M 21s
301250K .......... .......... .......... .......... .......... 38% 95.7M 21s
301300K .......... .......... .......... .......... .......... 38% 89.1M 21s
301350K .......... .......... .......... .......... .......... 38% 70.5M 21s
301400K .......... .......... .......... .......... .......... 38% 12.2M 21s
301450K .......... .......... .......... .......... .......... 38%  109M 21s
301500K .......... .......... .......... .......... .......... 38% 31.3M 21s

308150K .......... .......... .......... .......... .......... 39% 2.01M 21s
308200K .......... .......... .......... .......... .......... 39%  610K 21s
308250K .......... .......... .......... .......... .......... 39% 91.9M 21s
308300K .......... .......... .......... .......... .......... 39% 40.1M 21s
308350K .......... .......... .......... .......... .......... 39% 99.3M 21s
308400K .......... .......... .......... .......... .......... 39% 93.2M 21s
308450K .......... .......... .......... .......... .......... 39% 93.6M 21s
308500K .......... .......... .......... .......... .......... 39% 92.6M 21s
308550K .......... .......... .......... .......... .......... 39% 84.5M 21s
308600K .......... .......... .......... .......... .......... 39% 88.1M 21s
308650K .......... .......... .......... .......... .......... 39% 12.4M 21s
308700K .......... .......... .......... .......... .......... 39% 54.8M 21s
308750K .......... .......... .......... .......... .......... 39% 29.5M 21s

315500K .......... .......... .......... .......... .......... 40% 1.89M 21s
315550K .......... .......... .......... .......... .......... 40% 1.27M 21s
315600K .......... .......... .......... .......... .......... 40% 1.17M 21s
315650K .......... .......... .......... .......... .......... 40%  102M 21s
315700K .......... .......... .......... .......... .......... 40% 48.5M 21s
315750K .......... .......... .......... .......... .......... 40% 37.9M 21s
315800K .......... .......... .......... .......... .......... 40%  123M 21s
315850K .......... .......... .......... .......... .......... 40% 98.4M 21s
315900K .......... .......... .......... .......... .......... 40% 78.5M 21s
315950K .......... .......... .......... .......... .......... 40% 94.3M 21s
316000K .......... .......... .......... .......... .......... 40% 82.5M 21s
316050K .......... .......... .......... .......... .......... 40% 12.9M 21s
316100K .......... .......... .......... .......... .......... 40% 63.6M 21s

323050K .......... .......... .......... .......... .......... 41% 5.43M 21s
323100K .......... .......... .......... .......... .......... 41%  627K 21s
323150K .......... .......... .......... .......... .......... 41% 40.1M 21s
323200K .......... .......... .......... .......... .......... 41%  115M 21s
323250K .......... .......... .......... .......... .......... 41% 44.4M 21s
323300K .......... .......... .......... .......... .......... 41% 91.9M 21s
323350K .......... .......... .......... .......... .......... 41% 83.6M 21s
323400K .......... .......... .......... .......... .......... 41%  115M 21s
323450K .......... .......... .......... .......... .......... 41% 86.8M 21s
323500K .......... .......... .......... .......... .......... 41% 83.4M 21s
323550K .......... .......... .......... .......... .......... 41% 13.4M 21s
323600K .......... .......... .......... .......... .......... 41% 44.8M 21s
323650K .......... .......... .......... .......... .......... 41% 33.3M 21s

330600K .......... .......... .......... .......... .......... 42% 1.76M 21s
330650K .......... .......... .......... .......... .......... 42%  653K 21s
330700K .......... .......... .......... .......... .......... 42% 36.4M 21s
330750K .......... .......... .......... .......... .......... 42% 49.4M 21s
330800K .......... .......... .......... .......... .......... 42% 53.4M 21s
330850K .......... .......... .......... .......... .......... 42%  102M 21s
330900K .......... .......... .......... .......... .......... 42% 86.2M 21s
330950K .......... .......... .......... .......... .......... 42% 93.7M 21s
331000K .......... .......... .......... .......... .......... 42% 97.8M 21s
331050K .......... .......... .......... .......... .......... 42% 96.6M 21s
331100K .......... .......... .......... .......... .......... 42% 15.3M 21s
331150K .......... .......... .......... .......... .......... 42% 21.7M 21s
331200K .......... .......... .......... .......... .......... 42% 45.0M 21s

338300K .......... .......... .......... .......... .......... 43% 3.22M 20s
338350K .......... .......... .......... .......... .......... 43%  673K 20s
338400K .......... .......... .......... .......... .......... 43% 89.1M 20s
338450K .......... .......... .......... .......... .......... 43%  115M 20s
338500K .......... .......... .......... .......... .......... 43% 84.0M 20s
338550K .......... .......... .......... .......... .......... 43% 86.1M 20s
338600K .......... .......... .......... .......... .......... 43% 98.2M 20s
338650K .......... .......... .......... .......... .......... 43%  111M 20s
338700K .......... .......... .......... .......... .......... 43% 76.2M 20s
338750K .......... .......... .......... .......... .......... 43% 96.3M 20s
338800K .......... .......... .......... .......... .......... 43% 10.1M 20s
338850K .......... .......... .......... .......... .......... 43% 91.9M 20s
338900K .......... .......... .......... .......... .......... 43% 27.8M 20s

346050K .......... .......... .......... .......... .......... 44% 5.40M 20s
346100K .......... .......... .......... .......... .......... 44% 1.27M 20s
346150K .......... .......... .......... .......... .......... 44% 1.15M 20s
346200K .......... .......... .......... .......... .......... 44%  113M 20s
346250K .......... .......... .......... .......... .......... 44% 48.4M 20s
346300K .......... .......... .......... .......... .......... 44% 43.1M 20s
346350K .......... .......... .......... .......... .......... 44%  120M 20s
346400K .......... .......... .......... .......... .......... 44% 91.0M 20s
346450K .......... .......... .......... .......... .......... 44% 93.0M 20s
346500K .......... .......... .......... .......... .......... 44% 90.6M 20s
346550K .......... .......... .......... .......... .......... 44% 16.7M 20s
346600K .......... .......... .......... .......... .......... 44% 26.4M 20s
346650K .......... .......... .......... .......... .......... 44% 16.8M 20s

353850K .......... .......... .......... .......... .......... 45% 2.29M 20s
353900K .......... .......... .......... .......... .......... 45% 3.62M 20s
353950K .......... .......... .......... .......... .......... 45%  712K 20s
354000K .......... .......... .......... .......... .......... 45%  117M 20s
354050K .......... .......... .......... .......... .......... 45% 77.2M 20s
354100K .......... .......... .......... .......... .......... 45% 95.1M 20s
354150K .......... .......... .......... .......... .......... 45% 60.1M 20s
354200K .......... .......... .......... .......... .......... 45%  104M 20s
354250K .......... .......... .......... .......... .......... 45%  102M 20s
354300K .......... .......... .......... .......... .......... 45% 85.0M 20s
354350K .......... .......... .......... .......... .......... 45% 92.7M 20s
354400K .......... .......... .......... .......... .......... 45% 13.3M 20s
354450K .......... .......... .......... .......... .......... 45% 30.0M 20s

361750K .......... .......... .......... .......... .......... 46% 6.60M 19s
361800K .......... .......... .......... .......... .......... 46% 3.95M 19s
361850K .......... .......... .......... .......... .......... 46%  702K 20s
361900K .......... .......... .......... .......... .......... 46%  107M 20s
361950K .......... .......... .......... .......... .......... 46% 98.0M 20s
362000K .......... .......... .......... .......... .......... 46% 98.3M 20s
362050K .......... .......... .......... .......... .......... 46% 85.2M 20s
362100K .......... .......... .......... .......... .......... 46%  101M 20s
362150K .......... .......... .......... .......... .......... 46% 94.8M 20s
362200K .......... .......... .......... .......... .......... 46%  101M 20s
362250K .......... .......... .......... .......... .......... 46% 37.4M 20s
362300K .......... .......... .......... .......... .......... 46% 15.4M 20s
362350K .......... .......... .......... .......... .......... 46% 22.8M 20s

369650K .......... .......... .......... .......... .......... 47% 2.22M 19s
369700K .......... .......... .......... .......... .......... 47%  106M 19s
369750K .......... .......... .......... .......... .......... 47% 3.66M 19s
369800K .......... .......... .......... .......... .......... 47%  712K 19s
369850K .......... .......... .......... .......... .......... 47%  113M 19s
369900K .......... .......... .......... .......... .......... 47% 94.8M 19s
369950K .......... .......... .......... .......... .......... 47% 95.9M 19s
370000K .......... .......... .......... .......... .......... 47% 51.8M 19s
370050K .......... .......... .......... .......... .......... 47%  108M 19s
370100K .......... .......... .......... .......... .......... 47%  101M 19s
370150K .......... .......... .......... .......... .......... 47%  104M 19s
370200K .......... .......... .......... .......... .......... 47%  106M 19s
370250K .......... .......... .......... .......... .......... 47% 12.5M 19s

377650K .......... .......... .......... .......... .......... 48% 2.37M 19s
377700K .......... .......... .......... .......... .......... 48% 5.77M 19s
377750K .......... .......... .......... .......... .......... 48%  676K 19s
377800K .......... .......... .......... .......... .......... 48% 9.43M 19s
377850K .......... .......... .......... .......... .......... 48%  127M 19s
377900K .......... .......... .......... .......... .......... 48% 32.8M 19s
377950K .......... .......... .......... .......... .......... 48%  105M 19s
378000K .......... .......... .......... .......... .......... 48%  122M 19s
378050K .......... .......... .......... .......... .......... 48% 95.9M 19s
378100K .......... .......... .......... .......... .......... 48% 89.5M 19s
378150K .......... .......... .......... .......... .......... 48% 91.4M 19s
378200K .......... .......... .......... .......... .......... 48% 99.0M 19s
378250K .......... .......... .......... .......... .......... 48% 92.2M 19s

385650K .......... .......... .......... .......... .......... 49% 5.61M 19s
385700K .......... .......... .......... .......... .......... 49% 4.20M 19s
385750K .......... .......... .......... .......... .......... 49%  712K 19s
385800K .......... .......... .......... .......... .......... 49% 44.5M 19s
385850K .......... .......... .......... .......... .......... 49% 37.5M 19s
385900K .......... .......... .......... .......... .......... 49%  105M 19s
385950K .......... .......... .......... .......... .......... 49% 90.4M 19s
386000K .......... .......... .......... .......... .......... 49% 20.5M 19s
386050K .......... .......... .......... .......... .......... 49%  116M 19s
386100K .......... .......... .......... .......... .......... 49% 92.8M 19s
386150K .......... .......... .......... .......... .......... 49%  114M 19s
386200K .......... .......... .......... .......... .......... 49% 16.8M 19s
386250K .......... .......... .......... .......... .......... 49% 46.7M 19s

393650K .......... .......... .......... .......... .......... 50% 2.45M 18s
393700K .......... .......... .......... .......... .......... 50% 5.65M 18s
393750K .......... .......... .......... .......... .......... 50% 1.54M 18s
393800K .......... .......... .......... .......... .......... 50% 1.15M 18s
393850K .......... .......... .......... .......... .......... 50% 20.7M 18s
393900K .......... .......... .......... .......... .......... 50% 57.0M 18s
393950K .......... .......... .......... .......... .......... 50%  103M 18s
394000K .......... .......... .......... .......... .......... 50% 33.0M 18s
394050K .......... .......... .......... .......... .......... 50%  117M 18s
394100K .......... .......... .......... .......... .......... 50% 95.7M 18s
394150K .......... .......... .......... .......... .......... 50%  101M 18s
394200K .......... .......... .......... .......... .......... 50% 28.6M 18s
394250K .......... .......... .......... .......... .......... 50% 36.2M 18s

401700K .......... .......... .......... .......... .......... 51% 2.27M 18s
401750K .......... .......... .......... .......... .......... 51% 4.57M 18s
401800K .......... .......... .......... .......... .......... 51%  733K 18s
401850K .......... .......... .......... .......... .......... 51% 28.9M 18s
401900K .......... .......... .......... .......... .......... 51% 17.4M 18s
401950K .......... .......... .......... .......... .......... 51% 35.1M 18s
402000K .......... .......... .......... .......... .......... 51%  127M 18s
402050K .......... .......... .......... .......... .......... 51% 20.9M 18s
402100K .......... .......... .......... .......... .......... 51% 95.5M 18s
402150K .......... .......... .......... .......... .......... 51% 86.9M 18s
402200K .......... .......... .......... .......... .......... 51%  125M 18s
402250K .......... .......... .......... .......... .......... 51% 94.4M 18s
402300K .......... .......... .......... .......... .......... 51% 15.5M 18s

409750K .......... .......... .......... .......... .......... 52% 2.50M 18s
409800K .......... .......... .......... .......... .......... 52% 4.02M 18s
409850K .......... .......... .......... .......... .......... 52%  721K 18s
409900K .......... .......... .......... .......... .......... 52% 20.2M 18s
409950K .......... .......... .......... .......... .......... 52% 19.7M 18s
410000K .......... .......... .......... .......... .......... 52% 27.6M 18s
410050K .......... .......... .......... .......... .......... 52%  121M 18s
410100K .......... .......... .......... .......... .......... 52% 13.4M 18s
410150K .......... .......... .......... .......... .......... 52% 17.4M 18s
410200K .......... .......... .......... .......... .......... 52% 21.7M 18s
410250K .......... .......... .......... .......... .......... 52% 23.9M 18s
410300K .......... .......... .......... .......... .......... 52% 21.0M 18s
410350K .......... .......... .......... .......... .......... 52% 38.7M 18s

417850K .......... .......... .......... .......... .......... 53% 2.71M 17s
417900K .......... .......... .......... .......... .......... 53%  619K 17s
417950K .......... .......... .......... .......... .......... 53% 25.5M 17s
418000K .......... .......... .......... .......... .......... 53% 29.9M 17s
418050K .......... .......... .......... .......... .......... 53% 22.0M 17s
418100K .......... .......... .......... .......... .......... 53% 86.7M 17s
418150K .......... .......... .......... .......... .......... 53% 10.2M 17s
418200K .......... .......... .......... .......... .......... 53%  117M 17s
418250K .......... .......... .......... .......... .......... 53% 89.4M 17s
418300K .......... .......... .......... .......... .......... 53%  113M 17s
418350K .......... .......... .......... .......... .......... 53%  103M 17s
418400K .......... .......... .......... .......... .......... 53% 99.4M 17s
418450K .......... .......... .......... .......... .......... 53%  110M 17s

425900K .......... .......... .......... .......... .......... 54% 2.60M 17s
425950K .......... .......... .......... .......... .......... 54% 4.55M 17s
426000K .......... .......... .......... .......... .......... 54%  708K 17s
426050K .......... .......... .......... .......... .......... 54% 29.4M 17s
426100K .......... .......... .......... .......... .......... 54% 14.1M 17s
426150K .......... .......... .......... .......... .......... 54% 45.6M 17s
426200K .......... .......... .......... .......... .......... 54% 14.1M 17s
426250K .......... .......... .......... .......... .......... 54%  105M 17s
426300K .......... .......... .......... .......... .......... 54% 30.5M 17s
426350K .......... .......... .......... .......... .......... 54%  113M 17s
426400K .......... .......... .......... .......... .......... 54%  122M 17s
426450K .......... .......... .......... .......... .......... 54% 90.3M 17s
426500K .......... .......... .......... .......... .......... 54%  103M 17s

434000K .......... .......... .......... .......... .......... 55% 5.50M 17s
434050K .......... .......... .......... .......... .......... 55% 4.84M 17s
434100K .......... .......... .......... .......... .......... 55%  709K 17s
434150K .......... .......... .......... .......... .......... 55% 19.6M 17s
434200K .......... .......... .......... .......... .......... 55% 23.1M 17s
434250K .......... .......... .......... .......... .......... 55% 49.2M 17s
434300K .......... .......... .......... .......... .......... 55% 11.4M 17s
434350K .......... .......... .......... .......... .......... 55% 36.5M 17s
434400K .......... .......... .......... .......... .......... 55% 71.7M 17s
434450K .......... .......... .......... .......... .......... 55% 92.4M 17s
434500K .......... .......... .......... .......... .......... 55%  113M 17s
434550K .......... .......... .......... .......... .......... 55% 97.6M 17s
434600K .......... .......... .......... .......... .......... 55%  101M 17s

442100K .......... .......... .......... .......... .......... 56% 13.6M 16s
442150K .......... .......... .......... .......... .......... 56% 4.90M 16s
442200K .......... .......... .......... .......... .......... 56% 1.65M 16s
442250K .......... .......... .......... .......... .......... 56% 1.07M 16s
442300K .......... .......... .......... .......... .......... 56% 20.8M 16s
442350K .......... .......... .......... .......... .......... 56%  116M 16s
442400K .......... .......... .......... .......... .......... 56% 14.2M 16s
442450K .......... .......... .......... .......... .......... 56% 42.9M 16s
442500K .......... .......... .......... .......... .......... 56%  112M 16s
442550K .......... .......... .......... .......... .......... 56% 95.5M 16s
442600K .......... .......... .......... .......... .......... 56%  118M 16s
442650K .......... .......... .......... .......... .......... 56%  122M 16s
442700K .......... .......... .......... .......... .......... 56% 13.9M 16s

450150K .......... .......... .......... .......... .......... 57% 2.70M 16s
450200K .......... .......... .......... .......... .......... 57% 7.15M 16s
450250K .......... .......... .......... .......... .......... 57% 11.9M 16s
450300K .......... .......... .......... .......... .......... 57%  697K 16s
450350K .......... .......... .......... .......... .......... 57% 18.2M 16s
450400K .......... .......... .......... .......... .......... 57% 24.2M 16s
450450K .......... .......... .......... .......... .......... 57% 16.7M 16s
450500K .......... .......... .......... .......... .......... 57% 25.1M 16s
450550K .......... .......... .......... .......... .......... 57% 56.2M 16s
450600K .......... .......... .......... .......... .......... 57%  120M 16s
450650K .......... .......... .......... .......... .......... 57% 92.7M 16s
450700K .......... .......... .......... .......... .......... 57% 22.4M 16s
450750K .......... .......... .......... .......... .......... 57% 46.2M 16s

458250K .......... .......... .......... .......... .......... 58% 14.5M 15s
458300K .......... .......... .......... .......... .......... 58% 4.22M 15s
458350K .......... .......... .......... .......... .......... 58% 1.77M 15s
458400K .......... .......... .......... .......... .......... 58% 1.10M 16s
458450K .......... .......... .......... .......... .......... 58% 14.8M 16s
458500K .......... .......... .......... .......... .......... 58% 14.3M 16s
458550K .......... .......... .......... .......... .......... 58% 23.5M 16s
458600K .......... .......... .......... .......... .......... 58%  120M 15s
458650K .......... .......... .......... .......... .......... 58%  113M 15s
458700K .......... .......... .......... .......... .......... 58% 92.8M 15s
458750K .......... .......... .......... .......... .......... 58% 16.1M 15s
458800K .......... .......... .......... .......... .......... 58% 31.6M 15s
458850K .......... .......... .......... .......... .......... 58%  115M 15s

466300K .......... .......... .......... .......... .......... 59% 3.28M 15s
466350K .......... .......... .......... .......... .......... 59% 6.92M 15s
466400K .......... .......... .......... .......... .......... 59% 5.74M 15s
466450K .......... .......... .......... .......... .......... 59%  769K 15s
466500K .......... .......... .......... .......... .......... 59% 13.2M 15s
466550K .......... .......... .......... .......... .......... 59% 14.5M 15s
466600K .......... .......... .......... .......... .......... 59% 16.8M 15s
466650K .......... .......... .......... .......... .......... 59% 21.9M 15s
466700K .......... .......... .......... .......... .......... 59% 91.4M 15s
466750K .......... .......... .......... .......... .......... 59%  119M 15s
466800K .......... .......... .......... .......... .......... 59%  117M 15s
466850K .......... .......... .......... .......... .......... 59% 13.0M 15s
466900K .......... .......... .......... .......... .......... 59% 29.7M 15s

474350K .......... .......... .......... .......... .......... 60% 2.57M 15s
474400K .......... .......... .......... .......... .......... 60%  125M 15s
474450K .......... .......... .......... .......... .......... 60% 16.0M 15s
474500K .......... .......... .......... .......... .......... 60% 1.77M 15s
474550K .......... .......... .......... .......... .......... 60% 1.25M 15s
474600K .......... .......... .......... .......... .......... 60% 23.6M 15s
474650K .......... .......... .......... .......... .......... 60% 13.7M 15s
474700K .......... .......... .......... .......... .......... 60% 31.7M 15s
474750K .......... .......... .......... .......... .......... 60% 68.5M 15s
474800K .......... .......... .......... .......... .......... 60% 54.0M 15s
474850K .......... .......... .......... .......... .......... 60% 13.5M 15s
474900K .......... .......... .......... .......... .......... 60% 20.3M 15s
474950K .......... .......... .......... .......... .......... 60%  126M 15s

482450K .......... .......... .......... .......... .......... 61% 12.3M 14s
482500K .......... .......... .......... .......... .......... 61% 6.09M 14s
482550K .......... .......... .......... .......... .......... 61% 10.1M 14s
482600K .......... .......... .......... .......... .......... 61% 3.15M 14s
482650K .......... .......... .......... .......... .......... 61%  907K 14s
482700K .......... .......... .......... .......... .......... 61% 7.68M 14s
482750K .......... .......... .......... .......... .......... 61% 21.7M 14s
482800K .......... .......... .......... .......... .......... 61%  102M 14s
482850K .......... .......... .......... .......... .......... 61% 64.2M 14s
482900K .......... .......... .......... .......... .......... 61%  105M 14s
482950K .......... .......... .......... .......... .......... 61% 13.7M 14s
483000K .......... .......... .......... .......... .......... 61% 66.3M 14s
483050K .......... .......... .......... .......... .......... 61% 27.7M 14s

490550K .......... .......... .......... .......... .......... 62% 10.1M 14s
490600K .......... .......... .......... .......... .......... 62% 83.8M 14s
490650K .......... .......... .......... .......... .......... 62% 4.27M 14s
490700K .......... .......... .......... .......... .......... 62% 1.83M 14s
490750K .......... .......... .......... .......... .......... 62% 1.05M 14s
490800K .......... .......... .......... .......... .......... 62% 11.1M 14s
490850K .......... .......... .......... .......... .......... 62% 31.3M 14s
490900K .......... .......... .......... .......... .......... 62% 56.4M 14s
490950K .......... .......... .......... .......... .......... 62% 98.8M 14s
491000K .......... .......... .......... .......... .......... 62%  115M 14s
491050K .......... .......... .......... .......... .......... 62%  124M 14s
491100K .......... .......... .......... .......... .......... 62% 12.2M 14s
491150K .......... .......... .......... .......... .......... 62% 26.8M 14s

498650K .......... .......... .......... .......... .......... 63% 5.45M 14s
498700K .......... .......... .......... .......... .......... 63% 4.51M 14s
498750K .......... .......... .......... .......... .......... 63% 7.26M 14s
498800K .......... .......... .......... .......... .......... 63% 4.40M 14s
498850K .......... .......... .......... .......... .......... 63%  953K 14s
498900K .......... .......... .......... .......... .......... 63% 4.89M 14s
498950K .......... .......... .......... .......... .......... 63% 68.4M 14s
499000K .......... .......... .......... .......... .......... 63% 27.4M 14s
499050K .......... .......... .......... .......... .......... 63% 12.7M 14s
499100K .......... .......... .......... .......... .......... 63%  100M 14s
499150K .......... .......... .......... .......... .......... 63% 99.9M 14s
499200K .......... .......... .......... .......... .......... 63%  121M 14s
499250K .......... .......... .......... .......... .......... 63% 30.3M 14s

506700K .......... .......... .......... .......... .......... 64% 1.82M 13s
506750K .......... .......... .......... .......... .......... 64%  105M 13s
506800K .......... .......... .......... .......... .......... 64%  121M 13s
506850K .......... .......... .......... .......... .......... 64% 42.6M 13s
506900K .......... .......... .......... .......... .......... 64% 4.09M 13s
506950K .......... .......... .......... .......... .......... 64%  955K 13s
507000K .......... .......... .......... .......... .......... 64% 5.43M 13s
507050K .......... .......... .......... .......... .......... 64% 23.2M 13s
507100K .......... .......... .......... .......... .......... 64% 83.7M 13s
507150K .......... .......... .......... .......... .......... 64% 10.1M 13s
507200K .......... .......... .......... .......... .......... 64% 93.6M 13s
507250K .......... .......... .......... .......... .......... 64%  121M 13s
507300K .......... .......... .......... .......... .......... 64% 86.7M 13s

514750K .......... .......... .......... .......... .......... 65% 1.67M 13s
514800K .......... .......... .......... .......... .......... 65%  114M 13s
514850K .......... .......... .......... .......... .......... 65%  116M 13s
514900K .......... .......... .......... .......... .......... 65%  109M 13s
514950K .......... .......... .......... .......... .......... 65%  128M 13s
515000K .......... .......... .......... .......... .......... 65%  104M 13s
515050K .......... .......... .......... .......... .......... 65% 5.12M 13s
515100K .......... .......... .......... .......... .......... 65%  947K 13s
515150K .......... .......... .......... .......... .......... 65% 4.72M 13s
515200K .......... .......... .......... .......... .......... 65% 41.5M 13s
515250K .......... .......... .......... .......... .......... 65%  115M 13s
515300K .......... .......... .......... .......... .......... 65% 9.01M 13s
515350K .......... .......... .......... .......... .......... 65%  100M 13s

522650K .......... .......... .......... .......... .......... 66% 59.0M 13s
522700K .......... .......... .......... .......... .......... 66% 55.0M 13s
522750K .......... .......... .......... .......... .......... 66% 75.9M 13s
522800K .......... .......... .......... .......... .......... 66%  116M 13s
522850K .......... .......... .......... .......... .......... 66% 3.40M 13s
522900K .......... .......... .......... .......... .......... 66%  111M 13s
522950K .......... .......... .......... .......... .......... 66% 81.7M 13s
523000K .......... .......... .......... .......... .......... 66%  134M 13s
523050K .......... .......... .......... .......... .......... 66% 91.4M 13s
523100K .......... .......... .......... .......... .......... 66% 86.6M 13s
523150K .......... .......... .......... .......... .......... 66% 15.0M 13s
523200K .......... .......... .......... .......... .......... 66% 2.17M 13s
523250K .......... .......... .......... .......... .......... 66% 1.27M 13s

529700K .......... .......... .......... .......... .......... 67% 99.9M 12s
529750K .......... .......... .......... .......... .......... 67%  122M 12s
529800K .......... .......... .......... .......... .......... 67%  117M 12s
529850K .......... .......... .......... .......... .......... 67%  108M 12s
529900K .......... .......... .......... .......... .......... 67% 92.6M 12s
529950K .......... .......... .......... .......... .......... 67%  117M 12s
530000K .......... .......... .......... .......... .......... 67%  112M 12s
530050K .......... .......... .......... .......... .......... 67%  106M 12s
530100K .......... .......... .......... .......... .......... 67%  110M 12s
530150K .......... .......... .......... .......... .......... 67% 97.3M 12s
530200K .......... .......... .......... .......... .......... 67%  111M 12s
530250K .......... .......... .......... .......... .......... 67%  109M 12s
530300K .......... .......... .......... .......... .......... 67% 33.1M 12s

536950K .......... .......... .......... .......... .......... 68% 1.24M 12s
537000K .......... .......... .......... .......... .......... 68%  118M 12s
537050K .......... .......... .......... .......... .......... 68%  118M 12s
537100K .......... .......... .......... .......... .......... 68% 95.0M 12s
537150K .......... .......... .......... .......... .......... 68%  124M 12s
537200K .......... .......... .......... .......... .......... 68% 93.6M 12s
537250K .......... .......... .......... .......... .......... 68%  120M 12s
537300K .......... .......... .......... .......... .......... 68% 29.9M 12s
537350K .......... .......... .......... .......... .......... 68% 80.1M 12s
537400K .......... .......... .......... .......... .......... 68% 43.2M 12s
537450K .......... .......... .......... .......... .......... 68% 4.27M 12s
537500K .......... .......... .......... .......... .......... 68% 66.8M 12s
537550K .......... .......... .......... .......... .......... 68%  117M 12s

545150K .......... .......... .......... .......... .......... 69% 1.29M 12s
545200K .......... .......... .......... .......... .......... 69% 99.5M 12s
545250K .......... .......... .......... .......... .......... 69%  111M 12s
545300K .......... .......... .......... .......... .......... 69% 84.6M 12s
545350K .......... .......... .......... .......... .......... 69% 93.9M 12s
545400K .......... .......... .......... .......... .......... 69%  124M 12s
545450K .......... .......... .......... .......... .......... 69%  108M 12s
545500K .......... .......... .......... .......... .......... 69% 3.15M 12s
545550K .......... .......... .......... .......... .......... 69%  118M 12s
545600K .......... .......... .......... .......... .......... 69%  120M 12s
545650K .......... .......... .......... .......... .......... 69%  123M 12s
545700K .......... .......... .......... .......... .......... 69% 95.9M 12s
545750K .......... .......... .......... .......... .......... 69%  119M 12s

553450K .......... .......... .......... .......... .......... 70%  949K 11s
553500K .......... .......... .......... .......... .......... 70% 36.8M 11s
553550K .......... .......... .......... .......... .......... 70%  103M 11s
553600K .......... .......... .......... .......... .......... 70% 73.2M 11s
553650K .......... .......... .......... .......... .......... 70% 66.6M 11s
553700K .......... .......... .......... .......... .......... 70%  102M 11s
553750K .......... .......... .......... .......... .......... 70% 67.7M 11s
553800K .......... .......... .......... .......... .......... 70% 3.22M 11s
553850K .......... .......... .......... .......... .......... 70% 85.4M 11s
553900K .......... .......... .......... .......... .......... 70% 73.3M 11s
553950K .......... .......... .......... .......... .......... 70% 67.5M 11s
554000K .......... .......... .......... .......... .......... 70% 69.3M 11s
554050K .......... .......... .......... .......... .......... 70%  117M 11s

561700K .......... .......... .......... .......... .......... 71%  958K 11s
561750K .......... .......... .......... .......... .......... 71% 94.9M 11s
561800K .......... .......... .......... .......... .......... 71% 77.2M 11s
561850K .......... .......... .......... .......... .......... 71% 89.6M 11s
561900K .......... .......... .......... .......... .......... 71% 71.7M 11s
561950K .......... .......... .......... .......... .......... 71% 83.4M 11s
562000K .......... .......... .......... .......... .......... 71% 82.5M 11s
562050K .......... .......... .......... .......... .......... 71% 3.22M 11s
562100K .......... .......... .......... .......... .......... 71% 93.0M 11s
562150K .......... .......... .......... .......... .......... 71% 47.7M 11s
562200K .......... .......... .......... .......... .......... 71% 98.1M 11s
562250K .......... .......... .......... .......... .......... 71% 82.1M 11s
562300K .......... .......... .......... .......... .......... 71% 56.5M 11s

570100K .......... .......... .......... .......... .......... 72% 1.10M 10s
570150K .......... .......... .......... .......... .......... 72% 3.06M 10s
570200K .......... .......... .......... .......... .......... 72% 94.3M 10s
570250K .......... .......... .......... .......... .......... 72% 59.5M 10s
570300K .......... .......... .......... .......... .......... 72% 53.7M 10s
570350K .......... .......... .......... .......... .......... 72% 51.3M 10s
570400K .......... .......... .......... .......... .......... 72% 59.0M 10s
570450K .......... .......... .......... .......... .......... 72% 3.90M 10s
570500K .......... .......... .......... .......... .......... 72% 58.7M 10s
570550K .......... .......... .......... .......... .......... 72% 51.2M 10s
570600K .......... .......... .......... .......... .......... 72% 67.0M 10s
570650K .......... .......... .......... .......... .......... 72% 60.5M 10s
570700K .......... .......... .......... .......... .......... 72% 61.4M 10s

578650K .......... .......... .......... .......... .......... 73% 1.24M 10s
578700K .......... .......... .......... .......... .......... 73% 56.8M 10s
578750K .......... .......... .......... .......... .......... 73% 59.6M 10s
578800K .......... .......... .......... .......... .......... 73% 73.4M 10s
578850K .......... .......... .......... .......... .......... 73% 56.9M 10s
578900K .......... .......... .......... .......... .......... 73% 52.3M 10s
578950K .......... .......... .......... .......... .......... 73% 3.73M 10s
579000K .......... .......... .......... .......... .......... 73% 28.7M 10s
579050K .......... .......... .......... .......... .......... 73% 55.9M 10s
579100K .......... .......... .......... .......... .......... 73% 49.6M 10s
579150K .......... .......... .......... .......... .......... 73% 11.3M 10s
579200K .......... .......... .......... .......... .......... 74% 29.3M 10s
579250K .......... .......... .......... .......... .......... 74% 56.8M 10s

585900K .......... .......... .......... .......... .......... 74%  388K 10s
585950K .......... .......... .......... .......... .......... 74% 72.9M 10s
586000K .......... .......... .......... .......... .......... 74% 74.7M 10s
586050K .......... .......... .......... .......... .......... 74% 52.4M 10s
586100K .......... .......... .......... .......... .......... 74% 52.8M 10s
586150K .......... .......... .......... .......... .......... 74% 53.4M 10s
586200K .......... .......... .......... .......... .......... 74% 48.9M 10s
586250K .......... .......... .......... .......... .......... 74% 15.1M 10s
586300K .......... .......... .......... .......... .......... 74% 1.98M 10s
586350K .......... .......... .......... .......... .......... 74% 61.8M 10s
586400K .......... .......... .......... .......... .......... 74% 58.8M 10s
586450K .......... .......... .......... .......... .......... 74% 53.5M 10s
586500K .......... .......... .......... .......... .......... 74%  115M 10s

593200K .......... .......... .......... .......... .......... 75%  495K 9s
593250K .......... .......... .......... .......... .......... 75%  111M 9s
593300K .......... .......... .......... .......... .......... 75% 53.2M 9s
593350K .......... .......... .......... .......... .......... 75% 67.8M 9s
593400K .......... .......... .......... .......... .......... 75% 56.2M 9s
593450K .......... .......... .......... .......... .......... 75% 52.9M 9s
593500K .......... .......... .......... .......... .......... 75% 56.0M 9s
593550K .......... .......... .......... .......... .......... 75% 52.6M 9s
593600K .......... .......... .......... .......... .......... 75%  105M 9s
593650K .......... .......... .......... .......... .......... 75% 16.8M 9s
593700K .......... .......... .......... .......... .......... 75% 43.6M 9s
593750K .......... .......... .......... .......... .......... 75% 65.1M 9s
593800K .......... .......... .......... .......... .......... 75%  100M 9s
593850K ....

601850K .......... .......... .......... .......... .......... 76%  516K 9s
601900K .......... .......... .......... .......... .......... 76% 69.1M 9s
601950K .......... .......... .......... .......... .......... 76% 72.5M 9s
602000K .......... .......... .......... .......... .......... 76% 55.7M 9s
602050K .......... .......... .......... .......... .......... 76% 57.1M 9s
602100K .......... .......... .......... .......... .......... 76%  123M 9s
602150K .......... .......... .......... .......... .......... 76% 51.8M 9s
602200K .......... .......... .......... .......... .......... 76% 61.5M 9s
602250K .......... .......... .......... .......... .......... 76% 49.3M 9s
602300K .......... .......... .......... .......... .......... 76% 16.9M 9s
602350K .......... .......... .......... .......... .......... 76% 54.6M 9s
602400K .......... .......... .......... .......... .......... 76%  109M 9s
602450K .......... .......... .......... .......... .......... 76% 55.1M 9s
602500K ....

610900K .......... .......... .......... .......... .......... 78% 1.09M 8s
610950K .......... .......... .......... .......... .......... 78%  947K 8s
611000K .......... .......... .......... .......... .......... 78% 82.1M 8s
611050K .......... .......... .......... .......... .......... 78% 62.5M 8s
611100K .......... .......... .......... .......... .......... 78% 58.4M 8s
611150K .......... .......... .......... .......... .......... 78% 78.9M 8s
611200K .......... .......... .......... .......... .......... 78% 63.6M 8s
611250K .......... .......... .......... .......... .......... 78% 53.5M 8s
611300K .......... .......... .......... .......... .......... 78% 56.7M 8s
611350K .......... .......... .......... .......... .......... 78% 59.7M 8s
611400K .......... .......... .......... .......... .......... 78% 22.0M 8s
611450K .......... .......... .......... .......... .......... 78% 39.7M 8s
611500K .......... .......... .......... .......... .......... 78% 62.1M 8s
611550K ....

617950K .......... .......... .......... .......... .......... 78% 1.14M 8s
618000K .......... .......... .......... .......... .......... 78%  925K 8s
618050K .......... .......... .......... .......... .......... 78% 84.7M 8s
618100K .......... .......... .......... .......... .......... 78% 60.8M 8s
618150K .......... .......... .......... .......... .......... 78% 76.4M 8s
618200K .......... .......... .......... .......... .......... 78% 59.0M 8s
618250K .......... .......... .......... .......... .......... 78% 59.4M 8s
618300K .......... .......... .......... .......... .......... 78%  108M 8s
618350K .......... .......... .......... .......... .......... 79% 57.1M 8s
618400K .......... .......... .......... .......... .......... 79% 58.9M 8s
618450K .......... .......... .......... .......... .......... 79% 16.8M 8s
618500K .......... .......... .......... .......... .......... 79% 53.1M 8s
618550K .......... .......... .......... .......... .......... 79% 54.2M 8s
618600K ....

625250K .......... .......... .......... .......... .......... 79%  505K 8s
625300K .......... .......... .......... .......... .......... 79%  850K 8s
625350K .......... .......... .......... .......... .......... 79%  154M 8s
625400K .......... .......... .......... .......... .......... 79%  143M 8s
625450K .......... .......... .......... .......... .......... 79%  157M 8s
625500K .......... .......... .......... .......... .......... 79%  139M 8s
625550K .......... .......... .......... .......... .......... 79%  154M 8s
625600K .......... .......... .......... .......... .......... 79%  170M 8s
625650K .......... .......... .......... .......... .......... 79%  146M 8s
625700K .......... .......... .......... .......... .......... 79%  145M 8s
625750K .......... .......... .......... .......... .......... 79%  162M 8s
625800K .......... .......... .......... .......... .......... 79%  145M 8s
625850K .......... .......... .......... .......... .......... 79%  155M 8s
625900K ....

632650K .......... .......... .......... .......... .......... 80% 1.08M 7s
632700K .......... .......... .......... .......... .......... 80%  915K 7s
632750K .......... .......... .......... .......... .......... 80% 81.7M 7s
632800K .......... .......... .......... .......... .......... 80% 64.8M 7s
632850K .......... .......... .......... .......... .......... 80%  113M 7s
632900K .......... .......... .......... .......... .......... 80% 60.6M 7s
632950K .......... .......... .......... .......... .......... 80% 59.5M 7s
633000K .......... .......... .......... .......... .......... 80% 60.3M 7s
633050K .......... .......... .......... .......... .......... 80% 62.2M 7s
633100K .......... .......... .......... .......... .......... 80% 90.7M 7s
633150K .......... .......... .......... .......... .......... 80% 61.9M 7s
633200K .......... .......... .......... .......... .......... 80% 17.2M 7s
633250K .......... .......... .......... .......... .......... 80% 75.9M 7s
633300K ....

640250K .......... .......... .......... .......... .......... 81%  574K 7s
640300K .......... .......... .......... .......... .......... 81% 29.2M 7s
640350K .......... .......... .......... .......... .......... 81% 68.9M 7s
640400K .......... .......... .......... .......... .......... 81% 75.8M 7s
640450K .......... .......... .......... .......... .......... 81% 71.2M 7s
640500K .......... .......... .......... .......... .......... 81% 76.4M 7s
640550K .......... .......... .......... .......... .......... 81% 72.7M 7s
640600K .......... .......... .......... .......... .......... 81% 61.6M 7s
640650K .......... .......... .......... .......... .......... 81% 74.1M 7s
640700K .......... .......... .......... .......... .......... 81% 59.1M 7s
640750K .......... .......... .......... .......... .......... 81% 27.5M 7s
640800K .......... .......... .......... .......... .......... 81% 36.1M 7s
640850K .......... .......... .......... .......... .......... 81% 45.1M 7s
640900K ....

647950K .......... .......... .......... .......... .......... 82%  645K 7s
648000K .......... .......... .......... .......... .......... 82% 3.79M 7s
648050K .......... .......... .......... .......... .......... 82% 77.3M 7s
648100K .......... .......... .......... .......... .......... 82% 62.2M 7s
648150K .......... .......... .......... .......... .......... 82% 83.6M 7s
648200K .......... .......... .......... .......... .......... 82% 88.2M 7s
648250K .......... .......... .......... .......... .......... 82% 63.8M 7s
648300K .......... .......... .......... .......... .......... 82% 64.5M 7s
648350K .......... .......... .......... .......... .......... 82%  117M 7s
648400K .......... .......... .......... .......... .......... 82% 63.8M 7s
648450K .......... .......... .......... .......... .......... 82% 59.9M 7s
648500K .......... .......... .......... .......... .......... 82% 29.1M 7s
648550K .......... .......... .......... .......... .......... 82% 24.8M 7s
648600K ....

655850K .......... .......... .......... .......... .......... 83%  630K 6s
655900K .......... .......... .......... .......... .......... 83% 25.9M 6s
655950K .......... .......... .......... .......... .......... 83% 54.1M 6s
656000K .......... .......... .......... .......... .......... 83% 54.2M 6s
656050K .......... .......... .......... .......... .......... 83% 70.0M 6s
656100K .......... .......... .......... .......... .......... 83% 48.8M 6s
656150K .......... .......... .......... .......... .......... 83% 52.8M 6s
656200K .......... .......... .......... .......... .......... 83% 52.0M 6s
656250K .......... .......... .......... .......... .......... 83% 13.8M 6s
656300K .......... .......... .......... .......... .......... 83% 49.8M 6s
656350K .......... .......... .......... .......... .......... 83% 55.0M 6s
656400K .......... .......... .......... .......... .......... 83% 75.7M 6s
656450K .......... .......... .......... .......... .......... 83% 51.7M 6s
656500K ....

663850K .......... .......... .......... .......... .......... 84%  911K 6s
663900K .......... .......... .......... .......... .......... 84% 53.5M 6s
663950K .......... .......... .......... .......... .......... 84% 68.3M 6s
664000K .......... .......... .......... .......... .......... 84% 60.7M 6s
664050K .......... .......... .......... .......... .......... 84% 63.1M 6s
664100K .......... .......... .......... .......... .......... 84% 31.8M 6s
664150K .......... .......... .......... .......... .......... 84%  122M 6s
664200K .......... .......... .......... .......... .......... 84% 53.3M 6s
664250K .......... .......... .......... .......... .......... 84% 14.6M 6s
664300K .......... .......... .......... .......... .......... 84% 48.5M 6s
664350K .......... .......... .......... .......... .......... 84% 54.1M 6s
664400K .......... .......... .......... .......... .......... 84% 80.1M 6s
664450K .......... .......... .......... .......... .......... 84% 46.2M 6s
664500K ....

671950K .......... .......... .......... .......... .......... 85%  986K 6s
672000K .......... .......... .......... .......... .......... 85% 37.9M 6s
672050K .......... .......... .......... .......... .......... 85% 56.7M 6s
672100K .......... .......... .......... .......... .......... 85% 52.0M 6s
672150K .......... .......... .......... .......... .......... 85% 56.9M 6s
672200K .......... .......... .......... .......... .......... 85% 70.5M 6s
672250K .......... .......... .......... .......... .......... 85% 51.8M 6s
672300K .......... .......... .......... .......... .......... 85% 50.3M 6s
672350K .......... .......... .......... .......... .......... 85% 20.4M 6s
672400K .......... .......... .......... .......... .......... 85% 26.6M 6s
672450K .......... .......... .......... .......... .......... 85% 48.4M 6s
672500K .......... .......... .......... .......... .......... 85%  121M 6s
672550K .......... .......... .......... .......... .......... 85% 52.5M 6s
672600K ....

680050K .......... .......... .......... .......... .......... 86%  930K 5s
680100K .......... .......... .......... .......... .......... 86% 45.7M 5s
680150K .......... .......... .......... .......... .......... 86% 60.3M 5s
680200K .......... .......... .......... .......... .......... 86% 48.2M 5s
680250K .......... .......... .......... .......... .......... 86% 62.7M 5s
680300K .......... .......... .......... .......... .......... 86% 50.0M 5s
680350K .......... .......... .......... .......... .......... 86% 51.9M 5s
680400K .......... .......... .......... .......... .......... 86%  126M 5s
680450K .......... .......... .......... .......... .......... 86% 51.4M 5s
680500K .......... .......... .......... .......... .......... 86% 15.5M 5s
680550K .......... .......... .......... .......... .......... 86% 56.1M 5s
680600K .......... .......... .......... .......... .......... 86% 46.9M 5s
680650K .......... .......... .......... .......... .......... 86% 55.8M 5s
680700K ....

688300K .......... .......... .......... .......... .......... 87%  628K 5s
688350K .......... .......... .......... .......... .......... 87% 26.7M 5s
688400K .......... .......... .......... .......... .......... 87% 34.6M 5s
688450K .......... .......... .......... .......... .......... 87% 63.5M 5s
688500K .......... .......... .......... .......... .......... 87% 39.4M 5s
688550K .......... .......... .......... .......... .......... 87% 51.1M 5s
688600K .......... .......... .......... .......... .......... 87% 74.2M 5s
688650K .......... .......... .......... .......... .......... 87% 53.5M 5s
688700K .......... .......... .......... .......... .......... 87% 56.9M 5s
688750K .......... .......... .......... .......... .......... 87% 30.1M 5s
688800K .......... .......... .......... .......... .......... 88% 32.5M 5s
688850K .......... .......... .......... .......... .......... 88% 39.2M 5s
688900K .......... .......... .......... .......... .......... 88% 44.0M 5s
688950K ....

696600K .......... .......... .......... .......... .......... 88%  681K 4s
696650K .......... .......... .......... .......... .......... 89% 3.99M 4s
696700K .......... .......... .......... .......... .......... 89% 16.4M 4s
696750K .......... .......... .......... .......... .......... 89% 67.2M 4s
696800K .......... .......... .......... .......... .......... 89% 64.6M 4s
696850K .......... .......... .......... .......... .......... 89% 64.3M 4s
696900K .......... .......... .......... .......... .......... 89% 57.3M 4s
696950K .......... .......... .......... .......... .......... 89% 53.2M 4s
697000K .......... .......... .......... .......... .......... 89% 57.1M 4s
697050K .......... .......... .......... .......... .......... 89% 57.4M 4s
697100K .......... .......... .......... .......... .......... 89% 25.7M 4s
697150K .......... .......... .......... .......... .......... 89% 25.0M 4s
697200K .......... .......... .......... .......... .......... 89% 67.3M 4s
697250K ....

705000K .......... .......... .......... .......... .......... 90%  694K 4s
705050K .......... .......... .......... .......... .......... 90% 3.63M 4s
705100K .......... .......... .......... .......... .......... 90% 40.8M 4s
705150K .......... .......... .......... .......... .......... 90% 42.7M 4s
705200K .......... .......... .......... .......... .......... 90% 77.5M 4s
705250K .......... .......... .......... .......... .......... 90% 22.8M 4s
705300K .......... .......... .......... .......... .......... 90% 74.3M 4s
705350K .......... .......... .......... .......... .......... 90% 63.4M 4s
705400K .......... .......... .......... .......... .......... 90% 58.6M 4s
705450K .......... .......... .......... .......... .......... 90% 59.4M 4s
705500K .......... .......... .......... .......... .......... 90% 29.1M 4s
705550K .......... .......... .......... .......... .......... 90% 46.9M 4s
705600K .......... .......... .......... .......... .......... 90% 65.0M 4s
705650K ....

713500K .......... .......... .......... .......... .......... 91%  620K 3s
713550K .......... .......... .......... .......... .......... 91% 14.5M 3s
713600K .......... .......... .......... .......... .......... 91% 44.0M 3s
713650K .......... .......... .......... .......... .......... 91% 59.4M 3s
713700K .......... .......... .......... .......... .......... 91% 40.8M 3s
713750K .......... .......... .......... .......... .......... 91% 80.2M 3s
713800K .......... .......... .......... .......... .......... 91% 86.8M 3s
713850K .......... .......... .......... .......... .......... 91% 69.5M 3s
713900K .......... .......... .......... .......... .......... 91% 52.7M 3s
713950K .......... .......... .......... .......... .......... 91% 56.9M 3s
714000K .......... .......... .......... .......... .......... 91% 24.7M 3s
714050K .......... .......... .......... .......... .......... 91% 29.5M 3s
714100K .......... .......... .......... .......... .......... 91% 62.0M 3s
714150K ....

721900K .......... .......... .......... .......... .......... 92%  631K 3s
721950K .......... .......... .......... .......... .......... 92% 10.9M 3s
722000K .......... .......... .......... .......... .......... 92% 81.5M 3s
722050K .......... .......... .......... .......... .......... 92% 37.8M 3s
722100K .......... .......... .......... .......... .......... 92% 51.4M 3s
722150K .......... .......... .......... .......... .......... 92% 54.5M 3s
722200K .......... .......... .......... .......... .......... 92% 77.6M 3s
722250K .......... .......... .......... .......... .......... 92% 41.1M 3s
722300K .......... .......... .......... .......... .......... 92% 52.7M 3s
722350K .......... .......... .......... .......... .......... 92% 64.5M 3s
722400K .......... .......... .......... .......... .......... 92% 66.1M 3s
722450K .......... .......... .......... .......... .......... 92% 33.2M 3s
722500K .......... .......... .......... .......... .......... 92% 33.3M 3s
722550K ....

730450K .......... .......... .......... .......... .......... 93%  573K 3s
730500K .......... .......... .......... .......... .......... 93% 74.5M 3s
730550K .......... .......... .......... .......... .......... 93% 83.6M 3s
730600K .......... .......... .......... .......... .......... 93% 59.3M 3s
730650K .......... .......... .......... .......... .......... 93% 60.3M 3s
730700K .......... .......... .......... .......... .......... 93% 59.4M 3s
730750K .......... .......... .......... .......... .......... 93%  109M 3s
730800K .......... .......... .......... .......... .......... 93% 63.5M 3s
730850K .......... .......... .......... .......... .......... 93% 61.3M 3s
730900K .......... .......... .......... .......... .......... 93% 55.2M 3s
730950K .......... .......... .......... .......... .......... 93% 16.3M 3s
731000K .......... .......... .......... .......... .......... 93%  109M 3s
731050K .......... .......... .......... .......... .......... 93% 88.3M 3s
731100K ....

736150K .......... .......... .......... .......... .......... 94%  585K 2s
736200K .......... .......... .......... .......... .......... 94% 40.5M 2s
736250K .......... .......... .......... .......... .......... 94% 25.4M 2s
736300K .......... .......... .......... .......... .......... 94% 90.2M 2s
736350K .......... .......... .......... .......... .......... 94% 86.0M 2s
736400K .......... .......... .......... .......... .......... 94% 61.1M 2s
736450K .......... .......... .......... .......... .......... 94% 64.1M 2s
736500K .......... .......... .......... .......... .......... 94% 70.4M 2s
736550K .......... .......... .......... .......... .......... 94% 60.2M 2s
736600K .......... .......... .......... .......... .......... 94% 33.7M 2s
736650K .......... .......... .......... .......... .......... 94% 32.5M 2s
736700K .......... .......... .......... .......... .......... 94%  104M 2s
736750K .......... .......... .......... .......... .......... 94% 34.2M 2s
736800K ....

741850K .......... .......... .......... .......... .......... 94% 84.4M 2s
741900K .......... .......... .......... .......... .......... 94%  554K 2s
741950K .......... .......... .......... .......... .......... 94% 97.5M 2s
742000K .......... .......... .......... .......... .......... 94% 55.3M 2s
742050K .......... .......... .......... .......... .......... 94% 68.6M 2s
742100K .......... .......... .......... .......... .......... 94% 92.2M 2s
742150K .......... .......... .......... .......... .......... 94% 69.6M 2s
742200K .......... .......... .......... .......... .......... 94% 56.9M 2s
742250K .......... .......... .......... .......... .......... 94% 63.8M 2s
742300K .......... .......... .......... .......... .......... 94% 64.4M 2s
742350K .......... .......... .......... .......... .......... 94%  119M 2s
742400K .......... .......... .......... .......... .......... 94% 17.7M 2s
742450K .......... .......... .......... .......... .......... 94% 50.3M 2s
742500K ....

747600K .......... .......... .......... .......... .......... 95%  560K 2s
747650K .......... .......... .......... .......... .......... 95% 74.2M 2s
747700K .......... .......... .......... .......... .......... 95% 82.8M 2s
747750K .......... .......... .......... .......... .......... 95% 63.0M 2s
747800K .......... .......... .......... .......... .......... 95% 72.6M 2s
747850K .......... .......... .......... .......... .......... 95% 96.4M 2s
747900K .......... .......... .......... .......... .......... 95% 59.4M 2s
747950K .......... .......... .......... .......... .......... 95% 61.4M 2s
748000K .......... .......... .......... .......... .......... 95% 73.6M 2s
748050K .......... .......... .......... .......... .......... 95% 64.1M 2s
748100K .......... .......... .......... .......... .......... 95%  114M 2s
748150K .......... .......... .......... .......... .......... 95% 16.0M 2s
748200K .......... .......... .......... .......... .......... 95% 75.9M 2s
748250K ....

753300K .......... .......... .......... .......... .......... 96%  564K 1s
753350K .......... .......... .......... .......... .......... 96% 86.3M 1s
753400K .......... .......... .......... .......... .......... 96% 67.5M 1s
753450K .......... .......... .......... .......... .......... 96% 66.7M 1s
753500K .......... .......... .......... .......... .......... 96% 60.4M 1s
753550K .......... .......... .......... .......... .......... 96%  118M 1s
753600K .......... .......... .......... .......... .......... 96% 57.1M 1s
753650K .......... .......... .......... .......... .......... 96% 77.1M 1s
753700K .......... .......... .......... .......... .......... 96% 65.1M 1s
753750K .......... .......... .......... .......... .......... 96%  122M 1s
753800K .......... .......... .......... .......... .......... 96% 65.1M 1s
753850K .......... .......... .......... .......... .......... 96% 17.6M 1s
753900K .......... .......... .......... .......... .......... 96% 52.1M 1s
753950K ....

759050K .......... .......... .......... .......... .......... 96%  572K 1s
759100K .......... .......... .......... .......... .......... 96% 76.7M 1s
759150K .......... .......... .......... .......... .......... 96% 84.3M 1s
759200K .......... .......... .......... .......... .......... 96% 94.3M 1s
759250K .......... .......... .......... .......... .......... 97% 67.2M 1s
759300K .......... .......... .......... .......... .......... 97% 58.8M 1s
759350K .......... .......... .......... .......... .......... 97% 73.4M 1s
759400K .......... .......... .......... .......... .......... 97% 68.1M 1s
759450K .......... .......... .......... .......... .......... 97%  117M 1s
759500K .......... .......... .......... .......... .......... 97% 61.4M 1s
759550K .......... .......... .......... .......... .......... 97% 67.8M 1s
759600K .......... .......... .......... .......... .......... 97% 17.6M 1s
759650K .......... .......... .......... .......... .......... 97% 45.7M 1s
759700K ....

767600K .......... .......... .......... .......... .......... 98%  571K 1s
767650K .......... .......... .......... .......... .......... 98%  108M 1s
767700K .......... .......... .......... .......... .......... 98% 39.7M 1s
767750K .......... .......... .......... .......... .......... 98% 98.8M 1s
767800K .......... .......... .......... .......... .......... 98% 70.4M 1s
767850K .......... .......... .......... .......... .......... 98% 58.6M 1s
767900K .......... .......... .......... .......... .......... 98%  107M 1s
767950K .......... .......... .......... .......... .......... 98% 69.6M 1s
768000K .......... .......... .......... .......... .......... 98% 65.5M 1s
768050K .......... .......... .......... .......... .......... 98% 68.0M 1s
768100K .......... .......... .......... .......... .......... 98%  113M 1s
768150K .......... .......... .......... .......... .......... 98% 20.7M 1s
768200K .......... .......... .......... .......... .......... 98% 34.9M 1s
768250K ....

773350K .......... .......... .......... .......... .......... 98%  552K 0s
773400K .......... .......... .......... .......... .......... 98%  107M 0s
773450K .......... .......... .......... .......... .......... 98% 69.6M 0s
773500K .......... .......... .......... .......... .......... 98% 75.0M 0s
773550K .......... .......... .......... .......... .......... 98%  110M 0s
773600K .......... .......... .......... .......... .......... 98% 66.7M 0s
773650K .......... .......... .......... .......... .......... 98% 67.4M 0s
773700K .......... .......... .......... .......... .......... 98% 73.5M 0s
773750K .......... .......... .......... .......... .......... 98% 61.0M 0s
773800K .......... .......... .......... .......... .......... 98%  122M 0s
773850K .......... .......... .......... .......... .......... 98% 71.1M 0s
773900K .......... .......... .......... .......... .......... 98% 20.7M 0s
773950K .......... .......... .......... .......... .......... 98% 42.8M 0s
774000K ....

781900K .......... .......... .......... .......... .......... 99%  548K 0s
781950K .......... .......... .......... .......... .......... 99%  115M 0s
782000K .......... .......... .......... .......... .......... 99% 70.0M 0s
782050K .......... .......... .......... .......... .......... 99% 74.2M 0s
782100K .......... .......... .......... .......... .......... 99% 68.5M 0s
782150K .......... .......... .......... .......... .......... 99% 73.6M 0s
782200K .......... .......... .......... .......... .......... 99%  115M 0s
782250K .......... .......... .......... .......... .......... 99% 56.2M 0s
782300K .......... .......... .......... .......... .......... 99% 89.7M 0s
782350K .......... .......... .......... .......... .......... 99% 68.4M 0s
782400K .......... .......... .......... .......... .......... 99%  118M 0s
782450K .......... .......... .......... .......... .......... 99% 19.1M 0s
782500K .......... .......... .......... .......... .......... 99% 51.4M 0s
782550K ....

CompletedProcess(args='wget https://huggingface.co/spaces/sagawa/predictyield-t5/raw/main/tokenizer_config.json -P ./tokenizer', returncode=0)

In [1]:
train_ds = pd.read_csv('../../regression-input-train.csv')
train_ds

Unnamed: 0,REACTANT,REAGENT,PRODUCT,YIELD
0,Clc1ccccn1.Cc1ccc(N)cc1,COc1ccc(OC)c(P(C23CC4CC(CC(C4)C2)C3)(C23CC4CC(...,Cc1ccc(Nc2ccccn2)cc1,70.410458
1,Brc1ccccn1.Cc1ccc(N)cc1,COc1ccc(OC)c(P(C23CC4CC(CC(C4)C2)C3)(C23CC4CC(...,Cc1ccc(Nc2ccccn2)cc1,11.064457
2,CCc1ccc(I)cc1.Cc1ccc(N)cc1,CC(C)c1cc(C(C)C)c(-c2ccccc2P(C2CCCCC2)(C2CCCCC...,CCc1ccc(Nc2ccc(C)cc2)cc1,10.223550
3,FC(F)(F)c1ccc(Cl)cc1.Cc1ccc(N)cc1,COc1ccc(OC)c(P(C(C)(C)C)(C(C)(C)C)->[Pd]2(OS(=...,Cc1ccc(Nc2ccc(C(F)(F)F)cc2)cc1,20.083383
4,COc1ccc(Cl)cc1.Cc1ccc(N)cc1,COc1ccc(OC)c(P(C23CC4CC(CC(C4)C2)C3)(C23CC4CC(...,COc1ccc(Nc2ccc(C)cc2)cc1,0.492663
...,...,...,...,...
2762,FC(F)(F)c1ccc(Br)cc1.Cc1ccc(N)cc1,CC(C)c1cc(C(C)C)c(-c2ccccc2P(C(C)(C)C)(C(C)(C)...,Cc1ccc(Nc2ccc(C(F)(F)F)cc2)cc1,18.974171
2763,CCc1ccc(Br)cc1.Cc1ccc(N)cc1,CC(C)c1cc(C(C)C)c(-c2ccccc2P(C(C)(C)C)(C(C)(C)...,CCc1ccc(Nc2ccc(C)cc2)cc1,19.256507
2764,Ic1cccnc1.Cc1ccc(N)cc1,COc1ccc(OC)c(P(C(C)(C)C)(C(C)(C)C)->[Pd]2(OS(=...,Cc1ccc(Nc2cccnc2)cc1,57.529603
2765,FC(F)(F)c1ccc(Br)cc1.Cc1ccc(N)cc1,CC(C)c1cc(C(C)C)c(-c2ccccc2P(C2CCCCC2)(C2CCCCC...,Cc1ccc(Nc2ccc(C(F)(F)F)cc2)cc1,27.473043


In [3]:
train_ds = train_ds[['input', 'YIELD']]
train_ds

Unnamed: 0,input,YIELD
0,REACTANT:CC(=O)Cl.CC(C)(C)OC(=O)N1CCC2(CC1)CC(...,96.0
1,REACTANT:CCCC[Sn](CCCC)(CCCC)c1cccs1.CCCCc1nc(...,75.0
2,REACTANT:CC(C)CCBr.[Li]c1cccs1PRODUCT:CCc1c(Cc...,88.0
3,REACTANT:CS(=O)(=O)O.O=P12OP3(=O)OP(=O)(O1)OP(...,95.0
4,REACTANT:C=C[Mg]Br.CC(C)CC=O.CCOC(=O)CC(=O)OCC...,30.0
...,...,...
545630,REACTANT:Nc1cccc2c1C(=O)N(C1CCC(=O)NC1=O)C2=O....,88.0
545631,REACTANT:CC(=O)O[BH-](OC(C)=O)OC(C)=O.O=C(CNC(...,90.0
545632,REACTANT:CN1CCOCC1.COC(=O)c1ccc(Cc2cn(C)c3ccc(...,74.0
545633,REACTANT:Cc1ccc(N)c(C#C[Si](C)(C)C)n1.[Na+].[O...,75.0


In [55]:
ori = pd.read_csv('../../all_ord_reaction_uniq_with_attr_v3.tsv').drop_duplicates().reset_index(drop=True)
ori

Unnamed: 0,CATALYST,REACTANT,REAGENT,SOLVENT,INTERNAL_STANDARD,NoData,PRODUCT,YIELD,TEMP
0,CC(C)(C)c1ccn2->[Ir+]34(<-n5cc(C(F)(F)F)ccc5-c...,CC(=O)c1ccc(Br)cc1.O=C(O)C1CCCN1C(=O)OCc1ccccc1,O=C([O-])[O-].[Cs+],CN(C)C=O,,,CC(=O)c1ccc(C2CCCN2C(=O)OCc2ccccc2)cc1,93.0,23.0
1,CC(C)(C)c1ccn2->[Ir+]34(<-n5cc(C(F)(F)F)ccc5-c...,CC(=O)c1ccc(Br)cc1.CC(C)(C)OC(=O)N1CCCCC1C(=O)O,O=C([O-])[O-].[Cs+],CN(C)C=O,,,CC(=O)c1ccc(C2CCCN2C(=O)OCc2ccccc2)cc1,82.0,23.0
2,CC(C)(C)c1ccn2->[Ir+]34(<-n5cc(C(F)(F)F)ccc5-c...,CC(=O)c1ccc(Br)cc1.CC(C)(C)OC(=O)N1CCOCC1C(=O)O,O=C([O-])[O-].[Cs+],CN(C)C=O,,,CC(=O)c1ccc(C2CCCN2C(=O)OCc2ccccc2)cc1,61.0,23.0
3,CC(C)(C)c1ccn2->[Ir+]34(<-n5cc(C(F)(F)F)ccc5-c...,CC(=O)c1ccc(Br)cc1.CC(C)C(NC(=O)OC(C)(C)C)C(=O)O,O=C([O-])[O-].[Cs+],CN(C)C=O,,,CC(=O)c1ccc(C2CCCN2C(=O)OCc2ccccc2)cc1,72.0,23.0
4,CC(C)(C)c1ccn2->[Ir+]34(<-n5cc(C(F)(F)F)ccc5-c...,CC(=O)c1ccc(Br)cc1.CC(C)(C)OC(=O)NC(Cc1cn(C(=O...,O=C([O-])[O-].[Cs+],CN(C)C=O,,,CC(=O)c1ccc(C2CCCN2C(=O)OCc2ccccc2)cc1,83.0,23.0
...,...,...,...,...,...,...,...,...,...
2058485,,COc1ccccc1CCCCBr.Fc1ccc2c(C3CCNCC3)noc2c1.O=C(...,,CC#N.CCO,,,CC(O)CC(=O)[O-].O=C([O-])CCCO,,
2058486,,CC(O)=S.CCOC(=O)N=NC(=O)OCC.O=C1C[C@@H](O)CN1....,,C1CCOC1,,,CC(O)CC(=O)[O-].O=C([O-])CCCO,,
2058487,,C[C@@H](O[Si](C)(C)C(C)(C)C)[C@H]1C(=O)N2C(C(=...,,CC#N,,,CC(O)CC(=O)[O-].O=C([O-])CCCO,,
2058488,CC(=O)[O-].O.[Cu+2],C=O.CC(=O)O.CC(=O)[O-].CCOC(=O)CC(=O)OCC.[K+],,,,,O=S(=O)(c1ccc(Cl)cc1)C(F)(F)F,,


In [59]:
len(ori['REACTANT'].unique()), len(ori['PRODUCT'].unique())

(1045802, 440212)

In [60]:
df = ori[~ori['YIELD'].isna()]
df['YIELD'] = df['YIELD'].clip(0, 100)
df

Unnamed: 0,CATALYST,REACTANT,REAGENT,SOLVENT,INTERNAL_STANDARD,NoData,PRODUCT,YIELD,TEMP
0,CC(C)(C)c1ccn2->[Ir+]34(<-n5cc(C(F)(F)F)ccc5-c...,CC(=O)c1ccc(Br)cc1.O=C(O)C1CCCN1C(=O)OCc1ccccc1,O=C([O-])[O-].[Cs+],CN(C)C=O,,,CC(=O)c1ccc(C2CCCN2C(=O)OCc2ccccc2)cc1,93.0,23.0
1,CC(C)(C)c1ccn2->[Ir+]34(<-n5cc(C(F)(F)F)ccc5-c...,CC(=O)c1ccc(Br)cc1.CC(C)(C)OC(=O)N1CCCCC1C(=O)O,O=C([O-])[O-].[Cs+],CN(C)C=O,,,CC(=O)c1ccc(C2CCCN2C(=O)OCc2ccccc2)cc1,82.0,23.0
2,CC(C)(C)c1ccn2->[Ir+]34(<-n5cc(C(F)(F)F)ccc5-c...,CC(=O)c1ccc(Br)cc1.CC(C)(C)OC(=O)N1CCOCC1C(=O)O,O=C([O-])[O-].[Cs+],CN(C)C=O,,,CC(=O)c1ccc(C2CCCN2C(=O)OCc2ccccc2)cc1,61.0,23.0
3,CC(C)(C)c1ccn2->[Ir+]34(<-n5cc(C(F)(F)F)ccc5-c...,CC(=O)c1ccc(Br)cc1.CC(C)C(NC(=O)OC(C)(C)C)C(=O)O,O=C([O-])[O-].[Cs+],CN(C)C=O,,,CC(=O)c1ccc(C2CCCN2C(=O)OCc2ccccc2)cc1,72.0,23.0
4,CC(C)(C)c1ccn2->[Ir+]34(<-n5cc(C(F)(F)F)ccc5-c...,CC(=O)c1ccc(Br)cc1.CC(C)(C)OC(=O)NC(Cc1cn(C(=O...,O=C([O-])[O-].[Cs+],CN(C)C=O,,,CC(=O)c1ccc(C2CCCN2C(=O)OCc2ccccc2)cc1,83.0,23.0
...,...,...,...,...,...,...,...,...,...
2058408,[Na+].[OH-],COc1ccc(CC#N)cc1OC.O=Cc1ccc2ccccc2c1,,CCO,,,O=C(F)OCC(F)(F)F,82.0,
2058469,,CNCC[C@@H](O)c1ccccc1.FC(F)(F)c1ccc(Cl)cc1.[H-...,,CC(=O)N(C)C,,,CC(O)CC(=O)[O-].O=C([O-])CCCO,86.0,90.0
2058470,,COc1cc(OC)c(Br)c(OC)c1.O.O=C1CCCCC1,,C1CCOC1,,,CC(O)CC(=O)[O-].O=C([O-])CCCO,65.0,-30.0
2058473,,CN(C)Cc1ccccc1.OCC1CO1.OCCS,,CC(=O)CC(C)C,,,CC(O)CC(=O)[O-].O=C([O-])CCCO,90.0,50.0


In [62]:
len(df['REACTANT'].unique()), len(df['PRODUCT'].unique())

(399405, 358)

In [44]:
df['REACTANT'].isna().sum(), df['PRODUCT'].isna().sum(), (df['REACTANT'].isna() & df['PRODUCT'].isna()).sum()

(144, 142, 142)

In [63]:
df = df[~(df['REACTANT'].isna() | df['PRODUCT'].isna())]

Unnamed: 0,CATALYST,REACTANT,REAGENT,SOLVENT,INTERNAL_STANDARD,NoData,PRODUCT,YIELD,TEMP
0,CC(C)(C)c1ccn2->[Ir+]34(<-n5cc(C(F)(F)F)ccc5-c...,CC(=O)c1ccc(Br)cc1.O=C(O)C1CCCN1C(=O)OCc1ccccc1,O=C([O-])[O-].[Cs+],CN(C)C=O,,,CC(=O)c1ccc(C2CCCN2C(=O)OCc2ccccc2)cc1,93.0,23.0
1,CC(C)(C)c1ccn2->[Ir+]34(<-n5cc(C(F)(F)F)ccc5-c...,CC(=O)c1ccc(Br)cc1.CC(C)(C)OC(=O)N1CCCCC1C(=O)O,O=C([O-])[O-].[Cs+],CN(C)C=O,,,CC(=O)c1ccc(C2CCCN2C(=O)OCc2ccccc2)cc1,82.0,23.0
2,CC(C)(C)c1ccn2->[Ir+]34(<-n5cc(C(F)(F)F)ccc5-c...,CC(=O)c1ccc(Br)cc1.CC(C)(C)OC(=O)N1CCOCC1C(=O)O,O=C([O-])[O-].[Cs+],CN(C)C=O,,,CC(=O)c1ccc(C2CCCN2C(=O)OCc2ccccc2)cc1,61.0,23.0
3,CC(C)(C)c1ccn2->[Ir+]34(<-n5cc(C(F)(F)F)ccc5-c...,CC(=O)c1ccc(Br)cc1.CC(C)C(NC(=O)OC(C)(C)C)C(=O)O,O=C([O-])[O-].[Cs+],CN(C)C=O,,,CC(=O)c1ccc(C2CCCN2C(=O)OCc2ccccc2)cc1,72.0,23.0
4,CC(C)(C)c1ccn2->[Ir+]34(<-n5cc(C(F)(F)F)ccc5-c...,CC(=O)c1ccc(Br)cc1.CC(C)(C)OC(=O)NC(Cc1cn(C(=O...,O=C([O-])[O-].[Cs+],CN(C)C=O,,,CC(=O)c1ccc(C2CCCN2C(=O)OCc2ccccc2)cc1,83.0,23.0
...,...,...,...,...,...,...,...,...,...
2058408,[Na+].[OH-],COc1ccc(CC#N)cc1OC.O=Cc1ccc2ccccc2c1,,CCO,,,O=C(F)OCC(F)(F)F,82.0,
2058469,,CNCC[C@@H](O)c1ccccc1.FC(F)(F)c1ccc(Cl)cc1.[H-...,,CC(=O)N(C)C,,,CC(O)CC(=O)[O-].O=C([O-])CCCO,86.0,90.0
2058470,,COc1cc(OC)c(Br)c(OC)c1.O.O=C1CCCCC1,,C1CCOC1,,,CC(O)CC(=O)[O-].O=C([O-])CCCO,65.0,-30.0
2058473,,CN(C)Cc1ccccc1.OCC1CO1.OCCS,,CC(=O)CC(C)C,,,CC(O)CC(=O)[O-].O=C([O-])CCCO,90.0,50.0


In [64]:
df['input'] = 'REACTANT:' + df['REACTANT'] + 'PRODUCT:' + df['PRODUCT']
df = df[['input', 'YIELD']].drop_duplicates().reset_index(drop=True)
df

Unnamed: 0,input,YIELD
0,REACTANT:CC(=O)c1ccc(Br)cc1.O=C(O)C1CCCN1C(=O)...,93.0
1,REACTANT:CC(=O)c1ccc(Br)cc1.CC(C)(C)OC(=O)N1CC...,82.0
2,REACTANT:CC(=O)c1ccc(Br)cc1.CC(C)(C)OC(=O)N1CC...,61.0
3,REACTANT:CC(=O)c1ccc(Br)cc1.CC(C)C(NC(=O)OC(C)...,72.0
4,REACTANT:CC(=O)c1ccc(Br)cc1.CC(C)(C)OC(=O)NC(C...,83.0
...,...,...
598384,REACTANT:COc1ccc(CC#N)cc1OC.O=Cc1ccc2ccccc2c1P...,82.0
598385,REACTANT:CNCC[C@@H](O)c1ccccc1.FC(F)(F)c1ccc(C...,86.0
598386,REACTANT:COc1cc(OC)c(Br)c(OC)c1.O.O=C1CCCCC1PR...,65.0
598387,REACTANT:CN(C)Cc1ccccc1.OCC1CO1.OCCSPRODUCT:CC...,90.0


In [51]:
len(df['input'].unique())

589667

In [54]:
df[df['input']=='REACTANT:Cn1cnc(C#N)c1.Fc1ccccc1BrPRODUCT:Cn1cnc(C#N)c1-c1ccccc1F.Cn1cnc(C#N)c1-c1ccccc1F']

Unnamed: 0,input,YIELD
137311,REACTANT:Cn1cnc(C#N)c1.Fc1ccccc1BrPRODUCT:Cn1c...,29.0
137312,REACTANT:Cn1cnc(C#N)c1.Fc1ccccc1BrPRODUCT:Cn1c...,45.0
137313,REACTANT:Cn1cnc(C#N)c1.Fc1ccccc1BrPRODUCT:Cn1c...,50.0
137314,REACTANT:Cn1cnc(C#N)c1.Fc1ccccc1BrPRODUCT:Cn1c...,6.0
137315,REACTANT:Cn1cnc(C#N)c1.Fc1ccccc1BrPRODUCT:Cn1c...,4.0
...,...,...
312939,REACTANT:Cn1cnc(C#N)c1.Fc1ccccc1BrPRODUCT:Cn1c...,92.0
312940,REACTANT:Cn1cnc(C#N)c1.Fc1ccccc1BrPRODUCT:Cn1c...,98.0
312941,REACTANT:Cn1cnc(C#N)c1.Fc1ccccc1BrPRODUCT:Cn1c...,100.0
312942,REACTANT:Cn1cnc(C#N)c1.Fc1ccccc1BrPRODUCT:Cn1c...,96.0


In [67]:
dfagg = df.groupby('input')['YIELD'].agg('mean').reset_index()
dfagg

Unnamed: 0,input,YIELD
0,REACTANT:*C(F)(F)C(*)(F)F.C1=CCCC1.Cl[SiH](Cl)...,91.0
1,REACTANT:*C(F)(F)C(*)(F)F.C1CCNCC1.CC(C)(C)[O-...,77.0
2,REACTANT:*C(F)(F)C(*)(F)F.CC(=O)c1ccccc1.[BH4-...,95.0
3,REACTANT:*C(F)(F)C(*)(F)F.CC(=O)c1ccccc1.[H][H...,95.0
4,REACTANT:*C(F)(F)C(*)(F)F.CCCCC(N)N.CCOCC.Cc1c...,21.0
...,...,...
589661,REACTANT:c1cncc(C2CCCC2)c1PRODUCT:C[C@H](CO)CO...,84.0
589662,REACTANT:c1cncc(OCCOC2CCCCO2)c1PRODUCT:CC(C)(C...,86.0
589663,REACTANT:c1cncc(OCCOC2CCCCO2)c1PRODUCT:CC1CO1,86.0
589664,REACTANT:c1cncc(OCCOC2CCCCO2)c1PRODUCT:COC(=O)...,86.0


In [78]:
lens = dfagg['input'].apply(lambda x: len(x))
# remove data that have too long inputs
dfagg = dfagg[lens <= 512].reset_index(drop=True)
dfagg

Unnamed: 0,input,YIELD
0,REACTANT:*C(F)(F)C(*)(F)F.C1=CCCC1.Cl[SiH](Cl)...,91.0
1,REACTANT:*C(F)(F)C(*)(F)F.C1CCNCC1.CC(C)(C)[O-...,77.0
2,REACTANT:*C(F)(F)C(*)(F)F.CC(=O)c1ccccc1.[BH4-...,95.0
3,REACTANT:*C(F)(F)C(*)(F)F.CC(=O)c1ccccc1.[H][H...,95.0
4,REACTANT:*C(F)(F)C(*)(F)F.CCCCC(N)N.CCOCC.Cc1c...,21.0
...,...,...
589651,REACTANT:c1cncc(C2CCCC2)c1PRODUCT:C[C@H](CO)CO...,84.0
589652,REACTANT:c1cncc(OCCOC2CCCCO2)c1PRODUCT:CC(C)(C...,86.0
589653,REACTANT:c1cncc(OCCOC2CCCCO2)c1PRODUCT:CC1CO1,86.0
589654,REACTANT:c1cncc(OCCOC2CCCCO2)c1PRODUCT:COC(=O)...,86.0


In [2]:
# multiinput
ori = pd.read_csv('../../all_ord_reaction_uniq_with_attr_v3.tsv').drop_duplicates().reset_index(drop=True)
ori

Unnamed: 0,CATALYST,REACTANT,REAGENT,SOLVENT,INTERNAL_STANDARD,NoData,PRODUCT,YIELD,TEMP
0,CC(C)(C)c1ccn2->[Ir+]34(<-n5cc(C(F)(F)F)ccc5-c...,CC(=O)c1ccc(Br)cc1.O=C(O)C1CCCN1C(=O)OCc1ccccc1,O=C([O-])[O-].[Cs+],CN(C)C=O,,,CC(=O)c1ccc(C2CCCN2C(=O)OCc2ccccc2)cc1,93.0,23.0
1,CC(C)(C)c1ccn2->[Ir+]34(<-n5cc(C(F)(F)F)ccc5-c...,CC(=O)c1ccc(Br)cc1.CC(C)(C)OC(=O)N1CCCCC1C(=O)O,O=C([O-])[O-].[Cs+],CN(C)C=O,,,CC(=O)c1ccc(C2CCCN2C(=O)OCc2ccccc2)cc1,82.0,23.0
2,CC(C)(C)c1ccn2->[Ir+]34(<-n5cc(C(F)(F)F)ccc5-c...,CC(=O)c1ccc(Br)cc1.CC(C)(C)OC(=O)N1CCOCC1C(=O)O,O=C([O-])[O-].[Cs+],CN(C)C=O,,,CC(=O)c1ccc(C2CCCN2C(=O)OCc2ccccc2)cc1,61.0,23.0
3,CC(C)(C)c1ccn2->[Ir+]34(<-n5cc(C(F)(F)F)ccc5-c...,CC(=O)c1ccc(Br)cc1.CC(C)C(NC(=O)OC(C)(C)C)C(=O)O,O=C([O-])[O-].[Cs+],CN(C)C=O,,,CC(=O)c1ccc(C2CCCN2C(=O)OCc2ccccc2)cc1,72.0,23.0
4,CC(C)(C)c1ccn2->[Ir+]34(<-n5cc(C(F)(F)F)ccc5-c...,CC(=O)c1ccc(Br)cc1.CC(C)(C)OC(=O)NC(Cc1cn(C(=O...,O=C([O-])[O-].[Cs+],CN(C)C=O,,,CC(=O)c1ccc(C2CCCN2C(=O)OCc2ccccc2)cc1,83.0,23.0
...,...,...,...,...,...,...,...,...,...
2058485,,COc1ccccc1CCCCBr.Fc1ccc2c(C3CCNCC3)noc2c1.O=C(...,,CC#N.CCO,,,CC(O)CC(=O)[O-].O=C([O-])CCCO,,
2058486,,CC(O)=S.CCOC(=O)N=NC(=O)OCC.O=C1C[C@@H](O)CN1....,,C1CCOC1,,,CC(O)CC(=O)[O-].O=C([O-])CCCO,,
2058487,,C[C@@H](O[Si](C)(C)C(C)(C)C)[C@H]1C(=O)N2C(C(=...,,CC#N,,,CC(O)CC(=O)[O-].O=C([O-])CCCO,,
2058488,CC(=O)[O-].O.[Cu+2],C=O.CC(=O)O.CC(=O)[O-].CCOC(=O)CC(=O)OCC.[K+],,,,,O=S(=O)(c1ccc(Cl)cc1)C(F)(F)F,,


In [3]:
df = ori[~ori['PRODUCT'].isna()]
df

Unnamed: 0,CATALYST,REACTANT,REAGENT,SOLVENT,INTERNAL_STANDARD,NoData,PRODUCT,YIELD,TEMP
0,CC(C)(C)c1ccn2->[Ir+]34(<-n5cc(C(F)(F)F)ccc5-c...,CC(=O)c1ccc(Br)cc1.O=C(O)C1CCCN1C(=O)OCc1ccccc1,O=C([O-])[O-].[Cs+],CN(C)C=O,,,CC(=O)c1ccc(C2CCCN2C(=O)OCc2ccccc2)cc1,93.0,23.0
1,CC(C)(C)c1ccn2->[Ir+]34(<-n5cc(C(F)(F)F)ccc5-c...,CC(=O)c1ccc(Br)cc1.CC(C)(C)OC(=O)N1CCCCC1C(=O)O,O=C([O-])[O-].[Cs+],CN(C)C=O,,,CC(=O)c1ccc(C2CCCN2C(=O)OCc2ccccc2)cc1,82.0,23.0
2,CC(C)(C)c1ccn2->[Ir+]34(<-n5cc(C(F)(F)F)ccc5-c...,CC(=O)c1ccc(Br)cc1.CC(C)(C)OC(=O)N1CCOCC1C(=O)O,O=C([O-])[O-].[Cs+],CN(C)C=O,,,CC(=O)c1ccc(C2CCCN2C(=O)OCc2ccccc2)cc1,61.0,23.0
3,CC(C)(C)c1ccn2->[Ir+]34(<-n5cc(C(F)(F)F)ccc5-c...,CC(=O)c1ccc(Br)cc1.CC(C)C(NC(=O)OC(C)(C)C)C(=O)O,O=C([O-])[O-].[Cs+],CN(C)C=O,,,CC(=O)c1ccc(C2CCCN2C(=O)OCc2ccccc2)cc1,72.0,23.0
4,CC(C)(C)c1ccn2->[Ir+]34(<-n5cc(C(F)(F)F)ccc5-c...,CC(=O)c1ccc(Br)cc1.CC(C)(C)OC(=O)NC(Cc1cn(C(=O...,O=C([O-])[O-].[Cs+],CN(C)C=O,,,CC(=O)c1ccc(C2CCCN2C(=O)OCc2ccccc2)cc1,83.0,23.0
...,...,...,...,...,...,...,...,...,...
2058485,,COc1ccccc1CCCCBr.Fc1ccc2c(C3CCNCC3)noc2c1.O=C(...,,CC#N.CCO,,,CC(O)CC(=O)[O-].O=C([O-])CCCO,,
2058486,,CC(O)=S.CCOC(=O)N=NC(=O)OCC.O=C1C[C@@H](O)CN1....,,C1CCOC1,,,CC(O)CC(=O)[O-].O=C([O-])CCCO,,
2058487,,C[C@@H](O[Si](C)(C)C(C)(C)C)[C@H]1C(=O)N2C(C(=...,,CC#N,,,CC(O)CC(=O)[O-].O=C([O-])CCCO,,
2058488,CC(=O)[O-].O.[Cu+2],C=O.CC(=O)O.CC(=O)[O-].CCOC(=O)CC(=O)OCC.[K+],,,,,O=S(=O)(c1ccc(Cl)cc1)C(F)(F)F,,


In [4]:
len(df['REACTANT'].unique()), len(df['PRODUCT'].unique())

(1045802, 440211)

In [6]:
dfr = df[~df['REACTANT'].isna()]
dfr

Unnamed: 0,CATALYST,REACTANT,REAGENT,SOLVENT,INTERNAL_STANDARD,NoData,PRODUCT,YIELD,TEMP
0,CC(C)(C)c1ccn2->[Ir+]34(<-n5cc(C(F)(F)F)ccc5-c...,CC(=O)c1ccc(Br)cc1.O=C(O)C1CCCN1C(=O)OCc1ccccc1,O=C([O-])[O-].[Cs+],CN(C)C=O,,,CC(=O)c1ccc(C2CCCN2C(=O)OCc2ccccc2)cc1,93.0,23.0
1,CC(C)(C)c1ccn2->[Ir+]34(<-n5cc(C(F)(F)F)ccc5-c...,CC(=O)c1ccc(Br)cc1.CC(C)(C)OC(=O)N1CCCCC1C(=O)O,O=C([O-])[O-].[Cs+],CN(C)C=O,,,CC(=O)c1ccc(C2CCCN2C(=O)OCc2ccccc2)cc1,82.0,23.0
2,CC(C)(C)c1ccn2->[Ir+]34(<-n5cc(C(F)(F)F)ccc5-c...,CC(=O)c1ccc(Br)cc1.CC(C)(C)OC(=O)N1CCOCC1C(=O)O,O=C([O-])[O-].[Cs+],CN(C)C=O,,,CC(=O)c1ccc(C2CCCN2C(=O)OCc2ccccc2)cc1,61.0,23.0
3,CC(C)(C)c1ccn2->[Ir+]34(<-n5cc(C(F)(F)F)ccc5-c...,CC(=O)c1ccc(Br)cc1.CC(C)C(NC(=O)OC(C)(C)C)C(=O)O,O=C([O-])[O-].[Cs+],CN(C)C=O,,,CC(=O)c1ccc(C2CCCN2C(=O)OCc2ccccc2)cc1,72.0,23.0
4,CC(C)(C)c1ccn2->[Ir+]34(<-n5cc(C(F)(F)F)ccc5-c...,CC(=O)c1ccc(Br)cc1.CC(C)(C)OC(=O)NC(Cc1cn(C(=O...,O=C([O-])[O-].[Cs+],CN(C)C=O,,,CC(=O)c1ccc(C2CCCN2C(=O)OCc2ccccc2)cc1,83.0,23.0
...,...,...,...,...,...,...,...,...,...
2058485,,COc1ccccc1CCCCBr.Fc1ccc2c(C3CCNCC3)noc2c1.O=C(...,,CC#N.CCO,,,CC(O)CC(=O)[O-].O=C([O-])CCCO,,
2058486,,CC(O)=S.CCOC(=O)N=NC(=O)OCC.O=C1C[C@@H](O)CN1....,,C1CCOC1,,,CC(O)CC(=O)[O-].O=C([O-])CCCO,,
2058487,,C[C@@H](O[Si](C)(C)C(C)(C)C)[C@H]1C(=O)N2C(C(=...,,CC#N,,,CC(O)CC(=O)[O-].O=C([O-])CCCO,,
2058488,CC(=O)[O-].O.[Cu+2],C=O.CC(=O)O.CC(=O)[O-].CCOC(=O)CC(=O)OCC.[K+],,,,,O=S(=O)(c1ccc(Cl)cc1)C(F)(F)F,,


In [7]:
len(dfr['REACTANT'].unique()), len(dfr['PRODUCT'].unique())

(1045801, 447)

In [11]:
import os
import gc
import random
import itertools
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
import tokenizers
import transformers
from transformers import AutoTokenizer, AutoConfig, AutoModel, T5EncoderModel, get_linear_schedule_with_warmup
import datasets
from datasets import load_dataset, load_metric
import sentencepiece
import argparse
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
import torch.nn as nn
from torch.optim import AdamW
import pickle
import time
import math
from sklearn.preprocessing import MinMaxScaler
from datasets.utils.logging import disable_progress_bar
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
disable_progress_bar()


CFG = dict(
    data_path='../../all_ord_reaction_uniq_with_attr_v3.tsv',
    pretrained_model_name_or_path='sagawa/ZINC-t5',
    model = 'sagawa/ZINC-t5',
    debug = True,
    epochs = 5,
    batch_size = 5, #max_lenを大きくしたらoomしたから15から5に
    max_len = 512,
    seed = 42,
    num_workers = 4,
    fc_dropout = 0.1,
    eps = 1e-6,
    max_grad_norm=1000,
    gradient_accumulation_steps=3,
    num_warmup_steps=0,
    n_trials=100,
    batch_scheduler=True,
    print_freq=100,
    use_apex=False,
    output_dir = './')

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

OUTPUT_DIR = CFG['output_dir']
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
seed_everything(seed=CFG['seed'])  
    

df = pd.read_csv(CFG['data_path']).drop_duplicates().reset_index(drop=True)
df = df[~df['YIELD'].isna()].reset_index(drop=True)
df['YIELD'] = df['YIELD'].clip(0, 100)/100
df = df[~(df['REACTANT'].isna() | df['PRODUCT'].isna())]
for col in ['CATALYST', 'REACTANT', 'REAGENT', 'SOLVENT', 'INTERNAL_STANDARD', 'NoData','PRODUCT']:
    df[col] = df[col].fillna(' ')
    
    
###############################################
def clean(row):
    row = row.replace('. ', '').replace(' .', '').replace('  ', ' ')
    return row
df['REAGENT'] = df['CATALYST'] + '.' + df['REAGENT']
df['REAGENT'] = df['REAGENT'].apply(lambda x: clean(x))

from rdkit import Chem
def canonicalize(mol):
    mol = Chem.MolToSmiles(Chem.MolFromSmiles(mol),True)
    return mol

df['REAGENT'] = df['REAGENT'].apply(lambda x: canonicalize(x) if x != ' ' else ' ')
###############################################
    

df['input'] = 'REACTANT:' + df['REACTANT']  + 'REAGENT:' + df['REAGENT'] + 'PRODUCT:' + df['PRODUCT']
df = df[['input', 'YIELD']].drop_duplicates().reset_index(drop=True)

lens = df['input'].apply(lambda x: len(x))
# remove data that have too long inputs
df = df[lens <= 512].reset_index(drop=True)

train_ds, test_ds = train_test_split(df, test_size=int(len(df)*0.1))
train_ds, valid_ds = train_test_split(train_ds, test_size=int(len(df)*0.1))



In [13]:
!nvidia-smi

Sat Nov 26 16:32:36 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.42.01    Driver Version: 470.42.01    CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  On   | 00000000:19:00.0 Off |                  N/A |
| 52%   84C    P2   221W / 250W |   8792MiB / 11019MiB |     99%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  NVIDIA GeForce ...  On   | 00000000:1A:00.0 Off |                  N/A |
| 32%   50C    P8    11W / 250W |   5131MiB / 11019MiB |      0%      Default |
|       

In [4]:
class CFG():
    data_path='../../all_ord_reaction_uniq_with_attr_v3.tsv'
    pretrained_model_name_or_path = 'sagawa/ZINC-t5'
    model = 'sagawa/ZINC-t5'
    debug = True
    epochs = 5
    lr = 2e-5
    batch_size = 5 #max_lenを大きくしたらoomしたから15から5に
    max_len = 512
    weight_decay = 0.01
    seed = 42
    num_workers = 4
    fc_dropout = 0.1
    eps = 1e-6
    max_grad_norm=1000
    gradient_accumulation_steps=3
    num_warmup_steps=0
    batch_scheduler=True
    print_freq=100
    use_apex=False
    output_dir = './'
    
class RegressionModel(nn.Module):
    def __init__(self, cfg, config_path=None, pretrained=False):
        super().__init__()
        self.cfg = cfg
        if config_path is None:
            self.config = AutoConfig.from_pretrained(cfg.pretrained_model_name_or_path, output_hidden_states=True)
        else:
            self.config = torch.load(config_path)
        if pretrained:
            if 't5' in cfg.pretrained_model_name_or_path:
                self.model = T5EncoderModel.from_pretrained(CFG.pretrained_model_name_or_path)
            else:
                self.model = AutoModel.from_pretrained(CFG.pretrained_model_name_or_path)
        else:
            if 't5' in cfg.model_name_or_path:
                self.model = T5EncoderModel.from_pretrained('sagawa/ZINC-t5')
            else:
                self.model = AutoModel.from_config(self.config)
#         self.model.resize_token_embeddings(len(cfg.tokenizer))
        self.fc_dropout1 = nn.Dropout(cfg.fc_dropout)
        self.fc1 = nn.Linear(self.config.hidden_size, self.config.hidden_size)
        self.fc_dropout2 = nn.Dropout(cfg.fc_dropout)
        self.fc2 = nn.Linear(self.config.hidden_size, 1)
        
    def forward(self, inputs):
        outputs = self.model(**inputs)
        last_hidden_states = outputs[0]
        output = self.fc1(self.fc_dropout1(last_hidden_states)[:, 0, :].view(-1, self.config.hidden_size))
        output = self.fc2(self.fc_dropout2(output))
        return output
    
model = RegressionModel(CFG, config_path=None, pretrained=True)
model

Some weights of the model checkpoint at sagawa/ZINC-t5 were not used when initializing T5EncoderModel: ['decoder.block.11.layer.1.EncDecAttention.v.weight', 'decoder.block.6.layer.2.DenseReluDense.wi_0.weight', 'decoder.block.6.layer.1.layer_norm.weight', 'decoder.block.1.layer.1.EncDecAttention.k.weight', 'decoder.block.10.layer.1.EncDecAttention.k.weight', 'decoder.block.1.layer.0.SelfAttention.q.weight', 'decoder.block.2.layer.1.EncDecAttention.o.weight', 'decoder.block.10.layer.2.DenseReluDense.wi_1.weight', 'decoder.block.4.layer.1.EncDecAttention.q.weight', 'decoder.block.0.layer.0.SelfAttention.q.weight', 'decoder.block.11.layer.1.EncDecAttention.q.weight', 'decoder.block.3.layer.0.layer_norm.weight', 'decoder.block.11.layer.1.layer_norm.weight', 'decoder.block.5.layer.1.layer_norm.weight', 'decoder.block.0.layer.2.DenseReluDense.wo.weight', 'decoder.block.5.layer.0.SelfAttention.o.weight', 'decoder.block.6.layer.2.layer_norm.weight', 'decoder.block.5.layer.1.EncDecAttention.q.w

RegressionModel(
  (model): T5EncoderModel(
    (shared): Embedding(221, 768)
    (encoder): T5Stack(
      (embed_tokens): Embedding(221, 768)
      (block): ModuleList(
        (0): T5Block(
          (layer): ModuleList(
            (0): T5LayerSelfAttention(
              (SelfAttention): T5Attention(
                (q): Linear(in_features=768, out_features=768, bias=False)
                (k): Linear(in_features=768, out_features=768, bias=False)
                (v): Linear(in_features=768, out_features=768, bias=False)
                (o): Linear(in_features=768, out_features=768, bias=False)
                (relative_attention_bias): Embedding(32, 12)
              )
              (layer_norm): T5LayerNorm()
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (1): T5LayerFF(
              (DenseReluDense): T5DenseGatedActDense(
                (wi_0): Linear(in_features=768, out_features=2048, bias=False)
                (wi_1): Linear(in_features=7

In [21]:
for name, param in model.named_parameters():
    if ('model' in name) or ('block.11' noin name):
        print(name)
        param.requires_grad = False

All Flax model weights were used when initializing T5ForConditionalGeneration.

Some weights of T5ForConditionalGeneration were not initialized from the Flax model and are newly initialized: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


T5ForConditionalGeneration(
  (shared): Embedding(221, 768)
  (encoder): T5Stack(
    (embed_tokens): Embedding(221, 768)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=768, out_features=768, bias=False)
              (k): Linear(in_features=768, out_features=768, bias=False)
              (v): Linear(in_features=768, out_features=768, bias=False)
              (o): Linear(in_features=768, out_features=768, bias=False)
              (relative_attention_bias): Embedding(32, 12)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseGatedActDense(
              (wi_0): Linear(in_features=768, out_features=2048, bias=False)
              (wi_1): Linear(in_features=768, out_features=2048, bias=False)
              (wo): Lin

In [22]:
del model.lm_head

In [25]:
model.lm = nn.Linear(config.hidden_size, config.hidden_size)
model

T5ForConditionalGeneration(
  (shared): Embedding(221, 768)
  (encoder): T5Stack(
    (embed_tokens): Embedding(221, 768)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=768, out_features=768, bias=False)
              (k): Linear(in_features=768, out_features=768, bias=False)
              (v): Linear(in_features=768, out_features=768, bias=False)
              (o): Linear(in_features=768, out_features=768, bias=False)
              (relative_attention_bias): Embedding(32, 12)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseGatedActDense(
              (wi_0): Linear(in_features=768, out_features=2048, bias=False)
              (wi_1): Linear(in_features=768, out_features=2048, bias=False)
              (wo): Lin

T5Stack(
  (embed_tokens): Embedding(221, 768)
  (block): ModuleList(
    (0): T5Block(
      (layer): ModuleList(
        (0): T5LayerSelfAttention(
          (SelfAttention): T5Attention(
            (q): Linear(in_features=768, out_features=768, bias=False)
            (k): Linear(in_features=768, out_features=768, bias=False)
            (v): Linear(in_features=768, out_features=768, bias=False)
            (o): Linear(in_features=768, out_features=768, bias=False)
            (relative_attention_bias): Embedding(32, 12)
          )
          (layer_norm): T5LayerNorm()
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (1): T5LayerCrossAttention(
          (EncDecAttention): T5Attention(
            (q): Linear(in_features=768, out_features=768, bias=False)
            (k): Linear(in_features=768, out_features=768, bias=False)
            (v): Linear(in_features=768, out_features=768, bias=False)
            (o): Linear(in_features=768, out_features=768, bias=Fal

In [40]:
for name, param in model.decoder.block.named_parameters():
#     if ('model' in name) or ('block.11' noin name):
    print(name)
#         param.requires_grad = False

0.layer.0.SelfAttention.q.weight
0.layer.0.SelfAttention.k.weight
0.layer.0.SelfAttention.v.weight
0.layer.0.SelfAttention.o.weight
0.layer.0.SelfAttention.relative_attention_bias.weight
0.layer.0.layer_norm.weight
0.layer.1.EncDecAttention.q.weight
0.layer.1.EncDecAttention.k.weight
0.layer.1.EncDecAttention.v.weight
0.layer.1.EncDecAttention.o.weight
0.layer.1.layer_norm.weight
0.layer.2.DenseReluDense.wi_0.weight
0.layer.2.DenseReluDense.wi_1.weight
0.layer.2.DenseReluDense.wo.weight
0.layer.2.layer_norm.weight
1.layer.0.SelfAttention.q.weight
1.layer.0.SelfAttention.k.weight
1.layer.0.SelfAttention.v.weight
1.layer.0.SelfAttention.o.weight
1.layer.0.layer_norm.weight
1.layer.1.EncDecAttention.q.weight
1.layer.1.EncDecAttention.k.weight
1.layer.1.EncDecAttention.v.weight
1.layer.1.EncDecAttention.o.weight
1.layer.1.layer_norm.weight
1.layer.2.DenseReluDense.wi_0.weight
1.layer.2.DenseReluDense.wi_1.weight
1.layer.2.DenseReluDense.wo.weight
1.layer.2.layer_norm.weight
2.layer.0.SelfA

ModuleList(
  (0): T5Block(
    (layer): ModuleList(
      (0): T5LayerSelfAttention(
        (SelfAttention): T5Attention(
          (q): Linear(in_features=768, out_features=768, bias=False)
          (k): Linear(in_features=768, out_features=768, bias=False)
          (v): Linear(in_features=768, out_features=768, bias=False)
          (o): Linear(in_features=768, out_features=768, bias=False)
          (relative_attention_bias): Embedding(32, 12)
        )
        (layer_norm): T5LayerNorm()
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (1): T5LayerCrossAttention(
        (EncDecAttention): T5Attention(
          (q): Linear(in_features=768, out_features=768, bias=False)
          (k): Linear(in_features=768, out_features=768, bias=False)
          (v): Linear(in_features=768, out_features=768, bias=False)
          (o): Linear(in_features=768, out_features=768, bias=False)
        )
        (layer_norm): T5LayerNorm()
        (dropout): Dropout(p=0.1, inplace=Fals

In [4]:
import pandas as pd
df = pd.read_csv('/data2/sagawa/t5chem/data/C_N_yield/MFF_FullCV_01/train.csv').drop_duplicates().reset_index(drop=True)
df['input'] = 'REACTANT:' + df['REACTANT']  + 'REAGENT:' + df['REAGENT'] + 'PRODUCT:' + df['PRODUCT']
# df = df[['input', 'YIELD']].drop_duplicates().reset_index(drop=True)

lens = df['input'].apply(lambda x: len(x))
# remove data that have too long inputs
# df = df[lens <= 512].reset_index(drop=True)

In [5]:
len(df), len(df[lens <= 512].reset_index(drop=True))

(2767, 2767)

In [8]:
from torch.nn import MSELoss

loss = MSELoss()
input = torch.randn(3, 5, requires_grad=True)
target = torch.randn(3, 5)
output = loss(input, target)
output

tensor(2.7675, grad_fn=<MseLossBackward0>)

In [9]:
output = loss(input.reshape(-1, 1), target.reshape(-1, 1))
output

tensor(2.7675, grad_fn=<MseLossBackward0>)

In [1]:
import torch
torch.load('config.pth')

T5Config {
  "_name_or_path": "sagawa/ZINC-t5",
  "architectures": [
    "T5ForConditionalGeneration"
  ],
  "d_ff": 2048,
  "d_kv": 64,
  "d_model": 768,
  "decoder_start_token_id": 0,
  "dense_act_fn": "gelu_new",
  "dropout_rate": 0.1,
  "eos_token_id": 1,
  "feed_forward_proj": "gated-gelu",
  "initializer_factor": 1.0,
  "is_encoder_decoder": true,
  "is_gated_act": true,
  "layer_norm_epsilon": 1e-06,
  "model_type": "t5",
  "num_decoder_layers": 12,
  "num_heads": 12,
  "num_layers": 12,
  "output_hidden_states": true,
  "output_past": true,
  "pad_token_id": 0,
  "relative_attention_max_distance": 128,
  "relative_attention_num_buckets": 32,
  "tie_word_embeddings": false,
  "transformers_version": "4.21.0.dev0",
  "use_cache": true,
  "vocab_size": 221
}