In [1]:
import os
import gc
import json
import numpy as np
import pandas as pd
import torch
from sentence_transformers import SentenceTransformer
import scipy
from sklearn.feature_extraction.text import TfidfVectorizer
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import textstat
from tqdm import tqdm
from typing import Dict, List, Tuple, NamedTuple, Callable, Any
import scml
import mylib

In [2]:
class Conf(NamedTuple):
    device: torch.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    pretrained_dir: str = "pretrained/"
    data_dir: str = "data/"
    dtfy_model_max_length: int = 512
    dtfy_batch_size: int = 256
    dtfy_models: Dict[str, str] = {
        "dto_": f"{pretrained_dir}unitaryai/detoxify/toxic_original-c1212f89.ckpt",
        "dtu_": f"{pretrained_dir}unitaryai/detoxify/toxic_debiased-c7548aa0.ckpt",
        "dtm_": f"{pretrained_dir}unitaryai/detoxify/multilingual_debiased-0b549669.ckpt"
    }
    dtfy_configs: Dict[str, str] = {
        "dto_": f"{pretrained_dir}bert-base-uncased",
        "dtu_": f"{pretrained_dir}roberta-base",
        "dtm_": f"{pretrained_dir}xlm-roberta-base"
    }
    tweeteval_model_max_length: int = 512
    tweeteval_batch_size: int = 64
    tweeteval_models: Dict[str, str] = {
        "te_roberta_off": f"{pretrained_dir}cardiffnlp/twitter-roberta-base-offensive",
        "te_roberta_emo_anger": f"{pretrained_dir}cardiffnlp/twitter-roberta-base-emotion",
        "te_roberta_snt_neg": f"{pretrained_dir}cardiffnlp/twitter-roberta-base-sentiment",
        "te_roberta_iro": f"{pretrained_dir}cardiffnlp/twitter-roberta-base-irony",
        "te_xlm_roberta_snt_neg": f"{pretrained_dir}cardiffnlp/twitter-xlm-roberta-base-sentiment",
    }
    tweeteval_label_index: Dict[str, int] = {
        "te_roberta_off": 1,
        "te_roberta_emo_anger": 0,
        "te_roberta_snt_neg": 0,
        "te_roberta_iro": 1,
        "te_xlm_roberta_snt_neg": 0,
    }
    hatebert_model_max_length: int = 512
    hatebert_batch_size: int = 128
    hatebert_models: Dict[str, str] = {
        "hb_bert_off": f"{pretrained_dir}hatebert/bert-offenseval",
        "hb_bert_abu" : f"{pretrained_dir}hatebert/bert-abuseval",
        "hb_hatebert_off": f"{pretrained_dir}hatebert/hatebert-offenseval",
        "hb_hatebert_abu" : f"{pretrained_dir}hatebert/hatebert-abuseval",
    }
    em_max_seq_length: int = 128
    em_batch_size: int = 1000
    em_models: Dict[str, str] = {
        "paraphrase-MiniLM-L6-v2": f"{pretrained_dir}sentence-transformers/paraphrase-MiniLM-L6-v2"
    }
    vocab_file: str = f"{data_dir}vocab.json"
        
        
conf = Conf()
print(conf)
if conf.device.type == 'cuda':
    for i in range(torch.cuda.device_count()):
        print(f"device={i}, {torch.cuda.get_device_name(i)}")
        print('Mem Allocated:', round(torch.cuda.memory_allocated(i)/1024**3,1), 'GB')
        print('Mem Cached:   ', round(torch.cuda.memory_reserved(i)/1024**3,1), 'GB')

Conf(device=device(type='cuda'), pretrained_dir='pretrained/', data_dir='data/', dtfy_model_max_length=512, dtfy_batch_size=256, dtfy_models={'dto_': 'pretrained/unitaryai/detoxify/toxic_original-c1212f89.ckpt', 'dtu_': 'pretrained/unitaryai/detoxify/toxic_debiased-c7548aa0.ckpt', 'dtm_': 'pretrained/unitaryai/detoxify/multilingual_debiased-0b549669.ckpt'}, dtfy_configs={'dto_': 'pretrained/bert-base-uncased', 'dtu_': 'pretrained/roberta-base', 'dtm_': 'pretrained/xlm-roberta-base'}, tweeteval_model_max_length=512, tweeteval_batch_size=64, tweeteval_models={'te_roberta_off': 'pretrained/cardiffnlp/twitter-roberta-base-offensive', 'te_roberta_emo_anger': 'pretrained/cardiffnlp/twitter-roberta-base-emotion', 'te_roberta_snt_neg': 'pretrained/cardiffnlp/twitter-roberta-base-sentiment', 'te_roberta_iro': 'pretrained/cardiffnlp/twitter-roberta-base-irony', 'te_xlm_roberta_snt_neg': 'pretrained/cardiffnlp/twitter-xlm-roberta-base-sentiment'}, tweeteval_label_index={'te_roberta_off': 1, 'te_r

In [3]:
percentiles=[.01, .05, .1, .2, .3, .4, .5, .6, .7, .8, .9, .95, .99]
os.environ["TOKENIZERS_PARALLELISM"] = "false"
pd.set_option("use_inf_as_na", True)
pd.set_option("max_info_columns", 9999)
pd.set_option("display.max_columns", 9999)
pd.set_option("display.max_rows", 9999)
pd.set_option('max_colwidth', 9999)
tqdm.pandas()

In [4]:
df = pd.read_parquet("input/pre_ruddit.parquet")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5710 entries, 0 to 5709
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   label   5710 non-null   int32  
 1   bws     5710 non-null   float32
 2   worker  5710 non-null   int8   
 3   text    5710 non-null   object 
 4   text1   5710 non-null   object 
 5   text2   5710 non-null   object 
 6   text3   5710 non-null   object 
dtypes: float32(1), int32(1), int8(1), object(4)
memory usage: 228.7+ KB


# Character level features

In [5]:
%%time
col = "length"
df[col] = df["text1"].str.len()
df[col] = df[col].astype(np.int16)

Wall time: 4 ms


In [6]:
def digit_frac(row) -> float:
    return mylib.digit_frac(row["text1"])


def letter_frac(row) -> float:
    return mylib.letter_frac(row["text1"])


def space_frac(row) -> float:
    return mylib.space_frac(row["text1"])


def punc_frac(row) -> float:
    return mylib.punc_frac(row["text1"])


def upper_frac(row) -> float:
    return mylib.upper_frac(row["text1"])


def repeat_char_frac(row) -> float:
    return mylib.repeat_char_frac(row["text1"])


def repeat_substring_frac(row) -> float:
    return mylib.repeat_substring_frac(row["text1"])


char_fns: Dict[str, Callable] = {
    "digit_frac": digit_frac,
    "letter_frac": letter_frac,
    "space_frac": space_frac,
    "punc_frac": punc_frac,
    "upper_frac": upper_frac,
    "repeat_char_frac": repeat_char_frac,
    "repeat_substring_frac": repeat_substring_frac,
}

In [7]:
for col, fn in char_fns.items():
    print(col)
    df[col] = df.progress_apply(fn, axis=1)
    df[col] = df[col].astype(np.float32)

digit_frac


100%|████████████████████████████████████████| 5710/5710 [00:00<00:00, 38574.62it/s]


letter_frac


100%|████████████████████████████████████████| 5710/5710 [00:00<00:00, 38985.47it/s]


space_frac


100%|████████████████████████████████████████| 5710/5710 [00:00<00:00, 39563.22it/s]


punc_frac


100%|████████████████████████████████████████| 5710/5710 [00:00<00:00, 35727.41it/s]


upper_frac


100%|████████████████████████████████████████| 5710/5710 [00:00<00:00, 45196.56it/s]


repeat_char_frac


100%|████████████████████████████████████████| 5710/5710 [00:00<00:00, 22507.38it/s]


repeat_substring_frac


100%|██████████████████████████████████████████| 5710/5710 [00:09<00:00, 579.82it/s]


# Textstat features

In [8]:
def syllable_count(row) -> int:
    return textstat.syllable_count(row["text1"])


def lexicon_count(row) -> int:
    return textstat.lexicon_count(row["text1"])


def sentence_count(row) -> int:
    return textstat.sentence_count(row["text1"])


def syllables_per_word(row) -> float:
    return row["syllable_count"] / (row["lexicon_count"] + 1)


def syllables_per_sent(row) -> float:
    return row["syllable_count"] / (row["sentence_count"] + 1)


def words_per_sent(row) -> float:
    return row["lexicon_count"] / (row["sentence_count"] + 1)


def flesch_reading_ease(row) -> float:
    return textstat.flesch_reading_ease(row["text1"])


def flesch_kincaid_grade(row) -> float:
    return textstat.flesch_kincaid_grade(row["text1"])


def gunning_fog(row) -> float:
    return textstat.gunning_fog(row["text1"])


def smog_index(row) -> float:
    return textstat.smog_index(row["text1"])


def automated_readability_index(row) -> float:
    return textstat.automated_readability_index(row["text1"])


def coleman_liau_index(row) -> float:
    return textstat.coleman_liau_index(row["text1"])


def linsear_write_formula(row) -> float:
    return textstat.linsear_write_formula(row["text1"])


def dale_chall_readability_score(row) -> float:
    return textstat.dale_chall_readability_score(row["text1"])


preprocess_fns: List[Tuple[str, Callable, Any]] = [
    ("syllable_count", syllable_count, np.int32),
    ("lexicon_count", lexicon_count, np.int32),
    ("sentence_count", sentence_count, np.int32),
]
textstat_fns: List[Tuple[str, Callable, Any]] = [
    ("syllables_per_word", syllables_per_word, np.float32),
    ("syllables_per_sent", syllables_per_sent, np.float32),
    ("words_per_sent", words_per_sent, np.float32),
    ("flesch_reading_ease", flesch_reading_ease, np.float32),
    ("flesch_kincaid_grade", flesch_kincaid_grade, np.float32),
    ("gunning_fog", gunning_fog, np.float32),
    ("smog_index", smog_index, np.float32),
    ("automated_readability_index", automated_readability_index, np.float32),
    ("coleman_liau_index", coleman_liau_index, np.float32),
    ("linsear_write_formula", linsear_write_formula, np.float32),
    ("dale_chall_readability_score", dale_chall_readability_score, np.float32),
]

In [9]:
for col, fn, dtype in preprocess_fns:
    print(col)
    df[col] = df.progress_apply(fn, axis=1)
    df[col] = df[col].astype(dtype)
for col, fn, dtype in textstat_fns:
    print(col)
    df[col] = df.progress_apply(fn, axis=1)
    df[col] = df[col].astype(dtype)

syllable_count


100%|█████████████████████████████████████████| 5710/5710 [00:00<00:00, 7870.35it/s]


lexicon_count


100%|████████████████████████████████████████| 5710/5710 [00:00<00:00, 82239.00it/s]


sentence_count


100%|████████████████████████████████████████| 5710/5710 [00:00<00:00, 43522.42it/s]


syllables_per_word


100%|████████████████████████████████████████| 5710/5710 [00:00<00:00, 87848.98it/s]


syllables_per_sent


100%|████████████████████████████████████████| 5710/5710 [00:00<00:00, 95077.60it/s]


words_per_sent


100%|████████████████████████████████████████| 5710/5710 [00:00<00:00, 92348.86it/s]


flesch_reading_ease


100%|████████████████████████████████████████| 5710/5710 [00:00<00:00, 11916.63it/s]


flesch_kincaid_grade


100%|████████████████████████████████████████| 5710/5710 [00:00<00:00, 12779.69it/s]


gunning_fog


100%|█████████████████████████████████████████| 5710/5710 [00:00<00:00, 9878.86it/s]


smog_index


100%|████████████████████████████████████████| 5710/5710 [00:00<00:00, 15693.75it/s]


automated_readability_index


100%|████████████████████████████████████████| 5710/5710 [00:00<00:00, 26773.70it/s]


coleman_liau_index


100%|████████████████████████████████████████| 5710/5710 [00:00<00:00, 23620.82it/s]


linsear_write_formula


100%|████████████████████████████████████████| 5710/5710 [00:00<00:00, 11976.19it/s]


dale_chall_readability_score


100%|████████████████████████████████████████| 5710/5710 [00:00<00:00, 10421.97it/s]


# TF-IDF features

In [10]:
vocabulary, idf = [], []
with open(conf.vocab_file) as f:
    tmp = json.load(f)
    vocabulary = tmp["term"]
    idf = tmp["idf"]
print(f"len(vocab)={len(vocabulary)}\n{vocabulary}")
assert len(vocabulary) == len(idf)

len(vocab)=576
['african', 'african american', 'alabama hot pocket', 'alaskan pipeline', 'american', 'anal', 'analplug', 'analsex', 'anilingus', 'anus', 'apeshit', 'arse', 'arsehole', 'asian', 'ass', 'assassin', 'asshole', 'assmunch', 'atheist', 'auto erotic', 'autoerotic', 'babeland', 'baby batter', 'baby juice', 'ball gag', 'ball gravy', 'ball kicking', 'ball licking', 'ball sack', 'ball sucking', 'balls', 'bangbros', 'bangbus', 'bareback', 'barely legal', 'barenaked', 'bastard', 'bastardo', 'bastinado', 'bbw', 'bdsm', 'beaner', 'beaners', 'beastiality', 'beaver cleaver', 'beaver lips', 'bestiality', 'bewb', 'big black', 'big breasts', 'big knockers', 'big tits', 'bimbo', 'bimbos', 'birdlock', 'bisexual', 'bitch', 'bitches', 'black', 'black cock', 'blind', 'blonde action', 'blonde on blonde action', 'bloody', 'bloodyhell', 'blow', 'blow job', 'blow your load', 'blowjob', 'blue waffle', 'blumpkin', 'bollocks', 'bondage', 'boner', 'boob', 'boobies', 'boobs', 'booty call', 'boy', 'brown

In [11]:
%%time
vec = TfidfVectorizer(vocabulary=vocabulary, ngram_range=(1, 3), analyzer="word")
vec.idf_ = idf
x = vec.transform(df["text3"])
print(f"x.shape={x.shape}\n{x[0]}")
ti_features = [f"ti{i:04d}" for i in range(x.shape[1])]
df[ti_features] = scipy.sparse.csr_matrix(x).todense()
df[ti_features] = df[ti_features].astype(np.float32)
df = df.copy()  # defragment

x.shape=(5710, 576)
  (0, 563)	0.7175763759163235
  (0, 319)	0.6964798236322393


  self[col] = igetitem(value, i)


Wall time: 2.86 s


# TweetEval labels

In [12]:
sentences = list(df["text2"])
for col, model_dir in conf.tweeteval_models.items():
    tokenizer = AutoTokenizer.from_pretrained(
        model_dir, 
        model_max_length=conf.tweeteval_model_max_length
    )
    #print(f"{repr(tokenizer)}\nmodel_input_names={tokenizer.model_input_names}")
    x = tokenizer(sentences, truncation=True, padding="max_length")
    batches = torch.utils.data.DataLoader(mylib.Dataset(x), batch_size=conf.tweeteval_batch_size, shuffle=False)
    model = AutoModelForSequenceClassification.from_pretrained(model_dir)
    model.eval()
    model.to(conf.device)
    logits = None
    with torch.no_grad():
        for batch in tqdm(batches):
            for k, v in batch.items():
                batch[k] = v.to(conf.device)
            outputs = model(**batch)
            tmp = outputs.logits.detach().cpu()
            if logits is None:
                logits = tmp
            else:
                logits = torch.cat((logits, tmp), 0)
    logits = torch.nn.functional.softmax(logits, dim=1)
    print(f"{col} {logits.size()}\nlogits[:10]={logits[:10]}")
    df[col] = logits[:,conf.tweeteval_label_index[col]]
    df[col] = df[col].astype(np.float32)
    del tokenizer, model
    gc.collect()

100%|███████████████████████████████████████████████| 90/90 [04:08<00:00,  2.76s/it]


te_roberta_off torch.Size([5710, 2])
logits[:10]=tensor([[0.9025, 0.0975],
        [0.5804, 0.4196],
        [0.6679, 0.3321],
        [0.8045, 0.1955],
        [0.7880, 0.2120],
        [0.7823, 0.2177],
        [0.8272, 0.1728],
        [0.6837, 0.3163],
        [0.7598, 0.2402],
        [0.9035, 0.0965]])


100%|███████████████████████████████████████████████| 90/90 [04:07<00:00,  2.75s/it]


te_roberta_emo_anger torch.Size([5710, 4])
logits[:10]=tensor([[0.0376, 0.0565, 0.8628, 0.0431],
        [0.1369, 0.1012, 0.2558, 0.5061],
        [0.8166, 0.0105, 0.0391, 0.1338],
        [0.9419, 0.0041, 0.0231, 0.0308],
        [0.6684, 0.0184, 0.1338, 0.1794],
        [0.8703, 0.0090, 0.0602, 0.0605],
        [0.8556, 0.0071, 0.0948, 0.0425],
        [0.8475, 0.0155, 0.0260, 0.1111],
        [0.8054, 0.0086, 0.0704, 0.1156],
        [0.1417, 0.3192, 0.3703, 0.1687]])


100%|███████████████████████████████████████████████| 90/90 [04:07<00:00,  2.75s/it]


te_roberta_snt_neg torch.Size([5710, 3])
logits[:10]=tensor([[0.0407, 0.7561, 0.2032],
        [0.4268, 0.5488, 0.0244],
        [0.7662, 0.2259, 0.0080],
        [0.6066, 0.3729, 0.0206],
        [0.5111, 0.4282, 0.0607],
        [0.5525, 0.4095, 0.0379],
        [0.3226, 0.6394, 0.0380],
        [0.7787, 0.2147, 0.0066],
        [0.7998, 0.1863, 0.0139],
        [0.1055, 0.6091, 0.2854]])


100%|███████████████████████████████████████████████| 90/90 [04:05<00:00,  2.73s/it]


te_roberta_iro torch.Size([5710, 2])
logits[:10]=tensor([[0.7999, 0.2001],
        [0.4219, 0.5781],
        [0.3392, 0.6608],
        [0.8240, 0.1760],
        [0.8902, 0.1098],
        [0.6376, 0.3624],
        [0.7453, 0.2547],
        [0.0874, 0.9126],
        [0.8681, 0.1319],
        [0.6555, 0.3445]])


100%|███████████████████████████████████████████████| 90/90 [04:05<00:00,  2.73s/it]


te_xlm_roberta_snt_neg torch.Size([5710, 3])
logits[:10]=tensor([[0.1049, 0.7652, 0.1299],
        [0.1865, 0.7562, 0.0573],
        [0.4178, 0.4738, 0.1084],
        [0.7074, 0.2630, 0.0296],
        [0.7878, 0.1841, 0.0281],
        [0.5432, 0.4069, 0.0500],
        [0.5321, 0.4271, 0.0408],
        [0.5248, 0.4357, 0.0396],
        [0.9013, 0.0858, 0.0129],
        [0.3988, 0.4830, 0.1182]])


# HateBert labels

In [13]:
# all Hatebert models use the same tokenizer
tokenizer = AutoTokenizer.from_pretrained(
    conf.hatebert_models["hb_hatebert_off"], 
    model_max_length=conf.hatebert_model_max_length
)
print(f"{repr(tokenizer)}\nmodel_input_names={tokenizer.model_input_names}")

PreTrainedTokenizerFast(name_or_path='pretrained/hatebert/hatebert-offenseval', vocab_size=30522, model_max_len=512, is_fast=True, padding_side='right', special_tokens={'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'})
model_input_names=['input_ids', 'token_type_ids', 'attention_mask']


In [14]:
%%time
x = tokenizer(sentences, truncation=True, padding="max_length")
print(f"{repr(x.keys())}\nlen={len(x['input_ids'])}")

dict_keys(['input_ids', 'token_type_ids', 'attention_mask'])
len=5710
Wall time: 1.03 s


In [15]:
batches = torch.utils.data.DataLoader(mylib.Dataset(x), batch_size=conf.hatebert_batch_size, shuffle=False)
for col, model_dir in conf.hatebert_models.items():    
    model = AutoModelForSequenceClassification.from_pretrained(model_dir)
    model.eval()
    model.to(conf.device)
    logits = None
    with torch.no_grad():
        for batch in tqdm(batches):
            for k, v in batch.items():
                batch[k] = v.to(conf.device)
            outputs = model(**batch)
            tmp = outputs.logits.detach().cpu()
            if logits is None:
                logits = tmp
            else:
                logits = torch.cat((logits, tmp), 0)
    logits = torch.nn.functional.softmax(logits, dim=1)
    print(f"{col} {logits.size()}\nlogits[:10]={logits[:10]}")
    df[col] = logits[:,1]
    df[col] = df[col].astype(np.float32)

100%|███████████████████████████████████████████████| 45/45 [04:08<00:00,  5.53s/it]


hb_bert_off torch.Size([5710, 2])
logits[:10]=tensor([[0.9598, 0.0402],
        [0.7718, 0.2282],
        [0.7567, 0.2433],
        [0.9064, 0.0936],
        [0.8318, 0.1682],
        [0.7811, 0.2189],
        [0.5952, 0.4048],
        [0.8502, 0.1498],
        [0.7770, 0.2230],
        [0.9258, 0.0742]])


100%|███████████████████████████████████████████████| 45/45 [04:08<00:00,  5.52s/it]


hb_bert_abu torch.Size([5710, 2])
logits[:10]=tensor([[0.9910, 0.0090],
        [0.9717, 0.0283],
        [0.9545, 0.0455],
        [0.9631, 0.0369],
        [0.9714, 0.0286],
        [0.8686, 0.1314],
        [0.9001, 0.0999],
        [0.8544, 0.1456],
        [0.8708, 0.1292],
        [0.9881, 0.0119]])


100%|███████████████████████████████████████████████| 45/45 [04:08<00:00,  5.52s/it]


hb_hatebert_off torch.Size([5710, 2])
logits[:10]=tensor([[0.9377, 0.0623],
        [0.5526, 0.4474],
        [0.7553, 0.2447],
        [0.7828, 0.2172],
        [0.8787, 0.1213],
        [0.9267, 0.0733],
        [0.8304, 0.1696],
        [0.7886, 0.2114],
        [0.8038, 0.1962],
        [0.8965, 0.1035]])


100%|███████████████████████████████████████████████| 45/45 [04:08<00:00,  5.52s/it]

hb_hatebert_abu torch.Size([5710, 2])
logits[:10]=tensor([[0.9846, 0.0154],
        [0.9717, 0.0283],
        [0.9570, 0.0430],
        [0.9699, 0.0301],
        [0.9698, 0.0302],
        [0.9780, 0.0220],
        [0.9800, 0.0200],
        [0.9733, 0.0267],
        [0.9407, 0.0593],
        [0.9795, 0.0205]])





# Detoxify labels

In [16]:
gc.collect()
dtfy_fs = []
for prefix, checkpoint in tqdm(conf.dtfy_models.items()):
    res = mylib.detoxify_labels(
        sentences,
        checkpoint=checkpoint,
        config_dir=conf.dtfy_configs[prefix],
        model_max_length=conf.dtfy_model_max_length,
        device=conf.device,
        batch_size=conf.dtfy_batch_size
    )
    for k, v in res.items():
        col = prefix + k
        df[col] = v
        df[col] = df[col].astype(np.float32)
        dtfy_fs.append(col)
    gc.collect()

100%|█████████████████████████████████████████████████| 3/3 [04:00<00:00, 80.12s/it]


In [17]:
print(dtfy_fs)

['dto_toxicity', 'dto_severe_toxicity', 'dto_obscene', 'dto_threat', 'dto_insult', 'dto_identity_attack', 'dtu_toxicity', 'dtu_severe_toxicity', 'dtu_obscene', 'dtu_identity_attack', 'dtu_insult', 'dtu_threat', 'dtu_sexual_explicit', 'dtm_toxicity', 'dtm_severe_toxicity', 'dtm_obscene', 'dtm_identity_attack', 'dtm_insult', 'dtm_threat', 'dtm_sexual_explicit']


# Embeddings

In [18]:
model = SentenceTransformer(conf.em_models["paraphrase-MiniLM-L6-v2"], device=conf.device)
model.max_seq_length = conf.em_max_seq_length
em = model.encode(sentences=sentences, batch_size=conf.em_batch_size, show_progress_bar=True, convert_to_numpy=True)
print(f"em.shape={em.shape}")

[INFO|SentenceTransformer.py:60] 2022-02-04 13:09:49,940 >> Load pretrained SentenceTransformer: pretrained/sentence-transformers/paraphrase-MiniLM-L6-v2
[INFO|SentenceTransformer.py:60] 2022-02-04 13:09:49,940 >> Load pretrained SentenceTransformer: pretrained/sentence-transformers/paraphrase-MiniLM-L6-v2
[INFO|SentenceTransformer.py:60] 2022-02-04 13:09:49,940 >> Load pretrained SentenceTransformer: pretrained/sentence-transformers/paraphrase-MiniLM-L6-v2
[INFO|SentenceTransformer.py:60] 2022-02-04 13:09:49,940 >> Load pretrained SentenceTransformer: pretrained/sentence-transformers/paraphrase-MiniLM-L6-v2


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

em.shape=(5710, 384)


In [19]:
%%time
em_size = em.shape[1]
em_cols = [f"zz{i:04d}" for i in range(em_size)]
df[em_cols] = em
df[em_cols] = df[em_cols].astype(np.float32)
del sentences

Wall time: 258 ms


# Review data

In [20]:
cols = ["label", "bws", "worker", "length"]
cols += list(char_fns.keys())
cols += [x[0] for x in textstat_fns]
cols += dtfy_fs
cols += list(conf.hatebert_models.keys()) 
cols += list(conf.tweeteval_models.keys()) 
cols += ti_features
df[cols].describe(percentiles=percentiles)

Unnamed: 0,label,bws,worker,length,digit_frac,letter_frac,space_frac,punc_frac,upper_frac,repeat_char_frac,repeat_substring_frac,syllables_per_word,syllables_per_sent,words_per_sent,flesch_reading_ease,flesch_kincaid_grade,gunning_fog,smog_index,automated_readability_index,coleman_liau_index,linsear_write_formula,dale_chall_readability_score,dto_toxicity,dto_severe_toxicity,dto_obscene,dto_threat,dto_insult,dto_identity_attack,dtu_toxicity,dtu_severe_toxicity,dtu_obscene,dtu_identity_attack,dtu_insult,dtu_threat,dtu_sexual_explicit,dtm_toxicity,dtm_severe_toxicity,dtm_obscene,dtm_identity_attack,dtm_insult,dtm_threat,dtm_sexual_explicit,hb_bert_off,hb_bert_abu,hb_hatebert_off,hb_hatebert_abu,te_roberta_off,te_roberta_emo_anger,te_roberta_snt_neg,te_roberta_iro,te_xlm_roberta_snt_neg,ti0000,ti0001,ti0002,ti0003,ti0004,ti0005,ti0006,ti0007,ti0008,ti0009,ti0010,ti0011,ti0012,ti0013,ti0014,ti0015,ti0016,ti0017,ti0018,ti0019,ti0020,ti0021,ti0022,ti0023,ti0024,ti0025,ti0026,ti0027,ti0028,ti0029,ti0030,ti0031,ti0032,ti0033,ti0034,ti0035,ti0036,ti0037,ti0038,ti0039,ti0040,ti0041,ti0042,ti0043,ti0044,ti0045,ti0046,ti0047,ti0048,ti0049,ti0050,ti0051,ti0052,ti0053,ti0054,ti0055,ti0056,ti0057,ti0058,ti0059,ti0060,ti0061,ti0062,ti0063,ti0064,ti0065,ti0066,ti0067,ti0068,ti0069,ti0070,ti0071,ti0072,ti0073,ti0074,ti0075,ti0076,ti0077,ti0078,ti0079,ti0080,ti0081,ti0082,ti0083,ti0084,ti0085,ti0086,ti0087,ti0088,ti0089,ti0090,ti0091,ti0092,ti0093,ti0094,ti0095,ti0096,ti0097,ti0098,ti0099,ti0100,ti0101,ti0102,ti0103,ti0104,ti0105,ti0106,ti0107,ti0108,ti0109,ti0110,ti0111,ti0112,ti0113,ti0114,ti0115,ti0116,ti0117,ti0118,ti0119,ti0120,ti0121,ti0122,ti0123,ti0124,ti0125,ti0126,ti0127,ti0128,ti0129,ti0130,ti0131,ti0132,ti0133,ti0134,ti0135,ti0136,ti0137,ti0138,ti0139,ti0140,ti0141,ti0142,ti0143,ti0144,ti0145,ti0146,ti0147,ti0148,ti0149,ti0150,ti0151,ti0152,ti0153,ti0154,ti0155,ti0156,ti0157,ti0158,ti0159,ti0160,ti0161,ti0162,ti0163,ti0164,ti0165,ti0166,ti0167,ti0168,ti0169,ti0170,ti0171,ti0172,ti0173,ti0174,ti0175,ti0176,ti0177,ti0178,ti0179,ti0180,ti0181,ti0182,ti0183,ti0184,ti0185,ti0186,ti0187,ti0188,ti0189,ti0190,ti0191,ti0192,ti0193,ti0194,ti0195,ti0196,ti0197,ti0198,ti0199,ti0200,ti0201,ti0202,ti0203,ti0204,ti0205,ti0206,ti0207,ti0208,ti0209,ti0210,ti0211,ti0212,ti0213,ti0214,ti0215,ti0216,ti0217,ti0218,ti0219,ti0220,ti0221,ti0222,ti0223,ti0224,ti0225,ti0226,ti0227,ti0228,ti0229,ti0230,ti0231,ti0232,ti0233,ti0234,ti0235,ti0236,ti0237,ti0238,ti0239,ti0240,ti0241,ti0242,ti0243,ti0244,ti0245,ti0246,ti0247,ti0248,ti0249,ti0250,ti0251,ti0252,ti0253,ti0254,ti0255,ti0256,ti0257,ti0258,ti0259,ti0260,ti0261,ti0262,ti0263,ti0264,ti0265,ti0266,ti0267,ti0268,ti0269,ti0270,ti0271,ti0272,ti0273,ti0274,ti0275,ti0276,ti0277,ti0278,ti0279,ti0280,ti0281,ti0282,ti0283,ti0284,ti0285,ti0286,ti0287,ti0288,ti0289,ti0290,ti0291,ti0292,ti0293,ti0294,ti0295,ti0296,ti0297,ti0298,ti0299,ti0300,ti0301,ti0302,ti0303,ti0304,ti0305,ti0306,ti0307,ti0308,ti0309,ti0310,ti0311,ti0312,ti0313,ti0314,ti0315,ti0316,ti0317,ti0318,ti0319,ti0320,ti0321,ti0322,ti0323,ti0324,ti0325,ti0326,ti0327,ti0328,ti0329,ti0330,ti0331,ti0332,ti0333,ti0334,ti0335,ti0336,ti0337,ti0338,ti0339,ti0340,ti0341,ti0342,ti0343,ti0344,ti0345,ti0346,ti0347,ti0348,ti0349,ti0350,ti0351,ti0352,ti0353,ti0354,ti0355,ti0356,ti0357,ti0358,ti0359,ti0360,ti0361,ti0362,ti0363,ti0364,ti0365,ti0366,ti0367,ti0368,ti0369,ti0370,ti0371,ti0372,ti0373,ti0374,ti0375,ti0376,ti0377,ti0378,ti0379,ti0380,ti0381,ti0382,ti0383,ti0384,ti0385,ti0386,ti0387,ti0388,ti0389,ti0390,ti0391,ti0392,ti0393,ti0394,ti0395,ti0396,ti0397,ti0398,ti0399,ti0400,ti0401,ti0402,ti0403,ti0404,ti0405,ti0406,ti0407,ti0408,ti0409,ti0410,ti0411,ti0412,ti0413,ti0414,ti0415,ti0416,ti0417,ti0418,ti0419,ti0420,ti0421,ti0422,ti0423,ti0424,ti0425,ti0426,ti0427,ti0428,ti0429,ti0430,ti0431,ti0432,ti0433,ti0434,ti0435,ti0436,ti0437,ti0438,ti0439,ti0440,ti0441,ti0442,ti0443,ti0444,ti0445,ti0446,ti0447,ti0448,ti0449,ti0450,ti0451,ti0452,ti0453,ti0454,ti0455,ti0456,ti0457,ti0458,ti0459,ti0460,ti0461,ti0462,ti0463,ti0464,ti0465,ti0466,ti0467,ti0468,ti0469,ti0470,ti0471,ti0472,ti0473,ti0474,ti0475,ti0476,ti0477,ti0478,ti0479,ti0480,ti0481,ti0482,ti0483,ti0484,ti0485,ti0486,ti0487,ti0488,ti0489,ti0490,ti0491,ti0492,ti0493,ti0494,ti0495,ti0496,ti0497,ti0498,ti0499,ti0500,ti0501,ti0502,ti0503,ti0504,ti0505,ti0506,ti0507,ti0508,ti0509,ti0510,ti0511,ti0512,ti0513,ti0514,ti0515,ti0516,ti0517,ti0518,ti0519,ti0520,ti0521,ti0522,ti0523,ti0524,ti0525,ti0526,ti0527,ti0528,ti0529,ti0530,ti0531,ti0532,ti0533,ti0534,ti0535,ti0536,ti0537,ti0538,ti0539,ti0540,ti0541,ti0542,ti0543,ti0544,ti0545,ti0546,ti0547,ti0548,ti0549,ti0550,ti0551,ti0552,ti0553,ti0554,ti0555,ti0556,ti0557,ti0558,ti0559,ti0560,ti0561,ti0562,ti0563,ti0564,ti0565,ti0566,ti0567,ti0568,ti0569,ti0570,ti0571,ti0572,ti0573,ti0574,ti0575
count,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0,5710.0
mean,2855.5,-0.027706,0.0,196.915587,0.00349,0.787317,0.173301,0.035892,0.03172,0.020466,0.003176,1.30703,13.23911,9.545128,74.9795,6.491471,8.701601,3.175884,7.604343,7.027691,8.01527,8.399787,0.177269,0.012918,0.116036,0.008211,0.05858,0.01103,0.19788,0.004827425,0.116517,0.015205,0.082358,0.01082,0.041887,0.204815,0.006113,0.109037,0.013888,0.083703,0.014809,0.046989,0.334841,0.166219,0.328586,0.141803,0.346535,0.503257,0.504242,0.303546,0.579387,0.000285,0.0,0.0,0.0,0.009425,0.000622,0.0,0.0,0.0,0.000311,0.0,0.000175,0.00035,0.000894,0.00836,0.0,0.002155,0.0,0.002409,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000513,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000409,0.001825,0.0,0.004322,0.0,0.000823,0.0,0.0,0.001339,0.0,0.004759,0.000135,0.0,0.000175,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.004058,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.003347,0.0,0.0,0.0,0.000605,0.0,0.00031,0.000175,0.0,0.0,0.0,0.001415,0.0,0.0,0.002112,0.0,0.002562,0.0,0.0,0.0,0.001887,0.0,0.000155,0.0,0.0,0.0,0.0,0.0,0.000587,0.0,0.0,0.001917,0.0,0.0,0.0,0.0,0.0,0.0,0.001871,0.00015,0.000175,0.0,0.0,0.0,0.000175,0.000155,0.0,0.000468,0.00316,0.0,0.0,0.0,0.000496,0.0,0.0,0.0,0.004661,0.000175,6.8e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000126,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000301,0.00011,0.0,0.000337,0.0,0.001789,0.0,0.0,0.000838,0.0,0.0,0.000714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.004463,0.0,0.0,0.001843,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.044429,0.0,0.000633,0.000175,0.000493,0.020386,0.0,0.0,0.0,0.0,0.0,0.0,0.000175,0.0,0.0,0.0,0.005563,0.0,9.4e-05,0.000144,0.0,0.0,0.01243,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000283,0.0,0.0,0.0,0.0,0.000826,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000139,0.0,0.0,0.0,0.0,0.0,0.000953,0.014349,0.000148,0.000232,0.000147,0.001341,0.000137,0.0,0.000428,0.0,0.000257,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00247,0.000282,0.000376,0.00059,0.000428,0.0,0.0,0.00142,0.0,0.000258,0.0,0.001449,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025879,0.001901,0.0,0.000394,0.0,0.0,0.000304,0.0,0.0,0.0,0.000115,5e-05,0.0,0.0,0.0,0.0,0.000645,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.006154,0.0,0.029401,0.000281,0.0,0.000324,0.0,0.000582,0.000175,0.0,0.0,0.000745,0.0,0.00026,0.0,0.001486,0.0,0.000692,0.0,0.0,0.0,0.0,0.0,0.0,0.006377,0.000934,0.001591,0.0,0.0,0.000856,0.0,0.0,0.0,0.0,0.0,0.000175,0.0,0.000485,0.0,0.000119,0.0,0.0,0.0,0.000325,0.0,0.0,0.0,0.0,0.0,0.0,0.015183,0.0,0.0,0.0,0.0,0.000628,0.000491,0.0,0.000603,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000419,0.0,0.0,0.000353,0.0,0.0,0.000704,0.000525,0.0,6.3e-05,0.001471,0.0,0.000175,0.0,0.000438,0.0,0.0,0.0,0.000136,0.0,0.0,0.000159,0.0,0.0,0.0,0.0,0.004536,0.000497,0.0,0.0,0.001716,0.0,0.0,0.0,0.0,0.0,0.000391,0.0,0.0,0.000822,0.0,0.0,0.0,0.006223,0.0,0.0,0.000273,0.001311,0.0,7.4e-05,0.0,0.0,0.00011,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.001799,0.0,0.000328,0.026821,0.0,0.0,0.007283,0.002705,0.003016,0.001483,0.0,0.0,0.0,0.0,0.0,0.022077,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000257,0.0,0.0,0.000106,0.0,0.0,0.0,0.0,0.0,0.000153,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.003621,0.0,0.0,0.0,0.0,0.000295,0.0,0.002373,0.0,0.0,0.0,0.000152,0.0,0.000142,0.0,0.0,0.000175,0.0,0.0,0.000458,0.004837,0.0003,0.0,0.0,0.0,0.0,0.0,0.0,0.000144,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.001185,0.00089,0.0,0.0,0.0,0.0,0.0,0.000157,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000868,0.0,0.0,0.0,0.0,0.0,0.0,0.000175,0.0,0.0,0.0,0.0,0.0,0.000694,0.000167,0.0,0.006647,0.0,0.0,0.000386,0.0,0.032128,0.0,0.0,0.0,0.0,6.4e-05,0.0,0.0,0.0,0.0,0.006963,0.0,0.0
std,1648.47935,0.334195,0.0,171.109023,0.01254,0.034357,0.02064,0.02553,0.048117,0.018529,0.016198,0.199335,7.826234,5.379773,19.854204,4.23719,4.464397,4.698273,5.243802,3.855352,5.516964,2.254551,0.327141,0.052013,0.285578,0.055493,0.17248,0.06257,0.344887,0.02284678,0.290073,0.074058,0.209154,0.068646,0.154251,0.348054,0.028899,0.278013,0.076619,0.215425,0.078792,0.161162,0.33387,0.279103,0.284754,0.219147,0.250738,0.358158,0.318861,0.271424,0.299713,0.012755,0.0,0.0,0.0,0.090842,0.024057,0.0,0.0,0.0,0.016741,0.0,0.013234,0.018714,0.024681,0.086064,0.0,0.042544,0.0,0.047575,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01941,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013889,0.038556,0.0,0.05932,0.0,0.026521,0.0,0.0,0.034298,0.0,0.064782,0.010167,0.0,0.013234,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.059863,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.053607,0.0,0.0,0.0,0.023213,0.0,0.016709,0.013234,0.0,0.0,0.0,0.035099,0.0,0.0,0.043372,0.0,0.046509,0.0,0.0,0.0,0.040285,0.0,0.011742,0.0,0.0,0.0,0.0,0.0,0.022344,0.0,0.0,0.040558,0.0,0.0,0.0,0.0,0.0,0.0,0.040954,0.011365,0.013234,0.0,0.0,0.0,0.013234,0.011687,0.0,0.020735,0.051551,0.0,0.0,0.0,0.021712,0.0,0.0,0.0,0.063096,0.013234,0.005159,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.009504,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016091,0.008338,0.0,0.015283,0.0,0.039654,0.0,0.0,0.026415,0.0,0.0,0.024683,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.053721,0.0,0.0,0.039654,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.193266,0.0,0.02403,0.013234,0.02158,0.13302,0.0,0.0,0.0,0.0,0.0,0.0,0.013234,0.0,0.0,0.0,0.063077,0.0,0.007139,0.010869,0.0,0.0,0.099099,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015098,0.0,0.0,0.0,0.0,0.027992,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010473,0.0,0.0,0.0,0.0,0.0,0.028756,0.11438,0.01122,0.012429,0.011089,0.03606,0.010361,0.0,0.017676,0.0,0.013732,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047027,0.012447,0.016497,0.019376,0.017289,0.0,0.0,0.036167,0.0,0.014646,0.0,0.036988,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.152293,0.040663,0.0,0.018116,0.0,0.0,0.016263,0.0,0.0,0.0,0.008681,0.003814,0.0,0.0,0.0,0.0,0.01968,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.065054,0.0,0.149486,0.015446,0.0,0.017361,0.0,0.019814,0.013234,0.0,0.0,0.026164,0.0,0.014277,0.0,0.034592,0.0,0.022448,0.0,0.0,0.0,0.0,0.0,0.0,0.076133,0.029243,0.034523,0.0,0.0,0.02813,0.0,0.0,0.0,0.0,0.0,0.013234,0.0,0.021203,0.0,0.009021,0.0,0.0,0.0,0.014226,0.0,0.0,0.0,0.0,0.0,0.0,0.112568,0.0,0.0,0.0,0.0,0.024017,0.021514,0.0,0.023114,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018554,0.0,0.0,0.015626,0.0,0.0,0.021932,0.022917,0.0,0.004789,0.036388,0.0,0.013234,0.0,0.019318,0.0,0.0,0.0,0.010265,0.0,0.0,0.01201,0.0,0.0,0.0,0.0,0.06219,0.021759,0.0,0.0,0.03872,0.0,0.0,0.0,0.0,0.0,0.015244,0.0,0.0,0.023913,0.0,0.0,0.0,0.073757,0.0,0.0,0.0146,0.033645,0.0,0.005575,0.0,0.0,0.008309,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.039673,0.0,0.017576,0.146912,0.0,0.0,0.076132,0.048353,0.047715,0.037516,0.0,0.0,0.0,0.0,0.0,0.133633,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01128,0.0,0.0,0.007979,0.0,0.0,0.0,0.0,0.0,0.011561,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.053652,0.0,0.0,0.0,0.0,0.016029,0.0,0.045199,0.0,0.0,0.0,0.01148,0.0,0.010713,0.0,0.0,0.013234,0.0,0.0,0.020125,0.063934,0.016046,0.0,0.0,0.0,0.0,0.0,0.0,0.010864,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02943,0.02353,0.0,0.0,0.0,0.0,0.0,0.011865,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.023715,0.0,0.0,0.0,0.0,0.0,0.0,0.013234,0.0,0.0,0.0,0.0,0.0,0.023673,0.012625,0.0,0.073228,0.0,0.0,0.018164,0.0,0.15798,0.0,0.0,0.0,0.0,0.00482,0.0,0.0,0.0,0.0,0.077496,0.0,0.0
min,1.0,-0.889,0.0,14.0,0.0,0.298246,0.040541,0.0,0.0,0.0,0.0,0.8,2.0,1.5,-48.98,-2.5,1.2,0.0,-6.8,-10.16,0.5,0.2,0.000508,8e-05,0.000141,8.6e-05,0.000164,0.000121,0.000286,9.469297e-07,1.7e-05,5.2e-05,6.1e-05,1.2e-05,9e-06,0.000186,9e-06,6.3e-05,4.5e-05,9.5e-05,1.4e-05,1.2e-05,0.008981,0.002733,0.006939,0.00705,0.023256,0.004901,0.000799,0.014366,0.009455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1%,58.09,-0.667,0.0,24.0,0.0,0.670577,0.1113,0.0,0.0,0.0,0.0,0.857143,2.666667,2.5,19.439399,-1.5,1.7272,0.0,-2.8,-2.55,1.25,0.35,0.000554,8.7e-05,0.000158,9.6e-05,0.000171,0.000134,0.00036,1.08221e-06,2.1e-05,6.3e-05,9.2e-05,1.5e-05,1.1e-05,0.000278,1.3e-05,9.4e-05,6.6e-05,0.000149,2.1e-05,1.7e-05,0.013535,0.003709,0.021296,0.010223,0.05036,0.011169,0.001821,0.022659,0.019474,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5%,286.45,-0.521,0.0,33.0,0.0,0.7298,0.137931,0.008264,0.007047,0.0,0.0,1.0,4.0,3.0,41.441999,0.5,2.4,0.0,0.4,0.76,2.0,5.689,0.000613,9.3e-05,0.000165,0.000104,0.000175,0.000138,0.00041,1.20137e-06,2.4e-05,7e-05,0.000103,1.7e-05,1.2e-05,0.000351,1.6e-05,0.000114,7.8e-05,0.000178,2.7e-05,2e-05,0.020864,0.005157,0.036119,0.013418,0.073817,0.022917,0.00892,0.034882,0.054882,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10%,571.9,-0.426,0.0,42.0,0.0,0.747899,0.148148,0.013889,0.009524,0.0,0.0,1.0625,5.0,3.5,50.5,1.7,3.2,0.0,1.7,2.23,2.5,6.27,0.000663,9.7e-05,0.00017,0.000108,0.000177,0.00014,0.000454,1.270248e-06,2.6e-05,7.6e-05,0.000112,1.9e-05,1.3e-05,0.000412,1.8e-05,0.000129,8.5e-05,0.000199,3e-05,2.2e-05,0.028689,0.006384,0.049691,0.015893,0.091589,0.038438,0.032618,0.046842,0.114343,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
20%,1142.8,-0.312,0.0,61.0,0.0,0.767677,0.158416,0.019417,0.012903,0.008333,0.0,1.142857,6.5,5.0,60.139999,3.1,4.8,0.0,3.6,4.14,3.75,7.01,0.000786,0.000103,0.000176,0.000115,0.000181,0.000144,0.000573,1.407515e-06,3.1e-05,8.9e-05,0.000133,2.3e-05,1.6e-05,0.00057,2.2e-05,0.000158,9.9e-05,0.000246,3.7e-05,2.6e-05,0.045573,0.009076,0.077647,0.020805,0.123787,0.087268,0.141338,0.070889,0.246842,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
30%,1713.7,-0.213,0.0,82.0,0.0,0.777778,0.164983,0.023392,0.01585,0.011905,0.0,1.206897,8.19,6.0,66.407003,4.2,6.14,0.0,5.0,5.297,4.666667,7.49,0.000983,0.000108,0.000182,0.000121,0.000187,0.00015,0.000852,1.642735e-06,4.1e-05,0.000113,0.000178,3e-05,2.1e-05,0.00085,2.7e-05,0.000199,0.000117,0.000324,4.5e-05,3.2e-05,0.071853,0.012562,0.112063,0.026856,0.160974,0.173743,0.285689,0.098981,0.390838,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
40%,2284.6,-0.146,0.0,106.0,0.0,0.784656,0.169903,0.027027,0.018968,0.015,0.0,1.255814,10.0,7.333333,71.650002,5.2,7.612,0.0,6.1,6.25,5.666667,7.92,0.001464,0.000114,0.000194,0.000127,0.000204,0.000163,0.001596,2.273925e-06,6.7e-05,0.000175,0.000311,4.8e-05,3.3e-05,0.001615,3.3e-05,0.00027,0.000151,0.00051,6.4e-05,4.4e-05,0.109913,0.017949,0.156008,0.034969,0.204146,0.315728,0.410188,0.140475,0.520841,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [21]:
cols += em_cols
df[cols].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5710 entries, 0 to 5709
Data columns (total 1011 columns):
 #     Column                        Non-Null Count  Dtype  
---    ------                        --------------  -----  
 0     label                         5710 non-null   int32  
 1     bws                           5710 non-null   float32
 2     worker                        5710 non-null   int8   
 3     length                        5710 non-null   int16  
 4     digit_frac                    5710 non-null   float32
 5     letter_frac                   5710 non-null   float32
 6     space_frac                    5710 non-null   float32
 7     punc_frac                     5710 non-null   float32
 8     upper_frac                    5710 non-null   float32
 9     repeat_char_frac              5710 non-null   float32
 10    repeat_substring_frac         5710 non-null   float32
 11    syllables_per_word            5710 non-null   float32
 12    syllables_per_sent            

In [22]:
%%time
df[cols].to_parquet("output/tra.parquet", index=False)

Wall time: 363 ms
