In [1]:
import os
import gc
import json
import pathlib
from datetime import datetime
import optuna
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.metrics import roc_auc_score
import torch
from typing import List, Dict, Union, Tuple, NamedTuple
from tqdm import tqdm
import scml
from scml import pandasx as pdx
tim = scml.Timer()
tim.start()
os.environ["TOKENIZERS_PARALLELISM"] = "false"
percentiles=[.01, .05, .1, .2, .3, .4, .5, .6, .7, .8, .9, .95, .99]
pd.set_option("use_inf_as_na", True)
pd.set_option("max_info_columns", 9999)
pd.set_option("display.max_columns", 9999)
pd.set_option("display.max_rows", 9999)
pd.set_option('max_colwidth', 9999)
tqdm.pandas()
scml.seed_everything()

In [2]:
ts = datetime.now().strftime('%Y%m%d_%H%M%S')
job_dir = f"models/xgb/{ts}"
pathlib.Path(job_dir).mkdir(parents=True, exist_ok=True)
num_boost_round: int = 100
lr: Tuple[float, float] = (1e-3, 1e-3)
feature_fraction: Tuple[float, float] = (1, 1)
min_data_in_leaf: Tuple[int, int] = (20, 20)
objective: str = "binary:logistic"
n_trials: int = 1
label = "generated"

In [3]:
df = pd.read_parquet("input/features.parquet")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 221612 entries, 0 to 221611
Columns: 50047 entries, row_id to tf_Ġyoutube_Ġuploaded_Ġby
dtypes: float32(50033), int16(2), int32(5), int8(1), object(6)
memory usage: 41.3+ GB


In [4]:
features = []
prefixes = ["ch_", "ws_", "ts_", "va_", "tf_"]
for col in df.columns:
    for prefix in prefixes:
        if col.startswith(prefix):
            features.append(col)
features.sort()
print(f"{len(features)} features\n{features[:100]}")

50036 features
['ch_digit_frac', 'ch_len', 'ch_letter_frac', 'ch_punc_frac', 'ch_repeat_char_frac', 'ch_space_frac', 'ch_upper_frac', 'tf_1_n_1', 'tf_a_Ġet_Ġal', 'tf_a_Ġis_Ġa', 'tf_ability_Ġof_Ġthe', 'tf_ality_Ġof_Ġthe', 'tf_an_Ġet_Ġal', 'tf_ance_Ġof_Ġthe', 'tf_ar_Ġet_Ġal', 'tf_as_Ġet_Ġal', 'tf_ated_Ġby_Ġthe', 'tf_ated_Ġin_Ġthe', 'tf_ation_Ġof_Ġthe', 'tf_ations_Ġof_Ġthe', 'tf_e_Ġet_Ġal', 'tf_ed_Ġat_Ġthe', 'tf_ed_Ġby_Ġthe', 'tf_ed_Ġfrom_Ġthe', 'tf_ed_Ġin_Ġthe', 'tf_ed_Ġinto_Ġthe', 'tf_ed_Ġon_Ġthe', 'tf_ed_Ġto_Ġthe', 'tf_ed_Ġwith_Ġthe', 'tf_el_Ġet_Ġal', 'tf_en_Ġet_Ġal', 'tf_er_Ġet_Ġal', 'tf_er_Ġin_Ġthe', 'tf_ers_Ġin_Ġthe', 'tf_ers_Ġof_Ġthe', 'tf_es_Ġet_Ġal', 'tf_es_Ġin_Ġthe', 'tf_es_Ġof_Ġthe', 'tf_i_Ġet_Ġal', 'tf_ia_Ġet_Ġal', 'tf_ification_Ġof_Ġthe', 'tf_in_Ġet_Ġal', 'tf_in_Ġin_Ġthe', 'tf_ing_Ġin_Ġthe', 'tf_ing_Ġof_Ġthe', 'tf_ing_Ġto_Ġthe', 'tf_is_Ġet_Ġal', 'tf_isation_Ġof_Ġthe', 'tf_ism_Ġand_Ġthe', 'tf_ism_Ġin_Ġthe', 'tf_ists_Ġin_Ġthe', 'tf_ities_Ġof_Ġthe', 'tf_ity_Ġof_Ġthe', 'tf_ivenes

In [5]:
#X_train, X_test, y_train, y_test = train_test_split(tra[features], tra[label], test_size=0.2)

th = 0.29
tra = df[df["white_sim"]>=th].copy()
val = df[df["white_sim"]<th].copy()
t = len(tra)
v = len(val)
n = t+v
print(f"val%={v/n:.4f}, len(tra)={t:,}, len(val)={v:,}")
del df
gc.collect()
dtrain = xgb.DMatrix(tra[features], tra[label], enable_categorical=False)
dval = xgb.DMatrix(val[features], val[label], enable_categorical=False)
pdx.value_counts(val[label])

val%=0.0473, len(tra)=211,120, len(val)=10,492


Unnamed: 0_level_0,count,percent
generated,Unnamed: 1_level_1,Unnamed: 2_level_1
0,10451,0.996092
1,41,0.003908


In [6]:
%%time
model = xgb.train(
   params={
       "objective": objective,
       "learning_rate": 0.2,
       "min_child_weight": 40,
       "colsample_bytree": 0.75,
       "max_depth": 8,
   },
   dtrain=dtrain,
   num_boost_round=2000,
   evals=[(dtrain, "train"), (dval, "val")],
   verbose_eval=40,
   early_stopping_rounds=100,
)
print(f"best score {model.best_score:.5f} at iteration {model.best_iteration}")
model.save_model(f"{job_dir}/model.json")

[0]	train-logloss:0.25120	val-logloss:0.14830
[40]	train-logloss:0.04680	val-logloss:0.00976
[80]	train-logloss:0.03365	val-logloss:0.00812
[120]	train-logloss:0.02828	val-logloss:0.00759
[160]	train-logloss:0.02463	val-logloss:0.00730
[200]	train-logloss:0.02218	val-logloss:0.00722
[240]	train-logloss:0.01995	val-logloss:0.00724
[280]	train-logloss:0.01817	val-logloss:0.00730
[302]	train-logloss:0.01725	val-logloss:0.00729
best score 0.00718 at iteration 203
CPU times: user 6h 19min 45s, sys: 19min 54s, total: 6h 39min 39s
Wall time: 30min 32s


In [7]:
%%time
y_true = val[label].tolist()
y_pred = model.predict(data=dval, iteration_range=(0, model.best_iteration+1))
auc = roc_auc_score(y_true, y_pred, average="macro")
print(f"auc={auc:.4f}")
print(f"y_pred={y_pred.shape}\n{y_pred[:5]}")

auc=0.9940
y_pred=(10492,)
[0.38793376 0.00083771 0.00339701 0.4036299  0.8062436 ]
CPU times: user 1.28 s, sys: 796 ms, total: 2.08 s
Wall time: 160 ms


In [8]:
%%time
scores = model.get_score(importance_type="gain")
assert len(scores)!=0
rows = []
for feature, score in scores.items():
    rows.append({'importance': score, 'feature': feature})
idf = pd.DataFrame.from_records(rows)
idf = idf.sort_values(["importance"], ascending=False, ignore_index=True)
fp = f"{job_dir}/importance.csv"
idf.to_csv(fp, index=True)
print(f"Saved {fp}")
idf.T.head()

Saved models/xgb/20240123_055726/importance.csv
CPU times: user 47.8 ms, sys: 99.9 ms, total: 148 ms
Wall time: 12.8 ms


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,315,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,334,335,336,337,338,339,340,341,342,343,344,345,346,347,348,349,350,351,352,353,354,355,356,357,358,359,360,361,362,363,364,365,366,367,368,369,370,371,372,373,374,375,376,377,378,379,380,381,382,383,384,385,386,387,388,389,390,391,392,393,394,395,396,397,398,399,400,401,402,403,404,405,406,407,408,409,410,411,412,413,414,415,416,417,418,419,420,421,422,423,424,425,426,427
importance,524.765625,511.723145,463.868713,433.928864,420.624207,411.376556,376.831909,369.976868,353.973694,303.72998,300.026978,292.972626,273.50116,251.780762,244.906372,236.761963,233.246536,233.063965,221.298813,219.814575,210.735504,203.567871,203.105133,193.787521,192.337662,184.755463,183.179443,179.763107,175.643707,174.943481,173.67244,173.267426,170.513885,168.751465,159.221619,145.114731,130.477539,127.348206,127.241875,126.852661,120.087181,119.284294,118.40284,117.862144,116.257812,113.81414,113.317337,111.852013,110.475929,109.773201,107.882355,105.051758,103.165573,100.473457,99.323547,98.405632,95.234634,94.904297,93.865631,93.012596,91.941025,91.332726,91.285942,90.207336,88.597588,88.547523,88.102951,87.686523,86.764938,85.344482,84.515648,84.465714,82.509758,81.518547,80.563408,79.150604,78.165283,77.331863,76.980949,75.765114,75.130424,73.041496,72.34819,71.464958,71.115196,71.012543,70.588257,69.93084,69.667213,69.306229,69.281212,68.988899,67.249023,65.599655,65.349152,64.613182,64.534729,60.430954,59.935326,59.607166,58.929836,58.499546,58.268459,58.234737,56.726597,54.258781,54.163208,53.991737,52.952942,52.794891,51.508415,51.408188,51.156036,51.139626,50.724384,50.72052,50.342949,50.043159,50.038261,49.429996,49.394344,49.223888,48.869247,48.663818,48.474545,47.103096,46.62048,45.669006,45.539234,45.451225,44.604065,44.273502,43.827038,43.379868,43.36282,42.854084,42.773014,42.372299,42.226044,41.990639,41.539547,41.212307,41.129192,40.773094,40.742634,40.605923,39.848595,39.820763,38.893349,37.985046,37.832687,37.756157,37.26749,36.859478,36.625847,35.936344,35.843155,34.803265,34.671238,34.4146,34.061047,32.980579,32.794315,32.75325,32.717041,31.897552,31.12476,30.753656,30.680832,30.667295,30.456955,30.144304,30.054623,30.03529,29.197378,28.645521,28.385344,27.851171,27.672712,27.265966,26.963526,26.73008,25.991659,25.90309,25.696648,25.401455,25.369764,25.284649,25.201649,25.158321,24.689684,23.954996,23.879873,23.708439,23.152313,22.931667,22.556648,22.513037,22.4916,22.263689,22.114256,22.103745,21.665476,21.634214,21.068371,20.703932,20.572317,20.049183,20.02701,19.925823,19.780899,19.773476,19.742691,19.655207,19.496063,19.416416,19.409914,19.35713,19.056726,18.957182,18.927912,18.691212,18.581039,18.419609,18.121786,17.723103,17.592213,17.568398,17.342649,17.34165,17.150335,17.149137,17.036081,16.875542,16.707352,16.546499,16.478489,16.430897,16.430199,16.107883,16.015011,15.940613,15.876711,15.816297,15.303955,15.27786,15.244934,15.236691,14.93186,14.776999,14.732787,14.611173,14.575792,14.490128,14.465597,14.415238,14.159584,14.143925,14.050949,13.571251,13.496379,13.299495,13.173915,13.155909,13.012894,12.984392,12.854327,12.808397,12.801032,12.792909,12.449802,12.33784,12.039426,12.029979,11.974125,11.61098,11.539689,11.329334,11.209668,10.959936,10.922009,10.909601,10.638885,10.62007,10.582934,10.24295,10.196796,10.101044,9.931144,9.865816,9.846513,9.834991,9.769633,9.607177,9.56495,9.425009,9.420202,9.410563,9.406868,9.386194,9.240388,9.196784,9.192579,9.098709,9.03267,8.88895,8.869873,8.79985,8.799465,8.7561,8.637238,8.597653,8.492411,8.464831,8.43734,8.292707,8.228883,8.192965,8.091846,8.089886,8.023416,7.945129,7.71711,7.716771,7.709085,7.606255,7.507761,7.504264,7.384538,7.348444,7.339626,7.334103,7.323523,7.302694,7.300293,7.269935,7.084666,7.026788,6.929798,6.842355,6.836263,6.775718,6.748163,6.703769,6.655221,6.605691,6.568414,6.512507,6.495388,6.469895,6.357089,6.311851,6.282691,6.278307,6.271485,6.162516,6.090238,6.07077,5.911883,5.810243,5.798433,5.776344,5.716887,5.705674,5.685127,5.636733,5.619372,5.616952,5.611627,5.562146,5.520109,5.445625,5.419775,5.385602,5.330821,5.29848,5.165844,5.136758,4.980499,4.919892,4.67168,4.52014,4.449415,4.397775,4.326114,4.290195,4.286813,4.238537,4.156235,4.038404,4.02773,3.993826,3.922204,3.919786,3.716046,3.549574,3.545495,3.52509,3.516882,3.502821,3.442323,3.39757,3.36084,3.345551,3.315289,3.301518,3.295673,3.275577,3.235929,3.234998,3.18532,3.108246,2.992285,2.832838,2.623542,2.563537,2.51226,2.380589,2.330592,2.287354,2.126052,1.913824,1.893314,1.668793,1.565644,1.521978,1.137695,0.855891
feature,tf_Ġlimiting_Ġcar_Ġusage_Ġcan,tf_Ġam_Ġwriting_Ġto,tf_Ġit_Ġis_Ġlike,tf_Ġan_Ġ8_th,tf_Ġsincerely_Ġyour_Ġname,tf_Ġit_Ġis_Ġimportant_Ġto,ts_lexicon_count,tf_Ġthis_Ġessay_Ġi_Ġwill,ws_sent_len_std,tf_Ġas_Ġan_Ġ8_th,tf_Ġlet_Ġus_Ġtalk,tf_th_Ġgrade_Ġstudent,tf_Ġcan_Ġlead_Ġto,ch_digit_frac,tf_Ġdo_Ġyou_Ġagree,tf_Ġi_Ġam_Ġwriting_Ġto,tf_Ġit_Ġis_Ġimportant,tf_Ġaddress_Ġcity_Ġstate_Ġzip,tf_Ġi_Ġknow_Ġsome,tf_Ġaverage_Ġ8_th,tf_Ġit_Ġis_Ġnot_Ġjust,tf_Ġlimiting_Ġcar_Ġusage,tf_Ġlet_Ġus_Ġtalk_Ġabout,tf_Ġhey_Ġthere_Ġso,tf_Ġin_Ġconclusion_Ġwhile,tf_Ġcell_Ġphones_Ġin_Ġany,tf_Ġnumber_Ġof_Ġcars,tf_Ġ8_th_Ġgrader,tf_Ġto_Ġtake_Ġrisks,tf_Ġcan_Ġhelp_Ġus,tf_Ġto_Ġexpress_Ġmy,tf_Ġcan_Ġbe_Ġdifficult,tf_Ġa_Ġpositive_Ġimpact,tf_Ġthe_Ġnumber_Ġof_Ġcars,tf_Ġlearn_Ġand_Ġgrow,tf_Ġand_Ġgive_Ġyour,tf_Ġin_Ġthis_Ġessay_Ġi,tf_Ġare_Ġmore_Ġlikely_Ġto,tf_Ġit_Ġis_Ġup_Ġto,tf_Ġas_Ġan_Ġeighth,ts_sentence_count,tf_Ġstudies_Ġhave_Ġshown,tf_Ġthis_Ġis_Ġespecially,tf_Ġcan_Ġhelp_Ġto,tf_Ġgreenhouse_Ġgas_Ġemissions,tf_Ġa_Ġpositive_Ġimpact_Ġon,tf_Ġto_Ġwhat_Ġextent,tf_Ġachieve_Ġtheir_Ġgoals,tf_Ġgrade_Ġstudent_Ġi,tf_Ġoverall_Ġi_Ġbelieve_Ġthat,tf_Ġthere_Ġare_Ġa_Ġfew,tf_Ġmake_Ġit_Ġdifficult,tf_Ġa_Ġsignificant_Ġimpact,tf_Ġallows_Ġus_Ġto,tf_Ġnew_Ġthings_Ġand,ch_upper_frac,tf_Ġviews_Ġand_Ġgive,tf_Ġthe_Ġelectoral_Ġcollege,tf_Ġbe_Ġdifficult_Ġto,tf_Ġnot_Ġjust_Ġabout,ts_smog_index,tf_Ġhere_Ġis_Ġmy,tf_Ġthere_Ġare_Ġalso,tf_Ġcan_Ġbe_Ġa,tf_Ġcan_Ġalso_Ġhelp,tf_Ġthe_Ġface_Ġon_Ġmars,tf_Ġmake_Ġinformed_Ġdecisions,tf_Ġa_Ġsignificant_Ġimpact_Ġon,tf_Ġis_Ġalso_Ġimportant,tf_Ġto_Ġsum_Ġup,tf_Ġsome_Ġargue_Ġthat,tf_Ġon_Ġone_Ġhand,tf_Ġmore_Ġlikely_Ġto,tf_Ġare_Ġessential_Ġfor,tf_Ġsome_Ġpeople_Ġmight,tf_Ġthe_Ġrise_Ġof,tf_Ġhere_Ġare_Ġsome,tf_Ġprovide_Ġstudents_Ġwith,tf_Ġfor_Ġstudents_Ġwho,tf_Ġimportant_Ġto_Ġremember_Ġthat,tf_Ġpoint_Ġof_Ġview,tf_Ġand_Ġit_Ġis,tf_Ġi_Ġbelieve_Ġthat,tf_Ġmay_Ġnot_Ġhave,tf_Ġstudies_Ġhave_Ġshown_Ġthat,tf_Ġon_Ġthe_Ġone,tf_Ġis_Ġessential_Ġfor,tf_Ġthis_Ġessay_Ġwill,tf_Ġthere_Ġare_Ġseveral_Ġreasons,tf_Ġas_Ġa_Ġresult,tf_Ġi_Ġam_Ġwriting,tf_Ġthis_Ġessay_Ġi,tf_Ġthe_Ġpresident_Ġof_Ġthe,tf_Ġare_Ġmany_Ġreasons,tf_Ġmost_Ġof_Ġthe,tf_Ġagree_Ġwith_Ġthis_Ġstatement,tf_Ġthe_Ġworld_Ġaround,tf_Ġbecause_Ġof_Ġthe,tf_Ġfor_Ġexample_Ġif,tf_Ġhowever_Ġi_Ġbelieve_Ġthat,tf_Ġagree_Ġor_Ġdisagree,tf_Ġall_Ġin_Ġall,tf_Ġthis_Ġcan_Ġbe,tf_Ġfor_Ġthese_Ġreasons,tf_Ġreasons_Ġfor_Ġthis,tf_Ġin_Ġa_Ġgroup,tf_Ġwriting_Ġto_Ġexpress,tf_Ġtry_Ġnew_Ġthings,tf_Ġthis_Ġcan_Ġhelp,tf_Ġin_Ġa_Ġway_Ġthat,tf_Ġthe_Ġseagoing_Ġcowboys_Ġprogram,tf_Ġdo_Ġcommunity_Ġservice,tf_Ġopportunity_Ġto_Ġlearn,tf_Ġfriends_Ġand_Ġfamily,tf_Ġmore_Ġtime_Ġto,tf_Ġin_Ġother_Ġwords,tf_Ġto_Ġconnect_Ġwith,tf_Ġhard_Ġwork_Ġand,tf_Ġconclusion_Ġi_Ġbelieve_Ġthat,tf_Ġa_Ġsense_Ġof,tf_Ġcan_Ġcreate_Ġa,tf_Ġthey_Ġmay_Ġnot,tf_Ġmay_Ġargue_Ġthat,tf_Ġthe_Ġpresident_Ġof,ws_sent_len_delta_std,tf_Ġwhile_Ġit_Ġis,ch_letter_frac,tf_Ġhas_Ġled_Ġto,tf_Ġto_Ġconsider_Ġthe,tf_Ġa_Ġgreat_Ġway_Ġto,tf_Ġand_Ġmake_Ġa,tf_Ġare_Ġa_Ġfew,tf_Ġthe_Ġway_Ġwe,tf_Ġcan_Ġalso_Ġbe,tf_Ġcould_Ġlead_Ġto,tf_Ġthink_Ġabout_Ġit,ts_coleman_liau_index,tf_Ġthe_Ġbenefits_Ġof,tf_Ġimportant_Ġfor_Ġstudents,tf_Ġthe_Ġfact_Ġthat,tf_Ġthe_Ġface_Ġon,tf_Ġhave_Ġaccess_Ġto,tf_Ġis_Ġvery_Ġimportant,tf_Ġface_Ġon_Ġmars,va_valence_mean,tf_Ġthe_Ġpotential_Ġto,tf_Ġmay_Ġnot_Ġbe,tf_Ġfor_Ġexample_Ġa,tf_Ġa_Ġbetter_Ġunderstanding_Ġof,tf_Ġpositive_Ġimpact_Ġon,tf_Ġessay_Ġi_Ġwill,tf_Ġmake_Ġa_Ġdifference,tf_Ġis_Ġthat_Ġit,tf_Ġthere_Ġare_Ġmany_Ġreasons,tf_Ġit_Ġcomes_Ġto,tf_Ġto_Ġbegin_Ġwith,tf_Ġcan_Ġbe_Ġbeneficial,tf_Ġthere_Ġis_Ġa,ch_len,tf_Ġthat_Ġis_Ġthe,tf_Ġit_Ġis_Ġessential,tf_Ġis_Ġan_Ġessential,tf_Ġall_Ġof_Ġthe,tf_Ġit_Ġis_Ġnot,tf_Ġin_Ġthis_Ġcase,tf_Ġis_Ġimportant_Ġthat,tf_Ġthe_Ġimportance_Ġof,tf_Ġanother_Ġreason_Ġwhy,tf_Ġdue_Ġto_Ġthe,tf_Ġthey_Ġcan_Ġalso,tf_Ġin_Ġthe_Ġfirst,tf_Ġthe_Ġone_Ġhand,tf_Ġit_Ġis_Ġnecessary,ch_punc_frac,tf_Ġthe_Ġcost_Ġof,tf_Ġat_Ġleast_Ġone,tf_Ġdear_Ġprincipal_Ġi,tf_Ġone_Ġof_Ġthe_Ġmain,tf_Ġa_Ġpart_Ġof,tf_Ġin_Ġconclusion_Ġi,tf_Ġto_Ġsucceed_Ġin,tf_Ġgreat_Ġway_Ġto,tf_Ġpros_Ġand_Ġcons,tf_Ġat_Ġthe_Ġend,tf_Ġto_Ġlearn_Ġfrom,tf_Ġin_Ġconclusion_Ġthe,tf_Ġthis_Ġmeans_Ġthat,tf_Ġthe_Ġneed_Ġfor,tf_Ġis_Ġnot_Ġalways,tf_Ġit_Ġcan_Ġalso,tf_Ġis_Ġmore_Ġimportant,tf_Ġis_Ġessential_Ġto,tf_Ġand_Ġthey_Ġare,ts_polysyllable_frac,tf_Ġhowever_Ġthere_Ġare,tf_Ġthe_Ġmajority_Ġof,tf_Ġthe_Ġsame_Ġtime,tf_Ġseagoing_Ġcowboys_Ġprogram,tf_Ġon_Ġthe_Ġroad,tf_Ġthe_Ġrest_Ġof,ts_dale_chall_readability_score,tf_Ġdo_Ġyou_Ġthink,tf_Ġof_Ġthe_Ġmost,ts_automated_readability_index,tf_Ġthat_Ġi_Ġhave,tf_Ġthis_Ġis_Ġbecause,tf_Ġto_Ġbe_Ġthe,tf_Ġin_Ġthis_Ġessay,tf_Ġyou_Ġhave_Ġto,tf_Ġhowever_Ġthere_Ġare_Ġalso,tf_Ġand_Ġit_Ġis_Ġnot,tf_Ġfor_Ġexample_Ġwhen,tf_Ġwhether_Ġor_Ġnot,tf_Ġthey_Ġhave_Ġto,tf_Ġin_Ġconclusion_Ġi_Ġbelieve,va_dominance_std,tf_Ġthis_Ġcan_Ġlead,tf_Ġhave_Ġto_Ġbe,tf_Ġis_Ġan_Ġimportant,tf_Ġwe_Ġneed_Ġto,tf_Ġfor_Ġstudents_Ġto,tf_Ġin_Ġmy_Ġopinion,tf_Ġpart_Ġof_Ġour,tf_Ġa_Ġvariety_Ġof,tf_Ġbut_Ġit_Ġis,tf_Ġall_Ġof_Ġthese,tf_Ġin_Ġrecent_Ġyears,tf_Ġfirst_Ġof_Ġall,tf_Ġi_Ġagree_Ġthat,tf_Ġin_Ġthe_Ġunited_Ġstates,tf_Ġto_Ġensure_Ġthat,tf_Ġcan_Ġmake_Ġa,tf_Ġthat_Ġis_Ġwhy,ch_space_frac,tf_Ġmore_Ġand_Ġmore,tf_Ġto_Ġlearn_Ġand,tf_Ġwill_Ġbe_Ġa,tf_Ġwhether_Ġit_Ġis,tf_Ġbased_Ġon_Ġthe,tf_Ġis_Ġone_Ġof,tf_Ġthe_Ġprocess_Ġof,tf_Ġare_Ġalso_Ġsome,tf_Ġin_Ġtoday_Ġs,tf_Ġthey_Ġare_Ġmore,tf_Ġthe_Ġunited_Ġstates,ts_difficult_words,tf_Ġit_Ġcan_Ġhelp,tf_Ġin_Ġaddition_Ġto,tf_Ġthat_Ġcan_Ġhelp,tf_Ġi_Ġwould_Ġlike,tf_Ġthe_Ġrisk_Ġof,tf_Ġit_Ġwill_Ġbe,tf_Ġi_Ġam_Ġnot,tf_Ġon_Ġthe_Ġother,tf_Ġthe_Ġother_Ġhand,tf_Ġthat_Ġthis_Ġis,tf_Ġunderstanding_Ġof_Ġthe,tf_Ġthat_Ġyou_Ġare,tf_Ġcan_Ġhave_Ġa,tf_Ġimpact_Ġon_Ġthe,tf_Ġthe_Ġtime_Ġto,tf_Ġthey_Ġare_Ġnot,ts_syllables_per_sent,tf_Ġthe_Ġlack_Ġof,tf_Ġand_Ġhow_Ġto,tf_Ġlead_Ġto_Ġa,tf_Ġit_Ġshould_Ġbe,tf_Ġis_Ġnot_Ġjust,tf_Ġthere_Ġare_Ġseveral,tf_Ġthe_Ġopportunity_Ġto,tf_Ġit_Ġdoes_Ġnot,tf_Ġwas_Ġable_Ġto,tf_Ġtake_Ġcare_Ġof,tf_Ġas_Ġwell_Ġas,tf_Ġa_Ġlot_Ġof,tf_Ġlearn_Ġmore_Ġabout,tf_Ġwith_Ġthis_Ġstatement,tf_Ġbelieve_Ġthat_Ġthe,tf_Ġi_Ġwant_Ġto,tf_Ġto_Ġbe_Ġa,tf_Ġin_Ġthe_Ġfuture,ts_linsear_write_formula,va_arousal_mean,tf_Ġdo_Ġnot_Ġwant,tf_Ġto_Ġtake_Ġa,va_dominance_mean,tf_Ġi_Ġwill_Ġbe,tf_Ġit_Ġis_Ġalso,ts_monosyllable_frac,tf_Ġin_Ġmy_Ġlife,tf_Ġfor_Ġexample_Ġthe,tf_Ġfor_Ġme_Ġto,tf_Ġbecause_Ġit_Ġis,tf_Ġlikely_Ġto_Ġbe,tf_Ġon_Ġthe_Ġother_Ġhand,tf_Ġto_Ġlearn_Ġabout,tf_Ġin_Ġa_Ġway,tf_Ġif_Ġyou_Ġare,tf_Ġwhich_Ġcan_Ġlead,tf_Ġthere_Ġare_Ġa,ts_flesch_reading_ease,tf_Ġa_Ġstudent_Ġis,tf_Ġi_Ġagree_Ġwith,ts_flesch_kincaid_grade,tf_Ġit_Ġis_Ġclear,tf_Ġwould_Ġnot_Ġbe,ts_syllable_count,tf_Ġhas_Ġbeen_Ġa,tf_Ġwant_Ġto_Ġdo,ts_syllables_per_word,tf_Ġit_Ġcan_Ġbe,tf_Ġand_Ġi_Ġam,tf_Ġthat_Ġcan_Ġbe,tf_Ġto_Ġbe_Ġmore,tf_Ġwhen_Ġthey_Ġare,tf_Ġat_Ġthe_Ġsame,tf_Ġto_Ġfocus_Ġon,tf_Ġaccording_Ġto_Ġthe,tf_Ġto_Ġparticipate_Ġin,ch_repeat_char_frac,tf_Ġin_Ġorder_Ġto,tf_Ġwhich_Ġcan_Ġbe,tf_Ġthank_Ġyou_Ġfor,tf_Ġin_Ġthe_Ġunited,tf_Ġsuch_Ġas_Ġthe,tf_Ġis_Ġimportant_Ġfor,tf_Ġwhen_Ġit_Ġcomes_Ġto,tf_Ġpeople_Ġthink_Ġthat,tf_Ġcan_Ġbe_Ġused,tf_Ġthere_Ġare_Ġsome,tf_Ġto_Ġmake_Ġa,tf_Ġstudents_Ġshould_Ġbe,tf_Ġfor_Ġpeople_Ġto,tf_Ġfor_Ġexample_Ġin,tf_Ġam_Ġwriting_Ġto_Ġexpress,tf_Ġpeople_Ġwho_Ġare,tf_Ġto_Ġgo_Ġto,tf_Ġthe_Ġlong_Ġrun,tf_Ġthe_Ġdevelopment_Ġof,ws_sent_len_mean,tf_Ġthere_Ġare_Ġmany,tf_Ġcome_Ġup_Ġwith,tf_Ġare_Ġgoing_Ġto,ts_mcalpine_eflaw,ts_gunning_fog,tf_Ġthat_Ġis_Ġnot,tf_Ġthe_Ġseagoing_Ġcowboys,tf_Ġto_Ġdeal_Ġwith,tf_Ġthe_Ġmost_Ġimportant,tf_Ġpart_Ġof_Ġthe,tf_Ġone_Ġof_Ġthe,tf_Ġthat_Ġit_Ġis,va_arousal_std,tf_Ġto_Ġhave_Ġa,tf_Ġwill_Ġnot_Ġbe,tf_Ġbe_Ġable_Ġto,tf_Ġthey_Ġwant_Ġto,tf_Ġyou_Ġdo_Ġnot,tf_Ġnot_Ġbe_Ġable_Ġto,tf_Ġto_Ġdo_Ġit,tf_Ġi_Ġdo_Ġnot,tf_Ġthe_Ġpros_Ġand_Ġcons,va_valence_std,tf_Ġyou_Ġneed_Ġto,tf_Ġdo_Ġnot_Ġhave,tf_Ġpeople_Ġdo_Ġnot,tf_Ġi_Ġhave_Ġalways,tf_Ġare_Ġmore_Ġlikely,tf_Ġi_Ġthink_Ġit_Ġis,tf_Ġsome_Ġof_Ġthe,tf_Ġit_Ġis_Ġa,ts_spache_readability,tf_Ġthink_Ġit_Ġis,ws_sent_len_delta_mean,tf_Ġin_Ġthe_Ġpast,tf_Ġreason_Ġis_Ġthat,tf_Ġpeople_Ġbelieve_Ġthat,ts_words_per_sent,tf_Ġto_Ġget_Ġa,tf_Ġgoing_Ġto_Ġbe,tf_Ġwould_Ġlike_Ġto,tf_Ġsome_Ġpeople_Ġbelieve,tf_Ġin_Ġterms_Ġof,tf_Ġnot_Ġwant_Ġto,tf_Ġwhen_Ġyou_Ġare,tf_Ġshould_Ġnot_Ġbe,tf_Ġif_Ġthey_Ġare,tf_Ġa_Ġgreat_Ġway,tf_Ġthank_Ġyou_Ġfor_Ġyour,tf_Ġdo_Ġnot_Ġknow,tf_Ġis_Ġa_Ġgreat,tf_Ġthe_Ġnumber_Ġof,tf_Ġsome_Ġpeople_Ġbelieve_Ġthat,tf_Ġwhen_Ġi_Ġwas,tf_Ġthere_Ġis_Ġno,tf_Ġthis_Ġis_Ġa,tf_Ġto_Ġcreate_Ġa,tf_Ġis_Ġnot_Ġa,tf_Ġit_Ġis_Ġthe,tf_Ġwe_Ġdo_Ġnot,tf_Ġis_Ġa_Ġgood,tf_Ġyou_Ġwant_Ġto,tf_Ġbelieve_Ġthat_Ġit,tf_Ġthey_Ġneed_Ġto,tf_Ġthey_Ġcan_Ġbe,tf_Ġbeing_Ġable_Ġto,tf_Ġi_Ġthink_Ġthat,tf_Ġthat_Ġthey_Ġare,tf_Ġcan_Ġhelp_Ġyou,tf_Ġthey_Ġdo_Ġnot,tf_Ġit_Ġwould_Ġbe,tf_Ġcan_Ġlead_Ġto_Ġa,tf_Ġnot_Ġhave_Ġto,tf_Ġthe_Ġamount_Ġof,tf_Ġthe_Ġuse_Ġof,tf_Ġwould_Ġbe_Ġa,tf_Ġin_Ġthe_Ġworld,tf_Ġfor_Ġthem_Ġto,tf_Ġto_Ġmake_Ġthe,tf_Ġof_Ġthe_Ġworld,tf_Ġon_Ġthe_Ġone_Ġhand,tf_Ġi_Ġthink_Ġit,tf_Ġi_Ġdo_Ġnot_Ġthink,tf_Ġconclusion_Ġi_Ġbelieve,tf_Ġto_Ġbe_Ġable,tf_Ġa_Ġnumber_Ġof,tf_Ġexamples_Ġof_Ġhow,tf_Ġi_Ġbelieve_Ġthat_Ġthe


In [9]:
tim.stop()
print(f"Total time taken {str(tim.elapsed)}")

Total time taken 0:33:27.300883
