# seq2seq

In [13]:
import gc
import sys
import os
import warnings
from tqdm import tqdm

sys.path.append(os.path.abspath("../"))
os.environ['OPENBLAS_NUM_THREADS'] = '1'
warnings.filterwarnings('ignore')
tqdm.pandas()

In [14]:
import pandas as pd
import numpy as np
import time
import polars as pl
import scipy
import implicit
import bisect
import sklearn.metrics as m
from catboost import CatBoostClassifier, CatBoostRegressor, Pool

from sklearn.model_selection import train_test_split
from sklearn.calibration import calibration_curve, CalibratedClassifierCV
from sklearn.utils.class_weight import compute_class_weight

import torch
from torch.utils.data import TensorDataset, DataLoader
from torch import nn

from transformers import get_constant_schedule

In [15]:
import importlib

import modules
import fe_modules
import seq2seq_modules

importlib.reload(modules)
importlib.reload(fe_modules)
importlib.reload(seq2seq_modules)

from modules.memory_utils import pandas_reduce_mem_usage, pandas_string_to_cat
from seq2seq_modules.models import LSTMModel
from seq2seq_modules.weight_initialization import weights_init_uniform_rule
from seq2seq_modules.loops import cross_validation, single_model_training
from seq2seq_modules.utils import age_bucket
from seq2seq_modules.metrics import AGE_METRIC
from fe_modules.encoders import CatBoostEncoderWrapper

In [16]:
def my_reset(*varnames):
    """
    varnames are what you want to keep
    """
    globals_ = globals()
    to_save = {v: globals_[v] for v in varnames}
    to_save['my_reset'] = my_reset  # lets keep this function by default
    del globals_
    get_ipython().magic("reset")
    globals().update(to_save)

## Read and process

In [23]:
LOCAL_DATA_PATH = '../data/'
SEQ2SEQ_DATA_PATH = '../seq2seq_data/'

SPLIT_SEED = 42

In [6]:
df = pandas_reduce_mem_usage(
    pd.read_parquet(f"{SEQ2SEQ_DATA_PATH}/stages/stage_2.parquet.gzip")
)
df.head()

Memory usage of dataframe is 26346.12 MB


  0%|          | 0/30 [00:00<?, ?it/s]

Memory usage after optimization is: 26346.12 MB
Decreased by 0.0%


Unnamed: 0,region_name,city_name,cpe_manufacturer_name,cpe_model_name,url_host,cpe_type_cd,cpe_model_os_type,price,part_of_day,request_cnt,...,timezone,geo_lat,geo_lon,population,dist_to_Moscow,dist_to_SaintP,dist_to_Novosibirsk,dist_to_Ekaterinburg,dist_to_Vladivostok,domain
0,21,409,1,589,5788,2,1,20368.0,2,1,...,3,45.040161,38.975964,744933,1195.817871,1755.62085,3275.137451,1992.558472,6999.525391,761
1,21,409,1,589,12900,2,1,20368.0,2,1,...,3,45.040161,38.975964,744933,1195.817871,1755.62085,3275.137451,1992.558472,6999.525391,549
2,21,409,1,589,17626,2,1,20368.0,0,1,...,3,45.040161,38.975964,744933,1195.817871,1755.62085,3275.137451,1992.558472,6999.525391,712
3,21,409,1,589,59366,2,1,20368.0,0,1,...,3,45.040161,38.975964,744933,1195.817871,1755.62085,3275.137451,1992.558472,6999.525391,712
4,21,409,1,589,59366,2,1,20368.0,0,1,...,3,45.040161,38.975964,744933,1195.817871,1755.62085,3275.137451,1992.558472,6999.525391,712


In [7]:
target = pd.read_parquet(
             f'{LOCAL_DATA_PATH}public_train.pqt', columns=["user_id", "is_male"]
         ).dropna()
target = target[target["is_male"] != "NA"]
target["is_male"] = target["is_male"].astype(np.int32)
target = pandas_reduce_mem_usage(target)

target.head()

Memory usage of dataframe is 5.04 MB


  0%|          | 0/2 [00:00<?, ?it/s]

Memory usage after optimization is: 3.28 MB
Decreased by 35.0%


Unnamed: 0,user_id,is_male
350459,350459,1
188276,188276,1
99002,99002,0
155506,155506,0
213873,213873,0


In [8]:
df = df.merge(target, how="left", on="user_id").dropna(subset=["is_male"])
df["is_male"] = df["is_male"].astype(np.int32)
df = pandas_reduce_mem_usage(df)
df.head()

Memory usage of dataframe is 17680.39 MB


  0%|          | 0/31 [00:00<?, ?it/s]

Memory usage after optimization is: 17077.65 MB
Decreased by 3.4%


Unnamed: 0,region_name,city_name,cpe_manufacturer_name,cpe_model_name,url_host,cpe_type_cd,cpe_model_os_type,price,part_of_day,request_cnt,...,geo_lat,geo_lon,population,dist_to_Moscow,dist_to_SaintP,dist_to_Novosibirsk,dist_to_Ekaterinburg,dist_to_Vladivostok,domain,is_male
2272,60,732,27,211,5790,2,0,74259.0,3,1,...,59.939133,30.315901,4848742,635.758972,0.024454,3115.404541,1787.926147,6555.964355,761,1
2273,60,732,27,211,65865,2,0,74259.0,0,3,...,59.939133,30.315901,4848742,635.758972,0.024454,3115.404541,1787.926147,6555.964355,549,1
2274,60,732,27,211,111474,2,0,74259.0,0,3,...,59.939133,30.315901,4848742,635.758972,0.024454,3115.404541,1787.926147,6555.964355,761,1
2275,14,311,27,211,111474,2,0,74259.0,1,2,...,54.70747,20.507324,431491,1092.055054,828.233154,3870.873535,2490.750244,7378.328613,761,1
2276,60,732,27,211,125409,2,0,74259.0,0,1,...,59.939133,30.315901,4848742,635.758972,0.024454,3115.404541,1787.926147,6555.964355,549,1


## CatBoostEncoder

In [11]:
my_reset("df")

Once deleted, variables cannot be recovered. Proceed (y/[n])? y


In [17]:
ids = ["user_id"]

cat_features = [
    "region_name",
    "city_name",
    "cpe_manufacturer_name",
    "cpe_model_name",
    "url_host",
    "cpe_type_cd",
    "cpe_model_os_type",
    "part_of_day",
    "domain",
    "capital_marker"
]

continous_features = [
    "request_cnt",
    "price",
    "timestamp",
    "relative_timestamp",
    "geo_lat",
    "geo_lon",
    "population",
    "timezone",
    "dist_to_Moscow",
    "dist_to_SaintP",
    "dist_to_Novosibirsk",
    "dist_to_Ekaterinburg",
    "dist_to_Vladivostok",
]

In [18]:
cbe = CatBoostEncoderWrapper(cat_features=cat_features, sort_col="timestamp")

In [19]:
df = cbe.fit_transform(df, target_col="is_male")

In [20]:
df.head()

Unnamed: 0,region_name,city_name,cpe_manufacturer_name,cpe_model_name,url_host,cpe_type_cd,cpe_model_os_type,price,part_of_day,request_cnt,...,geo_lat,geo_lon,population,dist_to_Moscow,dist_to_SaintP,dist_to_Novosibirsk,dist_to_Ekaterinburg,dist_to_Vladivostok,domain,is_male
31997270,0.579374,0.579374,0.497506,0.432235,0.552377,0.520281,0.497506,62682.0,0.558678,1,...,55.754047,37.620403,11514330,0.264119,636.019714,2820.210938,1420.975342,6433.896484,0.516162,0
81207635,0.498263,0.509906,0.497506,0.503072,0.357407,0.520281,0.497506,46582.0,0.558678,1,...,58.010258,56.234203,1000679,1158.34314,1496.257812,1664.177246,292.776245,5297.291504,0.520648,1
146145825,0.498263,0.464282,0.497506,0.503072,0.51875,0.520281,0.497506,50924.0,0.558678,1,...,59.407993,56.803944,156350,1212.513672,1483.777954,1639.137207,363.076721,5223.483398,0.516162,0
135108815,0.486671,0.501678,0.497506,0.544257,0.918161,0.520281,0.497506,26516.0,0.558678,1,...,51.533558,46.034256,836900,727.582153,1354.061401,2461.177979,1115.767944,6176.562012,0.520648,1
81208091,0.498263,0.509906,0.497506,0.503072,0.172348,0.520281,0.497506,46582.0,0.558678,1,...,58.010258,56.234203,1000679,1158.34314,1496.257812,1664.177246,292.776245,5297.291504,0.520648,1


In [21]:
cbe.save("../model_zoo", "is_male_cbe")

In [24]:
df.to_parquet(f"{SEQ2SEQ_DATA_PATH}/version_5_is_male.parquet.gzip",
              compression='gzip')

## Make torch Dataset

In [1]:
import gc
import sys
import os
import warnings
from tqdm import tqdm

sys.path.append(os.path.abspath("../"))
os.environ['OPENBLAS_NUM_THREADS'] = '1'
warnings.filterwarnings('ignore')
tqdm.pandas()

In [2]:
import pandas as pd
import numpy as np
import time
import polars as pl
import scipy
import implicit
import bisect
import sklearn.metrics as m
from catboost import CatBoostClassifier, CatBoostRegressor, Pool

from sklearn.model_selection import train_test_split
from sklearn.calibration import calibration_curve, CalibratedClassifierCV
from sklearn.utils.class_weight import compute_class_weight

import torch
from torch.utils.data import TensorDataset, DataLoader
from torch import nn

from torch.optim.lr_scheduler import ReduceLROnPlateau
from transformers import get_constant_schedule, get_cosine_schedule_with_warmup

In [3]:
import importlib

import modules
import fe_modules
import seq2seq_modules

importlib.reload(modules)
importlib.reload(fe_modules)
importlib.reload(seq2seq_modules)

from modules.memory_utils import pandas_reduce_mem_usage, pandas_string_to_cat
from seq2seq_modules.data import TargetDataset
from seq2seq_modules.models import LSTMModel, StarterBERTModel, AttentionPoolingBERTModel
from seq2seq_modules.weight_initialization import weights_init_uniform_rule, weights_init_xavier
from seq2seq_modules.loops import cross_validation, single_model_training
from seq2seq_modules.trainer import CVTrainer
from seq2seq_modules.utils import age_bucket
from seq2seq_modules.metrics import GENDER_METRIC
from seq2seq_modules.utils import fix_random_state

In [4]:
def my_reset(*varnames):
    """
    varnames are what you want to keep
    """
    globals_ = globals()
    to_save = {v: globals_[v] for v in varnames}
    to_save['my_reset'] = my_reset  # lets keep this function by default
    del globals_
    get_ipython().magic("reset")
    globals().update(to_save)

In [5]:
LOCAL_DATA_PATH = '../seq2seq_data/'
SEED = 42
fix_random_state(SEED)

In [6]:
ids = ["user_id"]

cat_features = [
    "region_name",
    "city_name",
    "cpe_manufacturer_name",
    "cpe_model_name",
    "url_host",
    "cpe_type_cd",
    "cpe_model_os_type",
    "part_of_day",
    "domain",
    "capital_marker"
]

continous_features = [
    "request_cnt",
    "price",
    "timestamp",
    "relative_timestamp",
    "geo_lat",
    "geo_lon",
    "population",
    "timezone",
    "dist_to_Moscow",
    "dist_to_SaintP",
    "dist_to_Novosibirsk",
    "dist_to_Ekaterinburg",
    "dist_to_Vladivostok",
]

In [7]:
df = pandas_reduce_mem_usage(
    pd.read_parquet(f"{LOCAL_DATA_PATH}/version_5_is_male.parquet.gzip")
)
df.head()

Memory usage of dataframe is 29184.38 MB


  0%|          | 0/31 [00:00<?, ?it/s]

Memory usage after optimization is: 29184.38 MB
Decreased by 0.0%


Unnamed: 0,region_name,city_name,cpe_manufacturer_name,cpe_model_name,url_host,cpe_type_cd,cpe_model_os_type,price,part_of_day,request_cnt,...,geo_lat,geo_lon,population,dist_to_Moscow,dist_to_SaintP,dist_to_Novosibirsk,dist_to_Ekaterinburg,dist_to_Vladivostok,domain,is_male
31997270,0.579374,0.579374,0.497506,0.432235,0.552377,0.520281,0.497506,62682.0,0.558678,1,...,55.754047,37.620403,11514330,0.264119,636.019714,2820.210938,1420.975342,6433.896484,0.516162,0
81207635,0.498263,0.509906,0.497506,0.503072,0.357407,0.520281,0.497506,46582.0,0.558678,1,...,58.010258,56.234203,1000679,1158.34314,1496.257812,1664.177246,292.776245,5297.291504,0.520648,1
146145825,0.498263,0.464282,0.497506,0.503072,0.51875,0.520281,0.497506,50924.0,0.558678,1,...,59.407993,56.803944,156350,1212.513672,1483.777954,1639.137207,363.076721,5223.483398,0.516162,0
135108815,0.486671,0.501678,0.497506,0.544257,0.918161,0.520281,0.497506,26516.0,0.558678,1,...,51.533558,46.034256,836900,727.582153,1354.061401,2461.177979,1115.767944,6176.562012,0.520648,1
81208091,0.498263,0.509906,0.497506,0.503072,0.172348,0.520281,0.497506,46582.0,0.558678,1,...,58.010258,56.234203,1000679,1158.34314,1496.257812,1664.177246,292.776245,5297.291504,0.520648,1


In [8]:
gc.collect()

21

In [9]:
dataset = TargetDataset(
         df,
         agg_column="user_id", 
         time_column="timestamp",
         target_column="is_male",
         cat_features=[],
         cont_features=continous_features + cat_features,
         max_len=1024,
         padding_side="left",
)
user_ids = dataset.get_agg_col()
dataset

  0%|          | 0/209603031 [00:00<?, ?it/s]

<seq2seq_modules.data.TargetDataset at 0x7f81dd6433a0>

In [10]:
df.head()

Unnamed: 0,region_name,city_name,cpe_manufacturer_name,cpe_model_name,url_host,cpe_type_cd,cpe_model_os_type,price,part_of_day,request_cnt,...,geo_lat,geo_lon,population,dist_to_Moscow,dist_to_SaintP,dist_to_Novosibirsk,dist_to_Ekaterinburg,dist_to_Vladivostok,domain,is_male
31997270,0.579374,0.579374,0.497506,0.432235,0.552377,0.520281,0.497506,62682.0,0.558678,1,...,55.754047,37.620403,11514330,0.264119,636.019714,2820.210938,1420.975342,6433.896484,0.516162,0
81207635,0.498263,0.509906,0.497506,0.503072,0.357407,0.520281,0.497506,46582.0,0.558678,1,...,58.010258,56.234203,1000679,1158.34314,1496.257812,1664.177246,292.776245,5297.291504,0.520648,1
146145825,0.498263,0.464282,0.497506,0.503072,0.51875,0.520281,0.497506,50924.0,0.558678,1,...,59.407993,56.803944,156350,1212.513672,1483.777954,1639.137207,363.076721,5223.483398,0.516162,0
135108815,0.486671,0.501678,0.497506,0.544257,0.918161,0.520281,0.497506,26516.0,0.558678,1,...,51.533558,46.034256,836900,727.582153,1354.061401,2461.177979,1115.767944,6176.562012,0.520648,1
81208091,0.498263,0.509906,0.497506,0.503072,0.172348,0.520281,0.497506,46582.0,0.558678,1,...,58.010258,56.234203,1000679,1158.34314,1496.257812,1664.177246,292.776245,5297.291504,0.520648,1


## Feed to the model

In [11]:
cat_feature_indexes = []
cont_feature_indexes = []
vocab_sizes = {}

# for i in tqdm(range(len(cat_features))):
#     cat_feature_indexes.append(i)
#     vocab_sizes[i] = int(df[cat_features[i]].max() + 1)

for i in tqdm(range(len(continous_features + cat_features))):
    cont_feature_indexes.append(i)

100%|███████████████████████████████████████| 23/23 [00:00<00:00, 742069.17it/s]


In [12]:
targets = torch.cat([el[3].unsqueeze(0) for el in tqdm(dataset)], dim=0)
targets

100%|████████████████████████████████| 264324/264324 [00:17<00:00, 15309.89it/s]


tensor([0, 0, 0,  ..., 0, 1, 0])

In [13]:
# model = AttentionPoolingBERTModel(
#         cat_feature_indexes=cat_feature_indexes,
#         vocab_sizes=vocab_sizes,
#         cont_feature_indexes=cont_feature_indexes,
#         encoder_hidden_dim=16,
#         hidden_dim=256,
#         dim_feedforward=512,
#         output_dim=7,
#         pe_type="trainable",
#         use_mask=False,
#         max_len=1024,
#         use_key_padding_mask=True,
# )

# model = StarterBERTModel(
#         cat_feature_indexes=cat_feature_indexes,
#         vocab_sizes=vocab_sizes,
#         cont_feature_indexes=cont_feature_indexes,
#         encoder_hidden_dim=16,
#         hidden_dim=256,
#         dim_feedforward=512,
#         output_dim=2,
#         pe_type="trainable",
#         use_mask=False,
#         max_len=1024,
#         use_key_padding_mask=False,
#         starter="randn",
#         shared=False
# )

model = LSTMModel(
        cat_feature_indexes=cat_feature_indexes,
        vocab_sizes=vocab_sizes,
        cont_feature_indexes=cont_feature_indexes,
        encoder_hidden_dim=16,
        hidden_dim=256,
        num_layers=3,
        output_dim=2,
)

weights_init_xavier(model)

metric = GENDER_METRIC

In [14]:
trainer = CVTrainer(
            model_name="is_male_rnn_with_cbe_10_folds",
            model=model,
            n_folds=10,
)

In [15]:
train_fold_metrics, eval_fold_metrics, embeddings_df, logits_df = trainer.fit_transform(
                          dataset=dataset,
                          loss_function=nn.CrossEntropyLoss,
                          metric_func=metric,
                          optimizer=torch.optim.AdamW,
                          get_scheduler=get_cosine_schedule_with_warmup,
                          strat_array=targets,
                          target_name="is_male",
                          user_ids=user_ids,
                          shuffle=True,
                          epochs=10,
                          lr=1e-3,
                          weight_decay=1e-2,
                          num_warmup_steps=0,
                          batch_size=256,
                          random_state=69,
                          device= "cuda"
)

FOLD 0
--------------------------------


  0%|          | 0/930 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

EPOCH: 0
{'Gender GINI': 0.4632098738267212, 'loss': 0.5985584716611766}
{'Gender GINI': 0.6977002297570625, 'loss': 0.5117003824405079}


  0%|          | 0/930 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

EPOCH: 1
{'Gender GINI': 0.6965556707413376, 'loss': 0.4833877591859913}
{'Gender GINI': 0.7140056834943236, 'loss': 0.4773499072803918}


  0%|          | 0/930 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

EPOCH: 2
{'Gender GINI': 0.7068812312769943, 'loss': 0.47594735530111515}
{'Gender GINI': 0.720817679665612, 'loss': 0.4704619215537931}


  0%|          | 0/930 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

EPOCH: 3
{'Gender GINI': 0.7129886111191375, 'loss': 0.4714787296906292}
{'Gender GINI': 0.7235367808921862, 'loss': 0.4691978346945529}


  0%|          | 0/930 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

EPOCH: 4
{'Gender GINI': 0.7180579409773269, 'loss': 0.46776016050518804}
{'Gender GINI': 0.7271890074048106, 'loss': 0.46412409081917944}


  0%|          | 0/930 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

EPOCH: 5
{'Gender GINI': 0.7217095084902798, 'loss': 0.46504213700004393}
{'Gender GINI': 0.7249302967136892, 'loss': 0.4658839577614093}


  0%|          | 0/930 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

EPOCH: 6
{'Gender GINI': 0.7253553695253006, 'loss': 0.4622712247054046}
{'Gender GINI': 0.7299834513050785, 'loss': 0.46069231833940033}


  0%|          | 0/930 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

EPOCH: 7
{'Gender GINI': 0.728700966100384, 'loss': 0.4596754708723746}
{'Gender GINI': 0.7314618013847289, 'loss': 0.4593635270162257}


  0%|          | 0/930 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

EPOCH: 8
{'Gender GINI': 0.7312351447811798, 'loss': 0.4577524673099622}
{'Gender GINI': 0.7308161616994804, 'loss': 0.4602958660539263}


  0%|          | 0/930 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

EPOCH: 9
{'Gender GINI': 0.7326533065636056, 'loss': 0.4566286309195048}
{'Gender GINI': 0.731253782742959, 'loss': 0.45897450007405244}
FOLD 1
--------------------------------


  0%|          | 0/930 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

EPOCH: 0
{'Gender GINI': 0.43433102862879513, 'loss': 0.6106256825804466}
{'Gender GINI': 0.700181966906047, 'loss': 0.48302247869968323}


  0%|          | 0/930 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

EPOCH: 1
{'Gender GINI': 0.6968881955534978, 'loss': 0.48280685286966957}
{'Gender GINI': 0.6988793743917396, 'loss': 0.50743553878232}


  0%|          | 0/930 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

EPOCH: 2
{'Gender GINI': 0.7052422061180914, 'loss': 0.4769587343624866}
{'Gender GINI': 0.7109100579290246, 'loss': 0.485076797537585}


  0%|          | 0/930 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

EPOCH: 3
{'Gender GINI': 0.7116913256214459, 'loss': 0.47220852112639494}
{'Gender GINI': 0.7188107737789398, 'loss': 0.47129062675917555}


  0%|          | 0/930 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

EPOCH: 4
{'Gender GINI': 0.71643362229233, 'loss': 0.46878772626078846}
{'Gender GINI': 0.7176785715308602, 'loss': 0.4693659570579687}


  0%|          | 0/930 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

EPOCH: 5
{'Gender GINI': 0.721066749059432, 'loss': 0.46536012757860307}
{'Gender GINI': 0.7200412772224312, 'loss': 0.46868663097113955}


  0%|          | 0/930 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

EPOCH: 6
{'Gender GINI': 0.7246939533791088, 'loss': 0.4626487988615614}
{'Gender GINI': 0.7228865529394986, 'loss': 0.4659544325788754}


  0%|          | 0/930 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

EPOCH: 7
{'Gender GINI': 0.7281181599426809, 'loss': 0.46000254702331517}
{'Gender GINI': 0.7243880620269603, 'loss': 0.46538254268027274}


  0%|          | 0/930 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

EPOCH: 8
{'Gender GINI': 0.7306579140537612, 'loss': 0.4581211753425296}
{'Gender GINI': 0.7246773691871207, 'loss': 0.46567022382037404}


  0%|          | 0/930 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

EPOCH: 9
{'Gender GINI': 0.7321271515329348, 'loss': 0.45702818287285385}
{'Gender GINI': 0.7252323662457032, 'loss': 0.46405619388000147}
FOLD 2
--------------------------------


  0%|          | 0/930 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

EPOCH: 0
{'Gender GINI': 0.4752730437030521, 'loss': 0.5933205491178635}
{'Gender GINI': 0.6965681936260317, 'loss': 0.49083006659456935}


  0%|          | 0/930 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

EPOCH: 1
{'Gender GINI': 0.6981639739611156, 'loss': 0.4821954515120505}
{'Gender GINI': 0.7069243142086785, 'loss': 0.4873516873239549}


  0%|          | 0/930 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

EPOCH: 2
{'Gender GINI': 0.7055187208263562, 'loss': 0.47696662645772503}
{'Gender GINI': 0.7127683635236748, 'loss': 0.4800207404544053}


  0%|          | 0/930 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

EPOCH: 3
{'Gender GINI': 0.7139192340693679, 'loss': 0.47086988995192036}
{'Gender GINI': 0.7174070732639191, 'loss': 0.47204907674551155}


  0%|          | 0/930 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

EPOCH: 4
{'Gender GINI': 0.7189825171359292, 'loss': 0.46708177951442587}
{'Gender GINI': 0.7199721668021382, 'loss': 0.4676721677791142}


  0%|          | 0/930 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

EPOCH: 5
{'Gender GINI': 0.7232849207308134, 'loss': 0.46383617310854897}
{'Gender GINI': 0.7216393289558627, 'loss': 0.46741577127536615}


  0%|          | 0/930 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

EPOCH: 6
{'Gender GINI': 0.7271760284206019, 'loss': 0.46086481886820635}
{'Gender GINI': 0.7231203311801129, 'loss': 0.4658772262795}


  0%|          | 0/930 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

EPOCH: 7
{'Gender GINI': 0.7310029204294217, 'loss': 0.45790475344941123}
{'Gender GINI': 0.7247734306664673, 'loss': 0.46436490562939503}


  0%|          | 0/930 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

EPOCH: 8
{'Gender GINI': 0.7337095290301774, 'loss': 0.4557658904081361}
{'Gender GINI': 0.7249426867542277, 'loss': 0.4646193922574232}


  0%|          | 0/930 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

EPOCH: 9
{'Gender GINI': 0.735236710008055, 'loss': 0.45460101407341375}
{'Gender GINI': 0.725091000637546, 'loss': 0.4640373668482255}
FOLD 3
--------------------------------


  0%|          | 0/930 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
CV_FEATURES = "../cv_data/"
MODEL_ZOO = "../model_zoo/"

In [20]:
trainer.save_model(model_name=MODEL_ZOO + trainer.model_name)

In [21]:
embeddings_df.to_parquet(f"{CV_FEATURES}/is_male_cv_embeddings_10_folds.parquet.gzip",
                          compression='gzip')
logits_df.to_parquet(f"{CV_FEATURES}/is_male_cv_logits_10_folds.parquet.gzip",
                          compression='gzip')