In [5]:
# Imports

import os
import pandas as pd
import sys
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from loguru import logger
from pathlib import Path

# Local imports
sys.path.append(r"./utils")
from utils import utils

In [6]:
# Constants

SAMPLE_DATA = True
DEBUG_MODEL = True

GROUP_NAME = "ethnicity_groups"  # TODO : Changed this for ethnicity segments. 
MODEL_DICT = \
            {"NN": {'epochs': 25} , 
             "LR": {'max_iter': 100}
            }
SELF_REPORTED_COLS = \
           ['age',
            # 'country_canada', 
            # 'country_united kingdom',
            # 'country_united states',
            # 'database_dating',
            # 'database_fb',
            # 'gender',
           ]
DATA_DROP_COLS = \
           ['Unnamed: 0', # index columns
            # 'pol',  # label column
            'gender', # self reported and filtered already
            # 'age', # self-reported  
            'country', # self reported and filtered already
            'userid', # index equivalent column 
            'pol_dat_us', # redundant columns with label
            'pol_dat_ca', # redundant columns with label
            'pol_dat_uk', # redundant columns with label
            'pol_fb_us', # redundant columns with label
            'database', # filtered already 
            'ethnicity.value' # filtered already
            ]
RESULTS_COLS = \
            ["Group Name", 
             "Model", 
             "Feature Set", 
             "Test AUC", 
             "Test Accuracy",
            ]
DATA_DIR = "./data/full/"
RESULTS_DIR = f"./results/full/{GROUP_NAME}/" #TODO: ensure that the folder exists
RESULTS_STATS_FILENAME = GROUP_NAME + '.csv'
RESULTS_MODEL_FILENAME_PREFIX = GROUP_NAME

if SAMPLE_DATA:
  DATA_DIR = "./data/sample/"
  RESULTS_DIR = f"./results/sample/{GROUP_NAME}/"
  ASSERT_DATA_SHAPE_0 = 31742
  ASSERT_DATA_SHAPE_1 = 2092
  DATA_DROP_COLS = DATA_DROP_COLS \
                  + ['Unnamed: 0.1'] #TODO: Regenerate sample with index=False and remove this

if DEBUG_MODEL:
  MODEL_DICT = \
            {"NN": {'epochs': 1} , 
             "LR": {'max_iter': 1}
            }

logger.debug(f"Started the script for {GROUP_NAME}.")

2021-08-18 19:00:18.361 | DEBUG    | __main__:<module>:60 - Started the script for ethnicity_groups.


In [7]:
# Read datasets

folders = os.listdir(DATA_DIR)
dataset_paths = []
for folder in tqdm(folders):
  logger.debug(f"In folder {folder}.")
  csv_files = os.listdir(DATA_DIR + folder)
  for csv in csv_files:
    if '.csv' in csv:
      logger.debug(DATA_DIR + folder + "/" + csv)
      dataset_paths.append(DATA_DIR + folder + "/" + csv)


  0%|          | 0/9 [00:00<?, ?it/s]2021-08-18 19:00:18.395 | DEBUG    | __main__:<module>:6 - In folder UK_0_dating.
2021-08-18 19:00:18.396 | DEBUG    | __main__:<module>:10 - ./data/sample/UK_0_dating/segment_united kingdom_0_dating_black.csv
2021-08-18 19:00:18.397 | DEBUG    | __main__:<module>:10 - ./data/sample/UK_0_dating/segment_united kingdom_0_dating_asian.csv
2021-08-18 19:00:18.398 | DEBUG    | __main__:<module>:10 - ./data/sample/UK_0_dating/segment_united kingdom_0_dating_india.csv
2021-08-18 19:00:18.398 | DEBUG    | __main__:<module>:10 - ./data/sample/UK_0_dating/segment_united kingdom_0_dating_white.csv
2021-08-18 19:00:18.398 | DEBUG    | __main__:<module>:6 - In folder Canada_1_dating.
2021-08-18 19:00:18.399 | DEBUG    | __main__:<module>:10 - ./data/sample/Canada_1_dating/segment_canada_1_dating_asian.csv
2021-08-18 19:00:18.399 | DEBUG    | __main__:<module>:10 - ./data/sample/Canada_1_dating/segment_canada_1_dating_india.csv
2021-08-18 19:00:18.400 | DEBUG    

In [8]:
results = []

for dataset_path in dataset_paths:
  data = pd.read_csv(dataset_path)
  
  # Clean the data
  data = data.drop(DATA_DROP_COLS, axis=1)
  data = utils.get_clean_data(data)
  logger.debug(f"Data size after cleaning is {data.shape}")
  data_y = data['pol'].replace({"liberal": 1, "conservative": 0})
  data = data.drop('pol', axis = 1)
  all_features = data.columns

  # Split the data
  X_train, X_test, y_train, y_test = train_test_split(data, data_y, test_size = 0.2, random_state = 2021) 
  del data, data_y

  # which group it is being processed on 
  SEGMENT_NAME = utils.get_dataframe_name(dataset_path)
  logger.debug(f"Started the script for {SEGMENT_NAME}.")
  
  ## Define features for the various settings
  image_cols = list(map(str, range(1, 2049)))
  image_and_self_reported_cols = image_cols + SELF_REPORTED_COLS
  image_and_extracted_cols = [x for x in all_features if x not in SELF_REPORTED_COLS]
  image_and_self_reported_and_extracted_cols = all_features

  data_dict = {
                "Image Features" : image_cols,
                # "Image and Self Reported Features" : image_and_self_reported_cols,
                "Image and Extracted Features": image_and_extracted_cols,
                "Image, Self-reported and Extracted Features": image_and_self_reported_and_extracted_cols
                }

  if SEGMENT_NAME == "canada_1_dating_india": # this segment does not have any "age" values associated with it.
    ## Define features for the various settings
    image_cols = list(map(str, range(1, 2049)))
    # image_and_self_reported_cols = image_cols + SELF_REPORTED_COLS 
    image_and_extracted_cols = [x for x in all_features if x not in SELF_REPORTED_COLS]
    image_and_self_reported_and_extracted_cols = all_features

  
    data_dict = {
                "Image Features" : image_cols,
                # "Image and Self Reported Features" : image_and_self_reported_cols,
                "Image and Extracted Features": image_and_extracted_cols,
                "Image, Self-reported and Extracted Features": image_and_self_reported_and_extracted_cols
                }


  # Fit models and log results
  for data_name, data_set_features in tqdm(data_dict.items()):
    for model_name, model_params in MODEL_DICT.items():
      try:
        save_model_filepath = Path(RESULTS_DIR \
                        + SEGMENT_NAME\
                        + "_" + model_name \
                        + "_" + data_name.replace(" ","_").replace(",","").replace("-","_") \
                        + '.mdl')

        if save_model_filepath.is_file() or save_model_filepath.is_dir():
          logger.debug(f"{SEGMENT_NAME}, {model_name}, {data_name}: model already exists.")
          model = utils.read_model(model_name, save_model_filepath)
          logger.debug(f"{SEGMENT_NAME}, {model_name}, {data_name}: model read from disk.")
          
        else:
          logger.debug(f"{SEGMENT_NAME}, {model_name}, {data_name}: model training started.")
          model = utils.fit_model(model_name,
                                X_train[data_set_features],
                                y_train,
                                model_params = model_params)

          utils.save_model(model, model_name, save_model_filepath)
          logger.debug(f"{GROUP_NAME}, {model_name}, {data_name}: model training ended and model saved.")
        
        auc, acc = utils.get_metrics(model_name,
                                     model,
                                     X_test[data_set_features],
                                     y_test)
                                     
        results.append([SEGMENT_NAME, model_name, data_name, auc, acc])
        logger.debug(f"{SEGMENT_NAME}, {model_name}, {data_name}: model training ended. AUC: {auc}, accuracy: {acc}")
      
      except Exception as error:
        logger.exception(error)
        logger.error(f"{SEGMENT_NAME}, {model_name}, {data_name}: Error occured!")

2021-08-18 19:00:19.499 | DEBUG    | __main__:<module>:9 - Data size after cleaning is (1000, 2076)
2021-08-18 19:00:19.505 | DEBUG    | __main__:<module>:20 - Started the script for united kingdom_0_dating_black.
  0%|          | 0/3 [00:00<?, ?it/s]2021-08-18 19:00:19.506 | DEBUG    | __main__:<module>:67 - united kingdom_0_dating_black, NN, Image Features: model training started.


Train on 640 samples, validate on 160 samples
Epoch 1/1


2021-08-18 19:00:20.496 | DEBUG    | __main__:<module>:74 - ethnicity_groups, NN, Image Features: model training ended and model saved.
2021-08-18 19:00:20.610 | DEBUG    | __main__:<module>:82 - united kingdom_0_dating_black, NN, Image Features: model training ended. AUC: 51.74, accuracy: 61.5
2021-08-18 19:00:20.611 | DEBUG    | __main__:<module>:67 - united kingdom_0_dating_black, LR, Image Features: model training started.
2021-08-18 19:00:20.731 | DEBUG    | __main__:<module>:74 - ethnicity_groups, LR, Image Features: model training ended and model saved.
2021-08-18 19:00:20.758 | DEBUG    | __main__:<module>:82 - united kingdom_0_dating_black, LR, Image Features: model training ended. AUC: 65.86, accuracy: 62.5
 33%|███▎      | 1/3 [00:01<00:02,  1.25s/it]2021-08-18 19:00:20.759 | DEBUG    | __main__:<module>:67 - united kingdom_0_dating_black, NN, Image and Extracted Features: model training started.


Train on 640 samples, validate on 160 samples
Epoch 1/1


2021-08-18 19:00:21.707 | DEBUG    | __main__:<module>:74 - ethnicity_groups, NN, Image and Extracted Features: model training ended and model saved.
2021-08-18 19:00:21.832 | DEBUG    | __main__:<module>:82 - united kingdom_0_dating_black, NN, Image and Extracted Features: model training ended. AUC: 37.27, accuracy: 61.0
2021-08-18 19:00:21.833 | DEBUG    | __main__:<module>:67 - united kingdom_0_dating_black, LR, Image and Extracted Features: model training started.
2021-08-18 19:00:21.964 | DEBUG    | __main__:<module>:74 - ethnicity_groups, LR, Image and Extracted Features: model training ended and model saved.
2021-08-18 19:00:21.988 | DEBUG    | __main__:<module>:82 - united kingdom_0_dating_black, LR, Image and Extracted Features: model training ended. AUC: 52.51, accuracy: 59.5
 67%|██████▋   | 2/3 [00:02<00:01,  1.24s/it]2021-08-18 19:00:21.989 | DEBUG    | __main__:<module>:67 - united kingdom_0_dating_black, NN, Image, Self-reported and Extracted Features: model training sta

Train on 640 samples, validate on 160 samples
Epoch 1/1


2021-08-18 19:00:22.983 | DEBUG    | __main__:<module>:74 - ethnicity_groups, NN, Image, Self-reported and Extracted Features: model training ended and model saved.
2021-08-18 19:00:23.097 | DEBUG    | __main__:<module>:82 - united kingdom_0_dating_black, NN, Image, Self-reported and Extracted Features: model training ended. AUC: 53.53, accuracy: 42.5
2021-08-18 19:00:23.097 | DEBUG    | __main__:<module>:67 - united kingdom_0_dating_black, LR, Image, Self-reported and Extracted Features: model training started.
2021-08-18 19:00:23.221 | DEBUG    | __main__:<module>:74 - ethnicity_groups, LR, Image, Self-reported and Extracted Features: model training ended and model saved.
2021-08-18 19:00:23.244 | DEBUG    | __main__:<module>:82 - united kingdom_0_dating_black, LR, Image, Self-reported and Extracted Features: model training ended. AUC: 61.42, accuracy: 61.5
100%|██████████| 3/3 [00:03<00:00,  1.25s/it]
2021-08-18 19:00:23.643 | DEBUG    | __main__:<module>:9 - Data size after cleanin

Train on 640 samples, validate on 160 samples
Epoch 1/1


2021-08-18 19:00:24.643 | DEBUG    | __main__:<module>:74 - ethnicity_groups, NN, Image Features: model training ended and model saved.
2021-08-18 19:00:24.761 | DEBUG    | __main__:<module>:82 - united kingdom_0_dating_asian, NN, Image Features: model training ended. AUC: 46.18, accuracy: 52.0
2021-08-18 19:00:24.762 | DEBUG    | __main__:<module>:67 - united kingdom_0_dating_asian, LR, Image Features: model training started.
2021-08-18 19:00:24.882 | DEBUG    | __main__:<module>:74 - ethnicity_groups, LR, Image Features: model training ended and model saved.
2021-08-18 19:00:24.904 | DEBUG    | __main__:<module>:82 - united kingdom_0_dating_asian, LR, Image Features: model training ended. AUC: 66.91, accuracy: 63.0
 33%|███▎      | 1/3 [00:01<00:02,  1.25s/it]2021-08-18 19:00:24.905 | DEBUG    | __main__:<module>:67 - united kingdom_0_dating_asian, NN, Image and Extracted Features: model training started.


Train on 640 samples, validate on 160 samples
Epoch 1/1


2021-08-18 19:00:25.839 | DEBUG    | __main__:<module>:74 - ethnicity_groups, NN, Image and Extracted Features: model training ended and model saved.
2021-08-18 19:00:25.956 | DEBUG    | __main__:<module>:82 - united kingdom_0_dating_asian, NN, Image and Extracted Features: model training ended. AUC: 58.89, accuracy: 56.5
2021-08-18 19:00:25.957 | DEBUG    | __main__:<module>:67 - united kingdom_0_dating_asian, LR, Image and Extracted Features: model training started.
2021-08-18 19:00:26.080 | DEBUG    | __main__:<module>:74 - ethnicity_groups, LR, Image and Extracted Features: model training ended and model saved.
2021-08-18 19:00:26.104 | DEBUG    | __main__:<module>:82 - united kingdom_0_dating_asian, LR, Image and Extracted Features: model training ended. AUC: 65.28, accuracy: 63.0
 67%|██████▋   | 2/3 [00:02<00:01,  1.22s/it]2021-08-18 19:00:26.105 | DEBUG    | __main__:<module>:67 - united kingdom_0_dating_asian, NN, Image, Self-reported and Extracted Features: model training sta

Train on 640 samples, validate on 160 samples
Epoch 1/1


2021-08-18 19:00:27.125 | DEBUG    | __main__:<module>:74 - ethnicity_groups, NN, Image, Self-reported and Extracted Features: model training ended and model saved.
2021-08-18 19:00:27.243 | DEBUG    | __main__:<module>:82 - united kingdom_0_dating_asian, NN, Image, Self-reported and Extracted Features: model training ended. AUC: 43.9, accuracy: 52.5
2021-08-18 19:00:27.244 | DEBUG    | __main__:<module>:67 - united kingdom_0_dating_asian, LR, Image, Self-reported and Extracted Features: model training started.
2021-08-18 19:00:27.367 | DEBUG    | __main__:<module>:74 - ethnicity_groups, LR, Image, Self-reported and Extracted Features: model training ended and model saved.
2021-08-18 19:00:27.390 | DEBUG    | __main__:<module>:82 - united kingdom_0_dating_asian, LR, Image, Self-reported and Extracted Features: model training ended. AUC: 64.54, accuracy: 61.5
100%|██████████| 3/3 [00:03<00:00,  1.25s/it]
2021-08-18 19:00:27.793 | DEBUG    | __main__:<module>:9 - Data size after cleaning

Train on 640 samples, validate on 160 samples
Epoch 1/1


2021-08-18 19:00:28.709 | DEBUG    | __main__:<module>:74 - ethnicity_groups, NN, Image Features: model training ended and model saved.
2021-08-18 19:00:28.825 | DEBUG    | __main__:<module>:82 - united kingdom_0_dating_india, NN, Image Features: model training ended. AUC: 50.74, accuracy: 63.5
2021-08-18 19:00:28.825 | DEBUG    | __main__:<module>:67 - united kingdom_0_dating_india, LR, Image Features: model training started.
2021-08-18 19:00:28.949 | DEBUG    | __main__:<module>:74 - ethnicity_groups, LR, Image Features: model training ended and model saved.
2021-08-18 19:00:28.971 | DEBUG    | __main__:<module>:82 - united kingdom_0_dating_india, LR, Image Features: model training ended. AUC: 67.31, accuracy: 72.0
 33%|███▎      | 1/3 [00:01<00:02,  1.17s/it]2021-08-18 19:00:28.972 | DEBUG    | __main__:<module>:67 - united kingdom_0_dating_india, NN, Image and Extracted Features: model training started.


Train on 640 samples, validate on 160 samples
Epoch 1/1


2021-08-18 19:00:29.997 | DEBUG    | __main__:<module>:74 - ethnicity_groups, NN, Image and Extracted Features: model training ended and model saved.
2021-08-18 19:00:30.108 | DEBUG    | __main__:<module>:82 - united kingdom_0_dating_india, NN, Image and Extracted Features: model training ended. AUC: 52.73, accuracy: 72.0
2021-08-18 19:00:30.109 | DEBUG    | __main__:<module>:67 - united kingdom_0_dating_india, LR, Image and Extracted Features: model training started.
2021-08-18 19:00:30.233 | DEBUG    | __main__:<module>:74 - ethnicity_groups, LR, Image and Extracted Features: model training ended and model saved.
2021-08-18 19:00:30.257 | DEBUG    | __main__:<module>:82 - united kingdom_0_dating_india, LR, Image and Extracted Features: model training ended. AUC: 61.72, accuracy: 69.0
 67%|██████▋   | 2/3 [00:02<00:01,  1.24s/it]2021-08-18 19:00:30.259 | DEBUG    | __main__:<module>:67 - united kingdom_0_dating_india, NN, Image, Self-reported and Extracted Features: model training sta

Train on 640 samples, validate on 160 samples
Epoch 1/1


2021-08-18 19:00:31.197 | DEBUG    | __main__:<module>:74 - ethnicity_groups, NN, Image, Self-reported and Extracted Features: model training ended and model saved.
2021-08-18 19:00:31.309 | DEBUG    | __main__:<module>:82 - united kingdom_0_dating_india, NN, Image, Self-reported and Extracted Features: model training ended. AUC: 44.26, accuracy: 72.0
2021-08-18 19:00:31.310 | DEBUG    | __main__:<module>:67 - united kingdom_0_dating_india, LR, Image, Self-reported and Extracted Features: model training started.
2021-08-18 19:00:31.432 | DEBUG    | __main__:<module>:74 - ethnicity_groups, LR, Image, Self-reported and Extracted Features: model training ended and model saved.
2021-08-18 19:00:31.455 | DEBUG    | __main__:<module>:82 - united kingdom_0_dating_india, LR, Image, Self-reported and Extracted Features: model training ended. AUC: 62.46, accuracy: 71.0
100%|██████████| 3/3 [00:03<00:00,  1.22s/it]
2021-08-18 19:00:31.903 | DEBUG    | __main__:<module>:9 - Data size after cleanin

Train on 640 samples, validate on 160 samples
Epoch 1/1


2021-08-18 19:00:32.940 | DEBUG    | __main__:<module>:74 - ethnicity_groups, NN, Image Features: model training ended and model saved.
2021-08-18 19:00:33.058 | DEBUG    | __main__:<module>:82 - united kingdom_0_dating_white, NN, Image Features: model training ended. AUC: 53.86, accuracy: 50.0
2021-08-18 19:00:33.058 | DEBUG    | __main__:<module>:67 - united kingdom_0_dating_white, LR, Image Features: model training started.
2021-08-18 19:00:33.181 | DEBUG    | __main__:<module>:74 - ethnicity_groups, LR, Image Features: model training ended and model saved.
2021-08-18 19:00:33.204 | DEBUG    | __main__:<module>:82 - united kingdom_0_dating_white, LR, Image Features: model training ended. AUC: 61.46, accuracy: 60.5
 33%|███▎      | 1/3 [00:01<00:02,  1.29s/it]2021-08-18 19:00:33.206 | DEBUG    | __main__:<module>:67 - united kingdom_0_dating_white, NN, Image and Extracted Features: model training started.


Train on 640 samples, validate on 160 samples
Epoch 1/1


2021-08-18 19:00:34.156 | DEBUG    | __main__:<module>:74 - ethnicity_groups, NN, Image and Extracted Features: model training ended and model saved.
2021-08-18 19:00:34.271 | DEBUG    | __main__:<module>:82 - united kingdom_0_dating_white, NN, Image and Extracted Features: model training ended. AUC: 51.83, accuracy: 58.0
2021-08-18 19:00:34.271 | DEBUG    | __main__:<module>:67 - united kingdom_0_dating_white, LR, Image and Extracted Features: model training started.
2021-08-18 19:00:34.393 | DEBUG    | __main__:<module>:74 - ethnicity_groups, LR, Image and Extracted Features: model training ended and model saved.
2021-08-18 19:00:34.417 | DEBUG    | __main__:<module>:82 - united kingdom_0_dating_white, LR, Image and Extracted Features: model training ended. AUC: 57.45, accuracy: 59.5
 67%|██████▋   | 2/3 [00:02<00:01,  1.25s/it]2021-08-18 19:00:34.418 | DEBUG    | __main__:<module>:67 - united kingdom_0_dating_white, NN, Image, Self-reported and Extracted Features: model training sta

Train on 640 samples, validate on 160 samples
Epoch 1/1


2021-08-18 19:00:35.372 | DEBUG    | __main__:<module>:74 - ethnicity_groups, NN, Image, Self-reported and Extracted Features: model training ended and model saved.
2021-08-18 19:00:35.492 | DEBUG    | __main__:<module>:82 - united kingdom_0_dating_white, NN, Image, Self-reported and Extracted Features: model training ended. AUC: 48.59, accuracy: 42.0
2021-08-18 19:00:35.493 | DEBUG    | __main__:<module>:67 - united kingdom_0_dating_white, LR, Image, Self-reported and Extracted Features: model training started.
2021-08-18 19:00:35.615 | DEBUG    | __main__:<module>:74 - ethnicity_groups, LR, Image, Self-reported and Extracted Features: model training ended and model saved.
2021-08-18 19:00:35.639 | DEBUG    | __main__:<module>:82 - united kingdom_0_dating_white, LR, Image, Self-reported and Extracted Features: model training ended. AUC: 56.48, accuracy: 57.5
100%|██████████| 3/3 [00:03<00:00,  1.24s/it]
2021-08-18 19:00:36.070 | DEBUG    | __main__:<module>:9 - Data size after cleanin

Train on 640 samples, validate on 160 samples
Epoch 1/1


2021-08-18 19:00:37.142 | DEBUG    | __main__:<module>:74 - ethnicity_groups, NN, Image Features: model training ended and model saved.
2021-08-18 19:00:37.255 | DEBUG    | __main__:<module>:82 - canada_1_dating_asian, NN, Image Features: model training ended. AUC: 50.84, accuracy: 32.0
2021-08-18 19:00:37.256 | DEBUG    | __main__:<module>:67 - canada_1_dating_asian, LR, Image Features: model training started.
2021-08-18 19:00:37.378 | DEBUG    | __main__:<module>:74 - ethnicity_groups, LR, Image Features: model training ended and model saved.
2021-08-18 19:00:37.401 | DEBUG    | __main__:<module>:82 - canada_1_dating_asian, LR, Image Features: model training ended. AUC: 55.0, accuracy: 68.0
 33%|███▎      | 1/3 [00:01<00:02,  1.32s/it]2021-08-18 19:00:37.402 | DEBUG    | __main__:<module>:67 - canada_1_dating_asian, NN, Image and Extracted Features: model training started.


Train on 640 samples, validate on 160 samples
Epoch 1/1


2021-08-18 19:00:38.337 | DEBUG    | __main__:<module>:74 - ethnicity_groups, NN, Image and Extracted Features: model training ended and model saved.
2021-08-18 19:00:38.452 | DEBUG    | __main__:<module>:82 - canada_1_dating_asian, NN, Image and Extracted Features: model training ended. AUC: 49.8, accuracy: 68.5
2021-08-18 19:00:38.452 | DEBUG    | __main__:<module>:67 - canada_1_dating_asian, LR, Image and Extracted Features: model training started.
2021-08-18 19:00:38.577 | DEBUG    | __main__:<module>:74 - ethnicity_groups, LR, Image and Extracted Features: model training ended and model saved.
2021-08-18 19:00:38.600 | DEBUG    | __main__:<module>:82 - canada_1_dating_asian, LR, Image and Extracted Features: model training ended. AUC: 46.76, accuracy: 67.5
 67%|██████▋   | 2/3 [00:02<00:01,  1.25s/it]2021-08-18 19:00:38.601 | DEBUG    | __main__:<module>:67 - canada_1_dating_asian, NN, Image, Self-reported and Extracted Features: model training started.


Train on 640 samples, validate on 160 samples
Epoch 1/1


2021-08-18 19:00:39.639 | DEBUG    | __main__:<module>:74 - ethnicity_groups, NN, Image, Self-reported and Extracted Features: model training ended and model saved.
2021-08-18 19:00:39.754 | DEBUG    | __main__:<module>:82 - canada_1_dating_asian, NN, Image, Self-reported and Extracted Features: model training ended. AUC: 44.0, accuracy: 68.5
2021-08-18 19:00:39.755 | DEBUG    | __main__:<module>:67 - canada_1_dating_asian, LR, Image, Self-reported and Extracted Features: model training started.
2021-08-18 19:00:39.879 | DEBUG    | __main__:<module>:74 - ethnicity_groups, LR, Image, Self-reported and Extracted Features: model training ended and model saved.
2021-08-18 19:00:39.903 | DEBUG    | __main__:<module>:82 - canada_1_dating_asian, LR, Image, Self-reported and Extracted Features: model training ended. AUC: 47.68, accuracy: 68.5
100%|██████████| 3/3 [00:03<00:00,  1.27s/it]
2021-08-18 19:00:40.334 | DEBUG    | __main__:<module>:9 - Data size after cleaning is (1000, 2075)
2021-08

Train on 640 samples, validate on 160 samples
Epoch 1/1


2021-08-18 19:00:41.294 | DEBUG    | __main__:<module>:74 - ethnicity_groups, NN, Image Features: model training ended and model saved.
2021-08-18 19:00:41.405 | DEBUG    | __main__:<module>:82 - canada_1_dating_india, NN, Image Features: model training ended. AUC: 56.24, accuracy: 70.0
2021-08-18 19:00:41.406 | DEBUG    | __main__:<module>:67 - canada_1_dating_india, LR, Image Features: model training started.
2021-08-18 19:00:41.525 | DEBUG    | __main__:<module>:74 - ethnicity_groups, LR, Image Features: model training ended and model saved.
2021-08-18 19:00:41.548 | DEBUG    | __main__:<module>:82 - canada_1_dating_india, LR, Image Features: model training ended. AUC: 64.95, accuracy: 69.5
 33%|███▎      | 1/3 [00:01<00:02,  1.21s/it]2021-08-18 19:00:41.549 | DEBUG    | __main__:<module>:67 - canada_1_dating_india, NN, Image and Extracted Features: model training started.


Train on 640 samples, validate on 160 samples
Epoch 1/1


2021-08-18 19:00:42.467 | DEBUG    | __main__:<module>:74 - ethnicity_groups, NN, Image and Extracted Features: model training ended and model saved.
2021-08-18 19:00:42.582 | DEBUG    | __main__:<module>:82 - canada_1_dating_india, NN, Image and Extracted Features: model training ended. AUC: 54.21, accuracy: 73.0
2021-08-18 19:00:42.582 | DEBUG    | __main__:<module>:67 - canada_1_dating_india, LR, Image and Extracted Features: model training started.
2021-08-18 19:00:42.715 | DEBUG    | __main__:<module>:74 - ethnicity_groups, LR, Image and Extracted Features: model training ended and model saved.
2021-08-18 19:00:42.738 | DEBUG    | __main__:<module>:82 - canada_1_dating_india, LR, Image and Extracted Features: model training ended. AUC: 61.04, accuracy: 73.0
 67%|██████▋   | 2/3 [00:02<00:01,  1.20s/it]2021-08-18 19:00:42.739 | DEBUG    | __main__:<module>:67 - canada_1_dating_india, NN, Image, Self-reported and Extracted Features: model training started.


Train on 640 samples, validate on 160 samples
Epoch 1/1


2021-08-18 19:00:43.810 | DEBUG    | __main__:<module>:74 - ethnicity_groups, NN, Image, Self-reported and Extracted Features: model training ended and model saved.
2021-08-18 19:00:43.926 | DEBUG    | __main__:<module>:82 - canada_1_dating_india, NN, Image, Self-reported and Extracted Features: model training ended. AUC: 45.75, accuracy: 73.0
2021-08-18 19:00:43.926 | DEBUG    | __main__:<module>:67 - canada_1_dating_india, LR, Image, Self-reported and Extracted Features: model training started.
2021-08-18 19:00:44.048 | DEBUG    | __main__:<module>:74 - ethnicity_groups, LR, Image, Self-reported and Extracted Features: model training ended and model saved.
2021-08-18 19:00:44.071 | DEBUG    | __main__:<module>:82 - canada_1_dating_india, LR, Image, Self-reported and Extracted Features: model training ended. AUC: 60.78, accuracy: 73.5
100%|██████████| 3/3 [00:03<00:00,  1.24s/it]
2021-08-18 19:00:44.519 | DEBUG    | __main__:<module>:9 - Data size after cleaning is (1000, 2076)
2021-0

Train on 640 samples, validate on 160 samples
Epoch 1/1


2021-08-18 19:00:45.488 | DEBUG    | __main__:<module>:74 - ethnicity_groups, NN, Image Features: model training ended and model saved.
2021-08-18 19:00:45.610 | DEBUG    | __main__:<module>:82 - canada_1_dating_white, NN, Image Features: model training ended. AUC: 50.15, accuracy: 45.5
2021-08-18 19:00:45.611 | DEBUG    | __main__:<module>:67 - canada_1_dating_white, LR, Image Features: model training started.
2021-08-18 19:00:45.729 | DEBUG    | __main__:<module>:74 - ethnicity_groups, LR, Image Features: model training ended and model saved.
2021-08-18 19:00:45.753 | DEBUG    | __main__:<module>:82 - canada_1_dating_white, LR, Image Features: model training ended. AUC: 54.41, accuracy: 67.5
 33%|███▎      | 1/3 [00:01<00:02,  1.23s/it]2021-08-18 19:00:45.755 | DEBUG    | __main__:<module>:67 - canada_1_dating_white, NN, Image and Extracted Features: model training started.


Train on 640 samples, validate on 160 samples
Epoch 1/1


2021-08-18 19:00:46.752 | DEBUG    | __main__:<module>:74 - ethnicity_groups, NN, Image and Extracted Features: model training ended and model saved.
2021-08-18 19:00:46.866 | DEBUG    | __main__:<module>:82 - canada_1_dating_white, NN, Image and Extracted Features: model training ended. AUC: 56.22, accuracy: 69.0
2021-08-18 19:00:46.867 | DEBUG    | __main__:<module>:67 - canada_1_dating_white, LR, Image and Extracted Features: model training started.
2021-08-18 19:00:46.993 | DEBUG    | __main__:<module>:74 - ethnicity_groups, LR, Image and Extracted Features: model training ended and model saved.
2021-08-18 19:00:47.016 | DEBUG    | __main__:<module>:82 - canada_1_dating_white, LR, Image and Extracted Features: model training ended. AUC: 52.09, accuracy: 67.5
 67%|██████▋   | 2/3 [00:02<00:01,  1.25s/it]2021-08-18 19:00:47.017 | DEBUG    | __main__:<module>:67 - canada_1_dating_white, NN, Image, Self-reported and Extracted Features: model training started.


Train on 640 samples, validate on 160 samples
Epoch 1/1


2021-08-18 19:00:47.970 | DEBUG    | __main__:<module>:74 - ethnicity_groups, NN, Image, Self-reported and Extracted Features: model training ended and model saved.
2021-08-18 19:00:48.084 | DEBUG    | __main__:<module>:82 - canada_1_dating_white, NN, Image, Self-reported and Extracted Features: model training ended. AUC: 45.57, accuracy: 32.0
2021-08-18 19:00:48.085 | DEBUG    | __main__:<module>:67 - canada_1_dating_white, LR, Image, Self-reported and Extracted Features: model training started.
2021-08-18 19:00:48.214 | DEBUG    | __main__:<module>:74 - ethnicity_groups, LR, Image, Self-reported and Extracted Features: model training ended and model saved.
2021-08-18 19:00:48.238 | DEBUG    | __main__:<module>:82 - canada_1_dating_white, LR, Image, Self-reported and Extracted Features: model training ended. AUC: 48.04, accuracy: 67.0
100%|██████████| 3/3 [00:03<00:00,  1.24s/it]
2021-08-18 19:00:48.677 | DEBUG    | __main__:<module>:9 - Data size after cleaning is (1000, 2076)
2021-0

Train on 640 samples, validate on 160 samples
Epoch 1/1


2021-08-18 19:00:49.784 | DEBUG    | __main__:<module>:74 - ethnicity_groups, NN, Image Features: model training ended and model saved.
2021-08-18 19:00:49.914 | DEBUG    | __main__:<module>:82 - canada_1_dating_black, NN, Image Features: model training ended. AUC: 49.81, accuracy: 69.5
2021-08-18 19:00:49.915 | DEBUG    | __main__:<module>:67 - canada_1_dating_black, LR, Image Features: model training started.
2021-08-18 19:00:50.041 | DEBUG    | __main__:<module>:74 - ethnicity_groups, LR, Image Features: model training ended and model saved.
2021-08-18 19:00:50.064 | DEBUG    | __main__:<module>:82 - canada_1_dating_black, LR, Image Features: model training ended. AUC: 59.32, accuracy: 71.5
 33%|███▎      | 1/3 [00:01<00:02,  1.38s/it]2021-08-18 19:00:50.065 | DEBUG    | __main__:<module>:67 - canada_1_dating_black, NN, Image and Extracted Features: model training started.


Train on 640 samples, validate on 160 samples
Epoch 1/1


2021-08-18 19:00:51.042 | DEBUG    | __main__:<module>:74 - ethnicity_groups, NN, Image and Extracted Features: model training ended and model saved.
2021-08-18 19:00:51.156 | DEBUG    | __main__:<module>:82 - canada_1_dating_black, NN, Image and Extracted Features: model training ended. AUC: 46.47, accuracy: 75.0
2021-08-18 19:00:51.157 | DEBUG    | __main__:<module>:67 - canada_1_dating_black, LR, Image and Extracted Features: model training started.
2021-08-18 19:00:51.282 | DEBUG    | __main__:<module>:74 - ethnicity_groups, LR, Image and Extracted Features: model training ended and model saved.
2021-08-18 19:00:51.307 | DEBUG    | __main__:<module>:82 - canada_1_dating_black, LR, Image and Extracted Features: model training ended. AUC: 55.15, accuracy: 75.0
 67%|██████▋   | 2/3 [00:02<00:01,  1.30s/it]2021-08-18 19:00:51.309 | DEBUG    | __main__:<module>:67 - canada_1_dating_black, NN, Image, Self-reported and Extracted Features: model training started.


Train on 640 samples, validate on 160 samples
Epoch 1/1


2021-08-18 19:00:52.299 | DEBUG    | __main__:<module>:74 - ethnicity_groups, NN, Image, Self-reported and Extracted Features: model training ended and model saved.
2021-08-18 19:00:52.419 | DEBUG    | __main__:<module>:82 - canada_1_dating_black, NN, Image, Self-reported and Extracted Features: model training ended. AUC: 49.65, accuracy: 75.0
2021-08-18 19:00:52.419 | DEBUG    | __main__:<module>:67 - canada_1_dating_black, LR, Image, Self-reported and Extracted Features: model training started.
2021-08-18 19:00:52.542 | DEBUG    | __main__:<module>:74 - ethnicity_groups, LR, Image, Self-reported and Extracted Features: model training ended and model saved.
2021-08-18 19:00:52.565 | DEBUG    | __main__:<module>:82 - canada_1_dating_black, LR, Image, Self-reported and Extracted Features: model training ended. AUC: 60.68, accuracy: 52.5
100%|██████████| 3/3 [00:03<00:00,  1.29s/it]
2021-08-18 19:00:53.014 | DEBUG    | __main__:<module>:9 - Data size after cleaning is (1000, 2076)
2021-0

Train on 640 samples, validate on 160 samples
Epoch 1/1


2021-08-18 19:00:54.080 | DEBUG    | __main__:<module>:74 - ethnicity_groups, NN, Image Features: model training ended and model saved.


In [None]:
# Save results summary to disk

save_results_filepath = Path(RESULTS_DIR + RESULTS_STATS_FILENAME)
utils.save_results(results_array = results, 
                     location = save_results_filepath,
                     columns = RESULTS_COLS)
print(pd.DataFrame(results,columns = RESULTS_COLS))
logger.debug(f"Script for {GROUP_NAME} finished.")

2021-08-18 18:55:23.956 | DEBUG    | utils.utils:save_results:263 - Results Saved.
2021-08-18 18:55:23.963 | DEBUG    | __main__:<module>:8 - Script for ethnicity_groups finished.


           Group Name Model                                  Feature Set  \
0    ethnicity_groups    NN                               Image Features   
1    ethnicity_groups    LR                               Image Features   
2    ethnicity_groups    NN             Image and Self Reported Features   
3    ethnicity_groups    LR             Image and Self Reported Features   
4    ethnicity_groups    NN                 Image and Extracted Features   
..                ...   ...                                          ...   
251  ethnicity_groups    LR             Image and Self Reported Features   
252  ethnicity_groups    NN                 Image and Extracted Features   
253  ethnicity_groups    LR                 Image and Extracted Features   
254  ethnicity_groups    NN  Image, Self-reported and Extracted Features   
255  ethnicity_groups    LR  Image, Self-reported and Extracted Features   

     Test AUC  Test Accuracy  
0       58.07           63.5  
1       57.80           5