### Setup

In [1]:
import numpy as np
import pandas as pd
import gc
import time
import random
from contextlib import contextmanager

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

DATA_PATH = "../data/home_default/"

### Helper functions

In [2]:
@contextmanager
def timer(title):
    t0 = time.time()
    yield
    print("{} - done in {:.0f}s".format(title, time.time() - t0))

# One-hot encoding for categorical columns with get_dummies
def one_hot_encoder(df, nan_as_category = True):
    original_columns = list(df.columns)
    categorical_columns = [col for col in df.columns if df[col].dtype == 'object']
    df = pd.get_dummies(df, columns= categorical_columns, dummy_na= nan_as_category)
    new_columns = [c for c in df.columns if c not in original_columns]
    return df, new_columns

### Train and Test

In [3]:
feather = False
debug   = False
num_rows = 1000 if debug else None

# Preprocess application_train.csv and application_test.csv
def application_train_test(num_rows, feather, nan_as_category = False):
    # Read data and merge
    if feather:
        df      = pd.read_feather(DATA_PATH + 'train_feathered.csv')
        test_df = pd.read_feather(DATA_PATH + 'test_feathered.csv')
    else:
        df      = pd.read_csv(DATA_PATH + 'train.csv', nrows= num_rows)
        test_df = pd.read_csv(DATA_PATH + 'test.csv',  nrows= num_rows)
    print("Train samples: {}, test samples: {}".format(len(df), len(test_df)))
    
    df = pd.concat([df, test_df])
    # Optional: Remove 4 applications with XNA CODE_GENDER (train set)
    df = df[df['CODE_GENDER'] != 'XNA']
    
    docs = [_f for _f in df.columns if 'FLAG_DOC' in _f]
    live = [_f for _f in df.columns if ('FLAG_' in _f) & ('FLAG_DOC' not in _f) & ('_FLAG_' not in _f)]
    
    # NaN values for DAYS_EMPLOYED: 365.243 -> nan
    df['DAYS_EMPLOYED'].replace(365243, np.nan, inplace= True)

    inc_by_org = df[['AMT_INCOME_TOTAL', 'ORGANIZATION_TYPE']].groupby('ORGANIZATION_TYPE').median()['AMT_INCOME_TOTAL']

    df['NEW_CREDIT_TO_ANNUITY_RATIO'] = df['AMT_CREDIT'] / df['AMT_ANNUITY']
    df['NEW_CREDIT_TO_GOODS_RATIO'] = df['AMT_CREDIT'] / df['AMT_GOODS_PRICE']
    df['NEW_DOC_IND_KURT'] = df[docs].kurtosis(axis=1)
    df['NEW_LIVE_IND_SUM'] = df[live].sum(axis=1)
    df['NEW_INC_PER_CHLD'] = df['AMT_INCOME_TOTAL'] / (1 + df['CNT_CHILDREN'])
    df['NEW_INC_BY_ORG'] = df['ORGANIZATION_TYPE'].map(inc_by_org)
    df['NEW_EMPLOY_TO_BIRTH_RATIO'] = df['DAYS_EMPLOYED'] / df['DAYS_BIRTH']
    df['NEW_ANNUITY_TO_INCOME_RATIO'] = df['AMT_ANNUITY'] / (1 + df['AMT_INCOME_TOTAL'])
    df['NEW_SOURCES_PROD'] = df['EXT_SOURCE_1'] * df['EXT_SOURCE_2'] * df['EXT_SOURCE_3']
    df['NEW_EXT_SOURCES_MEAN'] = df[['EXT_SOURCE_1', 'EXT_SOURCE_2', 'EXT_SOURCE_3']].mean(axis=1)
    df['NEW_SCORES_STD'] = df[['EXT_SOURCE_1', 'EXT_SOURCE_2', 'EXT_SOURCE_3']].std(axis=1)
    df['NEW_SCORES_STD'] = df['NEW_SCORES_STD'].fillna(df['NEW_SCORES_STD'].mean())
    df['NEW_CAR_TO_BIRTH_RATIO'] = df['OWN_CAR_AGE'] / df['DAYS_BIRTH']
    df['NEW_CAR_TO_EMPLOY_RATIO'] = df['OWN_CAR_AGE'] / df['DAYS_EMPLOYED']
    df['NEW_PHONE_TO_BIRTH_RATIO'] = df['DAYS_LAST_PHONE_CHANGE'] / df['DAYS_BIRTH']
    df['NEW_PHONE_TO_EMPLOY_RATIO'] = df['DAYS_LAST_PHONE_CHANGE'] / df['DAYS_EMPLOYED']
    df['NEW_CREDIT_TO_INCOME_RATIO'] = df['AMT_CREDIT'] / df['AMT_INCOME_TOTAL']
    
    # Categorical features with Binary encode (0 or 1; two categories)
    for bin_feature in ['CODE_GENDER', 'FLAG_OWN_CAR', 'FLAG_OWN_REALTY']:
        df[bin_feature], uniques = pd.factorize(df[bin_feature])
    # Categorical features with One-Hot encode
    df, cat_cols = one_hot_encoder(df, nan_as_category)
    
    del test_df
    gc.collect()
    return df

df = application_train_test(num_rows, feather)

Train samples: 307511, test samples: 48744


### Bureau

In [4]:
# Preprocess bureau.csv and bureau_balance.csv
def bureau_and_balance(num_rows, feather, nan_as_category = True):
    if feather:
        bureau = pd.read_feather(DATA_PATH + 'bureau_feathered.csv')
        bb     = pd.read_feather(DATA_PATH + 'bureau_balance_feathered.csv')
    else:
        bureau = pd.read_csv(DATA_PATH + 'bureau.csv', nrows = num_rows)
        bb     = pd.read_csv(DATA_PATH + 'bureau_balance.csv', nrows = num_rows)
    bb, bb_cat = one_hot_encoder(bb, nan_as_category)
    bureau, bureau_cat = one_hot_encoder(bureau, nan_as_category)
    
    # Bureau balance: Perform aggregations and merge with bureau.csv
    bb_aggregations = {'MONTHS_BALANCE': ['min', 'max', 'size']}
    for col in bb_cat:
        bb_aggregations[col] = ['mean']
    bb_agg = bb.groupby('SK_ID_BUREAU').agg(bb_aggregations)
    bb_agg.columns = pd.Index([e[0] + "_" + e[1].upper() for e in bb_agg.columns.tolist()])
    bureau = bureau.join(bb_agg, how='left', on='SK_ID_BUREAU')
    bureau.drop(['SK_ID_BUREAU'], axis=1, inplace= True)
    del bb, bb_agg
    gc.collect()
    
    # Bureau and bureau_balance numeric features
    num_aggregations = {
        'DAYS_CREDIT': ['min', 'max', 'mean', 'var'],
        'DAYS_CREDIT_ENDDATE': ['min', 'max', 'mean'],
        'DAYS_CREDIT_UPDATE': ['mean'],
        'CREDIT_DAY_OVERDUE': ['max', 'mean'],
        'AMT_CREDIT_MAX_OVERDUE': ['mean'],
        'AMT_CREDIT_SUM': ['max', 'mean', 'sum'],
        'AMT_CREDIT_SUM_DEBT': ['max', 'mean', 'sum'],
        'AMT_CREDIT_SUM_OVERDUE': ['mean'],
        'AMT_CREDIT_SUM_LIMIT': ['mean', 'sum'],
        'AMT_ANNUITY': ['max', 'mean'],
        'CNT_CREDIT_PROLONG': ['sum'],
        'MONTHS_BALANCE_MIN': ['min'],
        'MONTHS_BALANCE_MAX': ['max'],
        'MONTHS_BALANCE_SIZE': ['mean', 'sum']
    }
    # Bureau and bureau_balance categorical features
    cat_aggregations = {}
    for cat in bureau_cat: cat_aggregations[cat] = ['mean']
    for cat in bb_cat: cat_aggregations[cat + "_MEAN"] = ['mean']
    
    bureau_agg = bureau.groupby('SK_ID_CURR').agg({**num_aggregations, **cat_aggregations})
    bureau_agg.columns = pd.Index(['BURO_' + e[0] + "_" + e[1].upper() for e in bureau_agg.columns.tolist()])
    # Bureau: Active credits - using only numerical aggregations
    active = bureau[bureau['CREDIT_ACTIVE_Active'] == 1]
    active_agg = active.groupby('SK_ID_CURR').agg(num_aggregations)
    active_agg.columns = pd.Index(['ACTIVE_' + e[0] + "_" + e[1].upper() for e in active_agg.columns.tolist()])
    bureau_agg = bureau_agg.reset_index().merge(active_agg.reset_index(), how='left', on='SK_ID_CURR')
    del active, active_agg
    gc.collect()
    # Bureau: Closed credits - using only numerical aggregations
    closed = bureau[bureau['CREDIT_ACTIVE_Closed'] == 1]
    closed_agg = closed.groupby('SK_ID_CURR').agg(num_aggregations)
    closed_agg.columns = pd.Index(['CLOSED_' + e[0] + "_" + e[1].upper() for e in closed_agg.columns.tolist()])
    bureau_agg = bureau_agg.merge(closed_agg.reset_index(), how='left', on='SK_ID_CURR')
    del closed, closed_agg, bureau
    gc.collect()
    return bureau_agg

with timer("Process bureau and bureau_balance"):
    bureau = bureau_and_balance(num_rows, feather)
    print("Bureau df shape:", bureau.shape)
    df = df.merge(bureau, how='left', on='SK_ID_CURR')
    del bureau
    gc.collect()

Bureau df shape: (305811, 117)
Process bureau and bureau_balance - done in 49s


### Prev app

In [5]:
# Preprocess previous_applications.csv
def previous_applications(num_rows, feather, nan_as_category = True):
    if feather:
        prev = pd.read_feather(DATA_PATH + 'previous_application_feathered.csv')
    else:
        prev = pd.read_csv(DATA_PATH + 'previous_application.csv', nrows = num_rows)
        
    prev, cat_cols = one_hot_encoder(prev, nan_as_category= True)
    # Days 365.243 values -> nan
    prev['DAYS_FIRST_DRAWING'].replace(365243, np.nan, inplace= True)
    prev['DAYS_FIRST_DUE'].replace(365243, np.nan, inplace= True)
    prev['DAYS_LAST_DUE_1ST_VERSION'].replace(365243, np.nan, inplace= True)
    prev['DAYS_LAST_DUE'].replace(365243, np.nan, inplace= True)
    prev['DAYS_TERMINATION'].replace(365243, np.nan, inplace= True)
    # Add feature: value ask / value received percentage
    prev['APP_CREDIT_PERC'] = prev['AMT_APPLICATION'] / prev['AMT_CREDIT']
    # Previous applications numeric features
    num_aggregations = {
        'AMT_ANNUITY': ['min', 'max', 'mean'],
        'AMT_APPLICATION': ['min', 'max', 'mean'],
        'AMT_CREDIT': ['min', 'max', 'mean'],
        'APP_CREDIT_PERC': ['min', 'max', 'mean', 'var'],
        'AMT_DOWN_PAYMENT': ['min', 'max', 'mean'],
        'AMT_GOODS_PRICE': ['min', 'max', 'mean'],
        'HOUR_APPR_PROCESS_START': ['min', 'max', 'mean'],
        'RATE_DOWN_PAYMENT': ['min', 'max', 'mean'],
        'DAYS_DECISION': ['min', 'max', 'mean'],
        'CNT_PAYMENT': ['mean', 'sum'],
    }
    # Previous applications categorical features
    cat_aggregations = {}
    for cat in cat_cols:
        cat_aggregations[cat] = ['mean']
    
    prev_agg = prev.groupby('SK_ID_CURR').agg({**num_aggregations, **cat_aggregations})
    prev_agg.columns = pd.Index(['PREV_' + e[0] + "_" + e[1].upper() for e in prev_agg.columns.tolist()])
    # Previous Applications: Approved Applications - only numerical features
    approved = prev[prev['NAME_CONTRACT_STATUS_Approved'] == 1]
    approved_agg = approved.groupby('SK_ID_CURR').agg(num_aggregations)
    approved_agg.columns = pd.Index(['APPROVED_' + e[0] + "_" + e[1].upper() for e in approved_agg.columns.tolist()])
    prev_agg = prev_agg.reset_index().merge(approved_agg.reset_index(), how='left', on='SK_ID_CURR')
    # Previous Applications: Refused Applications - only numerical features
    refused = prev[prev['NAME_CONTRACT_STATUS_Refused'] == 1]
    refused_agg = refused.groupby('SK_ID_CURR').agg(num_aggregations)
    refused_agg.columns = pd.Index(['REFUSED_' + e[0] + "_" + e[1].upper() for e in refused_agg.columns.tolist()])
    prev_agg = prev_agg.merge(refused_agg.reset_index(), how='left', on='SK_ID_CURR')
    del refused, refused_agg, approved, approved_agg, prev
    gc.collect()
    return prev_agg

with timer("Process previous_applications"):
    prev = previous_applications(num_rows, feather)
    print("Previous applications df shape:", prev.shape)
    df = df.merge(prev, how='left', on='SK_ID_CURR')
    del prev
    gc.collect()    

Previous applications df shape: (338857, 250)
Process previous_applications - done in 77s


### POS Cash

In [6]:
# Preprocess POS_CASH_balance.csv
def pos_cash(num_rows, feather, nan_as_category = True):
    if feather:
        pos = pd.read_feather(DATA_PATH + 'POS_CASH_balance_feathered.csv')
    else:
        pos = pd.read_csv(DATA_PATH + 'POS_CASH_balance.csv', nrows = num_rows)
        
    pos, cat_cols = one_hot_encoder(pos, nan_as_category= True)
    # Features
    aggregations = {
        'MONTHS_BALANCE': ['max', 'mean', 'size'],
        'SK_DPD': ['max', 'mean'],
        'SK_DPD_DEF': ['max', 'mean']
    }
    for cat in cat_cols:
        aggregations[cat] = ['mean']
    
    pos_agg = pos.groupby('SK_ID_CURR').agg(aggregations)
    pos_agg.columns = pd.Index(['POS_' + e[0] + "_" + e[1].upper() for e in pos_agg.columns.tolist()])
    # Count pos cash accounts
    pos_agg['POS_COUNT'] = pos.groupby('SK_ID_CURR').size()
    del pos
    gc.collect()
    return pos_agg

with timer("Process POS-CASH balance"):
    pos = pos_cash(num_rows, feather)
    print("Pos-cash balance df shape:", pos.shape)
    df = df.merge(pos.reset_index(), how='left', on='SK_ID_CURR')
    del pos
    gc.collect()

Pos-cash balance df shape: (337252, 18)
Process POS-CASH balance - done in 36s


### Installment payments

In [7]:
# Preprocess installments_payments.csv
def installments_payments(num_rows, feather, nan_as_category = True):
    if feather:
        ins = pd.read_feather(DATA_PATH + 'installments_payments_feathered.csv')
    else:
        ins = pd.read_csv(DATA_PATH + 'installments_payments.csv', nrows = num_rows)
        
    ins, cat_cols = one_hot_encoder(ins, nan_as_category= True)
    # Percentage and difference paid in each installment (amount paid and installment value)
    ins['PAYMENT_PERC'] = ins['AMT_PAYMENT'] / ins['AMT_INSTALMENT']
    ins['PAYMENT_DIFF'] = ins['AMT_INSTALMENT'] - ins['AMT_PAYMENT']
    # Days past due and days before due (no negative values)
    ins['DPD'] = ins['DAYS_ENTRY_PAYMENT'] - ins['DAYS_INSTALMENT']
    ins['DBD'] = ins['DAYS_INSTALMENT'] - ins['DAYS_ENTRY_PAYMENT']
    ins['DPD'] = ins['DPD'].apply(lambda x: x if x > 0 else 0)
    ins['DBD'] = ins['DBD'].apply(lambda x: x if x > 0 else 0)
    # Features: Perform aggregations
    aggregations = {
        'NUM_INSTALMENT_VERSION': ['nunique'],
        'DPD': ['max', 'mean', 'sum'],
        'DBD': ['max', 'mean', 'sum'],
        'PAYMENT_PERC': ['max', 'mean', 'sum', 'var'],
        'PAYMENT_DIFF': ['max', 'mean', 'sum', 'var'],
        'AMT_INSTALMENT': ['max', 'mean', 'sum'],
        'AMT_PAYMENT': ['min', 'max', 'mean', 'sum'],
        'DAYS_ENTRY_PAYMENT': ['max', 'mean', 'sum']
    }
    for cat in cat_cols:
        aggregations[cat] = ['mean']
    ins_agg = ins.groupby('SK_ID_CURR').agg(aggregations)
    ins_agg.columns = pd.Index(['INSTAL_' + e[0] + "_" + e[1].upper() for e in ins_agg.columns.tolist()])
    # Count installments accounts
    ins_agg['INSTAL_COUNT'] = ins.groupby('SK_ID_CURR').size()
    del ins
    gc.collect()
    return ins_agg

with timer("Process installments payments"):
    ins = installments_payments(num_rows, feather)
    print("Installments payments df shape:", ins.shape)
    df = df.merge(ins.reset_index(), how='left', on='SK_ID_CURR')
    del ins
    gc.collect()

Installments payments df shape: (339587, 26)
Process installments payments - done in 70s


### Credit Card Balance

In [8]:
# Preprocess credit_card_balance.csv
def credit_card_balance(num_rows, feather, nan_as_category = True):
    if feather:
        cc = pd.read_feather(DATA_PATH + 'credit_card_balance_feathered.csv')
    else:
        cc = pd.read_csv(DATA_PATH + 'credit_card_balance.csv', nrows = num_rows)
        
    cc, cat_cols = one_hot_encoder(cc, nan_as_category= True)
    # General aggregations
    cc.drop(['SK_ID_PREV'], axis= 1, inplace = True)
    cc_agg = cc.groupby('SK_ID_CURR').agg(['min', 'max', 'mean', 'sum', 'var'])
    cc_agg.columns = pd.Index(['CC_' + e[0] + "_" + e[1].upper() for e in cc_agg.columns.tolist()])
    # Count credit card lines
    cc_agg['CC_COUNT'] = cc.groupby('SK_ID_CURR').size()
    del cc
    gc.collect()
    return cc_agg

with timer("Process credit card balance"):
    cc = credit_card_balance(num_rows, feather)
    print("Credit card balance df shape:", cc.shape)
    df = df.merge(cc.reset_index(), how='left', on='SK_ID_CURR')
    del cc
    gc.collect()

Credit card balance df shape: (103558, 141)
Process credit card balance - done in 61s


# Modeling

In [9]:
import xgboost as xgb
import lightgbm as lgb

from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.model_selection import train_test_split

### Narrow Features

In [10]:
train_df = df[df['TARGET'].notnull()]
test_df  = df[df['TARGET'].isnull()]

take_important_feats = False
if take_important_feats:
    train_x = train_df.drop(["TARGET"], axis=1)
    train_y = train_df["TARGET"]

    training_x, val_x, training_y, val_y = train_test_split(train_x, train_y, test_size=0.2, random_state=17)

    lgb_train = lgb.Dataset(data=training_x, label=training_y)
    lgb_eval  = lgb.Dataset(data=val_x, label=val_y)

    # try feature_fraction
    params = {'task': 'train', 'boosting_type': 'gbdt', 'objective': 'binary', 'metric': 'auc', 
              'learning_rate': 0.3, 'num_leaves': 55, 'num_iteration': 2000, 'verbose': 0 ,
              'subsample':.9, 'max_depth':7, 'reg_alpha':20, 'reg_lambda':20, 
              'min_split_gain':.05, 'min_child_weight':1, "min_data_in_leaf": 40,
              "feature_fraction":0.5}

    start = time.time()
    model = lgb.train(params, lgb_train, valid_sets=lgb_eval, early_stopping_rounds=150, verbose_eval=200)
    print("Training took {} seconds".format(round(time.time() - start)))

    NUM_FEATS = 350

    feats = sorted(list(zip(model.feature_importance(), train_x.columns)))
    feats = list(list(zip(*feats[-NUM_FEATS:]))[1])

### KFold

Best

[40]	train-auc:0.815651	valid-auc:0.783637

params = {"objective": "binary:logistic", "eval_metric": "auc",
              "alpha": 10, "lambda": 10, "gamma": 10,
              "colsample_bytree": 0.5, "colsample_bylevel": 0.5,
              "min_child_weight": 40,
              "max_depth": 8, "max_leaves": 200,
              "random_state": 17}

In [22]:
# LightGBM GBDT with KFold or Stratified KFold
def kfold_xgb(train_df, test_df, feats=None, num_folds = 5, stratified = False, debug= False):
    if feats:
        feats.append("TARGET")
        feats = list(set(feats))
        train_df = train_df[feats]
        test_df  = test_df[feats]
    # Divide in training/validation and test data
    print("Starting XGB model. Train shape: {}, test shape: {}".format(train_df.shape, test_df.shape))

    # Cross validation model
    if stratified:
        folds = StratifiedKFold(n_splits= num_folds, shuffle=True, random_state=1001)
    else:
        folds = KFold(n_splits= num_folds, shuffle=True, random_state=1001)
        
    # Create arrays and dataframes to store results
    oof_preds = np.zeros(train_df.shape[0])
    sub_preds = np.zeros(test_df.shape[0])
    feats = [f for f in train_df.columns if f not in ['TARGET','SK_ID_CURR','SK_ID_BUREAU','SK_ID_PREV','index']]
    
    for n_fold, (train_idx, valid_idx) in enumerate(folds.split(train_df[feats], train_df['TARGET'])):
        train_x, train_y = train_df[feats].iloc[train_idx], train_df['TARGET'].iloc[train_idx]
        valid_x, valid_y = train_df[feats].iloc[valid_idx], train_df['TARGET'].iloc[valid_idx]

        
        params = {"learning_rate": 0.01, "n_estimators": 10000,
                  "max_depth": 4, "min_child_weight": 5,
                  "subsample": 0.8, "colsample_bytree": 0.8,
                  "objective": "binary:logistic",
                  "nthread": 4, "scale_pos_weight": 2.5,
                  "eval_metric": "auc",
                  "seed": 27, "reg_lambda": 1.2}

#             clf.fit(train_x, train_y, eval_set=[(train_x, train_y), (valid_x, valid_y)], 
#                 eval_metric= 'auc', verbose= 100, early_stopping_rounds= 200)
        
#         params = {"objective": "binary:logistic", "eval_metric": "auc",
#                   "alpha": 0.2, "lambda": 0.2, "gamma": 0.2,
#                   "colsample_bytree": 0.7, "colsample_bylevel": 0.7,
#                   "min_child_weight": 5,
#                   "scale_pos_weight": 2.5,
#                   "max_depth": 4,
#                   "learning_rate": 0.01,
#                   "n_estimators": 80000,
#                   "random_state": random.randint(1, 100)}# Recommended to make the seed random
        
        xgb_train = xgb.DMatrix(data=train_x, label=train_y)
        xgb_eval  = xgb.DMatrix(data=valid_x, label=valid_y)
        watchlist = [(xgb_train, 'train'), (xgb_eval, 'valid')]

        clf = xgb.train(params, xgb_train, 1000, watchlist, maximize=True, early_stopping_rounds = 200, verbose_eval=10)

        xgb_valid = xgb.DMatrix(data=valid_x)
        oof_preds[valid_idx] = clf.predict(xgb_valid)
        
        xgb_test = xgb.DMatrix(data=test_df[feats])
        sub_preds += clf.predict(xgb_test) / folds.n_splits

        print('Fold %2d AUC : %.6f' % (n_fold + 1, roc_auc_score(valid_y, oof_preds[valid_idx])))
        del clf, train_x, train_y, valid_x, valid_y
        gc.collect()

    print('Full AUC score %.6f' % roc_auc_score(train_df['TARGET'], oof_preds))
    
    ### Write submission file
    test_df['TARGET'] = sub_preds
    test_df[['SK_ID_CURR', 'TARGET']].to_csv("../submissions/xgb_oliver.csv", index= False)
    
with timer("Run LightGBM with kfold"):
    kfold_xgb(train_df, test_df, feats=None, num_folds = 5, stratified= False, debug = debug)

Starting XGB model. Train shape: (307507, 808), test shape: (48744, 808)
[0]	train-auc:0.719624	valid-auc:0.719493
Multiple eval metrics have been passed: 'valid-auc' will be used for early stopping.

Will train until valid-auc hasn't improved in 200 rounds.
[10]	train-auc:0.73294	valid-auc:0.732914
[20]	train-auc:0.735772	valid-auc:0.735395
[30]	train-auc:0.736745	valid-auc:0.736168
[40]	train-auc:0.737574	valid-auc:0.736945
[50]	train-auc:0.738444	valid-auc:0.73774


KeyboardInterrupt: 