In [None]:
import pandas as pd
import numpy as np
import amp_pd_peptide_310
import lightgbm as lgbm

In [None]:
nan_to_trend = {'updrs_1': [5.394793062665313, 0.027091086167821344], 
                'updrs_2': [4.991499435628459, 0.046525105983252386], 
                'updrs_3': [21.450374496989554, 0.035651592857152345, 0.0010867699338717358], 
                'updrs_4': [-6.527843696016633, 0.1097502927175179]}


def replace_nan(pred_month, trend):
    if len(trend) == 2:
        return np.clip(np.round(trend[0] + pred_month * trend[1], 0), 0.0, None) 
    return np.clip(np.round(trend[0] + pred_month * trend[1] + np.square(pred_month) * trend[2], 0), 0.0, None)

In [None]:
train_cli = pd.read_csv("/kaggle/input/amp-parkinsons-disease-progression-prediction/train_clinical_data.csv")
sup_cli = pd.read_csv('/kaggle/input/amp-parkinsons-disease-progression-prediction/supplemental_clinical_data.csv')
train = pd.concat([train_cli, sup_cli])
p1 = train[train.visit_month.isin([5])].patient_id.to_list()
train = train[~train.patient_id.isin(p1)].reset_index(drop=True)
targets = ['updrs_1', 'updrs_2', 'updrs_3', 'updrs_4']
flags = [6, 12, 18, 36, 60, 84]

train['visit_month_diff'] = train.groupby(['patient_id'])['visit_month'].diff()
train['visit_month_diff_min'] = train.groupby('patient_id')['visit_month_diff'].transform('min')
train['group'] = 1
train.loc[train['visit_month_diff_min'] == 12, 'group'] = 0
train.loc[train.patient_id == 2689, 'group'] = 0
train.loc[train.visit_month == 0, 'group'] = -1

In [None]:
rows = []
patients = train.patient_id.unique()
for p in patients:
    tmp = train.loc[train.patient_id == p]
    vm = np.zeros(len(flags))
    for i, v in enumerate(flags):
        if v in tmp.visit_month.values:
            vm[i] = 1
    for i, row in tmp.iterrows():
        tmp_row = {}
        tmp_row['patient_id'] = p
        tmp_row['visit_month'] = row.visit_month
        tmp_row['group'] = row.group
        for t in targets:
            tmp_row[t] = row[t]
        for j, v in enumerate(flags):
            tmp_row[f'v{v}'] = vm[j]
        rows.append(tmp_row)

train = pd.DataFrame(rows)
for i in range(len(train)):
    v = train.loc[i, 'visit_month']
    for f in flags:
        if f > v:
            train.loc[i, f'v{f}'] = -1

In [None]:
train['pred_month'] = train['visit_month']
for plus_month in [6, 12, 24]:
    train_shift = train[['patient_id', 'visit_month', 'pred_month', 'updrs_1', 'updrs_2', 'updrs_3', 'updrs_4']].copy()
    train_shift['visit_month'] -= plus_month
    train_shift.rename(columns={f'updrs_{i}': f'updrs_{i}_plus_{plus_month}_months' for i in range(1, 5)}, inplace=True)
    train_shift.rename(columns={'pred_month': f'pred_month_plus_{plus_month}_months'}, inplace=True)
    train = train.merge(train_shift, how='left', on=['patient_id', 'visit_month'])

train.rename(columns={f'updrs_{i}': f'updrs_{i}_plus_0_months' for i in range(1, 5)}, inplace=True)
train.rename(columns={'pred_month': f'pred_month_plus_0_months'}, inplace=True)

In [None]:
for updrs in targets:
    for plus_month in [0, 6, 12, 24]:
        col = f'{updrs}_plus_{plus_month}_trend'
        train[col] = replace_nan(train[f'pred_month_plus_{plus_month}_months'], nan_to_trend[updrs])

In [None]:
models_trend = {}
for updrs in targets:
    models_trend[updrs] = {}
    for plus_month in [0, 6, 12, 24]:
        columns_target = f'{updrs}_plus_{plus_month}_months'
        tmp = train[[columns_target, f'pred_month_plus_{plus_month}_months', 'group', 
                     'v6', 'v12', 'v18', 'v36', 'v60', 'v84', f'{updrs}_plus_{plus_month}_trend']].copy()
        tmp.dropna(inplace=True)
        y = tmp[columns_target]
        X = tmp.loc[:, [f'pred_month_plus_{plus_month}_months', 'group', 
                        'v6', 'v12', 'v18', 'v36', 'v60', 'v84', f'{updrs}_plus_{plus_month}_trend']]
        trained = lgbm.LGBMRegressor(verbose=-1, objective='mae')
        trained.fit(X, y)
        models_trend[updrs][plus_month] = trained

In [None]:
def model_prediction(test, month):
    df_test = test.copy()
    df_test.drop(['updrs_test', 'row_id'], axis=1, inplace=True)
    df_test = df_test.drop_duplicates().reset_index(drop=True)
    df_test['visit_month_diff'] = df_test.groupby(['patient_id'])['visit_month'].diff()
    df_test['visit_month_diff_min'] = df_test.groupby('patient_id')['visit_month_diff'].transform('min')
    df_test['group'] = 1
    df_test.loc[df_test['visit_month_diff_min'] == 12, 'group'] = 0
    df_test.loc[df_test.visit_month == 0, 'group'] = -1
    
    rows = []
    patients = df_test.patient_id.unique()
    for p in patients:
        tmp = df_test.loc[df_test.patient_id == p]
        vm = np.zeros(len(flags))
        for i, v in enumerate(flags):
            if v in tmp.visit_month.values:
                vm[i] = 1
        for i, row in tmp.iterrows():
            tmp_row = {}
            tmp_row['visit_id'] = row.visit_id
            tmp_row['visit_month'] = row.visit_month
            tmp_row['group'] = row.group
            for j, v in enumerate(flags):
                tmp_row[f'v{v}'] = vm[j]
            rows.append(tmp_row)
            
    df_test = pd.DataFrame(rows)
    for i in range(len(df_test)):
        v = df_test.loc[i, 'visit_month']
        for f in flags:
            if f > v:
                df_test.loc[i, f'v{f}'] = -1
    
    df_test = df_test.loc[df_test.visit_month == month, :]
    for updrs in targets:
        for i in [0, 6, 12, 24]:
            col = f'{updrs}_plus_{i}_months'
            tmp = df_test[['visit_month', 'group', 'v6', 'v12', 'v18', 'v36', 'v60', 'v84']].copy()
            tmp['visit_month'] = tmp.visit_month + i
            tmp[f'{updrs}_plus_{i}_trend'] = replace_nan(tmp['visit_month'], nan_to_trend[updrs])
            if updrs != 'updrs_4':
                y_pred = models_trend[updrs][i].predict(tmp[['visit_month', 'group', 
                                                             'v6', 'v12', 'v18', 'v36', 'v60', 'v84', f'{updrs}_plus_{i}_trend']])
                y_pred = np.clip(np.round(y_pred, 0), 0.0, None)
            else:
                y_pred = replace_nan(tmp['visit_month'], nan_to_trend[updrs])
            
            df_test[col] = y_pred
            
    return df_test

In [None]:
def format_submission(sub):
    sub.index = sub.visit_id
    sub.drop(['visit_id', 'visit_month', 'group', 'v6', 'v12', 'v18', 'v36', 'v60', 'v84'], axis=1, inplace=True)
    sub = sub.stack().reset_index()
    sub['prediction_id'] = sub['visit_id'] + '_' + sub['level_1']
    sub['rating'] = sub[0]
    sub = sub[['prediction_id', 'rating']]
    return sub

In [None]:
env = amp_pd_peptide_310.make_env()
iter_test = env.iter_test()

cli_record = pd.DataFrame()
for test_clinical, test_peptides, test_proteins, sample_submission in iter_test:
    m = test_clinical.visit_month.unique()[0]
    cli_record = pd.concat([cli_record, test_clinical]).reset_index(drop=True)
    test_ob = model_prediction(cli_record, m)
    result = format_submission(test_ob)
    env.predict(result)