In [99]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
import tensorflow.keras.backend as K
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_pinball_loss

## Read Train and Test Data 

In [63]:
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

## Check for Missing values and replace with the most frequent value

In [64]:
train.isnull().sum()

id                 0
wi                 0
year               0
month              0
age                0
education          0
familysize         0
urban              0
race               0
region          6994
state          95742
marital            0
occupation    233483
income             0
expense            0
dtype: int64

In [65]:
train = train.fillna(train.mode().iloc[0].astype('int64'))
train.isnull().sum()
train.dtypes

id              int64
wi            float64
year            int64
month           int64
age             int64
education       int64
familysize      int64
urban           int64
race            int64
region        float64
state         float64
marital         int64
occupation    float64
income          int64
expense         int64
dtype: object

In [66]:
col_types={'region':'int64','state':'int64','occupation':'int64'}
train = train.astype(col_types)

In [67]:
test.isnull().sum()

id                0
year              0
month             0
age               0
education         0
familysize        0
urban             0
race              0
region         1826
state         24016
marital           0
occupation    58171
income            0
dtype: int64

In [68]:
test = test.fillna(test.mode().iloc[0].astype('int64'))
test.isnull().sum()
test.dtypes

id              int64
year            int64
month           int64
age             int64
education       int64
familysize      int64
urban           int64
race            int64
region        float64
state         float64
marital         int64
occupation    float64
income          int64
dtype: object

In [69]:
col_types={'region':'int64','state':'int64','occupation':'int64'}
test = test.astype(col_types)

## Identify categorical, ordinal and numeric columns

In [70]:
columns = train.columns
columns  = columns.to_list()
categorical_columns=['urban','race','region','state','marital','occupation']
ordinal_columns=['education']
numeric_columns=['age','familysize','income']
weight_column=['wi']
target=['expense']
id_column=['id']
columns

['id',
 'wi',
 'year',
 'month',
 'age',
 'education',
 'familysize',
 'urban',
 'race',
 'region',
 'state',
 'marital',
 'occupation',
 'income',
 'expense']

## Convert types of various columns in train and test data

In [71]:
#Convert cateogrical columns to 'category' type
cat_types={c:'category' for c in categorical_columns}
train = train.astype(cat_types)
#Convert ordinal column to category type
train['education'] = train['education'].astype('category')
#Get different categories for ordinal column
train_ord_categories = train.education.unique().tolist()
train_ord_categories.sort()
train['education'] = train['education'].cat.set_categories(train_ord_categories,ordered=True)

In [72]:
#Convert cateogrical columns to 'category' type
cat_types={c:'category' for c in categorical_columns}
test = test.astype(cat_types)
#Convert ordinal column to category type
test['education'] = test['education'].astype('category')
#Get different categories for ordinal column
test_ord_categories = test.education.unique().tolist()
test_ord_categories.sort()
test['education'] = test['education'].cat.set_categories(test_ord_categories,ordered=True)
test.dtypes

id               int64
year             int64
month            int64
age              int64
education     category
familysize       int64
urban         category
race          category
region        category
state         category
marital       category
occupation    category
income           int64
dtype: object

## Create dummy categorical columns for all categorical variables in train

In [73]:
train_cat_dummies = pd.get_dummies(train[categorical_columns])
new_cat_columns=train_cat_dummies.columns.to_list()
train = pd.concat([train,train_cat_dummies],axis=1)
train.drop(categorical_columns,inplace=True,axis=1)
train.drop(['year','month'],inplace=True,axis=1)
train['education']=train['education'].cat.codes
train

Unnamed: 0,id,wi,age,education,familysize,income,expense,urban_1,urban_2,race_1,...,occupation_9,occupation_10,occupation_11,occupation_12,occupation_13,occupation_14,occupation_15,occupation_16,occupation_17,occupation_18
0,1,2831.0,50,3,3,13141,2398,0,1,0,...,0,0,0,0,0,0,0,0,0,0
1,2,1941.0,23,4,1,0,575,1,0,0,...,0,0,1,0,0,0,0,0,0,0
2,3,1369.0,56,6,4,459,1592,1,0,1,...,0,0,0,0,0,0,0,0,0,0
3,4,816.0,59,8,2,17474,3443,1,0,1,...,0,0,0,0,0,0,0,0,0,0
4,5,3064.0,51,6,2,39395,1484,1,0,1,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
741869,741870,1515.0,66,7,2,7780,9670,1,0,1,...,0,0,0,0,0,0,0,0,0,0
741870,741871,1544.0,25,5,1,12528,1146,1,0,1,...,0,0,0,0,0,0,0,0,0,0
741871,741872,2702.0,56,6,2,37435,3077,1,0,0,...,0,0,0,0,0,0,0,0,0,0
741872,741873,1310.0,48,6,5,54714,6566,1,0,1,...,0,0,0,0,0,0,0,0,0,0


## Create dummy categorical columns for all categorical variables in test

In [75]:
test_cat_dummies = pd.get_dummies(test[categorical_columns])
test_new_cat_columns=test_cat_dummies.columns.to_list()
test = pd.concat([test,test_cat_dummies],axis=1)
test.drop(categorical_columns,inplace=True,axis=1)
test.drop(['year','month'],inplace=True,axis=1)
test['education']=test['education'].cat.codes
test

Unnamed: 0,id,age,education,familysize,income,urban_1,urban_2,race_1,race_2,race_3,...,occupation_9,occupation_10,occupation_11,occupation_12,occupation_13,occupation_14,occupation_15,occupation_16,occupation_17,occupation_18
0,741875,49,4,1,0,1,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
1,741876,29,3,4,11628,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
2,741877,22,4,1,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
3,741878,78,3,2,13413,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
4,741879,25,6,2,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
185464,927339,51,4,2,20957,0,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
185465,927340,70,2,1,3843,1,0,1,0,0,...,0,1,0,0,0,0,0,0,0,0
185466,927341,58,6,1,15598,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
185467,927342,35,4,5,10138,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0


## Standardize numerical columns in train

In [77]:
for c in numeric_columns:
    train[c] = (train[c]-train[c].mean())/train[c].std()
train

Unnamed: 0,id,wi,age,education,familysize,income,expense,urban_1,urban_2,race_1,...,occupation_9,occupation_10,occupation_11,occupation_12,occupation_13,occupation_14,occupation_15,occupation_16,occupation_17,occupation_18
0,1,2831.0,0.016812,3,0.321927,-0.287200,2398,0,1,0,...,0,0,0,0,0,0,0,0,0,0
1,2,1941.0,-1.538223,4,-1.010164,-0.995756,575,1,0,0,...,0,0,1,0,0,0,0,0,0,0
2,3,1369.0,0.362376,6,0.987973,-0.971007,1592,1,0,1,...,0,0,0,0,0,0,0,0,0,0
3,4,816.0,0.535157,8,-0.344119,-0.053567,3443,1,0,1,...,0,0,0,0,0,0,0,0,0,0
4,5,3064.0,0.074406,6,-0.344119,1.128403,1484,1,0,1,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
741869,741870,1515.0,0.938314,7,-0.344119,-0.576263,9670,1,0,1,...,0,0,0,0,0,0,0,0,0,0
741870,741871,1544.0,-1.423035,5,-1.010164,-0.320253,1146,1,0,1,...,0,0,0,0,0,0,0,0,0,0
741871,741872,2702.0,0.362376,6,-0.344119,1.022721,3077,1,0,0,...,0,0,0,0,0,0,0,0,0,0
741872,741873,1310.0,-0.098375,6,1.654018,1.954396,6566,1,0,1,...,0,0,0,0,0,0,0,0,0,0


## Standardize numerical columns in test

In [78]:
for c in numeric_columns:
    test[c] = (test[c]-test[c].mean())/test[c].std()
test

Unnamed: 0,id,age,education,familysize,income,urban_1,urban_2,race_1,race_2,race_3,...,occupation_9,occupation_10,occupation_11,occupation_12,occupation_13,occupation_14,occupation_15,occupation_16,occupation_17,occupation_18
0,741875,-0.035942,4,-1.011089,-0.994219,1,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
1,741876,-1.183839,3,0.992162,-0.368123,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
2,741877,-1.585604,4,-1.011089,-0.994219,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
3,741878,1.628510,3,-0.343339,-0.272012,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
4,741879,-1.413419,6,-0.343339,-0.994219,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
185464,927339,0.078848,4,-0.343339,0.134186,0,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
185465,927340,1.169351,2,-1.011089,-0.787298,1,0,1,0,0,...,0,1,0,0,0,0,0,0,0,0
185466,927341,0.480612,6,-1.011089,-0.154363,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
185467,927342,-0.839470,4,1.659912,-0.448351,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0


## Create Train and Validation

In [79]:
columns = train.columns.to_list()
from sklearn.model_selection import train_test_split
a_train,a_validation = train_test_split(train,test_size=0.2)
train = pd.DataFrame(a_train,columns=columns)
validation = pd.DataFrame(a_validation,columns=columns)

## Create X_train, y_train, X_val, y_val

In [80]:
X_train = train[new_cat_columns+ordinal_columns+numeric_columns]
y_train = train[target]
sample_weights_train=train[weight_column].to_numpy()
sample_weights_train=sample_weights_train.reshape(-1)
X_val = validation[new_cat_columns+ordinal_columns+numeric_columns]
y_val = validation[target]
sample_weights_validation=validation[weight_column].to_numpy()
sample_weights_validation = sample_weights_validation.reshape(-1)

## Create X_test for final predictions

In [83]:
X_test = test[test_new_cat_columns+ordinal_columns+numeric_columns]
input_dims_test=X_test.shape[1]
print(input_dims_test)

84


## Get input_dims for X_train used to setup tensorflow model

In [87]:
input_dims_train = X_train.shape[1]
input_dims_val = X_val.shape[1]
input_dims = input_dims_train

## Tensorflow Model Definition function

In [85]:
def defineModel(input_dims):
    model = Sequential()
    model.add(Dense(units=100, input_dim=input_dims,activation='relu'))
    model.add(Dense(units=100, input_dim=input_dims,activation='relu'))
    model.add(Dense(1))
    return model

## Create models for each quantile

In [88]:
quantiles = [0.005,0.025,0.165,0.25,0.5,0.75,0.835,0.975,0.995]
models = {}
for index,q in enumerate(quantiles):
    models["q"+str(index+1)] = defineModel(input_dims)

## Compile each model with tfa.losses.PinballLoss

In [94]:
for index,q in enumerate(models.keys()):
    models[q].compile(optimizer=Adam(learning_rate=0.01),loss=tfa.losses.PinballLoss(tau=quantiles[index]))

## fit each model using X_train,y_train,sample_weights_train

In [96]:
for q in models.keys():
    print(f"fitting model for {q}")
    models[q].fit(X_train,y_train,batch_size=64,epochs=50,sample_weight=sample_weights_train)

fitting model for q1
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
fitting model for q2
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoc

Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
fitting model for q3
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
fitting model for q4
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoc

Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
fitting model for q5
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
fitting model for q6
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoc

Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
fitting model for q7
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
fitting model for q8
Epoch 1/50
Epoch 2/50
Epoch 3/5

Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
fitting model for q9
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoc

## Predict on entire training set and calculate mean_pinball_loss_train for all quantiles

In [100]:
mpl_train={}
for i,q in enumerate(models.keys()):
    y_pred=models[q].predict(X_train)
    mpl_train[q]=mean_pinball_loss(y_train.values.ravel(),y_pred,sample_weight=sample_weights_train,alpha=quantiles[i])
mpl_train



{'q1': 20.777254156829716,
 'q2': 76.86416249859539,
 'q3': 343.4860111116149,
 'q4': 460.39985821201685,
 'q5': 686.2712019408766,
 'q6': 717.1019271781644,
 'q7': 661.6013996039198,
 'q8': 314.5075203689776,
 'q9': 135.11801059339217}

## Predict on validation set and calculate mean_pinball_loss for all quantiles

In [101]:
mpl_val={}
for i,q in enumerate(models.keys()):
    y_pred = models[q].predict(X_val)
    mpl_val[q]=mean_pinball_loss(y_val.values.ravel(),y_pred,sample_weight=sample_weights_validation,alpha=quantiles[i])
mpl_val



{'q1': 21.284945230226118,
 'q2': 77.94043037252614,
 'q3': 345.73788456439894,
 'q4': 461.3396794173967,
 'q5': 686.7073912706567,
 'q6': 718.8144853147141,
 'q7': 663.191831901588,
 'q8': 328.96093722609993,
 'q9': 144.26872820317593}

## write mean pinball losses to a file

In [106]:
mpls={}
mpls['Data'] = ['mpl_train','mpl_val']
for q in mpl_train.keys():
    mpls[q]=[mpl_train[q],mpl_val[q]]
df = pd.DataFrame(mpls)
df.to_csv("DeepQuantReg_MeanPinballLoss.csv",index=False)

## Predict on test set and create output data frame

In [102]:
test_df = test[id_column]
for q in models.keys():
    test_df[q] = models[q].predict(X_test)

   1/5796 [..............................] - ETA: 2:07

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df[q] = models[q].predict(X_test)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df[q] = models[q].predict(X_test)


  48/5796 [..............................] - ETA: 6s  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df[q] = models[q].predict(X_test)


  34/5796 [..............................] - ETA: 8s  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df[q] = models[q].predict(X_test)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df[q] = models[q].predict(X_test)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df[q] = models[q].predict(X_test)


   1/5796 [..............................] - ETA: 2:13

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df[q] = models[q].predict(X_test)


   1/5796 [..............................] - ETA: 3:40

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df[q] = models[q].predict(X_test)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df[q] = models[q].predict(X_test)


In [103]:
test_df

Unnamed: 0,id,q1,q2,q3,q4,q5,q6,q7,q8,q9
0,741875,0.002155,0.041018,415.862946,533.342468,1207.400024,1540.236694,1494.764771,3097.941162,8743.972656
1,741876,0.002155,408.631317,1548.614746,1870.294678,2324.487305,2801.555420,3328.641602,8567.908203,15888.204102
2,741877,0.002155,0.041018,200.441528,257.670654,589.048828,1074.236938,1386.907471,4227.487305,7980.968262
3,741878,0.002155,204.340500,1024.434082,1172.977051,1732.630493,2456.526611,3086.889160,5891.034668,15471.567383
4,741879,0.002155,0.041018,908.830078,768.379883,1756.229736,2193.549072,4970.726074,7932.776855,8123.765137
...,...,...,...,...,...,...,...,...,...,...
185464,927339,0.002155,0.041018,1239.587769,1639.713989,2831.386475,3852.295166,3655.614502,15184.964844,17265.908203
185465,927340,0.002155,142.268250,473.997955,733.323425,1056.846313,1780.866089,1534.364502,3513.184570,8583.276367
185466,927341,0.002155,0.041018,1462.513306,1624.515991,2442.233154,3715.857910,4111.535156,10691.758789,19599.353516
185467,927342,0.002155,0.041018,1279.245605,1676.730957,2014.810425,2853.300537,3769.618408,7202.806152,9304.554688


## write output to test_quantiles_DeepQuantileReg.csv

In [105]:
test_df.to_csv('test_quantiles_DeepQuantileReg.csv',index=False)