# Load in the dataset and clean

In [1]:
import tensorflow as tf
import sklearn
import pandas as pd
import numpy as np

In [2]:
# We will use the binary columns for this test
X_cols =['B_30', 'B_38', 'D_114', 'D_116', 'D_117', 'D_120', 'D_126', 'D_63', 'D_64', 'D_66', 'D_68']

In [3]:
chunksize = 1000000

train_df_iter = pd.read_csv("train_data.csv", chunksize=chunksize, usecols=["customer_ID"] + X_cols)


train_df = pd.DataFrame()
for i_chunk, chunk in enumerate(train_df_iter):
    train_df = pd.concat([train_df, chunk])
    print(train_df.shape)

(1000000, 12)
(2000000, 12)
(3000000, 12)
(4000000, 12)
(5000000, 12)
(5531451, 12)


In [4]:
train_labels_df = pd.read_csv("train_labels.csv")

## We will Label encode every multi-class variable in the dataset

In [6]:
train_df.loc[500000:]

Unnamed: 0,customer_ID,D_63,D_64,D_66,D_68,B_30,B_38,D_114,D_116,D_117,D_120,D_126
500000,174c5845a69b53beac167eed7b6cad7dc4856a71b4e727...,CR,O,1.0,6.0,0.0,5.0,1.0,0.0,-1.0,0.0,1.0
500001,174c5845a69b53beac167eed7b6cad7dc4856a71b4e727...,CR,O,1.0,6.0,0.0,2.0,1.0,0.0,-1.0,0.0,1.0
500002,174c5845a69b53beac167eed7b6cad7dc4856a71b4e727...,CR,O,1.0,6.0,0.0,3.0,1.0,0.0,-1.0,0.0,1.0
500003,174c5845a69b53beac167eed7b6cad7dc4856a71b4e727...,CR,O,1.0,6.0,0.0,3.0,1.0,0.0,-1.0,0.0,1.0
500004,174c5845a69b53beac167eed7b6cad7dc4856a71b4e727...,CR,U,1.0,4.0,0.0,3.0,1.0,0.0,6.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...
5531446,fffff1d38b785cef84adeace64f8f83db3a0c31e8d92ea...,CL,O,,5.0,0.0,3.0,1.0,0.0,3.0,0.0,1.0
5531447,fffff1d38b785cef84adeace64f8f83db3a0c31e8d92ea...,CL,O,,5.0,0.0,3.0,1.0,0.0,3.0,0.0,1.0
5531448,fffff1d38b785cef84adeace64f8f83db3a0c31e8d92ea...,CL,O,,5.0,0.0,3.0,1.0,0.0,3.0,0.0,1.0
5531449,fffff1d38b785cef84adeace64f8f83db3a0c31e8d92ea...,CL,O,,5.0,0.0,3.0,1.0,0.0,3.0,0.0,1.0


In [10]:
def replace_all_nan_in_train_df(train_df):
    train_df["D_66"].fillna(1, inplace = True)
    train_df["D_116"].fillna(0, inplace = True)
    train_df["D_64"].fillna("U", inplace = True)
    train_df["D_68"].fillna(4, inplace = True)
    train_df["B_30"].fillna(1, inplace = True)
    train_df["B_38"].fillna(4, inplace = True)
    train_df["D_114"].fillna(0, inplace = True)
    train_df["D_117"].fillna(6, inplace = True)
    train_df["D_120"].fillna(1, inplace = True)
    train_df["D_126"].fillna(0, inplace = True)

In [12]:
train_df["D_116"].isna().sum()

0

In [11]:
replace_all_nan_in_train_df(train_df)

In [13]:
train_df.isna().sum()

customer_ID    0
D_63           0
D_64           0
D_66           0
D_68           0
B_30           0
B_38           0
D_114          0
D_116          0
D_117          0
D_120          0
D_126          0
dtype: int64

In [14]:
train_df = pd.merge(train_df, train_labels_df, on="customer_ID", how="left")

In [15]:
# Label encoder
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
D_63 = le.fit_transform(train_df["D_63"])

In [16]:
train_df["D_63"] = D_63

In [17]:
train_df.head()

Unnamed: 0,customer_ID,D_63,D_64,D_66,D_68,B_30,B_38,D_114,D_116,D_117,D_120,D_126,target
0,0000099d6bd597052cdcda90ffabf56573fe9d7c79be5f...,2,O,1.0,6.0,0.0,2.0,1.0,0.0,4.0,0.0,1.0,0
1,0000099d6bd597052cdcda90ffabf56573fe9d7c79be5f...,2,O,1.0,6.0,0.0,2.0,1.0,0.0,4.0,0.0,1.0,0
2,0000099d6bd597052cdcda90ffabf56573fe9d7c79be5f...,2,O,1.0,6.0,0.0,2.0,1.0,0.0,4.0,0.0,1.0,0
3,0000099d6bd597052cdcda90ffabf56573fe9d7c79be5f...,2,O,1.0,6.0,0.0,2.0,1.0,0.0,4.0,0.0,1.0,0
4,0000099d6bd597052cdcda90ffabf56573fe9d7c79be5f...,2,O,1.0,6.0,0.0,2.0,1.0,0.0,4.0,0.0,1.0,0


In [18]:
D_64 = le.fit_transform(train_df["D_64"])
train_df["D_64"] = D_64

In [19]:
train_df.head()

Unnamed: 0,customer_ID,D_63,D_64,D_66,D_68,B_30,B_38,D_114,D_116,D_117,D_120,D_126,target
0,0000099d6bd597052cdcda90ffabf56573fe9d7c79be5f...,2,1,1.0,6.0,0.0,2.0,1.0,0.0,4.0,0.0,1.0,0
1,0000099d6bd597052cdcda90ffabf56573fe9d7c79be5f...,2,1,1.0,6.0,0.0,2.0,1.0,0.0,4.0,0.0,1.0,0
2,0000099d6bd597052cdcda90ffabf56573fe9d7c79be5f...,2,1,1.0,6.0,0.0,2.0,1.0,0.0,4.0,0.0,1.0,0
3,0000099d6bd597052cdcda90ffabf56573fe9d7c79be5f...,2,1,1.0,6.0,0.0,2.0,1.0,0.0,4.0,0.0,1.0,0
4,0000099d6bd597052cdcda90ffabf56573fe9d7c79be5f...,2,1,1.0,6.0,0.0,2.0,1.0,0.0,4.0,0.0,1.0,0


In [20]:
train_df.columns

Index(['customer_ID', 'D_63', 'D_64', 'D_66', 'D_68', 'B_30', 'B_38', 'D_114',
       'D_116', 'D_117', 'D_120', 'D_126', 'target'],
      dtype='object')

In [21]:
# re-organizing the columns
train_df = train_df[['customer_ID', 'D_63', 'D_64', 'D_66', 'D_68', 'B_30', 'B_38', 'D_114', 'D_116', 'D_117', 'D_120',
       'D_126', 'target']]

In [22]:
train_df

Unnamed: 0,customer_ID,D_63,D_64,D_66,D_68,B_30,B_38,D_114,D_116,D_117,D_120,D_126,target
0,0000099d6bd597052cdcda90ffabf56573fe9d7c79be5f...,2,1,1.0,6.0,0.0,2.0,1.0,0.0,4.0,0.0,1.0,0
1,0000099d6bd597052cdcda90ffabf56573fe9d7c79be5f...,2,1,1.0,6.0,0.0,2.0,1.0,0.0,4.0,0.0,1.0,0
2,0000099d6bd597052cdcda90ffabf56573fe9d7c79be5f...,2,1,1.0,6.0,0.0,2.0,1.0,0.0,4.0,0.0,1.0,0
3,0000099d6bd597052cdcda90ffabf56573fe9d7c79be5f...,2,1,1.0,6.0,0.0,2.0,1.0,0.0,4.0,0.0,1.0,0
4,0000099d6bd597052cdcda90ffabf56573fe9d7c79be5f...,2,1,1.0,6.0,0.0,2.0,1.0,0.0,4.0,0.0,1.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5531446,fffff1d38b785cef84adeace64f8f83db3a0c31e8d92ea...,0,1,1.0,5.0,0.0,3.0,1.0,0.0,3.0,0.0,1.0,0
5531447,fffff1d38b785cef84adeace64f8f83db3a0c31e8d92ea...,0,1,1.0,5.0,0.0,3.0,1.0,0.0,3.0,0.0,1.0,0
5531448,fffff1d38b785cef84adeace64f8f83db3a0c31e8d92ea...,0,1,1.0,5.0,0.0,3.0,1.0,0.0,3.0,0.0,1.0,0
5531449,fffff1d38b785cef84adeace64f8f83db3a0c31e8d92ea...,0,1,1.0,5.0,0.0,3.0,1.0,0.0,3.0,0.0,1.0,0


In [44]:
_X_cols_ = train_df.columns[1:-1]

In [33]:
_X_cols = train_df.columns[1:-1]

## Now we will train_test_split and create a model

In [47]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(train_df[_X_cols_], train_df["target"], 
                                                    random_state=42, stratify=train_df["target"])

In [48]:
X_train.shape

(4148588, 11)

In [49]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(164, activation = "relu"),
    tf.keras.layers.Dense(128, activation = "relu"),
    tf.keras.layers.Dropout(0.4),
    tf.keras.layers.Dense(64, activation = "relu"),
    tf.keras.layers.Dense(32, activation = "relu"),
    tf.keras.layers.Dropout(0.4),
    tf.keras.layers.Dense(8, activation = "relu"),
    tf.keras.layers.Dense(1, activation = "sigmoid")
])

In [50]:
model.compile(loss = "binary_crossentropy", optimizer = "adam", metrics = "accuracy")

In [51]:
model.fit(X_train, y_train, epochs = 3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7fc49bd38a60>

In [27]:
model.evaluate(X_test, y_test)



[0.43211281299591064, 0.8047152757644653]

In [52]:
chunksize = 1000000

test_df_iter = pd.read_csv("test_data.csv", chunksize=chunksize, usecols=["customer_ID"] + X_cols)

In [53]:
_index = []
_vals = []

for chunk in test_df_iter:
    # Reseting index to have 924621
    _chunk_last = chunk.groupby("customer_ID")[X_cols].last().reset_index() 
    _chunk = _chunk_last


    X_test = _chunk[_X_cols_]
    
    # Getting rid of NaN
    X_test["D_66"].fillna(1, inplace = True)
    X_test["D_116"].fillna(0, inplace = True)
    X_test["D_64"].fillna("U", inplace = True)
    X_test["D_68"].fillna(4, inplace = True)
    X_test["B_30"].fillna(1, inplace = True)
    X_test["B_38"].fillna(4, inplace = True)
    X_test["D_114"].fillna(0, inplace = True)
    X_test["D_117"].fillna(6, inplace = True)
    X_test["D_120"].fillna(1, inplace = True)
    X_test["D_126"].fillna(0, inplace = True)
    
    # Fitting to LabelEncoder
    D_64 = le.fit_transform(X_test["D_63"])
    D_63 = le.fit_transform(X_test["D_64"])
    
    X_test["D_64"] = D_64
    X_test["D_63"] = D_63
    

    # Changing Types 
    X_test = np.asarray(X_test)
    X_test = tf.constant(X_test, dtype = tf.float32)
    y_test_pred = model.predict(X_test)#[:, 1]
    _index.extend(_chunk["customer_ID"])
    _vals.extend(y_test_pred)
    
    print(len(_index))

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_66"].fillna(1, inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_116"].fillna(0, inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_64"].fillna("U", inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_68"].fillna(4, inplace = True)
A

81358
   1/2540 [..............................] - ETA: 30s

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_66"].fillna(1, inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_116"].fillna(0, inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_64"].fillna("U", inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_68"].fillna(4, inplace = True)
A

162618


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_66"].fillna(1, inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_116"].fillna(0, inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_64"].fillna("U", inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_68"].fillna(4, inplace = True)
A

 140/2546 [>.............................] - ETA: 1s

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_63"] = D_63


244059


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_66"].fillna(1, inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_116"].fillna(0, inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_64"].fillna("U", inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_68"].fillna(4, inplace = True)
A

325450


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_66"].fillna(1, inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_116"].fillna(0, inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_64"].fillna("U", inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_68"].fillna(4, inplace = True)
A

406815


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_66"].fillna(1, inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_116"].fillna(0, inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_64"].fillna("U", inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_68"].fillna(4, inplace = True)
A

488260


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_66"].fillna(1, inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_116"].fillna(0, inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_64"].fillna("U", inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_68"].fillna(4, inplace = True)
A

 126/2541 [>.............................] - ETA: 1s

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_64"] = D_64
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_63"] = D_63


569555
   1/2543 [..............................] - ETA: 29s

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_66"].fillna(1, inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_116"].fillna(0, inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_64"].fillna("U", inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_68"].fillna(4, inplace = True)
A

650904


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_66"].fillna(1, inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_116"].fillna(0, inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_64"].fillna("U", inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_68"].fillna(4, inplace = True)
A

 141/2542 [>.............................] - ETA: 1s

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_64"] = D_64
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_63"] = D_63


732217
   1/2542 [..............................] - ETA: 31s

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_66"].fillna(1, inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_116"].fillna(0, inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_64"].fillna("U", inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_68"].fillna(4, inplace = True)
A

813543


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_66"].fillna(1, inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_116"].fillna(0, inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_64"].fillna("U", inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_68"].fillna(4, inplace = True)
A

 144/2547 [>.............................] - ETA: 1s

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_64"] = D_64
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_63"] = D_63


895040
  1/925 [..............................] - ETA: 10s

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_66"].fillna(1, inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_116"].fillna(0, inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_64"].fillna("U", inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["D_68"].fillna(4, inplace = True)
A

924631


In [54]:
res_df = pd.DataFrame(
    {"customer_ID": _index, "prediction": np.squeeze(_vals)}
).groupby("customer_ID")["prediction"].mean().reset_index()

In [55]:
res_df.isna().sum()

customer_ID    0
prediction     0
dtype: int64

In [56]:
res_df.head()

Unnamed: 0,customer_ID,prediction
0,00000469ba478561f23a92a868bd366de6f6527a684c9a...,0.3955
1,00001bf2e77ff879fab36aa4fac689b9ba411dae63ae39...,0.092948
2,0000210045da4f81e5f122c6bde5c2a617d03eef67f82c...,0.243021
3,00003b41e58ede33b8daf61ab56d9952f17c9ad1c3976c...,0.226076
4,00004b22eaeeeb0ec976890c1d9bfc14fd9427e98c4ee9...,0.66089


In [57]:
res_df.to_csv("tensorflow_predictions_submission_with_categorical_data.csv", index = False)