In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from statistics import mode 
# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input"))

# Any results you write to the current directory are saved as output.

In [None]:
train_df = pd.read_csv("../input/train.csv")
test_df = pd.read_csv("../input/test.csv")
merchants_df = pd.read_csv("../input/merchants.csv")
new_merchant_transactions_df = pd.read_csv("../input/new_merchant_transactions.csv", )
historical_transactions_df = pd.read_csv("../input/historical_transactions.csv")

In [None]:
def reduce_mem_usage(df):
    """ iterate through all the columns of a dataframe and modify the data type
        to reduce memory usage.        
    """
    start_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))
    
    for col in df.columns:
        col_type = df[col].dtype
        
        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
        else:
            df[col] = df[col].astype('category')

    end_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
    print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))
    
    return df

In [None]:
historical_transactions_df = reduce_mem_usage(historical_transactions_df)
new_merchant_transactions_df = reduce_mem_usage(new_merchant_transactions_df)
merchants_df = reduce_mem_usage(merchants_df)

In [None]:
train_df.head()

In [None]:
train_df.shape

In [None]:
test_df.head()

In [None]:
test_df.shape

In [None]:
merchants_df.head()

In [None]:
merchants_df.shape

In [None]:
new_merchant_transactions_df.head()

In [None]:
historical_transactions_df.head()

In [None]:
historical_transactions_df.isna().sum()

In [None]:
historical_transactions_df['category_3'].value_counts()

In [None]:
new_merchant_transactions_df.isna().sum()

In [None]:
new_merchant_transactions_df['category_3'].value_counts()

In [None]:
new_merchant_transactions_df.head(2)

In [None]:
#historical_transactions_df.isna().sum()

In [None]:
new_merchant_transactions_df['category_2'].fillna(1.0, inplace=True)
new_merchant_transactions_df['category_3'].fillna('A', inplace=True)

historical_transactions_df['category_2'].fillna(1.0, inplace=True)
historical_transactions_df['category_3'].fillna('A', inplace=True)

In [None]:
#del historical_transactions_df['category_2']
#del historical_transactions_df['category_3']

#del new_merchant_transactions_df['category_2']
#del new_merchant_transactions_df['category_3']

In [None]:
historical_transactions_df = historical_transactions_df.dropna()
new_merchant_transactions_df = new_merchant_transactions_df.dropna()

In [None]:
train_df.head(2)

In [None]:
test_df.head(2)

In [None]:
train_df['first_active_month'] = pd.to_datetime(train_df['first_active_month'])
test_df['first_active_month'] = pd.to_datetime(test_df['first_active_month'])

In [None]:
train_df.dtypes

In [None]:
#hist_trans_df_group = historical_transactions_df.groupby('card_id')

In [None]:
historical_transactions_df.head(2)

In [None]:
new_merchant_transactions_df.head(2)

In [None]:
def binarize(df):
    for col in ['authorized_flag', 'category_1']:
        df[col] = df[col].map({'Y':1, 'N':0})
    return df

historical_transactions_df = binarize(historical_transactions_df)
new_merchant_transactions_df = binarize(new_merchant_transactions_df)

In [None]:
historical_transactions_df['category_3'] = historical_transactions_df['category_3'].astype('category').cat.codes
new_merchant_transactions_df['category_3'] = new_merchant_transactions_df['category_3'].astype('category').cat.codes

In [None]:
historical_transactions_df.head(2)

In [None]:
new_merchant_transactions_df.head(2)

In [None]:
transactions = historical_transactions_df.append(new_merchant_transactions_df)

In [None]:
historical_transactions_df.shape

In [None]:
new_merchant_transactions_df.shape

In [None]:
transactions.shape

In [None]:
del historical_transactions_df
del new_merchant_transactions_df

In [None]:
#transactions = reduce_mem_usage(transactions)

In [None]:
from scipy import stats

In [None]:
def aggregate_transactions(history):
    history.loc[:, 'purchase_date'] = pd.DatetimeIndex(history['purchase_date']).\
                                      astype(np.int64) * 1e-9
    agg_func = {
    
    'authorized_flag': ['sum', 'mean', lambda x: tuple(stats.mode(x)[0])],
    'category_1': ['sum', 'mean', lambda x: tuple(stats.mode(x)[0])],
    'category_2': ['sum', 'mean', lambda x: tuple(stats.mode(x)[0])],
    'category_3': ['sum', 'mean', lambda x: tuple(stats.mode(x)[0])],
    'merchant_id': ['nunique', lambda x: tuple(stats.mode(x)[0])],
    'merchant_category_id': ['nunique', lambda x: tuple(stats.mode(x)[0])],
    'state_id': ['nunique', lambda x: tuple(stats.mode(x)[0])],
    'city_id': ['nunique', lambda x: tuple(stats.mode(x)[0])],
    'subsector_id': ['nunique', lambda x: tuple(stats.mode(x)[0])],
    'purchase_amount': ['sum', 'mean', 'max', 'min', 'std'],
    'installments': ['sum', 'mean', 'max', 'min', 'std'],
    'month_lag': [ lambda x: tuple(stats.mode(x)[0])]
    }
    
    agg_history = history.groupby(['card_id']).agg(agg_func)
    agg_history.columns = ['_'.join(col).strip() for col in agg_history.columns.values]
    agg_history.reset_index(inplace=True)
    
    df = (history.groupby('card_id')
          .size()
          .reset_index(name='transactions_count'))
    
    agg_history = pd.merge(df, agg_history, on='card_id', how='left')
    
    return agg_history

In [None]:
history = aggregate_transactions(transactions)

In [None]:
history.columns = ['hist_' + c if c != 'card_id' else c for c in history.columns]
history[:5]

In [None]:
history.shape

In [None]:
train = pd.merge(train_df, history, on='card_id', how='left')
test = pd.merge(test_df, history, on='card_id', how='left')

In [None]:
train['hist_authorized_flag_<lambda>'] = train['hist_authorized_flag_<lambda>'].map(lambda x: str(x).lstrip('(').rstrip(',)')).astype(int)
train['hist_category_1_<lambda>'] = train['hist_category_1_<lambda>'].map(lambda x: str(x).lstrip('(').rstrip(',)')).astype(int)

In [None]:
train['hist_category_2_<lambda>'] = train['hist_category_2_<lambda>'].map(lambda x: str(x).lstrip('(').rstrip(',)'))
train['hist_category_3_<lambda>'] = train['hist_category_3_<lambda>'].map(lambda x: str(x).lstrip('(').rstrip(',)'))
train['hist_merchant_id_<lambda>'] = train['hist_merchant_id_<lambda>'].map(lambda x: str(x).lstrip('(').rstrip(',)'))
train['hist_merchant_category_id_<lambda>'] = train['hist_merchant_category_id_<lambda>'].map(lambda x: str(x).lstrip('(').rstrip(',)')).astype(int)
train['hist_state_id_<lambda>'] = train['hist_state_id_<lambda>'].map(lambda x: str(x).lstrip('(').rstrip(',)')).astype(int)
train['hist_city_id_<lambda>'] = train['hist_city_id_<lambda>'].map(lambda x: str(x).lstrip('(').rstrip(',)')).astype(int)
train['hist_subsector_id_<lambda>'] = train['hist_subsector_id_<lambda>'].map(lambda x: str(x).lstrip('(').rstrip(',)')).astype(int)
train['hist_month_lag_<lambda>'] = train['hist_month_lag_<lambda>'].map(lambda x: str(x).lstrip('(').rstrip(',)')).astype(int)

In [None]:
test['hist_authorized_flag_<lambda>'] = test['hist_authorized_flag_<lambda>'].map(lambda x: str(x).lstrip('(').rstrip(',)')).astype(int)
test['hist_category_1_<lambda>'] = test['hist_category_1_<lambda>'].map(lambda x: str(x).lstrip('(').rstrip(',)')).astype(int)
test['hist_category_2_<lambda>'] = test['hist_category_2_<lambda>'].map(lambda x: str(x).lstrip('(').rstrip(',)'))
test['hist_category_3_<lambda>'] = test['hist_category_3_<lambda>'].map(lambda x: str(x).lstrip('(').rstrip(',)'))

test['hist_merchant_id_<lambda>'] = test['hist_merchant_id_<lambda>'].map(lambda x: str(x).lstrip('(').rstrip(',)'))
test['hist_merchant_category_id_<lambda>'] = test['hist_merchant_category_id_<lambda>'].map(lambda x: str(x).lstrip('(').rstrip(',)')).astype(int)
test['hist_state_id_<lambda>'] = test['hist_state_id_<lambda>'].map(lambda x: str(x).lstrip('(').rstrip(',)')).astype(int)
test['hist_city_id_<lambda>'] = test['hist_city_id_<lambda>'].map(lambda x: str(x).lstrip('(').rstrip(',)')).astype(int)
test['hist_subsector_id_<lambda>'] = test['hist_subsector_id_<lambda>'].map(lambda x: str(x).lstrip('(').rstrip(',)')).astype(int)
test['hist_month_lag_<lambda>'] = test['hist_month_lag_<lambda>'].map(lambda x: str(x).lstrip('(').rstrip(',)')).astype(int)

In [None]:
train.head()

In [None]:
test.head()

In [None]:
test.columns

In [None]:
 test.columns = ['first_active_month', 'card_id', 'feature_1', 'feature_2', 'feature_3',
       'hist_transactions_count', 'hist_authorized_flag_sum',
       'hist_authorized_flag_mean', 'hist_authorized_flag_mode',
       'hist_category_1_sum', 'hist_category_1_mean',
       'hist_category_1_mode', 'hist_category_2_sum',
       'hist_category_2_mean', 'hist_category_2_mode',
       'hist_category_3_sum', 'hist_category_3_mean',
       'hist_category_3_mode', 'hist_merchant_id_nunique',
       'hist_merchant_id_mode', 'hist_merchant_category_id_nunique',
       'hist_merchant_category_id_mode', 'hist_state_id_nunique',
       'hist_state_id_mode', 'hist_city_id_nunique',
       'hist_city_id_mode', 'hist_subsector_id_nunique',
       'hist_subsector_id_mode', 'hist_purchase_amount_sum',
       'hist_purchase_amount_mean', 'hist_purchase_amount_max',
       'hist_purchase_amount_min', 'hist_purchase_amount_std',
       'hist_installments_sum', 'hist_installments_mean',
       'hist_installments_max', 'hist_installments_min',
       'hist_installments_std', 'hist_month_lag_mode']

In [None]:
train.columns

In [None]:
train.columns = ['first_active_month', 'card_id', 'feature_1', 'feature_2', 'feature_3',
       'target', 'hist_transactions_count', 'hist_authorized_flag_sum',
       'hist_authorized_flag_mean', 'hist_authorized_flag_mode',
       'hist_category_1_sum', 'hist_category_1_mean',
       'hist_category_1_mode', 'hist_category_2_sum',
       'hist_category_2_mean', 'hist_category_2_mode',
       'hist_category_3_sum', 'hist_category_3_mean',
       'hist_category_3_mode', 'hist_merchant_id_nunique',
       'hist_merchant_id_mode', 'hist_merchant_category_id_nunique',
       'hist_merchant_category_id_<lambda>', 'hist_state_id_nunique',
       'hist_state_id_mode', 'hist_city_id_nunique',
       'hist_city_id_mode', 'hist_subsector_id_nunique',
       'hist_subsector_id_mode', 'hist_purchase_amount_sum',
       'hist_purchase_amount_mean', 'hist_purchase_amount_max',
       'hist_purchase_amount_min', 'hist_purchase_amount_std',
       'hist_installments_sum', 'hist_installments_mean',
       'hist_installments_max', 'hist_installments_min',
       'hist_installments_std', 'hist_month_lag_mode']

In [None]:
train.head(2)

In [None]:
test.head(2)

In [None]:
del transactions

In [None]:
merchants_df.isna().sum()

In [None]:
merchants_df['category_2'].value_counts()

In [None]:
#merchants_df['avg_sales_lag6'].value_counts()

In [None]:
###### merchants_df['avg_sales_lag3'].fillna(1.0, inplace = True)
merchants_df['avg_sales_lag3'].fillna(1.0, inplace = True)
merchants_df['avg_sales_lag6'].fillna(1.0, inplace = True)
merchants_df['avg_sales_lag12'].fillna(1.0, inplace = True)

In [None]:
merchants_df['category_2'].fillna(1.0, inplace = True)

In [None]:
merchants_df.head()

In [None]:
merchants_df['category_4'].value_counts()

In [None]:
merchants_df['category_1'] = merchants_df['category_1'].astype('category').cat.codes
merchants_df['most_recent_sales_range'] = merchants_df['most_recent_sales_range'].astype('category').cat.codes

merchants_df['most_recent_purchases_range'] = merchants_df['most_recent_purchases_range'].astype('category').cat.codes
merchants_df['category_4'] = merchants_df['category_4'].astype('category').cat.codes

In [None]:
merchants_df.apply(lambda x:len(x.unique()))

In [None]:
merchants_df.columns

In [None]:
merchants_df.apply(lambda x:len(x.unique()))

In [None]:
def aggregate_merchants(history):
    
    agg_func = {
    
    'merchant_group_id': [lambda x: tuple(stats.mode(x)[0])],
    'merchant_category_id': [lambda x: tuple(stats.mode(x)[0])],
    'subsector_id': [lambda x: tuple(stats.mode(x)[0])],
    'numerical_1': ['sum', 'mean'],
    'numerical_2': ['sum', 'mean'],
    'category_1': [lambda x: tuple(stats.mode(x)[0])],
    'most_recent_sales_range': [lambda x: tuple(stats.mode(x)[0])],
    'most_recent_purchases_range': [lambda x: tuple(stats.mode(x)[0])],
    'avg_sales_lag3': ['sum', 'mean'],
    'avg_purchases_lag3': ['sum', 'mean'],
    'active_months_lag3': [lambda x: tuple(stats.mode(x)[0])],
    'avg_sales_lag6': ['mean',lambda x: tuple(stats.mode(x)[0])],
    'avg_purchases_lag6': ['sum', 'mean'],
    'active_months_lag6': [lambda x: tuple(stats.mode(x)[0])],
    'avg_sales_lag12': ['sum', 'mean'],
    'avg_purchases_lag12': ['sum', 'mean'],
    'active_months_lag12': [lambda x: tuple(stats.mode(x)[0])],
    'category_4': [lambda x: tuple(stats.mode(x)[0])],
    'city_id': [lambda x: tuple(stats.mode(x)[0])],
    'state_id': [lambda x: tuple(stats.mode(x)[0])],
    'category_2': [lambda x: tuple(stats.mode(x)[0])]
        
    }
    
    agg_history = history.groupby(['merchant_id']).agg(agg_func)
    agg_history.columns = ['_'.join(col).strip() for col in agg_history.columns.values]
    agg_history.reset_index(inplace=True)
    
    df = (history.groupby('merchant_id')
          .size()
          .reset_index(name='merchant_count'))
    
    agg_history = pd.merge(df, agg_history, on='merchant_id', how='left')
    
    return agg_history

In [None]:
merchants_data = aggregate_merchants(merchants_df)

In [None]:
merchants_data.columns = ['merch_' + c if c != 'merchant_id' else c for c in merchants_data.columns]
merchants_data[:5]

In [None]:
merchants_data.shape

In [None]:
merchants_data.columns

In [None]:
merchants_data.columns = ['merchant_id', 'merch_merchant_count',
       'merch_merchant_group_id_mode',
       'merch_merchant_category_id_mode', 'merch_subsector_id_mode',
       'merch_numerical_1_sum', 'merch_numerical_1_mean',
       'merch_numerical_2_sum', 'merch_numerical_2_mean',
       'merch_category_1_mode', 'merch_most_recent_sales_range_mode',
       'merch_most_recent_purchases_range_mode',
       'merch_avg_sales_lag3_sum', 'merch_avg_sales_lag3_mean',
       'merch_avg_purchases_lag3_sum', 'merch_avg_purchases_lag3_mean',
       'merch_active_months_lag3_mode', 'merch_avg_sales_lag6_mean',
       'merch_avg_sales_lag6_mode', 'merch_avg_purchases_lag6_sum',
       'merch_avg_purchases_lag6_mean', 'merch_active_months_lag6_mode',
       'merch_avg_sales_lag12_sum', 'merch_avg_sales_lag12_mean',
       'merch_avg_purchases_lag12_sum', 'merch_avg_purchases_lag12_mean',
       'merch_active_months_lag12_mode', 'merch_category_4_mode',
       'merch_city_id_mode', 'merch_state_id_mode',
       'merch_category_2_mode']

In [None]:
merchants_data.head()

In [None]:
del merchants_df

In [None]:
merchants_data.isna().sum()

In [None]:
train.head()

In [None]:
train['merchant_id'] = train['hist_merchant_id_mode'].str.replace('\W', '')
test['merchant_id'] = test['hist_merchant_id_mode'].str.replace('\W', '')

In [None]:
del train['hist_merchant_id_mode']
del test['hist_merchant_id_mode']

In [None]:
test['old_index'] = range(0, len(test))

In [None]:
train_final = pd.merge(train, merchants_data,  on=['merchant_id'] )
test_final = pd.merge(test, merchants_data,  on=['merchant_id'] )

In [None]:
train_final.head()

In [None]:
train.head()

In [None]:
train_final.shape

In [None]:
train.shape

In [None]:
test.shape

In [None]:
test_final.shape

In [None]:
test.head()

In [None]:
test_final.head()

In [None]:
test_final = test_final.sort_values(by=['old_index'])

In [None]:
test_final.head()

In [None]:
test_final.index = test_final['old_index'].values

In [None]:
test_final.head()

In [None]:
train_final['merch_merchant_group_id_mode'] = train_final['merch_merchant_group_id_mode'].map(lambda x: str(x).lstrip('(').rstrip(',)'))
train_final['merch_merchant_category_id_mode'] = train_final['merch_merchant_category_id_mode'].map(lambda x: str(x).lstrip('(').rstrip(',)'))
train_final['merch_subsector_id_mode'] = train_final['merch_subsector_id_mode'].map(lambda x: str(x).lstrip('(').rstrip(',)'))
train_final['merch_category_1_mode'] = train_final['merch_category_1_mode'].map(lambda x: str(x).lstrip('(').rstrip(',)'))
train_final['merch_most_recent_sales_range_mode'] = train_final['merch_most_recent_sales_range_mode'].map(lambda x: str(x).lstrip('(').rstrip(',)'))
train_final['merch_most_recent_purchases_range_mode'] = train_final['merch_most_recent_purchases_range_mode'].map(lambda x: str(x).lstrip('(').rstrip(',)'))
train_final['merch_active_months_lag3_mode'] = train_final['merch_active_months_lag3_mode'].map(lambda x: str(x).lstrip('(').rstrip(',)'))
train_final['merch_avg_sales_lag6_mode'] = train_final['merch_avg_sales_lag6_mode'].map(lambda x: str(x).lstrip('(').rstrip(',)'))
train_final['merch_active_months_lag6_mode'] = train_final['merch_active_months_lag6_mode'].map(lambda x: str(x).lstrip('(').rstrip(',)'))
train_final['merch_active_months_lag12_mode'] = train_final['merch_active_months_lag12_mode'].map(lambda x: str(x).lstrip('(').rstrip(',)'))
train_final['merch_category_4_mode'] = train_final['merch_category_4_mode'].map(lambda x: str(x).lstrip('(').rstrip(',)'))
train_final['merch_city_id_mode'] = train_final['merch_city_id_mode'].map(lambda x: str(x).lstrip('(').rstrip(',)'))
train_final['merch_state_id_mode'] = train_final['merch_state_id_mode'].map(lambda x: str(x).lstrip('(').rstrip(',)'))
train_final['merch_category_2_mode'] = train_final['merch_category_2_mode'].map(lambda x: str(x).lstrip('(').rstrip(',)'))

In [None]:
test_final['merch_merchant_group_id_mode'] = test_final['merch_merchant_group_id_mode'].map(lambda x: str(x).lstrip('(').rstrip(',)'))
test_final['merch_merchant_category_id_mode'] = test_final['merch_merchant_category_id_mode'].map(lambda x: str(x).lstrip('(').rstrip(',)'))
test_final['merch_subsector_id_mode'] = test_final['merch_subsector_id_mode'].map(lambda x: str(x).lstrip('(').rstrip(',)'))
test_final['merch_category_1_mode'] = test_final['merch_category_1_mode'].map(lambda x: str(x).lstrip('(').rstrip(',)'))
test_final['merch_most_recent_sales_range_mode'] = test_final['merch_most_recent_sales_range_mode'].map(lambda x: str(x).lstrip('(').rstrip(',)'))
test_final['merch_most_recent_purchases_range_mode'] = test_final['merch_most_recent_purchases_range_mode'].map(lambda x: str(x).lstrip('(').rstrip(',)'))
test_final['merch_active_months_lag3_mode'] = test_final['merch_active_months_lag3_mode'].map(lambda x: str(x).lstrip('(').rstrip(',)'))
test_final['merch_avg_sales_lag6_mode'] = test_final['merch_avg_sales_lag6_mode'].map(lambda x: str(x).lstrip('(').rstrip(',)'))
test_final['merch_active_months_lag6_mode'] = test_final['merch_active_months_lag6_mode'].map(lambda x: str(x).lstrip('(').rstrip(',)'))
test_final['merch_active_months_lag12_mode'] = test_final['merch_active_months_lag12_mode'].map(lambda x: str(x).lstrip('(').rstrip(',)'))
test_final['merch_category_4_mode'] = test_final['merch_category_4_mode'].map(lambda x: str(x).lstrip('(').rstrip(',)'))
test_final['merch_city_id_mode'] = test_final['merch_city_id_mode'].map(lambda x: str(x).lstrip('(').rstrip(',)'))
test_final['merch_state_id_mode'] = test_final['merch_state_id_mode'].map(lambda x: str(x).lstrip('(').rstrip(',)'))
test_final['merch_category_2_mode'] = test_final['merch_category_2_mode'].map(lambda x: str(x).lstrip('(').rstrip(',)'))

In [None]:
train_final.head()

In [None]:
test_final.head()

In [None]:
y_train = train_final['target']
del train_final['target']

In [None]:
train_final['month'] =  pd.DatetimeIndex(train_final['first_active_month']).month  
test_final['month'] = pd.DatetimeIndex(test_final['first_active_month']).month

del train_final['first_active_month']
del test_final['first_active_month']

In [None]:
test_card_ids = test_final['card_id']
del test_final['card_id']
del test_final['merchant_id']
del train_final['card_id']
del train_final['merchant_id']

In [None]:
del test_final['old_index']

In [None]:
train_final.shape

In [None]:
test_final.shape

In [None]:
y_train.shape

In [None]:
del train
del test
#del merchants_data
#del history

In [None]:
test_final.head()

In [None]:
test_final.isna().sum()

In [None]:
test_final['month'].fillna(12.0, inplace = True)

In [None]:
x_train = np.array(train_final)
x_test = np.array(test_final)
y_train = np.array(y_train).reshape(201917,1)

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score

In [None]:
model = Sequential()
model.add(Dense(134, input_dim=67, activation='relu', kernel_initializer='random_uniform'))
#model.add(Dense(134,activation='relu', kernel_initializer='random_uniform'))
#model.add(Dense(67,activation='relu', kernel_initializer='random_uniform'))
model.add(Dense(1, activation='linear', kernel_initializer='random_uniform'))
model.compile(loss='mse', optimizer='adam')
model.fit(x_train, y_train, epochs=30)

In [None]:
#from sklearn.ensemble import RandomForestRegressor
#RF = RandomForestRegressor()
#RF.fit(train_final,y_train)

In [None]:
#train_final.shape

In [None]:
#test_final.shape

In [None]:
#train_final.head()

In [None]:
#test_final.head()

In [None]:
#from sklearn.linear_model import LinearRegression, Lasso
#lm = Lasso(alpha=0.1, normalize=False)
#lm.fit(train_final, y_train)

In [None]:
#y_pred = lm.predict(test_final)

In [None]:
y_pred = model.predict(x_test)

In [None]:
submission_df = pd.DataFrame()
submission_df['card_id'] = test_card_ids
submission_df['target'] = y_pred

In [None]:
submission_df.to_csv("submission.csv", index=None)

In [None]:
submission_df.head()

In [None]:
sample_train = train_final.copy()
sample_train['target'] = y_train

In [None]:
sample_test = test_final.copy()
sample_test['card_id'] = test_card_ids
sample_test.to_csv("test_out.csv", index=None)
sample_train.to_csv("train_out.csv", index=None)