In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

In [None]:
dataset = pd.read_csv('/kaggle/input/santander-customer-transaction-prediction/train.csv')

In [None]:
sns.countplot(dataset.target)

In [None]:
dataset.head(5)

In [None]:
dataset.describe()

In [None]:
plt.figure(figsize=(10, 7))
sns.heatmap(dataset.corr())

In [None]:
dataset = dataset.drop(['ID_code'],axis=1)

In [None]:
dataset[dataset.columns[:1]]

In [None]:
x = dataset[dataset.columns[1:]]
y = dataset[dataset.columns[:1]]

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 0)

In [None]:
sc_X = StandardScaler()

In [None]:
x_train = sc_X.fit_transform(x_train)
x_test = sc_X.fit_transform(x_test)

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
log_reg_cls = LogisticRegression()

In [None]:
log_reg_cls.fit(x_train, y_train)

In [None]:
y_preds_log_reg = log_reg_cls.predict(x_test)

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [None]:
print(classification_report(y_test, y_preds_log_reg))

In [None]:
from sklearn.metrics import roc_auc_score

In [None]:
roc_auc_score(y_test, y_preds_log_reg)

In [None]:
from imblearn.over_sampling import SMOTE

In [None]:
sm = SMOTE()

In [None]:
X_res, y_res = sm.fit_resample(x,y)

In [None]:
x_res_train, x_res_test, y_res_train, y_res_test = train_test_split(X_res, y_res, test_size = 0.2, random_state = 0)

In [None]:
log_reg_cls.fit(x_res_train, y_res_train)

In [None]:
y_res_preds_log_reg = log_reg_cls.predict(x_res_test)

In [None]:
print(classification_report(y_res_test, y_res_preds_log_reg))

In [None]:
roc_auc_score(y_res_test, y_res_preds_log_reg)

In [None]:
import pickle 

In [None]:
filename = 'LogicReg_res.sav'
pickle.dump(log_reg_cls, open(filename, 'wb'))

In [None]:
test = pd.read_csv('/kaggle/input/santander-customer-transaction-prediction/test.csv')

In [None]:
test.head()

In [None]:
test_x = test[test.columns[1:]]

In [None]:
test[test.columns[:1]]

In [None]:
y_f = log_reg_cls.predict(test_x)

In [None]:
y_f.shape

In [None]:
test.ID_code.shape

In [None]:
my_submission = pd.DataFrame({'ID_code': test.ID_code,'target': y_f})
my_submission.to_csv('submission.csv', index=False)