This is try to predict by LogisticRegression model with StandardScaler.

# Import libraries and load data

In [None]:
# Import libraries
import pandas as pd
import numpy as np

from sklearn.metrics import accuracy_score, roc_curve, roc_auc_score, f1_score
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

from matplotlib import pyplot as plt
%matplotlib inline

In [None]:
# Set options
pd.set_option('display.float_format', lambda x: '%.3f' % x)

Use [function memory usage optimization dataframe](https://www.kaggle.com/ellavs/function-memory-usage-optimization-dataframe/):

In [None]:
def optimize_memory_usage(df, print_size = True):
    # Function optimizes memory usage in dataframe.
   
    # Types for optimization.
    numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    # Memory usage size before optimize (Mb).
    before_size = df.memory_usage().sum() / 1024**2    
    for column in df.columns:
        column_type = df[column].dtypes
        if column_type in numerics:
            column_min = df[column].min()
            column_max = df[column].max()
            if str(column_type).startswith('int'):
                if column_min > np.iinfo(np.int8).min and column_max < np.iinfo(np.int8).max:
                    df[column] = df[column].astype(np.int8)
                elif column_min > np.iinfo(np.int16).min and column_max < np.iinfo(np.int16).max:
                    df[column] = df[column].astype(np.int16)
                elif column_min > np.iinfo(np.int32).min and column_max < np.iinfo(np.int32).max:
                    df[column] = df[column].astype(np.int32)
                elif column_min > np.iinfo(np.int64).min and column_max < np.iinfo(np.int64).max:
                    df[column] = df[column].astype(np.int64)  
            else:
                if column_min > np.finfo(np.float32).min and column_max < np.finfo(np.float32).max:
                    df[column] = df[column].astype(np.float32)
                else:
                    df[column] = df[column].astype(np.float64)    
    # Memory usage size after optimize (Mb).
    after_size = df.memory_usage().sum() / 1024**2
    if print_size: print('Memory usage size: before {:5.4f} Mb - after {:5.4f} Mb ({:.1f}%).'.format(before_size, after_size, 100 * (before_size - after_size) / before_size))
    return df

In [None]:
def import_data_from_csv(file_path):
    # Load a dataframe from csv-file and optimize its memory usage.
    df = pd.read_csv(file_path, parse_dates = True, keep_date_col = True)
    df = optimize_memory_usage(df)
    return df

In [None]:
# Load train data
train_df = import_data_from_csv('../input/tabular-playground-series-nov-2021/train.csv')
#train_df = import_data_from_csv('train.csv')

In [None]:
# Load test data
test_df = import_data_from_csv('../input/tabular-playground-series-nov-2021/test.csv')
#test_df = import_data_from_csv('test.csv')

In [None]:
train_df.shape, test_df.shape

In [None]:
train_df.head()

In [None]:
train_df.describe()

In [None]:
_ = train_df.hist(train_df.columns[1:10], figsize=(10, 10), color='skyblue', ec='blue', alpha = 0.5)

In [None]:
_ = train_df.hist(train_df.columns[10:19], figsize=(10, 10), color='skyblue', ec='blue', alpha = 0.5)

In [None]:
_ = train_df.hist(train_df.columns[20:29], figsize=(10, 10), color='skyblue', ec='blue', alpha = 0.5)

In [None]:
_ = train_df.hist(train_df.columns[30:39], figsize=(10, 10), color='skyblue', ec='blue', alpha = 0.5)

In [None]:
_ = train_df.hist(train_df.columns[40:49], figsize=(10, 10), color='skyblue', ec='blue', alpha = 0.5)

In [None]:
_ = train_df.hist(train_df.columns[50:59], figsize=(10, 10), color='skyblue', ec='blue', alpha = 0.5)

In [None]:
_ = train_df.hist(train_df.columns[60:69], figsize=(10, 10), color='skyblue', ec='blue', alpha = 0.5)

In [None]:
_ = train_df.hist(train_df.columns[70:79], figsize=(10, 10), color='skyblue', ec='blue', alpha = 0.5)

In [None]:
_ = train_df.hist(train_df.columns[80:89], figsize=(10, 10), color='skyblue', ec='blue', alpha = 0.5)

In [None]:
_ = train_df.hist(train_df.columns[90:101], figsize=(10, 10), color='skyblue', ec='blue', alpha = 0.5)

In [None]:
# Make features list
features_black_list = ['id', 'target']
features_list = [x for x in train_df.columns if x not in features_black_list]

In [None]:
# Make X and y
X = train_df[features_list].values
y = train_df['target'].values

# Try Logistic Regression

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7)

In [None]:
y_train.shape, y_test.shape

In [None]:
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
model_logistic = LogisticRegression(solver = 'lbfgs', max_iter = 10000)
model_logistic.fit(X_train, y_train)

y_predicted = model_logistic.predict_proba(X_test)[:, 1]

In [None]:
grid = np.linspace(0, 1, 50)
plt.figure(figsize=(10,5))
plt.title('f1_score by limit', fontsize=14)
plt.plot(grid, [f1_score(y_test, y_predicted > item) for item in grid])
plt.grid()
plt.xlabel('limit', fontsize=12)
plt.ylabel('f1_score', fontsize=12);

In [None]:
print('Test accuracy: ', accuracy_score(y_test, y_predicted > 0.5))
print('Test F measure: ', f1_score(y_test, y_predicted > 0.5))
print('Test ROC-AUC: ', roc_auc_score(y_test, y_predicted))

# Save result

In [None]:
X_pred = test_df[features_list].values
X_pred = scaler.transform(X_pred)

In [None]:
test_df['target'] = model_logistic.predict(X_pred)

In [None]:
test_df[['id', 'target']].to_csv('Tabular_Playground_Series_Nov_LogisticRegression.csv', index = False)