I never used the Stochastic Gradient Descent classifier before. 
It's being capable of handling verly large datasets efficiently. 

So let's pick this one and train it.

In [None]:
VERBOSE = True

## Imports

In [None]:
import numpy as np 
import pandas as pd 
import gc

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import RobustScaler
from sklearn.metrics import roc_auc_score
from sklearn.metrics import plot_confusion_matrix

from sklearn.linear_model import SGDClassifier
from sklearn.calibration import CalibratedClassifierCV


## Load datasets and memory reduction

In [None]:
## from: https://www.kaggle.com/bextuychiev/how-to-work-w-million-row-datasets-like-a-pro
def reduce_memory_usage(df, verbose=True):
    numerics = ["int8", "int16", "int32", "int64", "float16", "float32", "float64"]
    start_mem = df.memory_usage().sum() / 1024 ** 2
    for col in df.columns:
        col_type = df[col].dtypes
        if col_type in numerics:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == "int":
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)
            else:
                if (
                    c_min > np.finfo(np.float16).min
                    and c_max < np.finfo(np.float16).max
                ):
                    df[col] = df[col].astype(np.float16)
                elif (
                    c_min > np.finfo(np.float32).min
                    and c_max < np.finfo(np.float32).max
                ):
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
    end_mem = df.memory_usage().sum() / 1024 ** 2
    if verbose:
        print(
            "Mem. usage decreased to {:.2f} Mb ({:.1f}% reduction)".format(
                end_mem, 100 * (start_mem - end_mem) / start_mem
            )
        )
    return df


In [None]:
train = pd.read_csv("../input/tabular-playground-series-oct-2021/train.csv")
test = pd.read_csv("../input/tabular-playground-series-oct-2021/test.csv")
submission = pd.read_csv("../input/tabular-playground-series-oct-2021/sample_submission.csv")

train = reduce_memory_usage(train, VERBOSE)
test = reduce_memory_usage(test, VERBOSE)

TARGET = 'target'

## Get features

In [None]:
continous_cols = ['f'+str(i) for i in range(242)]
continous_cols.remove('f22')
continous_cols.remove('f43')
categorical_cols = ['f'+str(i) for i in range(242,285)]+['f22','f43']

In [None]:
train["mean"] = train[continous_cols].mean(axis=1)
train["std"] = train[continous_cols].std(axis=1)
train["min"] = train[continous_cols].min(axis=1)
train["max"] = train[continous_cols].max(axis=1)

test["mean"] = test[continous_cols].mean(axis=1)
test["std"] = test[continous_cols].std(axis=1)
test["min"] = test[continous_cols].min(axis=1)
test["max"] = test[continous_cols].max(axis=1)

continous_cols.append('mean')
continous_cols.append('std')
continous_cols.append('min')
continous_cols.append('max')


In [None]:
feature_cols = continous_cols + categorical_cols

## Scale data

Stochastic Gradient Descent is sensitive to feature scaling, so it is highly recommended to scale your data. 

In [None]:
scaler = RobustScaler()
train[continous_cols] = scaler.fit_transform(train[continous_cols])
test[continous_cols] = scaler.transform(test[continous_cols])

## Automatic hyper-parameter search

To find the best parameters I'm using GridSearchCV, with 10% of data. 

In [None]:
ResultOfPreviousRun = True

params = {
    "loss" : ["hinge", "log", "squared_hinge", "modified_huber", "perceptron"], #["hinge"]
    "alpha" : [0.0001, 0.001, 0.01, 0.1],
    "penalty" : ["l2", "l1", "elasticnet"],    
    "random_state":[2021],
    "class_weight":["balanced"]
}

if ResultOfPreviousRun and VERBOSE:
    print("best score: 0.7562200000000001")
    print("best estimator: SGDClassifier(alpha=0.1, class_weight='balanced', penalty='l1',random_state=2021)") 
elif not PrintResultOfPreviousRun:
    tmptrain = train.copy()

    tmptrain = tmptrain.sample(frac=0.10, replace=True, random_state=999)

    model = SGDClassifier(max_iter=1000)
    clf = GridSearchCV(model, cv=5, param_grid=params, verbose=10)

    clf.fit(tmptrain[feature_cols],tmptrain[TARGET])

    if VERBOSE:
        print("best score: ",clf.best_score_)
        print("best estimator: ",clf.best_estimator_)


## Modeling with SGDClassifier

SGDClassifier(loss = 'hinge') does not have probability by default.

You have to pass SGDclassifier(loss = 'hinge') to CalibratedClassifierCV() which will calculate the probability values of SGDclassifier(loss = 'hinge').        
  

In [None]:
params = {
    "loss" : "hinge",
    "alpha" : 0.1, 
    "random_state":2021   
}

preds = []
scores = []
              
kf = StratifiedKFold(n_splits=50, shuffle=True, random_state=13)

for fold, (idx_train, idx_valid) in enumerate(kf.split(train[feature_cols],train[TARGET])):
    
    X_train, y_train = train[feature_cols].iloc[idx_train], train[TARGET].iloc[idx_train]
    X_valid, y_valid = train[feature_cols].iloc[idx_valid], train[TARGET].iloc[idx_valid]
    
    model = SGDClassifier(**params)    
    clf = model.fit(X_train, y_train)
    calibrator = CalibratedClassifierCV(clf, cv='prefit')
    model = calibrator.fit(X_train, y_train)     
    
    pred_valid = model.predict_proba(X_valid)[:,1]
    score = roc_auc_score(y_valid, pred_valid)
    scores.append(score)
    
    if VERBOSE:
        print(f"Fold: {fold + 1} score auc: {score}")
    
    y_hat = model.predict_proba(test[feature_cols])[:,1]
    preds.append(y_hat)

if VERBOSE:
    print(f"Overall Validation Score : {np.mean(scores)}")
              
del model
gc.collect()

## Create submission

In [None]:
submission[TARGET] = np.mean(np.column_stack(preds), axis=1)
submission.to_csv('submission.csv', index=False)

## Conclusion

I had better results with LGBM classifier but not bad for the first time.

This is the first time I use Stochastic Gradient Descent. Your comments and suggestions are welcome.