In [None]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        
seed = 47       

In [None]:
def evaluate_model(model, x, y):
    y_pred_prob = model.predict_proba(x)[:, 1]
    auc_roc = roc_auc_score(y, y_pred_prob)
    return {'auc_roc_curve' : auc_roc}

# Tabular Playground Series - Apr 2022 - Logistic Regression

In [None]:
train_df =  pd.read_csv('/kaggle/input/tabular-playground-series-apr-2022/train.csv')
labels_df = pd.read_csv('/kaggle/input/tabular-playground-series-apr-2022/train_labels.csv')

In [None]:
train_df.drop(['subject', 'step'], axis=1, inplace=True)
train_df = train_df.groupby(['sequence']).mean()
train_df = train_df.join(labels_df.set_index('sequence'), on='sequence')
y_train = train_df.pop('state').values
x_train = train_df.values

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x_train, y_train, test_size=0.2, random_state=seed, shuffle=False)

In [None]:
sc = StandardScaler()

x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

In [None]:
print("Fitting a simple Logistic Regression model")
model = LogisticRegression(random_state=seed, solver='liblinear')
model.fit(x_train, y_train)
model.score(x_test, y_test)
results = evaluate_model(model, x_test, y_test)
print(results)

<h3>Submission</h3>


In [None]:
test_df =  pd.read_csv('/kaggle/input/tabular-playground-series-apr-2022/test.csv')
test_df.drop(['subject', 'step'], axis=1, inplace=True)
test_df = test_df.groupby(['sequence']).mean()
x_test = sc.transform(test_df.values)

In [None]:
submission_df = pd.read_csv('/kaggle/input/tabular-playground-series-apr-2022/sample_submission.csv')
state = model.predict_proba(x_test)[:, 1]
sequence = submission_df['sequence'].values.squeeze()
submission_df = pd.DataFrame({'sequence': sequence,'state': state}, index=test_df.index)
submission_df.head()

In [None]:
submission_df.to_csv('submission.csv', index=False)