In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

train = pd.read_csv('../input/tabular-playground-series-jun-2021/train.csv')
test = pd.read_csv('../input/tabular-playground-series-jun-2021/test.csv')
sample_submission = pd.read_csv('../input/tabular-playground-series-jun-2021/sample_submission.csv')
train.pop('id')
test.pop('id')
print(train.shape)
print(test.shape)
train.head()

In [None]:
X = train.copy()
y = X.pop('target')
target_labels = {label:val for val,label in zip(range(1,10), np.sort(y.unique()))}
y = y.map(target_labels)

In [None]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.1, random_state=42)

## Define the preprocessing steps

In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler

#Preprocssing for numerical data
numerical_transformer = MinMaxScaler()
preprocessor = ColumnTransformer(transformers=[
                                ('num', numerical_transformer, X_train.columns)])

# Create the model

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import log_loss
model = LogisticRegression(max_iter=1000)

# Evaluate the model

In [None]:
# Create and evaluate
my_pipeline = Pipeline(steps=[('preprcessor', preprocessor),
                                ('model', model)])


#### Predict on validation

In [None]:
my_pipeline.fit(X_train, y_train)
preds = my_pipeline.predict_proba(X_val)
score = log_loss(y_val, preds)
print('Logloss:', score)

#### Retrain on whole train data and predict on test

In [None]:
my_pipeline.fit(X, y)

In [None]:
preds = my_pipeline.predict_proba(test)
preds

In [None]:
target_columns = {val:'Class_'+str(label) for val,label in zip(range(1,10), np.sort(y.unique()))}
sub = pd.DataFrame(preds, columns=target_columns.values())
sub

In [None]:
sub['id'] = sample_submission['id']
sub

In [None]:
sub = sub[['id'] + list(sub.columns[:-1])]
sub

In [None]:
sub.to_csv('first_logistic_submission.csv',index=False)