# Threshold Otsu

In [1]:
# import required libraries
import numpy as np
import pandas as pd
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import StandardScaler
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from skimage.filters import threshold_otsu

In [2]:
# read datasets
train = pd.read_csv("../pdata/data/train.csv")
test = pd.read_csv("../pdata/data/test.csv")

In [3]:
target = train["y"]
train_df = train.drop("y", axis=1)
test_df = test.drop("id", axis=1)

In [4]:
smote = SMOTE(random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(train_df,target)

In [5]:
# scaling the dataset
scaler = StandardScaler()
scaled_data = scaler.fit_transform(X_train_smote)
scaled_test_df = scaler.transform(test_df)

In [6]:
# applying LDA
lda = LinearDiscriminantAnalysis()
lda_data = lda.fit_transform(scaled_data, y_train_smote)
test_lda_data = lda.transform(scaled_test_df)

In [7]:
reduced_test_data = pd.DataFrame(test_lda_data, columns=[f'LD{i+1}' for i in range(lda_data.shape[1])])

In [8]:
LD = np.array(list(reduced_test_data["LD1"]))

In [9]:
threshold = threshold_otsu(LD)

In [10]:
binarized = [0 if x < threshold else 1 for x in LD]

In [11]:
result = pd.DataFrame({
    "id": list(test["id"]),
    "y": binarized
})

In [12]:
# save the first submission
result.to_csv("../submissions/submission11.csv", index=False)