In [2]:
from torch.utils.data import random_split, DataLoader
from sklearn.metrics import roc_auc_score, roc_curve
from data_loader import SimpleDatasetLoader
import matplotlib.pyplot as plt
from torch import nn, optim
from sklearn import svm
from models.FC import *
import pandas as pd
import torch

In [3]:
df = pd.read_csv("../data/scoring_case.csv")

for column in df.columns:
    if str(df[column].dtype) == 'object':
        df[column] = pd.factorize(df[column])[0]

In [4]:
df.dropna(axis=1, thresh=int(0.8 * df.shape[0]), inplace=True)
df = df.drop(["FLAG_MOBIL", "SK_ID_CURR"], axis=1)
1 - df.isna().sum() / len(df)

TARGET                        0.852876
NAME_CONTRACT_TYPE            1.000000
CODE_GENDER                   1.000000
FLAG_OWN_CAR                  1.000000
FLAG_OWN_REALTY               1.000000
                                ...   
AMT_REQ_CREDIT_BUREAU_DAY     0.868177
AMT_REQ_CREDIT_BUREAU_WEEK    0.868177
AMT_REQ_CREDIT_BUREAU_MON     0.868177
AMT_REQ_CREDIT_BUREAU_QRT     0.868177
AMT_REQ_CREDIT_BUREAU_YEAR    0.868177
Length: 75, dtype: float64

In [None]:
# for column in df.columns:
#     if column == 'TARGET':
#         continue
#     df[column] = df[column].fillna(df[column].median())

In [None]:
# df = df[df['TARGET'].notna()]
df = df.dropna()

In [None]:
dataset = SimpleDatasetLoader(df)
train_amount_data = len(dataset) * 2 // 3
train, test = random_split(dataset, [train_amount_data, len(dataset) - train_amount_data])

In [None]:
train_loader = DataLoader(train, batch_size=32, shuffle=True)
test_loader = DataLoader(test, batch_size=32, shuffle=True)

In [None]:
model = FC()
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters())
num_epochs = 10

In [None]:
train_model(model, train_loader, criterion, optimizer, num_epochs)

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(15, 5))
title = ["train", "test"]
for i, loader in enumerate([train_loader, test_loader]):
    prediction, target_list = get_prediction(model, loader)
    logit_roc_aut = roc_auc_score(target_list, prediction)
    fpr, tpr, thresholds = roc_curve(target_list, prediction)

    axs[i].plot(fpr, tpr, logit_roc_aut)
    axs[i].plot([0, 1], [0, 1], '--')
    axs[i].set_title(f"{title[i]} - ({round(logit_roc_aut, 4)})")
plt.show()

In [None]:
model.load_state_dict(torch.load("../weigh/2.pt"))

In [None]:
# torch.save(model.state_dict(), "../weigh/10.pt")

In [None]:
df = pd.read_csv("../data/scoring_case.csv")
for column in df.columns:
    if str(df[column].dtype) == 'object':
        df[column] = pd.factorize(df[column])[0]
        
df.dropna(axis=1, thresh=int(0.8 * df.shape[0]), inplace=True)
df = df.drop(["FLAG_MOBIL", "SK_ID_CURR"], axis=1)

df = df[df["TARGET"].isna()]

for column in df.columns:
    if column == 'TARGET':
        continue
    df[column] = df[column].fillna(df[column].mode()[0])

dataset = SimpleDatasetLoader(df)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)
prediction = get_prediction(model, dataloader)[0]
prediction_df = pd.DataFrame(prediction, columns=["TARGET"])
prediction_df.to_csv("prediction.csv", index=False)