In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam



In [2]:
class Predictor:
    def __init__(self, data_path):
        self.data_path = data_path
        self.model = None
        self.preprocessor = None
        self.X_train = None
        self.X_test = None
        self.y_train = None
        self.y_test = None

    def load_and_preprocess_data(self):
        data = pd.read_csv(self.data_path)

        cat_features = ['Gender', 'Education_Level', 'Marital_Status', 'Income_Category', 'Card_Category']
        num_features = ['Customer_Age', 'Dependent_count', 'Months_on_book', 'Total_Relationship_Count',\
                        'Months_Inactive_12_mon', 'Contacts_Count_12_mon', 'Credit_Limit', 'Total_Revolving_Bal',\
                        'Avg_Open_To_Buy', 'Total_Amt_Chng_Q4_Q1', 'Total_Trans_Amt', 'Total_Trans_Ct',\
                        'Total_Ct_Chng_Q4_Q1', 'Avg_Utilization_Ratio']

        self.preprocessor = ColumnTransformer(
            transformers=[
                ('num', StandardScaler(), num_features),
                ('cat', OneHotEncoder(), cat_features)
            ])
        
        X = data.drop(["train_idx",'CLIENTNUM', 'Attrition_Flag'], axis=1)
        y = data['Attrition_Flag']

        X = self.preprocessor.fit_transform(X)

        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

    def build_and_train_model(self):
        self.model = Sequential()
        self.model.add(Dense(64, input_dim=self.X_train.shape[1], activation='relu'))
        self.model.add(Dense(32, activation='relu'))
        self.model.add(Dense(1, activation='sigmoid'))

        self.model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['accuracy'])

        self.model.fit(self.X_train, self.y_train, epochs=50, batch_size=32, verbose=1)

    def evaluate_model(self):
        y_pred = (self.model.predict(self.X_test) > 0.5).astype("int32")

        print("Accuracy: ", accuracy_score(self.y_test, y_pred))
        print("F1-score: ", f1_score(self.y_test, y_pred))
        print("\nConfusion Matrix:\n", confusion_matrix(self.y_test, y_pred))
        print("\nClassification Report:\n", classification_report(self.y_test, y_pred))

    def prediction_test(self, new_data_path):
        new_data = pd.read_csv(new_data_path)
        X_new = self.preprocessor.transform(new_data)
        y_pred = (self.model.predict(X_new) > 0.5).astype("int32")
        return y_pred



In [3]:
# Uso de la clase Predictor
predictor = Predictor("\\Users\\Asus\\Desktop\\Hack_2023\\supply_chain_train.csv")
predictor.load_and_preprocess_data()
predictor.build_and_train_model()
predictor.evaluate_model()

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Accuracy:  0.9376927822331894
F1-score:  0.9632593670425609

Confusion Matrix:
 [[ 196   64]
 [  37 1324]]

Classification Report:
               precision    recall  f1-score   support

           0       0.84      0.75      0.80       260
           1       0.95      0.97      0.96      1361

    accuracy                           0.94      1621
   macro avg       0.90      0.86      0.88      1621
weigh

In [9]:
# Predicción sobre nuevos datos
new_data_path = "\\Users\\Asus\\Desktop\\Hack_2023\\supply_chain_test.csv"
predictions = predictor.prediction_test(new_data_path)
print("Predictions for test set:\n", predictions)

Predictions for new data:
 [[1]
 [1]
 [1]
 ...
 [1]
 [1]
 [1]]


In [13]:
np.unique(predictions, return_counts=True)

(array([0, 1]), array([ 297, 1729], dtype=int64))

In [16]:
pred = pd.DataFrame(predictions, columns=["target"]).to_csv('prediction.csv', header=True, index = False)