In [9]:
import pandas as pd
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier

# import engine to load database data
from sqlalchemy import create_engine

In [10]:
import os
import dotenv

# loading environment vars
dotenv.load_dotenv()
MYSQL_USER = os.getenv("MYSQL_USER", default="root")
MYSQL_PASSWORD = os.getenv("MYSQL_PASSWORD", default="<PASSWORD>")

In [11]:
class Connection:
    engine = None

    @staticmethod
    def connect(host: str = 'localhost', port: int = 3306):
        connection_string = f"mysql+pymysql://{MYSQL_USER}:{MYSQL_PASSWORD}@{host}/statlog?charset=utf8"

        Connection.engine = create_engine(connection_string)

    @staticmethod
    def disconnect():
        if Connection.engine is not None:
            Connection.engine.dispose()

# connecting to database
Connection.connect()


In [12]:
model = None
label_encoders = {}

In [13]:
# reading database
df = pd.read_sql("SELECT * FROM germancredit", Connection.engine)

categorical_columns = df.select_dtypes(include=['object']).columns.tolist()
numerical_columns = df.select_dtypes(include=['int64', 'float64']).columns.tolist()

if 'kredit' in numerical_columns:
    numerical_columns.remove('kredit')

for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

X = df.drop('kredit', axis=1)
y = df['kredit']

feature_names = X.columns.tolist()

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print(f"Dados pré-processados. Conjunto de treino: {X_train.shape}, Conjunto de teste: {X_test.shape}")

model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print(f"\n{'='*50}")
print("AVALIAÇÃO DO MODELO")
print(f"{'='*50}")
print(f"Acurácia: {accuracy:.4f}")
print(f"\nMatriz de Confusão:")
print(conf_matrix)
print(f"\nRelatório de Classificação:")
print(class_report)
print(f"{'='*50}")

Dados pré-processados. Conjunto de treino: (800, 21), Conjunto de teste: (200, 21)

AVALIAÇÃO DO MODELO
Acurácia: 0.9050

Matriz de Confusão:
[[ 53   7]
 [ 12 128]]

Relatório de Classificação:
              precision    recall  f1-score   support

           0       0.82      0.88      0.85        60
           1       0.95      0.91      0.93       140

    accuracy                           0.91       200
   macro avg       0.88      0.90      0.89       200
weighted avg       0.91      0.91      0.91       200



In [14]:
if model is None:
    print("Model is not trained")
    Connection.disconnect()
    exit(0)

In [15]:
print(f"\n{'='*50}")
print("CLASSIFICAÇÃO DE NOVO CLIENTE")
print(f"{'='*50}")
print("Por favor, insira os dados do cliente:")


CLASSIFICAÇÃO DE NOVO CLIENTE
Por favor, insira os dados do cliente:


In [16]:
def predict_new_data():
    new_data = {}

    for feature in feature_names:
        if feature in categorical_columns:
            les = label_encoders.get(feature)
            if les:
                options = list(les.classes_)
                print(f"\n{feature} (opções: {', '.join(options)}):")
                value = input("> ")

                try:
                    new_data[feature] = les.transform([value])[0]
                except Exception as e:
                    print(f"Valor inválido para {feature}. Usando o primeiro valor disponível.", e)
                    new_data[feature] = 0
            else:
                value = input(f"{feature}: ")
                new_data[feature] = value
        else:
            while True:
                try:
                    value = float(input(f"{feature}: "))
                    new_data[feature] = value
                    break
                except ValueError:
                    print("Por favor, insira um valor numérico válido.")

    new_df = pd.DataFrame([new_data])
    prediction = model.predict(new_df)[0]

    result = "BOM" if prediction == 1 else "RUIM"

    print(f"\nResultado da classificação: Risco de crédito {result}")
    print(f"{'='*50}")


In [17]:
while True:
    predict_new_data()

    continue_option = input("\nDeseja classificar outro cliente? (s/n): ")
    if continue_option.lower() != 's':
        break

KeyboardInterrupt: Interrupted by user

In [18]:
Connection.disconnect()