In [1]:
url = "https://raw.githubusercontent.com/chiarorosa/ia_aprendizado_maquina_basico/main/ml-dataset/kaggle-basico/credit_Card.csv"

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score # Avaliação de Acurácia
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import RandomizedSearchCV
import numpy as np


In [2]:
df = pd.read_csv(url)
df.head()
X = df.drop('Class', axis=1)
y = df['Class']
scaler = StandardScaler()
x_scaled = scaler.fit_transform(X)


# Dividir em dois conjuntos, treinamento e teste, usando pareto 80/20
X_train, X_test, y_train, y_test = train_test_split(
    x_scaled,
    y,
    test_size=0.2, # pareto
    random_state=42 # reproduzivel
)

In [3]:
grade = {
    'loss': [
        'hinge', 'log', 'modified_huber', 'squared_hinge', 'perceptron', 
        'squared_loss', 'huber', 'epsilon_insensitive', 'squared_epsilon_insensitive'
    ],
    'alpha': np.linspace(0.0001, 0.001, 100),
    'fit_intercept': [True, False],
    'max_iter': np.arange(1000, 10000, 100),
    'learning_rate': ['constant', 'optimal', 'invscaling', 'adaptive'],
    'eta0': np.logspace(-4, 0, 5),  # Taxa de aprendizado inicial
    'power_t': np.linspace(0.1, 1, 10),  # Exponente para taxa de aprendizado inverso
    'epsilon': np.linspace(0.1, 1, 10),  # Relevante para algumas funções de perda
}

SGD = RandomizedSearchCV(
    SGDClassifier(),
    grade,
    cv=5,
    scoring='accuracy',
    n_iter=20,  # Número de iterações de busca aleatória
    n_jobs=-1,  # Use todos os núcleos disponíveis
    random_state=42  # Para reprodutibilidade
)

SGD.fit(X_train, y_train)

melhor = SGD.best_estimator_

y_pred = melhor.predict(X_test)

acuracia = accuracy_score(y_test, y_pred)

print(f'Acurácia: {acuracia * 100:.2f}')
print(f'Melhor modelo: {melhor}')
print(f'Parâmetros do melhor modelo: {SGD.best_params_}')

guardaAcc = 0

with open('Melhor Acurracia SGD.txt', 'r+', encoding='utf-8') as f:
        try:
            linha = f.readline().strip()
            if linha.startswith('Acurácia:'):
                guardaAcc = float(linha.split(': ')[1]) / 100
                print(f'Acurácia anterior: {guardaAcc * 100:.2f}')
                if acuracia > guardaAcc:
                    f.seek(0) # vai pro inicio
                    f.write(f'Acurácia: {acuracia * 100:.2f} \n')
                    
                    f.truncate() #  remove residuos antigos
                    print(f'Acurácia atualizada: {acuracia * 100:.2f}')
                else:
                    print('Acurácia não atualizada')
        except Exception as e:
            print(f"Erro ao ler o arquivo: {e}")
       


30 fits failed out of a total of 100.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\Pedro\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\model_selection\_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\Pedro\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\base.py", line 1467, in wrapper
    estimator._validate_params()
  File "c:\Users\Pedro\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\base.py", line 666, in _validate_params
    validate_parameter_constraints(
  File "c:\Users\Pedro\AppData\Local\Programs\Python\Python311\Lib\

Acurácia: 99.93
Melhor modelo: SGDClassifier(alpha=0.0007818181818181819, epsilon=0.4, eta0=0.001,
              learning_rate='adaptive', max_iter=3700, power_t=0.6)
Parâmetros do melhor modelo: {'power_t': 0.6, 'max_iter': 3700, 'loss': 'hinge', 'learning_rate': 'adaptive', 'fit_intercept': True, 'eta0': 0.001, 'epsilon': 0.4, 'alpha': 0.0007818181818181819}
Acurácia anterior: 0.00
Acurácia atualizada: 99.93
