<a href="https://colab.research.google.com/github/profugohenrique/spoincd/blob/master/INCD29102025.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#Preparação do modelo
import pickle
from sklearn.ensemble import RandomForestClassifier

# Modelo exemplo
model = RandomForestClassifier()
# ... treinamento ...
pickle.dump(model, open('model.pkl', 'wb'))

In [None]:
#API com FLASK
from flask import Flask, request, jsonify
import pickle

app = Flask(__name__)
model = pickle.load(open('model.pkl', 'rb'))

@app.route('/predict', methods=['POST'])
def predict():
    data = request.json
    prediction = model.predict([data['features']])
    return jsonify({'prediction': prediction.tolist()})

In [6]:
#Dashboard com Streamlit
!pip install streamlit
import streamlit as st
import pickle

model = pickle.load(open('model.pkl', 'rb'))

st.title('Classifier Demo')
features = st.text_input('Enter features (comma-separated)')
if features:
    prediction = model.predict([list(map(float, features.split(',')))])
    st.write(f'Prediction: {prediction[0]}')

Collecting streamlit
  Downloading streamlit-1.50.0-py3-none-any.whl.metadata (9.5 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.50.0-py3-none-any.whl (10.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.1/10.1 MB[0m [31m87.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m113.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pydeck, streamlit
Successfully installed pydeck-0.9.1 streamlit-1.50.0


FileNotFoundError: [Errno 2] No such file or directory: 'model.pkl'

In [2]:
!pip install evidently==0.4.3



In [15]:
#Data drift
!pip install evidently
import pandas as pd
from sklearn.datasets import load_iris
from evidently import Report
from evidently.presets import DataDriftPreset, DataSummaryPreset

# Dados antigos (treino)
iris = load_iris(as_frame=True)
train = iris.frame.sample(100, random_state=42)

# Dados novos (simulando drift)
test = train.copy()
test['sepal width (cm)'] *= 1.3  # altera uma feature artificialmente

# Relatório Evidently
report = Report(metrics=[DataDriftPreset()])
report.run(reference_data=train, current_data=test)
report.show()




AttributeError: 'Report' object has no attribute 'save_html'

In [16]:
#Alternativa sem evidently
import numpy as np
from scipy.stats import ks_2samp
from sklearn.datasets import load_iris
import pandas as pd

iris = load_iris(as_frame=True)
train = iris.frame.sample(100, random_state=42)
test = train.copy()
test['sepal width (cm)'] *= 1.3  # simula drift

# Teste KS (Kolmogorov-Smirnov)
for col in train.columns[:-1]:
    stat, p = ks_2samp(train[col], test[col])
    print(f"{col}: KS={stat:.3f}, p={p:.4f}")
###
###Interpretação:
## Se p < 0.05, há evidência de drift (a distribuição mudou).
## Se p ≥ 0.05, a distribuição é estável.

sepal length (cm): KS=0.000, p=1.0000
sepal width (cm): KS=0.700, p=0.0000
petal length (cm): KS=0.000, p=1.0000
petal width (cm): KS=0.000, p=1.0000


In [17]:
###KS
from scipy.stats import ks_2samp
import numpy as np

# Exemplo: feature mudou de média 5 → 6
x_train = np.random.normal(5, 1, 200)
x_prod  = np.random.normal(6, 1, 200)

stat, p_value = ks_2samp(x_train, x_prod)
print(f"KS={stat:.3f}, p-value={p_value:.4f}")

if p_value < 0.05:
    print(" Drift detectado!")
else:
    print(" Distribuições semelhantes.")


KS=0.455, p-value=0.0000
 Drift detectado!


In [18]:
#Chi-Quadrado
import pandas as pd
from scipy.stats import chi2_contingency

# Frequências de categorias (exemplo: tipos de clientes)
treino = pd.Series(['A', 'A', 'B', 'B', 'C'] * 20)
producao = pd.Series(['A', 'B', 'B', 'C', 'C'] * 20)

# Tabela de contingência
contingencia = pd.crosstab(index=treino, columns=producao)
chi2, p, dof, expected = chi2_contingency(contingencia)

print(f"χ²={chi2:.3f}, p-value={p:.4f}")

if p < 0.05:
    print("Mudança significativa na distribuição categórica.")
else:
    print(" Distribuições semelhantes.")


χ²=75.000, p-value=0.0000
Mudança significativa na distribuição categórica.


In [19]:
#PSI
import numpy as np
import pandas as pd

def calculate_psi(expected, actual, bins=10):
    expected_perc, _ = np.histogram(expected, bins=bins)
    actual_perc, _ = np.histogram(actual, bins=bins)

    expected_perc = expected_perc / len(expected)
    actual_perc = actual_perc / len(actual)

    psi = np.sum((expected_perc - actual_perc) * np.log((expected_perc + 1e-8) / (actual_perc + 1e-8)))
    return psi

# Exemplo de uso
train = np.random.normal(50, 5, 1000)
prod  = np.random.normal(55, 5, 1000)

psi_value = calculate_psi(train, prod)
print(f"PSI = {psi_value:.3f}")

if psi_value < 0.1:
    print("Estável")
elif psi_value < 0.25:
    print(" Mudança moderada")
else:
    print(" Drift significativo")


PSI = 0.119
 Mudança moderada


In [None]:
name: ML Pipeline
on: [push]
jobs:
  test:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v2
      - name: Run tests
        run: |
          python -m pytest tests/
          python validate_data.py

In [21]:
!pip install fairlearn

Collecting fairlearn
  Downloading fairlearn-0.13.0-py3-none-any.whl.metadata (7.3 kB)
Collecting scipy<1.16.0,>=1.9.3 (from fairlearn)
  Downloading scipy-1.15.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.0/62.0 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
Downloading fairlearn-0.13.0-py3-none-any.whl (251 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m251.6/251.6 kB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading scipy-1.15.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (37.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m37.3/37.3 MB[0m [31m15.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: scipy, fairlearn
  Attempting uninstall: scipy
    Found existing installation: scipy 1.16.2
    Uninstalling scipy-1.16.2:
      Successfully uninstalled scipy-1.16.2
[31mERROR: pip's dependency resolver doe

In [1]:
#Detecção de Vieses :

from fairlearn.metrics import demographic_parity_difference
import numpy as np

# Exemplo de dados (substitua com seus dados reais)
y_true = np.array([0, 1, 0, 1, 0, 1, 0, 1, 0, 1]) # True labels
y_pred = np.array([0, 0, 0, 1, 0, 1, 1, 1, 0, 0]) # Predicted labels
gender = np.array(['Male', 'Female', 'Male', 'Female', 'Male', 'Female', 'Male', 'Female', 'Male', 'Female']) # Sensitive feature

# Exemplo de fairness check
dp_diff = demographic_parity_difference(y_true, y_pred, sensitive_features=gender)
print(f"Demographic Parity Difference: {dp_diff}")

Demographic Parity Difference: 0.39999999999999997


In [3]:
import numpy as np

# Previsões do modelo
y_pred = np.array([1,0,1,1,0,1,0,1,0,0])  # 1=positivo
# Grupo sensível: 0=homem, 1=mulher
A = np.array([0,0,0,0,0,1,1,1,1,1])

# Taxas de aprovação por grupo
p0 = y_pred[A==0].mean()
p1 = y_pred[A==1].mean()

dpd = p0 - p1
print("Demographic Parity Difference:", dpd)


Demographic Parity Difference: 0.19999999999999996
