In [1]:
import numpy as np
import sys
import os
from typing import Sequence
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

sys.path.append(os.path.abspath("../libs"))
sys.path.append(os.path.abspath("../utils"))

from gradient_descendent import gradient_descendent
from normalize import min_max_normalize, padronize
from loss_fn_tarefa2 import make_mse_loss_function, make_mae_loss_function, make_rmse_loss_function

# Ajuste de curva por otimização

## Carregar os dados

In [2]:
# Carregamento dos dados
df = pd.read_excel('../data/Trabalho2dados.xlsx')

df.head()

Unnamed: 0,x,y,z
0,-5.0,-5.0,-458.963629
1,-5.0,-3.8,-520.361381
2,-5.0,-2.6,-593.039231
3,-5.0,-1.4,-606.776605
4,-5.0,-0.2,-657.401892


## EDA dados

In [3]:
df.describe()

Unnamed: 0,x,y,z
count,81.0,81.0,81.0
mean,-0.2,-0.2,36.990573
std,3.117691,3.117691,294.680404
min,-5.0,-5.0,-657.401892
25%,-2.6,-2.6,-42.780939
50%,-0.2,-0.2,59.403645
75%,2.2,2.2,170.599518
max,4.6,4.6,614.7001


In [4]:
df.isna().sum()

x    0
y    0
z    0
dtype: int64

In [5]:
df_pivot = df.pivot(index='y', columns='x', values='z')

# Gerar a malha
x_axis = df_pivot.columns.values
y_axis = df_pivot.index.values
z_grid = df_pivot.values

fig = go.Figure()

# Adicionar a Superfície
fig.add_trace(go.Surface(
    x=x_axis,
    y=y_axis,
    z=z_grid,
    colorscale='Viridis',
    opacity=0.9,
    contours_z=dict(show=True, usecolormap=True, project_z=True, highlightcolor="white"),
    name='Superfície dos Dados'
))

# Adicionar os Pontos de Dados Originais
fig.add_trace(go.Scatter3d(
    x=df['x'],
    y=df['y'],
    z=df['z'],
    mode='markers',
    marker=dict(size=3, color='red', symbol='circle'),
    name='Pontos de Dados Originais'
))

# Melhorar o Layout
fig.update_layout(
    title=dict(text='z = f(x, y)', x=0.5),
    scene=dict(
        xaxis_title='Eixo X',
        yaxis_title='Eixo Y',
        zaxis_title='Eixo Z (Valor)'
    ),
    margin=dict(l=0, r=0, b=0, t=50)
)

fig.show()

In [6]:
X = np.column_stack([df['x'] ** 3, df['y'] ** 2, np.ones(len(df['x']))])

true_weights = np.linalg.inv((X.T @ X) + (X.T @ X).T) @ (2 * X.T @ df['z'])
true_weights

array([ 5.0081046 ,  5.73419945, 10.59963756])

## Calcular as funções de perda

In [7]:
features = df[['x', 'y']]
y = df['z']
features_normalized = features.copy()
features_normalized['x'] = min_max_normalize(features['x'], -1, 1)
features_normalized['y'] = min_max_normalize(features['y'], -1, 1)

features_pradonized = features.copy()
features_pradonized['x'] = padronize(features['x'])
features_pradonized['y'] = padronize(features['y'])

features_list = [features, features_normalized, features_pradonized]
features_names = ['Original', 'Min-Max Normalized', 'Padronized']

loss_fn_names = ['MSE', 'MAE', 'RMSE']
loss_fn_makers = [make_mse_loss_function, make_mae_loss_function, make_rmse_loss_function]

learning_rates = [0.0001, 0.00001, 0.000001]
initial_weights = [np.zeros(3), np.array([5.0, 5.0, 8.0]), np.array([0.005, 0.005, 0.008])]
n_iterations = 10000
tolerance = 1e-6

dict_results = {}

for feature_set, feature_name in zip(features_list, features_names):
    x_data = feature_set['x'].values
    y_data = feature_set['y'].values
    z_data = y.values

    for loss_fn_name, loss_fn_maker in zip(loss_fn_names, loss_fn_makers):
        loss_function, grad_loss_function = loss_fn_maker(x_data, y_data, z_data)

        for lr in learning_rates:
            for initial_w in initial_weights:
                try:
                    weights, losses, n_iters = gradient_descendent(
                        initial_w, loss_function, grad_loss_function,
                        learning_rate=lr, max_iter=n_iterations, tolerance=tolerance
                    )

                    # Verificar se há overflow
                    if np.any(np.isnan(weights[-1])) or np.any(np.isinf(weights[-1])):
                        print(f"Overflow detectado para LR={lr}, Initial_W={initial_w}")
                        continue

                except Exception as e:
                    print(f"Erro para LR={lr}, Initial_W={initial_w}: {e}")
                    continue

                key = (feature_name, loss_fn_name, lr, tuple(initial_w))
                dict_results[key] = {
                    'weights': weights,
                    'losses': losses,
                    'n_iters': n_iters
                }

df_result = pd.DataFrame([
    {
        'Feature_Set': key[0],
        'Loss_Function': key[1],
        'Learning_Rate': key[2],
        'Initial_Weights': key[3],
        'Final_Weights': value['weights'][-1],
        'Final_Loss': value['losses'][-1],
        'Iterations': value['n_iters']
    }
    for key, value in dict_results.items()
])


df_result.sort_values(by='Final_Loss').groupby('Loss_Function').head(3).reset_index(drop=True)

Unnamed: 0,Feature_Set,Loss_Function,Learning_Rate,Initial_Weights,Final_Weights,Final_Loss,Iterations
0,Original,MAE,0.0001,"(5.0, 5.0, 8.0)","[5.005351980246683, 5.882894024690188, 8.06891...",15.241212,10000
1,Original,MAE,0.0001,"(0.005, 0.005, 0.008)","[5.008365293827008, 6.256254716049859, 0.56480...",15.92885,10000
2,Original,MAE,0.0001,"(0.0, 0.0, 0.0)","[5.008438716049226, 6.256332641975785, 0.55729...",15.929955,10000
3,Original,RMSE,0.0001,"(5.0, 5.0, 8.0)","[5.006212180144901, 5.878986370448017, 8.10737...",19.359807,10000
4,Original,RMSE,1e-05,"(5.0, 5.0, 8.0)","[4.999109151144993, 5.480606903815144, 8.03325...",20.040335,10000
5,Original,RMSE,0.0001,"(0.005, 0.005, 0.008)","[5.0000125767427726, 6.294893726075339, 0.5182...",20.409164,10000
6,Original,MSE,0.0001,"(5.0, 5.0, 8.0)","[5.0072951692722105, 5.796318859412162, 9.5317...",372.589507,10000
7,Original,MSE,1e-05,"(5.0, 5.0, 8.0)","[5.006333929788469, 5.870089221505922, 8.26359...",374.473034,10000
8,Original,MSE,1e-06,"(5.0, 5.0, 8.0)","[5.005617385797586, 5.84863701878164, 8.071503...",375.051672,10000


## Resultados

In [8]:
def make_function(w):
    def f(x, y):
        return w[0] * (x ** 3) + w[1] * (y ** 2) + w[2]
    return f

def plot_best_by_loss(
    df_result,
    loss_name: str,
    df,
    features_normalized,
    features_pradonized,
    title_prefix: str | None = None,
    show_original_points: bool = False
):
    """
    Filtra df_result pelo loss_name (MSE | MAE | RMSE), seleciona o melhor experimento,
    gera predições e plota a superfície original com os pontos preditos.

    Retorna um dicionário com informações do melhor experimento e métricas.
    """
    # Normalizar o nome do loss
    loss_key = loss_name.strip().upper()
    valid = {"MSE", "MAE", "RMSE"}
    if loss_key not in valid:
        raise ValueError(f"Loss inválido: {loss_name}. Use um de {sorted(valid)}")

    # Filtrar e pegar menor loss
    best_df = df_result[df_result["Loss_Function"].str.upper() == loss_key]
    if best_df.empty:
        raise ValueError(f"Nenhum resultado encontrado para {loss_key}")
    best_row = best_df.loc[best_df["Final_Loss"].idxmin()]

    best_w = best_row["Final_Weights"]
    feature_set_used = best_row["Feature_Set"]

    print(f"Melhor resultado {loss_key}:")
    print(f"Feature Set: {feature_set_used}")
    print(f"Learning Rate: {best_row['Learning_Rate']}")
    print(f"Final Loss: {best_row['Final_Loss']}")
    print(f"Pesos: {best_w}")

    # Selecionar os dados corretos para predição
    if feature_set_used == "Min-Max Normalized":
        x_data = features_normalized["x"].values
        y_data = features_normalized["y"].values
    elif feature_set_used == "Padronized":
        x_data = features_pradonized["x"].values
        y_data = features_pradonized["y"].values
    else:
        x_data = df["x"].values
        y_data = df["y"].values

    # Função de predição com os melhores pesos
    f_pred = make_function(best_w)

    # Predições
    predictions = [f_pred(x, y) for x, y in zip(x_data, y_data)]

    # Superfície original
    df_pivot = df.pivot(index="y", columns="x", values="z")
    x_axis = df_pivot.columns.values
    y_axis = df_pivot.index.values
    z_grid = df_pivot.values

    fig = go.Figure()

    # Superfície original
    fig.add_trace(go.Surface(
        x=x_axis,
        y=y_axis,
        z=z_grid,
        colorscale="Viridis",
        opacity=0.7,
        name="Dados Originais"
    ))

    # Pontos originais
    if show_original_points:
        fig.add_trace(go.Scatter3d(
            x=df["x"],
            y=df["y"],
            z=df["z"],
            mode="markers",
            marker=dict(size=4, color="red", symbol="circle"),
            name="Pontos Originais"
        ))

    # Pontos preditos
    fig.add_trace(go.Scatter3d(
        x=df["x"],
        y=df["y"],
        z=predictions,
        mode="markers",
        marker=dict(size=5, color="blue", symbol="circle"),
        name="Predições"
    ))

    # Métricas
    z_true = df["z"].values
    mse = float(np.mean((z_true - predictions) ** 2))
    mae = float(np.mean(np.abs(z_true - predictions)))
    rmse = float(np.sqrt(mse))

    title_main = title_prefix or "Ajuste de Curva"
    fig.update_layout(
        title=f"{title_main} - {loss_key} | Feature: {feature_set_used} | LR: {best_row['Learning_Rate']} | Loss: {best_row['Final_Loss']:.6f}",
        scene=dict(xaxis_title="X", yaxis_title="Y", zaxis_title="Z"),
        margin=dict(l=0, r=0, b=0, t=60),
        legend=dict(x=0.02, y=0.98)
    )
    fig.show()

    return {
        "loss": loss_key,
        "best_row": best_row,
        "weights": best_w,
        "feature_set_used": feature_set_used,
        "predictions": np.array(predictions),
        "metrics": {"MSE": mse, "MAE": mae, "RMSE": rmse},
        "fig": fig
    }


### MSE

In [9]:
_ = plot_best_by_loss(df_result, "MSE", df, features_normalized, features_pradonized)

Melhor resultado MSE:
Feature Set: Original
Learning Rate: 0.0001
Final Loss: 372.5895074408588
Pesos: [5.00729517 5.79631886 9.53176066]


In [10]:
df_result_mse = df_result[df_result['Loss_Function'] == 'MSE'].sort_values(by='Final_Loss').reset_index(drop=True)
df_result_mse

Unnamed: 0,Feature_Set,Loss_Function,Learning_Rate,Initial_Weights,Final_Weights,Final_Loss,Iterations
0,Original,MSE,0.0001,"(5.0, 5.0, 8.0)","[5.0072951692722105, 5.796318859412162, 9.5317...",372.589507,10000
1,Original,MSE,1e-05,"(5.0, 5.0, 8.0)","[5.006333929788469, 5.870089221505922, 8.26359...",374.473034,10000
2,Original,MSE,1e-06,"(5.0, 5.0, 8.0)","[5.005617385797586, 5.84863701878164, 8.071503...",375.051672,10000
3,Original,MSE,0.0001,"(0.005, 0.005, 0.008)","[5.0048559965110835, 5.983513269172831, 6.3137...",380.106817,10000
4,Original,MSE,0.0001,"(0.0, 0.0, 0.0)","[5.004853554896708, 5.983700650964379, 6.31053...",380.118869,10000
5,Original,MSE,1e-05,"(0.005, 0.005, 0.008)","[5.000998099542611, 6.279587724184635, 1.22402...",410.44642,10000
6,Original,MSE,1e-05,"(0.0, 0.0, 0.0)","[5.0009927583711935, 6.279997632595718, 1.2169...",410.504096,10000
7,Original,MSE,1e-06,"(0.005, 0.005, 0.008)","[4.996261258787341, 6.0877666293966195, 0.4469...",426.188441,10000
8,Original,MSE,1e-06,"(0.0, 0.0, 0.0)","[4.996251893294839, 6.0880059983761905, 0.4392...",426.271853,10000
9,Padronized,MSE,0.0001,"(0.0, 0.0, 0.0)","[149.63601344316348, 40.80270244009391, 2.1493...",1409.236569,10000


### RMSE

In [11]:
_ = plot_best_by_loss(df_result, "RMSE", df, features_normalized, features_pradonized, show_original_points=True)

Melhor resultado RMSE:
Feature Set: Original
Learning Rate: 0.0001
Final Loss: 19.359807376259504
Pesos: [5.00621218 5.87898637 8.10737105]


In [12]:
df_result_rmse = df_result[df_result['Loss_Function'] == 'RMSE'].sort_values(by='Final_Loss').reset_index(drop=True)
df_result_rmse

Unnamed: 0,Feature_Set,Loss_Function,Learning_Rate,Initial_Weights,Final_Weights,Final_Loss,Iterations
0,Original,RMSE,0.0001,"(5.0, 5.0, 8.0)","[5.006212180144901, 5.878986370448017, 8.10737...",19.359807,10000
1,Original,RMSE,1e-05,"(5.0, 5.0, 8.0)","[4.999109151144993, 5.480606903815144, 8.03325...",20.040335,10000
2,Original,RMSE,0.0001,"(0.005, 0.005, 0.008)","[5.0000125767427726, 6.294893726075339, 0.5182...",20.409164,10000
3,Original,RMSE,0.0001,"(0.0, 0.0, 0.0)","[5.000004796867698, 6.295228044651212, 0.51059...",20.41084,10000
4,Original,RMSE,1e-06,"(5.0, 5.0, 8.0)","[4.993834323711544, 5.063177526159203, 8.00417...",22.047287,10000
5,Original,RMSE,1e-05,"(0.005, 0.005, 0.008)","[4.453691810372759, 0.6407891840927746, 0.0448...",80.359537,10000
6,Original,RMSE,1e-05,"(0.0, 0.0, 0.0)","[4.4513994783966515, 0.6352689964662742, 0.036...",80.471046,10000
7,Original,RMSE,1e-06,"(0.005, 0.005, 0.008)","[0.5565544927957635, 0.03310481793451595, 0.00...",264.390603,10000
8,Original,RMSE,1e-06,"(0.0, 0.0, 0.0)","[0.5515667647476142, 0.028097995586983233, 0.0...",264.680514,10000
9,Padronized,RMSE,0.0001,"(5.0, 5.0, 8.0)","[6.870049735627312, 5.220358926389719, 8.08452...",280.237787,10000


### MAE

In [13]:
_ = plot_best_by_loss(df_result, "MAE", df, features_normalized, features_pradonized)

Melhor resultado MAE:
Feature Set: Original
Learning Rate: 0.0001
Final Loss: 15.24121237146608
Pesos: [5.00535198 5.88289402 8.06891358]


In [14]:
df_result_mae = df_result[df_result['Loss_Function'] == 'MAE'].sort_values(by='Final_Loss').reset_index(drop=True)
df_result_mae

Unnamed: 0,Feature_Set,Loss_Function,Learning_Rate,Initial_Weights,Final_Weights,Final_Loss,Iterations
0,Original,MAE,0.0001,"(5.0, 5.0, 8.0)","[5.005351980246683, 5.882894024690188, 8.06891...",15.241212,10000
1,Original,MAE,0.0001,"(0.005, 0.005, 0.008)","[5.008365293827008, 6.256254716049859, 0.56480...",15.92885,10000
2,Original,MAE,0.0001,"(0.0, 0.0, 0.0)","[5.008438716049226, 6.256332641975785, 0.55729...",15.929955,10000
3,Original,MAE,1e-05,"(5.0, 5.0, 8.0)","[5.002788657778689, 5.380203911111138, 8.02666...",16.515223,10000
4,Original,MAE,1e-06,"(5.0, 5.0, 8.0)","[4.997318024691879, 5.043334949136437, 8.00300...",17.789303,10000
5,Original,MAE,1e-05,"(0.005, 0.005, 0.008)","[3.532040535308378, 0.5421620641975892, 0.0526...",88.820891,10000
6,Original,MAE,1e-05,"(0.0, 0.0, 0.0)","[3.5275408809873916, 0.5369946172840103, 0.044...",89.000437,10000
7,Original,MAE,1e-06,"(0.005, 0.005, 0.008)","[0.3737516049383133, 0.052086419753072835, 0.0...",202.678204,10000
8,Original,MAE,1e-06,"(0.0, 0.0, 0.0)","[0.3687516049383126, 0.04708641975307644, 0.00...",202.889185,10000
9,Padronized,MAE,0.0001,"(5.0, 5.0, 8.0)","[6.247961300441247, 5.462962962961555, 8.30864...",203.582751,10000
