In [1]:
import numpy as np
import sys
import os
import pandas as pd
import plotly.graph_objects as go

sys.path.append(os.path.abspath("../libs"))
sys.path.append(os.path.abspath("../utils"))

from levenberg_marquadt import levenberg_marquadt
from normalize import MinMaxNormalizer, StandardScaler
from loss_fn_tarefa2 import make_mse_loss_function
from plots.plots_tarefa3 import plot_best_by_loss

pd.set_option('display.float_format', '{:.10f}'.format)

# Ajuste de curva por otimização

## Carregar os dados

In [2]:
# Carregamento dos dados
df = pd.read_excel('../data/Trabalho2dados.xlsx')

df.head()

Unnamed: 0,x,y,z
0,-5.0,-5.0,-458.9636288948
1,-5.0,-3.8,-520.3613811409
2,-5.0,-2.6,-593.0392311788
3,-5.0,-1.4,-606.7766053186
4,-5.0,-0.2,-657.4018919832


## EDA dados

In [3]:
df.describe()

Unnamed: 0,x,y,z
count,81.0,81.0,81.0
mean,-0.2,-0.2,36.9905729348
std,3.1176914536,3.1176914536,294.680404341
min,-5.0,-5.0,-657.4018919832
25%,-2.6,-2.6,-42.7809389881
50%,-0.2,-0.2,59.4036448974
75%,2.2,2.2,170.5995181649
max,4.6,4.6,614.7000997851


In [4]:
df.isna().sum()

x    0
y    0
z    0
dtype: int64

In [5]:
df_pivot = df.pivot(index='y', columns='x', values='z')

# Gerar a malha
x_axis = df_pivot.columns.values
y_axis = df_pivot.index.values
z_grid = df_pivot.values

fig = go.Figure()

# Adicionar a Superfície
fig.add_trace(go.Surface(
    x=x_axis,
    y=y_axis,
    z=z_grid,
    colorscale='Viridis',
    opacity=0.9,
    contours_z=dict(show=True, usecolormap=True, project_z=True, highlightcolor="white"),
    name='Superfície dos Dados'
))

# Adicionar os Pontos de Dados Originais
fig.add_trace(go.Scatter3d(
    x=df['x'],
    y=df['y'],
    z=df['z'],
    mode='markers',
    marker=dict(size=3, color='red', symbol='circle'),
    name='Pontos de Dados Originais'
))

# Melhorar o Layout
fig.update_layout(
    title=dict(text='z = f(x, y)', x=0.5),
    scene=dict(
        xaxis_title='Eixo X',
        yaxis_title='Eixo Y',
        zaxis_title='Eixo Z (Valor)'
    ),
    margin=dict(l=0, r=0, b=0, t=50)
)

fig.show()

In [6]:
X = np.column_stack([df['x'] ** 3, df['y'] ** 2, np.ones(len(df['x']))])

true_weights = np.linalg.inv((X.T @ X) + (X.T @ X).T) @ (2 * X.T @ df['z'])
true_weights

array([ 5.0081046 ,  5.73419945, 10.59963756])

## Calcular as funções de perda

### Configurações

In [7]:
def make_function(w):
    def f(x, y):
        if np.isscalar(x) and np.isscalar(y):
            return w @ np.array([x, y, 1])
        else:
            return np.array([w @ np.array([xi, yi, 1]) for xi, yi in zip(x, y)])
    return f

features = df[['x', 'y']]
y = df['z']

# Criar os objetos para Normalização e Padronização
min_max_scaler = MinMaxNormalizer(-1.5, 1.5)
standard_scaler = StandardScaler()

# Cria as cópias dos dados para normalização e padronização
cubed_squared_features = features.copy()
cubed_squared_features.loc[:, 'x'] = cubed_squared_features['x'] ** 3
cubed_squared_features.loc[:, 'y'] = cubed_squared_features['y'] ** 2
features_normalized = cubed_squared_features.copy()
features_standardized = cubed_squared_features.copy()

# Ajusta os normalizadores e padronizadores aos dados
min_max_scaler.fit(features_normalized)
standard_scaler.fit(features_standardized)

# Normaliza e Padroniza os dados
features_normalized = min_max_scaler.normalize(features_normalized)
features_standardized = standard_scaler.normalize(features_standardized)

# Listas para iteração
features_list = [cubed_squared_features, features_normalized, features_standardized]
features_names = ['Original', 'Normalized', 'Standardized']
loss_fn_names = ['MSE']
loss_fn_makers = [make_mse_loss_function]

initial_weights = [
    np.zeros(3),
    np.random.rand(3) * 0.1,
]
n_iterations = 10000
tolerance = 1e-6
alpha = 1e-3

### Rodar os experimentos

In [8]:
def make_residuals_fn(X: np.ndarray, Y: np.ndarray, Z: np.ndarray):
    def residuals_fn(w: np.ndarray) -> np.ndarray:
        predictions = w[0]*X + w[1]*Y + w[2]
        return Z - predictions
    return residuals_fn

def make_jacobian_fn(X: np.ndarray, Y: np.ndarray):
    def jacobian_fn(w: np.ndarray) -> np.ndarray:
            # Derivadas parciais em relação a cada peso
            J_a = -X
            J_b = -Y
            J_c = -np.ones_like(X)
            return np.vstack([J_a, J_b, J_c]).T
    return jacobian_fn

In [9]:
dict_results = {}

for feature_set, feature_name in zip(features_list, features_names):
    x_data = feature_set['x'].values
    y_data = feature_set['y'].values
    z_data = y

    x_data_orig = cubed_squared_features['x'].values
    y_data_orig = cubed_squared_features['y'].values

    for loss_fn_name, loss_fn_maker in zip(loss_fn_names, loss_fn_makers):
        loss_function, grad_loss_function = None, None

        loss_function, grad_loss_function = loss_fn_maker(x_data, y_data, z_data)

        for initial_w in initial_weights:
            try:

                residuals_fn = make_residuals_fn(x_data, y_data, z_data)
                jacobian_fn = make_jacobian_fn(x_data, y_data)

                weights, losses, n_iters = levenberg_marquadt(
                    initial_w, residuals_fn, loss_function, jacobian_fn,
                    alpha=alpha, alpha_variability=10, max_iter=n_iterations,
                    tolerance=tolerance, stopping_criteria=[1, 3]
                )

                weights_raw = weights[-1].copy()
                if feature_name == 'Standardized':
                    weights[-1] = standard_scaler.desnormalize_weights(weights[-1])
                elif feature_name == 'Normalized':
                    weights[-1] = min_max_scaler.desnormalize_weights(weights[-1])

                # Verificar se há overflow
                if np.any(np.isnan(weights[-1])) or np.any(np.isinf(weights[-1])):
                    print(f"Overflow detectado para Initial_W={initial_w}")
                    continue

            except Exception as e:
                print(f"Erro para Initial_W={initial_w}: {e}")
                continue

            function_aprox = make_function(weights[-1])

            mse_final = np.mean((z_data - function_aprox(x_data_orig, y_data_orig)) ** 2)
            rmse_final = np.sqrt(mse_final)
            mae_final = np.mean(np.abs(z_data - function_aprox(x_data_orig, y_data_orig)))

            key = (feature_name, loss_fn_name, tuple(initial_w))
            dict_results[key] = {
                'weights': weights,
                'weights_raw': weights_raw,
                'losses': losses,
                'n_iters': n_iters,
                'mse_final': mse_final,
                'rmse_final': rmse_final,
                'mae_final': mae_final
            }

df_result = pd.DataFrame([
    {
        'Feature_Set': key[0],
        'Loss_Function': key[1],
        'Initial_Weights': key[2],
        'Weights_raw': value['weights_raw'],
        'Final_Weights': value['weights'][-1],
        'Final_Loss': value['losses'][-1],
        'MSE_Final': value['mse_final'],
        'RMSE_Final': value['rmse_final'],
        'MAE_Final': value['mae_final'],
        'Iterations': value['n_iters']
    }
    for key, value in dict_results.items()
])

df_result_to_save = df_result.copy()
df_result_to_save['Initial_Weights'] = df_result_to_save['Initial_Weights'].apply(lambda x: np.array(x).tolist())
df_result_to_save['Final_Weights'] = df_result_to_save['Final_Weights'].apply(lambda x: np.array(x).tolist())

df_result_to_save['Initial_Weights'] = df_result_to_save['Initial_Weights'].apply(lambda x: str(x))
df_result_to_save['Final_Weights'] = df_result_to_save['Final_Weights'].apply(lambda x: str(x))

df_result_to_save.to_excel('../output/tarefa3_results.xlsx', index=False, float_format="%.6g")

df_result.sort_values(by='MSE_Final').groupby('Loss_Function').head(10).reset_index(drop=True).drop(columns=['Final_Loss'])

Unnamed: 0,Feature_Set,Loss_Function,Initial_Weights,Weights_raw,Final_Weights,MSE_Final,RMSE_Final,MAE_Final,Iterations
0,Standardized,MSE,"(0.0, 0.0, 0.0)","[288.11203268441704, 48.82467957045251, 36.990...","[5.008104595020174, 5.734199448068984, 10.5996...",372.0919316255,19.2896845911,15.2952363095,3
1,Original,MSE,"(0.016612939351596613, 0.07334226991833549, 0....","[5.008104595025083, 5.734199448081883, 10.5996...","[5.008104595025083, 5.734199448081883, 10.5996...",372.0919316255,19.2896845911,15.2952363095,3
2,Standardized,MSE,"(0.016612939351596613, 0.07334226991833549, 0....","[288.11203268441704, 48.82467957045258, 36.990...","[5.008104595020174, 5.734199448068993, 10.5996...",372.0919316255,19.2896845911,15.2952363095,3
3,Original,MSE,"(0.0, 0.0, 0.0)","[5.008104595025083, 5.734199448081934, 10.5996...","[5.008104595025083, 5.734199448081934, 10.5996...",372.0919316255,19.2896845911,15.2952363095,3
4,Normalized,MSE,"(0.0, 0.0, 0.0)","[371.16064774478366, 47.70853940799228, 13.119...","[5.008104595001939, 5.734199448075995, 638.903...",395137.7328103567,628.5998192892,628.3037807293,3
5,Normalized,MSE,"(0.016612939351596613, 0.07334226991833549, 0....","[371.1606477447837, 47.708539407992454, 13.119...","[5.00810459500194, 5.734199448076016, 638.9034...",395137.7328103574,628.5998192892,628.3037807293,3


In [10]:
standard_scaler.std

x   57.5291564339
y    8.5146462052
dtype: float64

## Resultados

### MSE

In [11]:
_ = plot_best_by_loss(df_result, "MSE", df, features_normalized, features_standardized, show_original_points=True)

Melhor resultado MSE:
Feature Set: Standardized
Initial Weights: (np.float64(0.0), np.float64(0.0), np.float64(0.0))
Final Loss: 372.0919316255451
Pesos: [ 5.0081046   5.73419945 10.59963756]
