In [1]:
import numpy as np
import sys
import os
from typing import Sequence
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

sys.path.append(os.path.abspath("../libs"))
sys.path.append(os.path.abspath("../utils"))

from gradient_descendent import gradient_descendent
from normalize import min_max_normalize, padronize
from loss_fn_tarefa2 import make_mse_loss_function, make_mae_loss_function, make_rmse_loss_function

# Ajuste de curva por otimização

## Carregar os dados

In [2]:
# Carregamento dos dados
df = pd.read_excel('../data/Trabalho2dados.xlsx')

df.head()

Unnamed: 0,x,y,z
0,-5.0,-5.0,-458.963629
1,-5.0,-3.8,-520.361381
2,-5.0,-2.6,-593.039231
3,-5.0,-1.4,-606.776605
4,-5.0,-0.2,-657.401892


## EDA dados

In [3]:
df.describe()

Unnamed: 0,x,y,z
count,81.0,81.0,81.0
mean,-0.2,-0.2,36.990573
std,3.117691,3.117691,294.680404
min,-5.0,-5.0,-657.401892
25%,-2.6,-2.6,-42.780939
50%,-0.2,-0.2,59.403645
75%,2.2,2.2,170.599518
max,4.6,4.6,614.7001


In [4]:
df.isna().sum()

x    0
y    0
z    0
dtype: int64

In [5]:
df_pivot = df.pivot(index='y', columns='x', values='z')

# Extrair os componentes para o Plotly
x_axis = df_pivot.columns.values  # Eixo X
y_axis = df_pivot.index.values    # Eixo Y
z_grid = df_pivot.values        # Valores Z (grid)

fig = go.Figure()

# Adicionar a Superfície
fig.add_trace(go.Surface(
    x=x_axis,
    y=y_axis,
    z=z_grid,
    colorscale='Viridis',
    opacity=0.9,
    contours_z=dict(show=True, usecolormap=True, project_z=True, highlightcolor="white"),
    name='Superfície dos Dados'
))

# Adicionar os Pontos de Dados Originais
fig.add_trace(go.Scatter3d(
    x=df['x'],
    y=df['y'],
    z=df['z'],
    mode='markers',
    marker=dict(size=3, color='red', symbol='circle'),
    name='Pontos de Dados Originais'
))

# Melhorar o Layout
fig.update_layout(
    title=dict(text='z = f(x, y)', x=0.5),
    scene=dict(
        xaxis_title='Eixo X',
        yaxis_title='Eixo Y',
        zaxis_title='Eixo Z (Valor)'
    ),
    margin=dict(l=0, r=0, b=0, t=50)
)

fig.show()

In [6]:
X = np.column_stack([df['x'] ** 3, df['y'] ** 2, np.ones(len(df['x']))])

true_weights = np.linalg.inv((X.T @ X) + (X.T @ X).T) @ (2 * X.T @ df['z'])
true_weights

array([ 5.0081046 ,  5.73419945, 10.59963756])

## Calcular as funções de perda

In [7]:
features = df[['x', 'y']]
y = df['z']
features_normalized = features.copy()
features_normalized['x'] = min_max_normalize(features['x'], 0, 1)
features_normalized['y'] = min_max_normalize(features['y'], 0, 1)

features_pradonized = features.copy()
features_pradonized['x'] = padronize(features['x'])
features_pradonized['y'] = padronize(features['y'])

features_list = [features_normalized, features_pradonized]
features_names = ['Min-Max Normalized', 'Padronized']

loss_fn_names = ['MSE', 'MAE', 'RMSE']
loss_fn_makers = [make_mse_loss_function, make_mae_loss_function, make_rmse_loss_function]

learning_rates = [0.01, 0.001, 0.0001]
initial_weights = [np.zeros(3), np.array([5.0, 5.0, 8.0]), np.array([-1.0, -1.0, -1.0])]
n_iterations = 5000
tolerance = 1e-6

dict_results = {}

for feature_set, feature_name in zip(features_list, features_names):
    x_data = feature_set['x'].values
    y_data = feature_set['y'].values
    z_data = y.values

    for loss_fn_name, loss_fn_maker in zip(loss_fn_names, loss_fn_makers):
        loss_function, grad_loss_function = loss_fn_maker(x_data, y_data, z_data)

        for lr in learning_rates:
            for initial_w in initial_weights:
                weights, losses, n_iters = gradient_descendent(
                    initial_w,
                    loss_function,
                    grad_loss_function,
                    learning_rate=lr,
                    max_iter=n_iterations,
                    tolerance=tolerance
                )

                key = (feature_name, loss_fn_name, lr, tuple(initial_w))
                dict_results[key] = {
                    'weights': weights,
                    'losses': losses,
                    'n_iters': n_iters
                }

df_result = pd.DataFrame([
    {
        'Feature_Set': key[0],
        'Loss_Function': key[1],
        'Learning_Rate': key[2],
        'Initial_Weights': key[3],
        'Final_Weights': value['weights'][-1],
        'Final_Loss': value['losses'][-1],
        'Iterations': value['n_iters']
    }
    for key, value in dict_results.items()
])


df_result.sort_values(by='Final_Loss').groupby('Loss_Function').head(3).reset_index(drop=True)

Unnamed: 0,Feature_Set,Loss_Function,Learning_Rate,Initial_Weights,Final_Weights,Final_Loss,Iterations
0,Padronized,RMSE,0.01,"(5.0, 5.0, 8.0)","[96.40771990606045, 17.932215962751602, 11.963...",112.919446,5000
1,Padronized,MAE,0.01,"(5.0, 5.0, 8.0)","[67.37270415350716, 27.079370370370857, 20.064...",114.863634,5000
2,Padronized,RMSE,0.01,"(0.0, 0.0, 0.0)","[91.09873212820592, 15.493380378739916, 6.3615...",123.502085,5000
3,Padronized,MAE,0.01,"(0.0, 0.0, 0.0)","[62.30012157672005, 22.98651851852012, 14.6691...",124.111235,5000
4,Padronized,RMSE,0.01,"(-1.0, -1.0, -1.0)","[90.03556304949578, 14.87148253872703, 5.69453...",125.669565,5000
5,Padronized,MAE,0.01,"(-1.0, -1.0, -1.0)","[61.30182855826839, 22.084888888890365, 14.030...",125.903328,5000
6,Padronized,MSE,0.01,"(5.0, 5.0, 8.0)","[149.7334226158865, 54.857842523741425, -17.86...",1221.550872,2714
7,Padronized,MSE,0.01,"(0.0, 0.0, 0.0)","[149.7334226158865, 54.85784252079392, -17.867...",1221.550872,2701
8,Padronized,MSE,0.01,"(-1.0, -1.0, -1.0)","[149.7334226158865, 54.857842526936, -17.86726...",1221.550872,2701


## Resultados

### MSE