In [2]:
import numpy as np
import sys
import os
from typing import Sequence
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

sys.path.append(os.path.abspath("../libs"))
sys.path.append(os.path.abspath("../utils"))

from gradient_descendent import gradient_descendent
from normalize import min_max_normalize, padronize
from loss_fn_tarefa2 import make_mse_loss_function, make_mae_loss_function, make_rmse_loss_function

# Ajuste de curva por otimização

## Carregar os dados

In [3]:
# Carregamento dos dados
df = pd.read_excel('../data/Trabalho2dados.xlsx')

df.head()

Unnamed: 0,x,y,z
0,-5.0,-5.0,-458.963629
1,-5.0,-3.8,-520.361381
2,-5.0,-2.6,-593.039231
3,-5.0,-1.4,-606.776605
4,-5.0,-0.2,-657.401892


## EDA dados

In [4]:
df.describe()

Unnamed: 0,x,y,z
count,81.0,81.0,81.0
mean,-0.2,-0.2,36.990573
std,3.117691,3.117691,294.680404
min,-5.0,-5.0,-657.401892
25%,-2.6,-2.6,-42.780939
50%,-0.2,-0.2,59.403645
75%,2.2,2.2,170.599518
max,4.6,4.6,614.7001


In [5]:
df.isna().sum()

x    0
y    0
z    0
dtype: int64

In [6]:
df_pivot = df.pivot(index='y', columns='x', values='z')

# Extrair os componentes para o Plotly
x_axis = df_pivot.columns.values  # Eixo X
y_axis = df_pivot.index.values    # Eixo Y
z_grid = df_pivot.values        # Valores Z (grid)

fig = go.Figure()

# Adicionar a Superfície
fig.add_trace(go.Surface(
    x=x_axis,
    y=y_axis,
    z=z_grid,
    colorscale='Viridis',
    opacity=0.9,
    contours_z=dict(show=True, usecolormap=True, project_z=True, highlightcolor="white"),
    name='Superfície dos Dados'
))

# Adicionar os Pontos de Dados Originais
fig.add_trace(go.Scatter3d(
    x=df['x'],
    y=df['y'],
    z=df['z'],
    mode='markers',
    marker=dict(size=3, color='red', symbol='circle'),
    name='Pontos de Dados Originais'
))

# Melhorar o Layout
fig.update_layout(
    title=dict(text='z = f(x, y)', x=0.5),
    scene=dict(
        xaxis_title='Eixo X',
        yaxis_title='Eixo Y',
        zaxis_title='Eixo Z (Valor)'
    ),
    margin=dict(l=0, r=0, b=0, t=50)
)

fig.show()

In [7]:
X = np.column_stack([df['x'] ** 3, df['y'] ** 2, np.ones(len(df['x']))])

true_weights = np.linalg.inv((X.T @ X) + (X.T @ X).T) @ (2 * X.T @ df['z'])
true_weights

array([ 5.0081046 ,  5.73419945, 10.59963756])

## Calcular as funções de perda

In [8]:
features = df[['x', 'y']]
y = df['z']
features_normalized = features.copy()
features_normalized['x'] = min_max_normalize(features['x'], 0, 1)
features_normalized['y'] = min_max_normalize(features['y'], 0, 1)

features_pradonized = features.copy()
features_pradonized['x'] = padronize(features['x'])
features_pradonized['y'] = padronize(features['y'])

features_list = [features, features_normalized, features_pradonized]
features_names = ['Original', 'Min-Max Normalized', 'Padronized']

loss_fn_names = ['MSE', 'MAE', 'RMSE']
loss_fn_makers = [make_mse_loss_function, make_mae_loss_function, make_rmse_loss_function]

learning_rates = [0.01, 0.001, 0.0001]
initial_weights = [np.zeros(3), np.array([5.0, 5.0, 8.0]), np.array([0.5, 0.5, 0.5])]
n_iterations = 5000
tolerance = 1e-6

dict_results = {}

for feature_set, feature_name in zip(features_list, features_names):
    x_data = feature_set['x'].values
    y_data = feature_set['y'].values
    z_data = y.values

    for loss_fn_name, loss_fn_maker in zip(loss_fn_names, loss_fn_makers):
        loss_function, grad_loss_function = loss_fn_maker(x_data, y_data, z_data)

        for lr in learning_rates:
            for initial_w in initial_weights:
                weights, losses, n_iters = gradient_descendent(
                    initial_w,
                    loss_function,
                    grad_loss_function,
                    learning_rate=lr,
                    max_iter=n_iterations,
                    tolerance=tolerance
                )

                key = (feature_name, loss_fn_name, lr, tuple(initial_w))
                dict_results[key] = {
                    'weights': weights,
                    'losses': losses,
                    'n_iters': n_iters
                }

df_result = pd.DataFrame([
    {
        'Feature_Set': key[0],
        'Loss_Function': key[1],
        'Learning_Rate': key[2],
        'Initial_Weights': key[3],
        'Final_Weights': value['weights'][-1],
        'Final_Loss': value['losses'][-1],
        'Iterations': value['n_iters']
    }
    for key, value in dict_results.items()
])


df_result.sort_values(by='Final_Loss').groupby('Loss_Function').head(3).reset_index(drop=True)


overflow encountered in reduce


overflow encountered in square


overflow encountered in matmul


invalid value encountered in matmul


invalid value encountered in matmul


invalid value encountered in matmul


invalid value encountered in subtract



Unnamed: 0,Feature_Set,Loss_Function,Learning_Rate,Initial_Weights,Final_Weights,Final_Loss,Iterations
0,Original,MAE,0.0001,"(5.0, 5.0, 8.0)","[5.005431229629682, 5.883213629629388, 8.06220...",15.241431,5000
1,Original,MAE,0.001,"(5.0, 5.0, 8.0)","[5.003422617283623, 5.8810261728389355, 8.1157...",15.24433,5000
2,Original,MAE,0.01,"(5.0, 5.0, 8.0)","[5.01551407407448, 5.8891851851840675, 8.29185...",15.272837,5000
3,Original,RMSE,0.01,"(5.0, 5.0, 8.0)","[5.007477748293044, 5.78230682525681, 9.772637...",19.297418,5000
4,Original,RMSE,0.001,"(5.0, 5.0, 8.0)","[5.006377899720806, 5.866714747314398, 8.32160...",19.348288,5000
5,Original,RMSE,0.0001,"(5.0, 5.0, 8.0)","[5.005956655896799, 5.867319533310181, 8.07845...",19.362176,5000
6,Original,MSE,0.0001,"(5.0, 5.0, 8.0)","[5.006854213483814, 5.830160028785147, 8.95000...",373.279313,5000
7,Original,MSE,0.0001,"(0.5, 0.5, 0.5)","[5.00331293599638, 6.101935509236395, 4.277992...",389.529143,5000
8,Original,MSE,0.0001,"(0.0, 0.0, 0.0)","[5.003082466001028, 6.119622938120117, 3.97393...",391.246879,5000


## Resultados

In [9]:
def make_function(w):
    def f(x, y):
        return w[0] * (x ** 3) + w[1] * (y ** 2) + w[2]
    return f

### MSE

In [None]:
# Filtrar e selecionar o melhor resultado MSE corretamente
best_mse_results = df_result[df_result['Loss_Function'] == 'MSE']
best_mse = best_mse_results.loc[best_mse_results['Final_Loss'].idxmin()]
best_w = best_mse['Final_Weights']
feature_set_used = best_mse['Feature_Set']

print("Melhor resultado MSE:")
print(f"Feature Set: {feature_set_used}")
print(f"Learning Rate: {best_mse['Learning_Rate']}")
print(f"Final Loss: {best_mse['Final_Loss']}")
print(f"Pesos: {best_w}")

# Determinar qual conjunto de dados usar para predição
if feature_set_used == 'Min-Max Normalized':
    x_data = features_normalized['x'].values
    y_data = features_normalized['y'].values
elif feature_set_used == 'Padronized':
    x_data = features_pradonized['x'].values
    y_data = features_pradonized['y'].values
else:
    x_data = df['x'].values
    y_data = df['y'].values

# Criar função de predição
f_pred = make_function(best_w)

# Gerar predições para os pontos existentes
predictions = [f_pred(x, y) for x, y in zip(x_data, y_data)]

# Criar superfície com dados originais
df_pivot = df.pivot(index='y', columns='x', values='z')
x_axis = df_pivot.columns.values
y_axis = df_pivot.index.values
z_grid = df_pivot.values

fig = go.Figure()

# Superfície original
fig.add_trace(go.Surface(
    x=x_axis,
    y=y_axis,
    z=z_grid,
    colorscale='Viridis',
    opacity=0.7,
    name='Dados Originais'
))

# Pontos preditos
fig.add_trace(go.Scatter3d(
    x=df['x'],  # Sempre use coordenadas originais para visualização
    y=df['y'],
    z=predictions,
    mode='markers',
    marker=dict(size=5, color='blue', symbol='circle'),
    name='Predições',
))

fig.update_layout(
    title=f'MSE: {feature_set_used} | LR: {best_mse["Learning_Rate"]} | Loss: {best_mse["Final_Loss"]:.6f}',
    scene=dict(
        xaxis_title='X',
        yaxis_title='Y',
        zaxis_title='Z'
    ),
    margin=dict(l=0, r=0, b=0, t=50)
)

fig.show()

Melhor resultado MSE:
Feature Set: Original
Learning Rate: 0.0001
Final Loss: 373.2793132812906
Pesos: [5.00685421 5.83016003 8.95000686]


In [15]:
df_result_mse = df_result[df_result['Loss_Function'] == 'MSE'].reset_index(drop=True).sort_values(by='Final_Loss')
df_result_mse

Unnamed: 0,Feature_Set,Loss_Function,Learning_Rate,Initial_Weights,Final_Weights,Final_Loss,Iterations
7,Original,MSE,0.0001,"(5.0, 5.0, 8.0)","[5.006854213483814, 5.830160028785147, 8.95000...",373.279313,5000
8,Original,MSE,0.0001,"(0.5, 0.5, 0.5)","[5.00331293599638, 6.101935509236395, 4.277992...",389.529143,5000
6,Original,MSE,0.0001,"(0.0, 0.0, 0.0)","[5.003082466001028, 6.119622938120117, 3.97393...",391.246879,5000
19,Padronized,MSE,0.01,"(5.0, 5.0, 8.0)","[149.7334226158866, 54.8578425237414, -17.8672...",1221.550872,2714
20,Padronized,MSE,0.01,"(0.5, 0.5, 0.5)","[149.7334226158866, 54.8578425245635, -17.8672...",1221.550872,2702
18,Padronized,MSE,0.01,"(0.0, 0.0, 0.0)","[149.7334226158866, 54.85784252079389, -17.867...",1221.550872,2701
21,Padronized,MSE,0.001,"(0.0, 0.0, 0.0)","[149.73342261588488, 53.73123486154125, -16.22...",1222.792754,5000
23,Padronized,MSE,0.001,"(0.5, 0.5, 0.5)","[149.73342261588488, 53.72805520959593, -16.22...",1222.799774,5000
22,Padronized,MSE,0.001,"(5.0, 5.0, 8.0)","[149.73342261588488, 53.638573544822044, -16.0...",1223.00544,5000
24,Padronized,MSE,0.0001,"(0.0, 0.0, 0.0)","[145.91433780064378, 33.4389530648235, 8.01532...",1648.219588,5000
