### Import dataset

In [208]:
pip install requests pandas

Note: you may need to restart the kernel to use updated packages.


In [209]:
import requests

# Download the Abalone dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data"
response = requests.get(url)
with open("abalone.data", "wb") as file:
    file.write(response.content)
    
# to read
import pandas as pd
column_names = ['Sex', 'Length', 'Diameter', 'Height', 'Whole weight', 'Shucked weight', 'Viscera weight', 'Shell weight', 'Rings']
data = pd.read_csv("abalone.data", names=column_names)

In [210]:
pip install numpy scikit-learn

Note: you may need to restart the kernel to use updated packages.


In [211]:
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import numpy as np

In [212]:
# converter a coluna categorica em numerica
# data["Sex"] = data["Sex"].map({"M": 0, "F": 1, "I": 2})

data["Sex"] = data["Sex"].map({"M": 0, "F": 1, "I": 2})

data

Unnamed: 0,Sex,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings
0,0,0.455,0.365,0.095,0.5140,0.2245,0.1010,0.1500,15
1,0,0.350,0.265,0.090,0.2255,0.0995,0.0485,0.0700,7
2,1,0.530,0.420,0.135,0.6770,0.2565,0.1415,0.2100,9
3,0,0.440,0.365,0.125,0.5160,0.2155,0.1140,0.1550,10
4,2,0.330,0.255,0.080,0.2050,0.0895,0.0395,0.0550,7
...,...,...,...,...,...,...,...,...,...
4172,1,0.565,0.450,0.165,0.8870,0.3700,0.2390,0.2490,11
4173,0,0.590,0.440,0.135,0.9660,0.4390,0.2145,0.2605,10
4174,0,0.600,0.475,0.205,1.1760,0.5255,0.2875,0.3080,9
4175,1,0.625,0.485,0.150,1.0945,0.5310,0.2610,0.2960,10


In [213]:
# truque de notação, inserindo uma coluna com o valor 1 em todas as linhas

data["bias"] = 1

In [214]:
# Dividir entre treino e teste o conjunto de dados

from sklearn.model_selection import train_test_split

X = data.drop("Rings", axis=1)

y = data["Rings"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [215]:
# criar a função de custo da regressão linear

def cost_function(X, y, theta):
    m = len(y)
    J = np.sum((X.dot(theta) - y) ** 2) / (2 * m)
    return J

In [216]:
# criar um vetor de pesos aleatórios

theta = np.random.rand(X_train.shape[1])

In [217]:
# Número de iterações
num_iterations = 10000

    # Defina uma taxa de aprendizado
learning_rate = 0.001

# Loop de treinamento
for i in range(num_iterations):

    # Calcule a previsão do modelo
    predictions = X_train.dot(theta)
    # Calcule o erro
    error = predictions - y_train
    # Calcule o gradiente
    gradient = X_train.T.dot(error) / len(y_train)
    # Atualize os pesos
    theta = theta - learning_rate * gradient

    # A cada 100 iterações, exibir o custo para monitoramento
    if i % 100 == 0:
        J = cost_function(X_train, y_train, theta)
        print(f"Iteração {i}: Custo = {J}") 


Iteração 0: Custo = 36.49734405986444
Iteração 100: Custo = 21.772637169643517
Iteração 200: Custo = 14.234769845591629
Iteração 300: Custo = 10.307846700146793
Iteração 400: Custo = 8.20452871256033
Iteração 500: Custo = 7.030136821086038
Iteração 600: Custo = 6.335699366540698
Iteração 700: Custo = 5.894946482247893
Iteração 800: Custo = 5.593019746624255
Iteração 900: Custo = 5.370913808963871
Iteração 1000: Custo = 5.197724020253989
Iteração 1100: Custo = 5.056769465408403
Iteração 1200: Custo = 4.938641920620223
Iteração 1300: Custo = 4.837713611327019
Iteração 1400: Custo = 4.750373765458538
Iteração 1500: Custo = 4.674130526581775
Iteração 1600: Custo = 4.607147113193203
Iteração 1700: Custo = 4.547996815722851
Iteração 1800: Custo = 4.495529069761797
Iteração 1900: Custo = 4.448792583361664
Iteração 2000: Custo = 4.406988339829967
Iteração 2100: Custo = 4.369438719858538
Iteração 2200: Custo = 4.3355657100634595
Iteração 2300: Custo = 4.3048745427114685
Iteração 2400: Custo = 4

In [218]:
# obtenha as métricas do modelo gerado

predictions = X_test.dot(theta)

print("Mean Squared Error:", mean_squared_error(y_test, predictions))

print("R2 Score:", r2_score(y_test, predictions))

print("Mean Absolute Error:", mean_absolute_error(y_test, predictions))



Mean Squared Error: 7.327393692300634
R2 Score: 0.32311764284817357
Mean Absolute Error: 1.9524967031206255


In [219]:
# compare com o modelo do sklearn
from sklearn.linear_model import LinearRegression

model = LinearRegression()

model.fit(X_train, y_train)

predictions = model.predict(X_test)

print("Mean Squared Error:", mean_squared_error(y_test, predictions))
print("R2 Score:", r2_score(y_test, predictions))
print("Mean Absolute Error:", mean_absolute_error(y_test, predictions))

Mean Squared Error: 4.950310502936191
R2 Score: 0.5427053625654411
Mean Absolute Error: 1.6067608598250254
