In [1]:
import pandas as pd
import numpy as np
from random import random

from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import MinMaxScaler
from sklearn.datasets import make_blobs

In [120]:
df = pd.read_csv('../data/medidas.csv')
df.shape

(100, 2)

In [121]:
df.head(3)

Unnamed: 0,Altura,Peso
0,187,109.72
1,177,91.09
2,180,88.93


In [122]:
x = df.Altura.values
y = df.Peso.values

In [123]:
print(x.shape, y.shape)

(100,) (100,)


In [124]:
x = x.reshape(-1, 1)

print(x.shape, y.shape)

(100, 1) (100,)


#### Python Puro

*Para transformar o perceptron em um regressor, basta remover a função de ativação.*

In [15]:
'''step 1: Inicializar pesos e bias'''

D = x.shape[1]
w = [2*random() - 1 for i in range(D)]
b = 2*random() - 1

''' step 2: Para cada amostra aleatória:
    a) Calcular a saida;
    b) Calcular o erro;
    c) Atualizar os pesos;
    d) Atualizar os bias.
'''
wl_rate = 1e-7
bl_rate = 1e-2

for step in range(10_001):
    cost = 0
    for xn, yn in zip(x, y):
        y_pred = sum([xi*wi for xi, wi in zip(xn, w)]) + b
        erro = yn - y_pred
        w = [wi + wl_rate * erro * xi for xi, wi in zip(xn, w)]
        b = b + bl_rate * erro
        cost += erro**2
    if step % 1000 == 0:
        print('step {0}: {1}'.format(step, cost))

print('w:', w)
print('b:', b)
print('y_pred: {0}'.format(np.dot(x, np.array(w))+b))

step 0: 1024641.7518430601
step 1000: 8173.51508088843
step 2000: 3590.671800476251
step 3000: 2790.716648439308
step 4000: 2652.819219900905
step 5000: 2629.7841589568757
step 6000: 2626.2517504352213
step 7000: 2625.849626271206
step 8000: 2625.8711079824543
step 9000: 2625.9136227218874
step 10000: 2625.937363237822
w: [np.float64(1.3697035107991162)]
b: -157.8653723865976
y_pred: [ 98.26918413  84.57214902  88.68125956  84.57214902  84.57214902
  92.79037009  85.94185254  84.57214902  92.79037009  99.63888764
  72.24481743  85.94185254  66.76600338  79.09333498  83.20244551
  77.72363147  74.98422445  91.42066658  87.31155605  79.09333498
  81.832742    73.61452094  92.79037009  62.65689285  84.57214902
  54.43867179  76.35392796  95.52977711  96.89948062  64.02659636
  77.72363147  83.20244551  84.57214902  72.24481743  88.68125956
  94.1600736   62.65689285  69.50541041  99.63888764  87.31155605
  80.46303849  74.98422445  88.68125956 103.74799818 107.85710871
  83.20244551  80.4

In [125]:
'''step 1: Inicializar pesos e bias'''
D = x.shape[1]
w = 2*np.random.random(size=D)-1
b = 2*np.random.random()-1

''' step 2: Para cada amostra aleatória:
    a) Calcular a saida;
    b) Calcular o erro;
    c) Atualizar os pesos;
    d) Atualizar os bias.
'''
wl_rate = 1e-7
bl_rate = 1e-2

for step in range(10_001):
    cost = 0
    for xn, yn in zip(x, y):
        y_pred = np.dot(xn, w) + b
        erro = yn - y_pred
        w = w + wl_rate * np.dot(erro, xn)
        b = b + bl_rate * erro
        cost += erro**2

    if step % 1000 == 0:
        print('step {0}: {1}'.format(step, cost))

print('w:', w)
print('b:', b)
print('y_pred: {0}'.format(np.dot(x, np.array(w))+b))

step 0: 248964.2976349774
step 1000: 3213.5094682473728
step 2000: 2725.411570399813
step 3000: 2641.786321687843
step 4000: 2628.037407162114
step 5000: 2626.0264626494964
step 6000: 2625.8444971991808
step 7000: 2625.8847011792955
step 8000: 2625.9220998651044
step 9000: 2625.9414089280567
step 10000: 2625.9501484679945
w: [1.36989521]
b: -157.89739408616484
y_pred: [ 98.27300984  84.57405776  88.68374339  84.57405776  84.57405776
  92.79342901  85.94395297  84.57405776  92.79342901  99.64290505
  72.24500089  85.94395297  66.76542005  79.09447693  83.20416255
  77.72458172  74.9847913   91.4235338   87.31384818  79.09447693
  81.83426734  73.6148961   92.79342901  62.65573443  84.57405776
  54.43636318  76.35468651  95.53321943  96.90311463  64.02562964
  77.72458172  83.20416255  84.57405776  72.24500089  88.68374339
  94.16332422  62.65573443  69.50521047  99.64290505  87.31384818
  80.46437214  74.9847913   88.68374339 103.75259067 107.8622763
  83.20416255  80.46437214  76.35468

#### Numpy com pré-processamento dos dados

In [135]:
minmax = MinMaxScaler(feature_range=(-1, 1))
x = minmax.fit_transform(x.astype(np.float64))

print(x.min(), x.max())

-1.0 1.0


In [129]:
lr = LinearRegression()
lr.fit(x, y)

print('w:', lr.coef_)
print('b:', lr.intercept_)

w: [33.60164767]
b: 74.99636286981101


In [136]:
'''step 1: Inicializar pesos e bias'''
D = x.shape[1]
w = 2*np.random.random(size=D)-1
b = 2*np.random.random()-1

''' step 2: Para cada amostra aleatória:
    a) Calcular a saida;
    b) Calcular o erro;
    c) Atualizar os pesos;
    d) Atualizar os bias.
'''
learning_rate_np = 1e-3

for step in range(1001):
    cost = 0 # inicializando com um custo zero
    for xn, yn in zip(x, y):
        y_pred = np.dot(xn, w) + b
        erro = yn - y_pred
        w = w + learning_rate_np * np.dot(erro, xn)
        b = b + learning_rate_np * erro
        cost += erro**2 # calculando o custo

    if step % 100 == 0:
        print('step {0}: {1}'.format(step, cost))

print('w:', w)
print('b:', b)
print('y_pred: {0}'.format(np.dot(x, np.array(w))+b))

step 0: 529388.0315617786
step 100: 3142.5059791454514
step 200: 2623.8670783539824
step 300: 2611.5458827247435
step 400: 2611.2517999114534
step 500: 2611.244575307423
step 600: 2611.2443663425192
step 700: 2611.2443556110484
step 800: 2611.2443544671482
step 900: 2611.24435430301
step 1000: 2611.2443542779947
w: [33.60180845]
b: 74.9389642686916
y_pred: [ 98.94025602  85.22523216  89.33973932  85.22523216  85.22523216
  93.45424647  86.59673455  85.22523216  93.45424647 100.3117584
  72.88171069  86.59673455  67.39570115  79.73922262  83.85372978
  78.36772023  75.62471546  92.08274409  87.96823693  79.73922262
  82.48222739  74.25321308  93.45424647  63.28119399  85.22523216
  55.05217968  76.99621785  96.19725125  97.56875363  64.65269638
  78.36772023  83.85372978  85.22523216  72.88171069  89.33973932
  94.82574886  63.28119399  70.13870592 100.3117584   87.96823693
  81.110725    75.62471546  89.33973932 104.42626556 108.54077272
  83.85372978  81.110725    76.99621785  81.1107

In [140]:
provas = pd.read_csv('../data/notas.csv')
provas.head(3)

Unnamed: 0,prova1,prova2,prova3,final
0,73,80,75,152
1,93,88,93,185
2,89,91,90,180


In [145]:
x = provas.drop('final', axis=1).values
y = provas['final'].values

In [146]:
print(x.shape, y.shape)

(25, 3) (25,)


In [151]:
minmax = MinMaxScaler(feature_range=(-1, 1))
x = minmax.fit_transform(x.astype(np.float64))

print(x.min(), x.max())

-1.0 1.0000000000000002


In [152]:
lr = LinearRegression()
lr.fit(x, y)

print('w:', lr.coef_)
print('b:', lr.intercept_)

w: [ 8.72048636 14.1054877  26.26749487]
b: 150.65175754349872


In [262]:
'''step 1: Inicializar pesos e bias'''
D = x.shape[1]
w = 2*np.random.random(size=D)-1
b = 2*np.random.random()-1

''' step 2: Para cada amostra aleatória:
    a) Calcular a saida;
    b) Calcular o erro;
    c) Atualizar os pesos;
    d) Atualizar os bias.
'''
learning_rate_np = 1e-2

for step in range(2001):
    cost = 0
    for xn, yn in zip(x, y):
        y_pred = np.dot(xn, w) + b
        erro = yn - y_pred
        w = w + learning_rate_np * np.dot(erro, xn)
        b = b + learning_rate_np * erro
        cost += erro**2

    if step % 500 == 0:
        print('step {0}: {1}'.format(step, cost))

print('w:', w)
print('b:', b)
print('y_pred: {0}'.format(np.dot(x, np.array(w))+b))

step 0: 498630.08959754824
step 500: 146.86000948261793
step 1000: 146.15414080249343
step 1500: 146.15066112265575
step 2000: 146.15066878557883
w: [ 8.72518951 14.1394923  26.32051346]
b: 150.71119383290605
y_pred: [152.67150063 185.2011281  181.89868545 199.89638911 139.20892486
 103.66277966 150.32362133 112.81326781 174.66050556 164.57742324
 143.46911243 142.27591007 186.6683133  152.46868054 151.30572548
 189.25435652 143.54441423 182.01521915 177.4071795  158.42323263
 176.68664103 174.76605466 167.78163742 150.69216867 191.32825299]
