# Напишем класс логистической регрессии

In [1]:
import numpy as np
import torch as T
import pandas as pd
import time
pu = 'cpu'

In [2]:
class LogRec:
    
    def __init__(self, num):
        self.n = num
        self.b = np.array(1)
        
        low = -0.01
        high = 0.01
        self.w = T.rand(n, dtype=T.float32, requires_grad=True).to(pu)
        self.w = (high - low) * self.w + low
        self.w.grad = T.zeros(n, device = pu)
        self.w.retain_grad()
        
        self.b = T.zeros(1, dtype=T.float32, requires_grad=True).to(pu)
        self.b.grad = T.zeros(1, device = pu)
        self.b.retain_grad()
    
    def forward(x_t, w, b):
        z = T.dot(x_t, w).reshape(1)
        z += b
        p = 1 / (1 + T.exp(-z))
        return p
    
    def train(self, train_x, train_y, num, lrn_rate, epochs, reg = 0, verbose = 0):
        indexes = np.arange(num)
        
        for epoch in range(0, epochs): #сколько у нас шаго эволюции
            #создание tot_loss, тензор с нулевыми начениями
            tot_loss = T.zeros(1, dtype=T.float32, requires_grad=True).to(pu)
            tot_loss.grad = T.zeros(1, device = pu)
            tot_loss.retain_grad()

            np.random.shuffle(indexes) #перетасовываем индексы
            for ii in range(len(indexes)):
                i = indexes[ii]
                x_t = train_x[i]
                target = train_y[i]
                #out = self.forward(x_t,self.w,self.b)
                out = 1/ (1 + T.exp(self.predict(x_t))) #вычисляем вероятность
                loss = (out - target).pow(2).sum()  # считаем нсколько мы ошиблись
                tot_loss = loss + tot_loss

            if reg == 1:
                tot_loss = tot_loss + T.norm(self.w, p=1) #регуляризация L1
            elif reg == 2:
                tot_loss = tot_loss + T.norm(self.w, p=2) #регуляризация L2

            tot_loss.backward(retain_graph=True)  # Вычисляем градиенты

            # обновляем градиент
            self.w.data += -1 * lrn_rate * self.w.grad.data
            self.b.data += -1 * lrn_rate * self.b.grad.data

            # обнуляем
            self.w.grad = T.zeros(n, device = pu) 
            self.b.grad = T.zeros(1, device = pu)

            if epoch % verbose == 0:
                print("epoch = %4d " % epoch, end="")
                print("   loss = %6.4f" % (tot_loss / num))
    
    def predict(self, x):
        return T.matmul(x, self.w) + self.b

    def res(self, name=''):
        print("Coef:", name)
        print(self.w.detach().numpy(), self.b.detach().numpy())

# Погода в шанхае

In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [5]:
data = pd.read_csv('Shanghai_HMT_2010.csv')
data = data.dropna()

In [6]:
for i in ['day', 'hour', 'PM_Jingan', 'PM_US Post' , 'PM_Xuhui', 'Iws', 'precipitation', 'Iprec', 'No', 'year', 'cbwd']:
    data = data.drop(i, axis=1)

In [7]:
data -= data.mean()
data /= data.std()
data

Unnamed: 0,month,season,DEWP,HUMI,PRES,TEMP
26304,-1.557791,1.347979,-1.704071,0.018112,0.774887,-1.961308
26305,-1.557791,1.347979,-1.704071,0.018112,0.774887,-1.961308
26307,-1.557791,1.347979,-1.598843,0.317001,0.774887,-1.961308
26308,-1.557791,1.347979,-1.598843,0.628044,0.774887,-2.072632
26309,-1.557791,1.347979,-1.598843,0.628044,0.774887,-2.072632
...,...,...,...,...,...,...
52578,1.607977,1.347979,-1.388386,-0.450938,2.114730,-1.404688
52579,1.607977,1.347979,-1.283157,-0.195695,2.114730,-1.404688
52580,1.607977,1.347979,-1.072701,0.088277,2.114730,-1.293365
52581,1.607977,1.347979,-1.072701,0.088277,2.226384,-1.293365


In [8]:
data['PRES'] = data['PRES'].apply(lambda x: 1 if x > 0 else 0)

In [9]:
train_x = T.tensor(data.drop(['PRES'], axis=1).to_numpy(), dtype=T.float32).to(pu)
train_y = T.tensor(data['PRES'].to_numpy(), dtype=T.long).to(pu)

In [10]:
num = int(train_y.size()[0]) # число данных
n = 5 # число фич
res = LogRec(n)
res.train(train_x, train_y, num, 0.0005, 100, reg=0, verbose=10)
res.res('without reg')

epoch =    0    loss = 0.2511
epoch =   10    loss = 0.0640
epoch =   20    loss = 0.0631
epoch =   30    loss = 0.0624
epoch =   40    loss = 0.0618
epoch =   50    loss = 0.0615
epoch =   60    loss = 0.0614
epoch =   70    loss = 0.0614
epoch =   80    loss = 0.0614
epoch =   90    loss = 0.0613
Coef: without reg
[-1.6048833  -0.80953485  2.1210449   0.26092535  2.5522377 ] [-0.41436666]


# Что говорит sklearn

In [11]:
from sklearn.linear_model import LogisticRegression
res3 = LogisticRegression()
res3.fit(data.drop(['PRES'], axis=1).to_numpy(), data['PRES'].to_numpy())

LogisticRegression()

In [12]:
print(res3.coef_)

[[ 1.65479326  0.75872407 -0.62277762 -1.12085302 -4.15814051]]
