In [1]:
import pandas as pd
import numpy as np
from lreg import LogisticRegression as lr
from sklearn.preprocessing import LabelEncoder
from sklearn import datasets
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

import matplotlib.pyplot as plt
import time

In [2]:
iris = datasets.load_iris()
df = pd.DataFrame(data= np.c_[iris['data'], iris['target']],
                     columns= iris['feature_names'] + ['target'])
idf = df[df.target.isin([1,2])]

In [3]:
df

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0.0
1,4.9,3.0,1.4,0.2,0.0
2,4.7,3.2,1.3,0.2,0.0
3,4.6,3.1,1.5,0.2,0.0
4,5.0,3.6,1.4,0.2,0.0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2.0
146,6.3,2.5,5.0,1.9,2.0
147,6.5,3.0,5.2,2.0,2.0
148,6.2,3.4,5.4,2.3,2.0


In [4]:

scaler = StandardScaler()

cols_to_norm = [
'sepal length (cm)',
'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)'
 ]
# X = idf.copy()

# idf.hist(column='sepal length (cm)',bins=100) # before scaling

X = pd.DataFrame(scaler.fit_transform(idf[cols_to_norm]),columns=cols_to_norm)
# dff.hist(column='sepal length (cm)',bins=100) # after scaling

X = X.apply(pd.to_numeric)

In [5]:
le = LabelEncoder()
le.fit( idf['target'] )
y = pd.Series( data = le.transform( idf['target'] ) )

test_s = 50
train_s = 50

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_s, train_size=train_s, random_state=42)

In [6]:
N, M = X.shape
rand_ind = np.random.randint(0,M)
Xn = X.to_numpy()
Xn[:,rand_ind]

array([-0.25077906, -0.49425387, -0.00730424, -1.10294091, -0.37251647,
       -0.49425387, -0.25077906, -1.95510276, -0.37251647, -1.22467832,
       -1.71162795, -0.8594661 , -1.10294091, -0.25077906, -1.58989054,
       -0.61599128, -0.49425387, -0.9812035 , -0.49425387, -1.22467832,
       -0.12904165, -1.10294091, -0.00730424, -0.25077906, -0.73772869,
       -0.61599128, -0.12904165,  0.11443316, -0.49425387, -1.71162795,
       -1.34641572, -1.46815313, -1.22467832,  0.23617057, -0.49425387,
       -0.49425387, -0.25077906, -0.61599128, -0.9812035 , -1.10294091,
       -0.61599128, -0.37251647, -1.10294091, -1.95510276, -0.8594661 ,
       -0.8594661 , -0.8594661 , -0.73772869, -2.32031498, -0.9812035 ,
        1.33180724,  0.23617057,  1.21006983,  0.84485761,  1.08833242,
        2.06223168, -0.49425387,  1.69701946,  1.08833242,  1.45354464,
        0.23617057,  0.47964538,  0.7231202 ,  0.11443316,  0.23617057,
        0.47964538,  0.7231202 ,  2.18396909,  2.4274439 ,  0.11

In [7]:
model = lr()
model.fit( X_train, y_train, lr=0.01, max_iter=1e4, method='gd' )
accuracy_score(y_test,  model.predict(X_test).round())

0.92

In [8]:
model = lr()
model.fit( X_train, y_train, lr=0.01, max_iter=1e4, method='sgd' )
accuracy_score(y_test,  model.predict(X_test).round())

0.94

In [9]:
model = lr()
start = time.time()
model.fit( X_train, y_train, lr=0.01, max_iter=1e4, method='momentum', hist_w=0. )
print(time.time() - start)

print('\nscore:')
accuracy_score(y_test,  np.around(model.predict(X_test)))

5.206702470779419

score:


0.92

In [10]:
model = lr()
start = time.time()
model.fit( X_train, y_train, lr=0.01, max_iter=1e4, method='rmsprop', hist_w=0. )
print(time.time() - start)

print('\nscore:')
accuracy_score(y_test,  np.around(model.predict(X_test)))

5.121544361114502

score:


0.9

In [11]:
model = lr()
start = time.time()
model.fit( X_train, y_train, lr=0.01, max_iter=1e4, method='nag', hist_w=0. )
print(time.time() - start)

print('\nscore:')
accuracy_score(y_test,  np.around(model.predict(X_test)))

7.0739898681640625

score:


0.92

In [12]:
# для оценки адекватности оценки - метод, который возвращает рандомный результат
model = lr()
start = time.time()
model.fit( X_train, y_train, lr=0.01, max_iter=1e4, method='not-a-real-method' )
print(time.time() - start)

print('\nscore:')
accuracy_score(y_test,  np.around(model.predict(X_test)))

0.002000093460083008

score:


0.7

In [13]:
# Контрольный расчет: проверить, что ничего основного не сломано - numply_ml без изменений, sgd
from lreg_numpyml_pure import LogisticRegression as lr_p
model = lr_p()
start = time.time()
model.fit( X_train, y_train, lr=0.01, max_iter=1e4)
print(time.time() - start)

print('\nscore:')
accuracy_score(y_test,  np.around(model.predict(X_test)))

5.429272413253784

score:


0.92