In [1]:
# define a regression dataset
from sklearn.datasets import make_regression

# define dataset
X, y = make_regression(n_samples=1000, n_features=10, n_informative=2, noise=0.2, random_state=1)

# summarize the shape of the dataset
print(X.shape, y.shape)

(1000, 10) (1000,)


In [2]:
# linear regression model
from numpy.random import rand
from sklearn.datasets import make_regression
from sklearn.metrics import mean_squared_error

# linear regression
def predict_row(row, coefficients):
    
    # add the bias, the last coefficient
    result = coefficients[-1]
    
    # add the weighted input
    for i in range(len(row)):
        result += coefficients[i] * row[i]
    return result

# use model coefficients to generate predictions for a dataset of rows
def predict_dataset(X, coefficients):
    yhats = list()
    for row in X:
        
        # make a prediction
        yhat = predict_row(row, coefficients)
        
        # store the prediction
        yhats.append(yhat)
    return yhats

# define dataset
X, y = make_regression(n_samples=1000, n_features=10, n_informative=2, noise=0.2, random_state=1)

# determine the number of coefficients
n_coeff = X.shape[1] + 1

# generate random coefficients
coefficients = rand(n_coeff)

# generate predictions for dataset
yhat = predict_dataset(X, coefficients)

# calculate model prediction error
score = mean_squared_error(y, yhat)
print('MSE: %f' % score)

MSE: 7296.390153


In [3]:
# optimize linear regression coefficients for regression dataset
from numpy.random import randn
from numpy.random import rand
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# linear regression
def predict_row(row, coefficients):
    
    # add the bias, the last coefficient
    result = coefficients[-1]
    
    # add the weighted input
    for i in range(len(row)):
        result += coefficients[i] * row[i]
    return result

# use model coefficients to generate predictions for a dataset of rows
def predict_dataset(X, coefficients):
    yhats = list()
    for row in X:
        
        # make a prediction
        yhat = predict_row(row, coefficients)
        
        # store the prediction
        yhats.append(yhat)
    return yhats

# objective function
def objective(X, y, coefficients):
    
    # generate predictions for dataset
    yhat = predict_dataset(X, coefficients)
    
    # calculate accuracy
    score = mean_squared_error(y, yhat)
    return score

# hill climbing local search algorithm
def hillclimbing(X, y, objective, solution, n_iter, step_size):
    
    # evaluate the initial point
    solution_eval = objective(X, y, solution)
    
    # run the hill climb
    for i in range(n_iter):
        
        # take a step
        candidate = solution + randn(len(solution)) * step_size
        
        # evaluate candidate point
        candidte_eval = objective(X, y, candidate)
        
        # check if we should keep the new point
        if candidte_eval <= solution_eval:
            
            # store the new point
            solution, solution_eval = candidate, candidte_eval
            
            # report progress
            print('>%d %.5f' % (i, solution_eval))
    return [solution, solution_eval]

# define dataset
X, y = make_regression(n_samples=1000, n_features=10, n_informative=2, noise=0.2, random_state=1)

# split into train test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

# define the total iterations
n_iter = 2000

# define the maximum step size
step_size = 0.15

# determine the number of coefficients
n_coef = X.shape[1] + 1

# define the initial solution
solution = rand(n_coef)

# perform the hill climbing search
coefficients, score = hillclimbing(X_train, y_train, objective, solution, n_iter, step_size)
print('Done!')
print('Coefficients: %s' % coefficients)
print('Train MSE: %f' % (score))

# generate predictions for the test dataset
yhat = predict_dataset(X_test, coefficients)

# calculate accuracy
score = mean_squared_error(y_test, yhat)
print('Test MSE: %f' % (score))

>5 7378.57413
>6 7356.86041
>8 7356.65280
>9 7316.11649
>12 7294.33960
>13 7277.51223
>20 7276.45759
>21 7255.72844
>22 7254.81587
>23 7254.14810
>24 7227.67081
>26 7225.23028
>31 7223.66274
>32 7212.84607
>35 7207.47937
>40 7165.15904
>43 7101.18352
>45 7078.20664
>46 7059.61796
>47 7009.60745
>48 6981.69014
>50 6973.32747
>52 6929.38794
>56 6871.33980
>60 6860.89554
>63 6846.13446
>68 6835.47392
>70 6832.55287
>71 6805.17999
>75 6774.50037
>78 6759.46282
>79 6751.73044
>80 6728.92342
>83 6710.09460
>85 6709.39747
>87 6704.73461
>88 6663.02829
>90 6636.69759
>93 6634.82940
>95 6626.18580
>99 6604.15483
>100 6598.27106
>102 6592.43709
>104 6572.62780
>105 6521.18622
>112 6500.06122
>114 6485.00919
>116 6481.79072
>118 6467.51063
>119 6449.33841
>120 6443.88038
>121 6436.00870
>123 6411.57259
>124 6392.30145
>126 6351.02070
>127 6344.20463
>129 6287.83975
>130 6265.31320
>131 6264.14320
>134 6261.76430
>137 6216.06039
>140 6203.19404
>141 6201.59920
>143 6147.73211
>146 6142.60907
>148 

>1099 720.05102
>1100 719.28255
>1102 715.67672
>1104 706.36367
>1105 701.38071
>1106 694.88570
>1109 686.67586
>1110 678.48441
>1111 674.71372
>1112 664.30305
>1113 660.17013
>1116 654.75363
>1117 654.09347
>1119 651.80540
>1120 651.14868
>1121 645.10050
>1122 627.72966
>1127 625.47245
>1128 622.71429
>1130 621.74795
>1131 607.89596
>1133 604.41709
>1134 594.76361
>1135 594.19162
>1138 590.24272
>1140 588.38063
>1141 577.51150
>1142 575.30178
>1144 567.89622
>1147 563.50499
>1152 560.38727
>1153 555.16551
>1154 553.46982
>1155 544.16742
>1156 532.86996
>1157 532.08834
>1160 526.62482
>1162 523.91207
>1163 520.77670
>1164 519.01740
>1165 516.20067
>1169 511.78358
>1171 503.13948
>1172 501.72215
>1174 491.60056
>1176 479.56166
>1177 468.42913
>1178 459.56647
>1179 456.27247
>1182 445.14537
>1183 443.64814
>1184 438.09571
>1188 436.25422
>1189 431.26007
>1192 426.44020
>1195 422.63821
>1196 417.07897
>1197 406.23665
>1199 395.03621
>1201 394.61072
>1205 393.20172
>1209 393.03139
>1210 39

In [4]:
# define a binary classification dataset
from sklearn.datasets import make_classification

# define dataset
X, y = make_classification(n_samples=1000, n_features=5, n_informative=2, n_redundant=1, random_state=1)

# summarize the shape of the dataset
print(X.shape, y.shape)

(1000, 5) (1000,)


In [5]:
# logistic regression function for binary classification
from math import exp
from numpy.random import rand
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score

# logistic regression
def predict_row(row, coefficients):
    
    # add the bias, the last coefficient
    result = coefficients[-1]
    
    # add the weighted input
    for i in range(len(row)):
        result += coefficients[i] * row[i]
        
    # logistic function
    logistic = 1.0 / (1.0 + exp(-result))
    return logistic

# use model coefficients to generate predictions for a dataset of rows
def predict_dataset(X, coefficients):
    yhats = list()
    for row in X:
        
        # make a prediction
        yhat = predict_row(row, coefficients)
        
        # store the prediction
        yhats.append(yhat)
    return yhats

# define dataset
X, y = make_classification(n_samples=1000, n_features=5, n_informative=2, n_redundant=1, random_state=1)

# determine the number of coefficients
n_coeff = X.shape[1] + 1

# generate random coefficients
coefficients = rand(n_coeff)

# generate predictions for dataset
yhat = predict_dataset(X, coefficients)

# round predictions to labels
yhat = [round(y) for y in yhat]

# calculate accuracy
score = accuracy_score(y, yhat)
print('Accuracy: %f' % score)

Accuracy: 0.696000


In [6]:
# optimize logistic regression model with a stochastic hill climber
from math import exp
from numpy.random import randn
from numpy.random import rand
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# logistic regression
def predict_row(row, coefficients):
    # add the bias, the last coefficient
    result = coefficients[-1]
    # add the weighted input
    for i in range(len(row)):
        result += coefficients[i] * row[i]
    # logistic function
    logistic = 1.0 / (1.0 + exp(-result))
    return logistic

# use model coefficients to generate predictions for a dataset of rows
def predict_dataset(X, coefficients):
    yhats = list()
    for row in X:
        # make a prediction
        yhat = predict_row(row, coefficients)
        # store the prediction
        yhats.append(yhat)
    return yhats

# objective function
def objective(X, y, coefficients):
    # generate predictions for dataset
    yhat = predict_dataset(X, coefficients)
    # round predictions to labels
    yhat = [round(y) for y in yhat]
    # calculate accuracy
    score = accuracy_score(y, yhat)
    return score

# hill climbing local search algorithm
def hillclimbing(X, y, objective, solution, n_iter, step_size):
    # evaluate the initial point
    solution_eval = objective(X, y, solution)
    # run the hill climb
    for i in range(n_iter):
        # take a step
        candidate = solution + randn(len(solution)) * step_size
        # evaluate candidate point
        candidte_eval = objective(X, y, candidate)
        # check if we should keep the new point
        if candidte_eval >= solution_eval:
            # store the new point
            solution, solution_eval = candidate, candidte_eval
            # report progress
            print('>%d %.5f' % (i, solution_eval))
    return [solution, solution_eval]

# define dataset
X, y = make_classification(n_samples=1000, n_features=5, n_informative=2, n_redundant=1, random_state=1)

# split into train test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

# define the total iterations
n_iter = 2000

# define the maximum step size
step_size = 0.1

# determine the number of coefficients
n_coef = X.shape[1] + 1

# define the initial solution
solution = rand(n_coef)

# perform the hill climbing search
coefficients, score = hillclimbing(X_train, y_train, objective, solution, n_iter, step_size)
print('Done!')
print('Coefficients: %s' % coefficients)
print('Train Accuracy: %f' % (score))

# generate predictions for the test dataset
yhat = predict_dataset(X_test, coefficients)

# round predictions to labels
yhat = [round(y) for y in yhat]

# calculate accuracy
score = accuracy_score(y_test, yhat)
print('Test Accuracy: %f' % (score))

>0 0.73582
>2 0.75224
>3 0.78657
>5 0.79104
>12 0.80149
>14 0.81791
>15 0.82090
>16 0.82388
>21 0.82388
>31 0.82985
>54 0.82985
>57 0.82985
>58 0.83433
>74 0.84328
>78 0.84925
>79 0.85075
>80 0.85224
>90 0.85522
>92 0.85672
>184 0.85672
>665 0.85821
>770 0.85821
>792 0.85821
>972 0.85821
>1014 0.85821
>1024 0.85821
>1025 0.85821
>1041 0.85970
>1237 0.85970
>1553 0.85970
>1983 0.85970
Done!
Coefficients: [ 0.32353165  0.22831124  2.39979865 -0.80711649 -0.70372284 -0.00628508]
Train Accuracy: 0.859701
Test Accuracy: 0.839394
