In [1]:
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import PolynomialFeatures
from sklearn.cluster import DBSCAN
from sklearn.cluster import AffinityPropagation
from sklearn.cluster import KMeans
from sklearn import metrics
import math
import time
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

In [2]:
from sklearn.base import BaseEstimator, ClassifierMixin
class MaxScaler(BaseEstimator, ClassifierMixin):

    def __init__(self, name = "MaxScaler"):
        self.name = name
        

    def fit(self, X):
        self.max_elements = np.amax(X, axis=0)
        return self

    def transform(self, X):
        scaledX = X / self.max_elements
        return scaledX

In [3]:
df_train = pd.read_csv('procom_train.csv', header=None)
df_test = pd.read_csv('procom_test.csv', header=None)

train_size = len(df_train)

train_X = df_train.iloc[:, :-1]
train_y = df_train.iloc[:,-1]
test_X = df_test.iloc[:, :-1]
test_y = df_test.iloc[:,-1]


In [4]:
scaler = MinMaxScaler()
scaler.fit(train_X)
train_X = pd.DataFrame(scaler.transform(train_X))
test_X = pd.DataFrame(scaler.transform(test_X))

In [6]:
# поліноміальне розширення входів

poly = PolynomialFeatures(2)
train_X = pd.DataFrame(poly.fit_transform(train_X))
test_X = pd.DataFrame(poly.fit_transform(test_X))

In [7]:
train_X.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,68,69,70,71,72,73,74,75,76,77
0,1.0,0.967454,0.147698,1.0,0.0,0.0,0.0,0.0,0.0,0.651757,...,0.0,0.0,0.0,0.0,0.424787,0.420021,0.405021,0.415309,0.400476,0.386173
1,1.0,0.970619,0.142217,0.0,0.0,0.4,0.0,0.0,0.0,0.760383,...,0.0,0.0,0.0,0.0,0.578183,0.557614,0.418211,0.537778,0.403333,0.3025
2,1.0,0.973639,0.136826,0.0,0.0,0.2,0.0,0.0,0.0,0.72524,...,0.0,0.0,0.0,0.0,0.525973,0.628541,0.625519,0.751111,0.7475,0.743906
3,1.0,0.976511,0.131527,0.0,0.0,0.0,0.0,0.0,0.0,0.805112,...,0.0,0.0,0.0,0.0,0.648205,0.769329,0.148083,0.913086,0.175754,0.03383
4,1.0,0.979232,0.12632,0.0,0.0,0.8,0.0,1.0,0.0,0.747604,...,0.0,0.0,0.0,0.0,0.558911,0.481789,0.168211,0.415309,0.145,0.050625


In [10]:
def mean_absolute_percentage_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

def sum_of_squared_errors(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return math.sqrt(np.sum((y_true - y_pred)**2))
  
def symmetric_mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.sum(np.abs(y_true - y_pred)) / np.sum(y_pred + y_true)

def root_mean_squared_error(y_true, y_pred):
    return np.sqrt(((y_pred - y_true) ** 2).mean())

def mean_absolute_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs(y_true - y_pred))
  

In [11]:
from sklearn.linear_model import SGDRegressor
sgd = SGDRegressor(loss = 'squared_loss', alpha=0.0001)
start_time = time.time()
sgd.fit(train_X, train_y)
print("--- %s seconds ---" % (time.time() - start_time))
train_pred_y = sgd.predict(train_X)
pred_y = sgd.predict(test_X)

print('Training erros:')
print("MAPE: " + str(mean_absolute_percentage_error(train_y, train_pred_y)))
print("MSE: " + str(mean_squared_error(train_y, train_pred_y)))
print("RMSE: " + str(root_mean_squared_error(train_y, train_pred_y)))

print("R2: " + str(r2_score(train_y, train_pred_y)))
print('Testing errors:')
print("MAPE: " + str(mean_absolute_percentage_error(test_y, pred_y)))
print("RMSE: " + str(root_mean_squared_error(test_y, pred_y)))
print("R2: " + str(r2_score(test_y, pred_y)))

--- 0.05828094482421875 seconds ---
Training erros:
MAPE: 3.5733690244927536
RMSE: 18618.606081478603
R2: 0.9645152922684502
Testing errors:
MAPE: 3.523815638291092
RMSE: 20345.565047352
R2: 0.956305443125463




In [12]:
print("MSE: " + str(mean_squared_error(train_y, train_pred_y)))

MSE: 346652492.4172721


In [8]:
#np.savetxt('C:/Users/User/Desktop/gtm/MPL.csv', pred_y, delimiter=',', fmt='%1.5f')

In [None]:
random_indices = np.random.choice(number_of_rows, size=2, replace=False)