In [None]:
import pandas
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, median_absolute_error, r2_score
import matplotlib.pyplot as plt
from scipy import stats

In [None]:
def regression_coef(model, X, y):
    coef = pandas.DataFrame(zip(['intercept'] + X.columns.tolist(), [model.intercept_] + model.coef_.tolist()),
                    columns=['predictor', 'coef'])
    X1 = np.append(np.ones((len(X),1)), X, axis=1)
    b = np.append(model.intercept_, model.coef_)
    MSE = np.sum((model.predict(X) - y) ** 2, axis=0) / float(X.shape[0] - X.shape[1])
    var_b = MSE * (np.linalg.inv(np.dot(X1.T, X1)).diagonal())
    sd_b = np.sqrt(var_b)
    t = b / sd_b
    coef['pvalue'] = [2 * (1 - stats.t.cdf(np.abs(i), (len(X1) - 1))) for i in t]
    return coef

In [None]:
data = pandas.read_csv('auto_work.csv',sep=';')
data.head()

In [None]:
X = data.drop(['price','name'], axis=1)
y = data['price']

In [None]:
X.corr()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=0)

In [None]:
model = LinearRegression()
model.fit(X_train, y_train)

In [None]:
model.score(X_train, y_train)

In [None]:
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

In [None]:
#pandas.DataFrame({'variable':['intercept']+X.columns.tolist(),'coef':[model.intercept_]+model.coef_.tolist()})
regression_coef(model, X, y)

In [None]:
plt.xlabel('Предсказание')
plt.ylabel('Остатки')
plt.scatter(y_train_pred,  y_train_pred - y_train,
            c='blue', marker='o', label='Обучение')
plt.scatter(y_test_pred,  y_test_pred - y_test,
            c='lightgreen', marker='s', label='Тест')
plt.legend(loc='upper right')
plt.xlim([0, 50000])

In [None]:
print('MSE train: {:.3f}, test: {:.3f}'.format(
        mean_squared_error(y_train, y_train_pred),
        mean_squared_error(y_test, y_test_pred)))
print('MAE train: {:.3f}, test: {:.3f}'.format(
        mean_absolute_error(y_train, y_train_pred),
        mean_absolute_error(y_test, y_test_pred)))