In [None]:
!lscpu

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error

In [None]:
df = pd.read_csv(
    "https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data",
    header=None,
    sep="\s+",
)
df.columns = [
    "CRIM",
    "ZN",
    "INDUS",
    "CHAS",
    "NOX",
    "RM",
    "AGE",
    "DIS",
    "RAD",
    "TAX",
    "PTRATIO",
    "B",
    "LSTAT",
    "MEDV",
]
df.head()

In [None]:
X_train = df.loc[:99, ["RM"]]
y_train = df.loc[:99, "MEDV"]
print("X_train:", X_train[:3])
print("y_train:", y_train[:3])

In [None]:
# 線形回帰
# モデルの学習
from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(X_train, y_train)
model.get_params()

In [None]:
# 予測値
model.predict(X_train)

In [None]:
# データと予測値の可視化
plt.figure(figsize=(8, 4))
X = X_train.values.flatten()
y = y_train.values

X_plt = np.arange(X.min(), X.max(), 0.01)[:, np.newaxis]
y_plt = model.predict(X_plt)

plt.scatter(X, y, color="blue", label="data")
plt.plot(X_plt, y_plt, color="red", label="LinearRegression")
plt.ylabel("Price in $1000s [MDEV]")
plt.xlabel("average number of rooms [RM]")
plt.title("Boston house-prices")
plt.legend(loc="upper right")
plt.show()

In [None]:
# パラメータ
print("傾き w1:", model.coef_[0])
print("切片 w0:", model.intercept_)

---

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, root_mean_squared_error

In [None]:
df = pd.read_csv(
    "https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data",
    header=None,
    sep="\s+",
)
df.columns = [
    "CRIM",
    "ZN",
    "INDUS",
    "CHAS",
    "NOX",
    "RM",
    "AGE",
    "DIS",
    "RAD",
    "TAX",
    "PTRATIO",
    "B",
    "LSTAT",
    "MEDV",
]
df.head()

In [None]:
# 重回帰の学習・予測・評価
X = df.drop(["MEDV"], axis=1)
y = df["MEDV"]
X.head()

In [None]:
# 学習データとテストデータに分割
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, shuffle=True, random_state=0
)
print("X_trainの形状:", X_train.shape)
print("y_trainの形状:", y_train.shape)
print("X_testの形状:", X_test.shape)
print("y_testの形状:", y_test.shape)

In [None]:
# 特徴量の標準化
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
num_cols = df.columns[0:13]
scaler.fit(X_train[num_cols])
X_train[num_cols] = scaler.transform(X_train[num_cols])
X_test[num_cols] = scaler.transform(X_test[num_cols])
display(X_train.iloc[:2])

In [None]:
# モデルの学習
from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(X_train, y_train)
model.get_params()

In [None]:
y_test_pred = model.predict(X_test)
print("RMSE test: %0.2f" % root_mean_squared_error(y_test, y_test_pred))

In [None]:
# テストデータの目的変数 統計情報
y_test.describe()

In [None]:
# パラメータによる予測値の解釈
print("回帰係数 w = [w1, w2, ..., w13]:", model.coef_)
print("定数項 w0:", model.intercept_)

In [None]:
# 特徴量のテキスト表示
X.columns

In [None]:
# 回帰係数の可視化
importances = model.coef_
indices = np.argsort(importances)[::-1]
plt.figure(figsize=(8, 4))
plt.title("Regression coefficient")
plt.bar(range(X.shape[1]), importances[indices])
plt.xticks(range(X.shape[1]), X.columns[indices], rotation=90)
plt.show()

In [None]:
# 予測値のリスト
y_test_pred

In [None]:
# 15件目の予測値
y_test_pred[14]

In [None]:
# 15件目の特徴量
print("15件目の特徴量 X= [x1, x2, ..., x13]:", X_test.values[14])

In [None]:
# 15件目予測値の検証
np.sum(model.coef_ * X_test.values[14]) + model.intercept_