<a href="https://colab.research.google.com/github/ks-yahagi/EU_M_Math-Repository/blob/main/Chap08_text_Cm_All_Ex_01.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#データ加工・処理・分析ライブラリ
import numpy as np
import numpy.random as random
import scipy as sp
from pandas import Series, DataFrame
import pandas as pd

#可視化ライブラリ
import matplotlib.pyplot as plt
import matplotlib as mp1
import seaborn as sns
%matplotlib inline

#機械学習ライブラリ
import sklearn

#小数点第3位まで表示
%precision 3

'%.3f'

In [2]:
#インポート
import requests, zipfile
import io

#自動車価格データを取得
url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/autos/imports-85.data'
res = requests.get(url).content

#取得したデータをDataFrameオブジェクトとして読み込み
auto = pd.read_csv(io.StringIO(res.decode('utf-8')),header=None)

#データの列にラベルを設定
auto.columns = ['symboling','normalized-losses','make','fuel-type','aspiration','num-of-doors',
                'body-style','drive-weels','engine-location','wheel-base','length','width','height',
                'curb-weight','engine-type','num-of-cylinders','engine-size','fuel-system','bore',
                'stroke','compression-ratio','horsepower','peak-rpm','city-mpg','highway-mpg','price']

In [3]:
print('自動車データの形成:{}'.format(auto.shape))

自動車データの形成:(205, 26)


In [4]:
auto.head()

Unnamed: 0,symboling,normalized-losses,make,fuel-type,aspiration,num-of-doors,body-style,drive-weels,engine-location,wheel-base,...,engine-size,fuel-system,bore,stroke,compression-ratio,horsepower,peak-rpm,city-mpg,highway-mpg,price
0,3,?,alfa-romero,gas,std,two,convertible,rwd,front,88.6,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,13495
1,3,?,alfa-romero,gas,std,two,convertible,rwd,front,88.6,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,16500
2,1,?,alfa-romero,gas,std,two,hatchback,rwd,front,94.5,...,152,mpfi,2.68,3.47,9.0,154,5000,19,26,16500
3,2,164,audi,gas,std,four,sedan,fwd,front,99.8,...,109,mpfi,3.19,3.4,10.0,102,5500,24,30,13950
4,2,164,audi,gas,std,four,sedan,4wd,front,99.4,...,136,mpfi,3.19,3.4,8.0,115,5500,18,22,17450


In [6]:
#それぞれのカラムに?が何個あるかカウント
auto = auto[['price','horsepower','width','height']]
auto.isin(['?']).sum()

price         4
horsepower    2
width         0
height        0
dtype: int64

In [7]:
#?をnanに置換して、nanがある行を削除
auto = auto.replace('?',np.nan).dropna()
print('自動車データの形式:{}'.format(auto.shape))

自動車データの形式:(199, 4)


In [8]:
auto.head()

Unnamed: 0,price,horsepower,width,height
0,13495,111,64.1,48.8
1,16500,111,64.1,48.8
2,16500,154,65.5,52.4
3,13950,102,66.2,54.3
4,17450,115,66.4,54.3


In [10]:
print('データ型の確認(型変換前)\n{}\n'.format(auto.dtypes))

データ型の確認(型変換前)
price          object
horsepower     object
width         float64
height        float64
dtype: object



In [11]:
auto = auto.assign(price=pd.to_numeric(auto.price))
auto=auto.assign(horsepower=pd.to_numeric(auto.horsepower))
print('データ型の確認(型変換前) \n{}'.format(auto.dtypes))

データ型の確認(型変換前) 
price           int64
horsepower      int64
width         float64
height        float64
dtype: object


In [12]:
auto.corr()

Unnamed: 0,price,horsepower,width,height
price,1.0,0.810533,0.753871,0.13499
horsepower,0.810533,1.0,0.615315,-0.087407
width,0.753871,0.615315,1.0,0.309223
height,0.13499,-0.087407,0.309223,1.0


In [14]:
#データ分割(訓練データとテストデータ)のためのインポート
from sklearn.model_selection import train_test_split

#重回帰のモデル構築のためのインポート
from sklearn.linear_model import LinearRegression

#目的変数にpriceを指定、説明変数にそれ以外を指定
X = auto.drop('price',axis=1)
Y = auto['price']

#訓練データとテストデータに分ける
X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.5, random_state=0)

#重回帰クラスの初期化と学習
model = LinearRegression()
model.fit(X_train,Y_train)

#決定係数を表示
print('決定係数(train):{:.3f}'.format(model.score(X_train,Y_train)))
print('決定係数(test):{:.3f}'.format(model.score(X_train,Y_train)))

#回帰係数と切片を表示
print('\n回帰係数\n{}'.format(pd.Series(model.coef_,index=X.columns)))
print('切片:{:.3f}'.format(model.intercept_))

決定係数(train):0.733
決定係数(test):0.733

回帰係数
horsepower      81.651078
width         1829.174506
height         229.510077
dtype: float64
切片:-128409.046
