## Imports

In [4]:
import seaborn as sns
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer

## Daten einlesen

In [2]:
df = sns.load_dataset("mpg")

In [5]:
df

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name
0,18.0,8,307.0,130.0,3504,12.0,70,usa,chevrolet chevelle malibu
1,15.0,8,350.0,165.0,3693,11.5,70,usa,buick skylark 320
2,18.0,8,318.0,150.0,3436,11.0,70,usa,plymouth satellite
3,16.0,8,304.0,150.0,3433,12.0,70,usa,amc rebel sst
4,17.0,8,302.0,140.0,3449,10.5,70,usa,ford torino
...,...,...,...,...,...,...,...,...,...
393,27.0,4,140.0,86.0,2790,15.6,82,usa,ford mustang gl
394,44.0,4,97.0,52.0,2130,24.6,82,europe,vw pickup
395,32.0,4,135.0,84.0,2295,11.6,82,usa,dodge rampage
396,28.0,4,120.0,79.0,2625,18.6,82,usa,ford ranger


In [6]:
X = df.iloc[:,:-2]
y = df.origin

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2)

In [8]:
pipeline = make_pipeline(
    SimpleImputer(),
    StandardScaler(),
    SVC()
)

In [9]:
pipeline.named_steps

{'simpleimputer': SimpleImputer(),
 'standardscaler': StandardScaler(),
 'svc': SVC()}

In [11]:
params = {
    "svc__C": [1.0,2.0,5.0,10.0,20.0],
    "svc__kernel":["linear","rbf"]
}

In [12]:
search = GridSearchCV(pipeline,params,cv=5)

In [13]:
search.fit(X_train,y_train)

GridSearchCV(cv=5,
             estimator=Pipeline(steps=[('simpleimputer', SimpleImputer()),
                                       ('standardscaler', StandardScaler()),
                                       ('svc', SVC())]),
             param_grid={'svc__C': [1.0, 2.0, 5.0, 10.0, 20.0],
                         'svc__kernel': ['linear', 'rbf']})

In [15]:
search.best_estimator_

Pipeline(steps=[('simpleimputer', SimpleImputer()),
                ('standardscaler', StandardScaler()), ('svc', SVC(C=10.0))])

In [16]:
search.best_params_

{'svc__C': 10.0, 'svc__kernel': 'rbf'}

In [14]:
search.score(X_test,y_test)

0.7625

In [20]:
X_test.iloc[0,:]

mpg               26.0
cylinders          4.0
displacement      91.0
horsepower        70.0
weight          1955.0
acceleration      20.5
model_year        71.0
Name: 56, dtype: float64

In [21]:
search.predict([[26,4,91,70,1955,20.5,71]])

array(['europe'], dtype=object)