In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.svm import SVC
from sklearn import set_config
set_config(display = 'diagram')

In [2]:
df = pd.read_csv('svm_data.csv')

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   x1      100 non-null    float64
 1   x2      100 non-null    float64
 2   y       100 non-null    int64  
dtypes: float64(2), int64(1)
memory usage: 2.5 KB


In [4]:
df.head()

Unnamed: 0,x1,x2,y
0,0.486861,0.163756,0
1,0.590718,0.429319,0
2,0.537981,0.082374,0
3,0.184411,0.717404,1
4,0.825697,0.41467,1


In [5]:
X = df.drop(columns= ['y'])
y = df['y']

In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42 )

In [7]:
numeric_feature = X_train.select_dtypes('number').columns

In [8]:
numeric_transformer = make_pipeline(SimpleImputer(strategy = 'median'), StandardScaler())

In [9]:
col_transformer = make_column_transformer((numeric_transformer, numeric_feature), remainder = 'passthrough')

In [10]:
col_transformer.fit(X_train)

In [11]:
pipe = make_pipeline(col_transformer, SVC(kernel = 'linear', random_state = 0))

In [12]:
pipe.fit(X_train, y_train)

In [13]:
from sklearn.model_selection import cross_validate
with_categorical_score = cross_validate(pipe, X_train, y_train, return_train_score = True)

In [14]:
categorical_score = pd.DataFrame(with_categorical_score)
categorical_score

Unnamed: 0,fit_time,score_time,test_score,train_score
0,0.011037,0.005722,0.625,0.609375
1,0.013017,0.008219,0.5625,0.578125
2,0.012805,0.011157,0.5625,0.578125
3,0.010504,0.004594,0.4375,0.65625
4,0.007456,0.00352,0.5625,0.578125


In [15]:
pipe.predict(X_test)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
      dtype=int64)

In [16]:
pipe.score(X_train, y_train)

0.575

In [17]:
pipe.score(X_test, y_test)

0.45