In [3]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

from jcopml.pipeline import num_pipe, cat_pipe
from jcopml.utils import save_model, load_model
from jcopml.plot import plot_missing_value
from jcopml.feature_importance import mean_score_decrease

In [4]:
df = pd.read_csv("data.csv")
df.head()

Unnamed: 0,file,energy_0,homogenity_0,entrophy_0,contras_0,energy_45,homogenity_45,entrophy_45,contras_45,energy_90,homogenity_90,entrophy_90,contras_90,energy_135,homogenity_135,entrophy_135,contras_135,Target
0,data\Blight\Corn_Blight (111).JPG,0.000407,178.497293,8.184861,177.497293,0.000373,193.356811,8.244528,192.356811,0.000487,72.988743,7.936127,71.988743,0.000382,178.073284,8.218762,177.073284,Blight
1,data\Blight\Corn_Blight (112).JPG,0.000543,97.519377,8.127788,96.519377,0.000507,111.753736,8.166177,110.753736,0.000637,48.054626,7.897788,47.054626,0.000428,130.480005,8.303244,129.480005,Blight
2,data\Blight\Corn_Blight (113).JPG,0.000405,191.488989,8.228577,190.488989,0.000379,206.613987,8.286121,205.613987,0.000573,89.864665,7.916103,88.864665,0.000365,240.954554,8.324917,239.954554,Blight
3,data\Blight\Corn_Blight (114).JPG,0.000241,213.173905,8.640188,212.173905,0.000244,206.704507,8.632925,205.704507,0.000409,52.754614,8.147924,51.754614,0.000234,226.944448,8.661606,225.944448,Blight
4,data\Blight\Corn_Blight (115).JPG,0.000272,283.584092,8.594806,282.584092,0.000254,308.931056,8.636834,307.931056,0.000313,186.3328,8.430809,185.3328,0.000257,322.376155,8.638022,321.376155,Blight


In [6]:
X = df.drop(columns=["file", "Target"])
y = df.Target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((960, 16), (240, 16), (960,), (240,))

In [7]:
from sklearn.svm import SVC


In [8]:
preprocessor = ColumnTransformer([
    ('numeric', num_pipe(), X_train.columns)
])
pipeline = Pipeline([
    ('prep', preprocessor),
    ('algo', SVC(max_iter=500))
])

In [9]:
from sklearn.model_selection import GridSearchCV
from jcopml.tuning import grid_search_params as gsp

model = GridSearchCV(pipeline, gsp.svm_params, cv=4, n_jobs=-1, verbose=1)
model.fit(X_train, y_train)

print(model.best_params_)
print(model.score(X_train, y_train), model.best_score_, model.score(X_test, y_test))

Fitting 4 folds for each of 49 candidates, totalling 196 fits




{'algo__C': 1.0, 'algo__gamma': 0.001}
0.9270833333333334 0.56875 0.6041666666666666
