### IT2Tsk Gaussian

In [6]:

from notebook_resolver import *
from src.utils.pandas_extension import *
from src.dataset import Dataset, WorkSheet

dataset_path = "data/e-nose_dataset_12_beef_cuts.xlsx"
worksheet = WorkSheet.DS12.value

dataset = Dataset(
	path=dataset_path, sheet_name=worksheet,
)

train_df = dataset.train_df
test_df = dataset.validate_df

In [7]:
from sklearn.metrics import r2_score
from src.pipeline import Pipeline
from src.pipelines.transformers import FeatureScaler
from src.pipelines.predictors import IT2TskPredictor
from src.fis.fuzzy_logic.mfs import MFType2
from src.clusters import ClusteringMethod
from src.fis.fuzzy_logic.consequents import LinearModel
from src.utils.hyperparameter import get_tuned_params

target_column = 'TVC'
# tuned_params = get_tuned_params()[worksheet]
tuned_params = {
	"batch_size": 256,
	"tol": 0.001,
	"max_no_improvement": 5,
	"uncertainty_factor": 0.07,
	"min_std_ratio": 0.13,
}

# IMPORTANT: Use the same random_state to ensure reproducible clustering
RANDOM_STATE = 42

pipeline = Pipeline(steps=[
	('feature_scaler', FeatureScaler(decimal_places=4)),
	('predictor', IT2TskPredictor(target=target_column))
])

pipeline.fit(
    train_df, # pipeline fit only for training dataframe
    predictor__clustering_method=ClusteringMethod.MBKMEANS,
	predictor__mfs__cluster__batch_size=tuned_params.get("batch_size"),
	predictor__mfs__cluster__tol=tuned_params.get("tol"),
	predictor__mfs__cluster__max_no_improvement=tuned_params.get(
		"max_no_improvement"
	),
	predictor__rules__cluster__batch_size=tuned_params.get("batch_size"),
	predictor__rules__cluster__tol=tuned_params.get("tol"),
	predictor__rules__cluster__max_no_improvement=tuned_params.get(
		"max_no_improvement"
	),
	predictor__mf_type=MFType2.GAUSSIAN,
	predictor__linear_model=LinearModel.LSE,
	predictor__mf__builder__uncertainty_factor=tuned_params.get(
		"uncertainty_factor"
	),
	predictor__mf__builder__min_std_ratio=tuned_params.get(
		"min_std_ratio"
	),	
)

transformed_test_df = pipeline.transform(test_df)
X_test_df = transformed_test_df.drop(columns=[target_column])

y_test_ = transformed_test_df[target_column].values
y_pred_ = pipeline.predict(X_test_df)

r2 = r2_score(y_test_, y_pred_)
print(f"R2 Score on Test Data: {r2}")

R2 Score on Test Data: 0.9812443708655182
