In [2]:
%pip install ucimlrepo

Collecting ucimlrepo
  Downloading ucimlrepo-0.0.7-py3-none-any.whl.metadata (5.5 kB)
Downloading ucimlrepo-0.0.7-py3-none-any.whl (8.0 kB)
Installing collected packages: ucimlrepo
Successfully installed ucimlrepo-0.0.7


In [13]:
from ucimlrepo import fetch_ucirepo

# fetch dataset
infrared_thermography_temperature = fetch_ucirepo(id=925)

# data (as pandas dataframes)
X = infrared_thermography_temperature.data.features
y = infrared_thermography_temperature.data.targets


{'uci_id': 925, 'name': 'Infrared Thermography Temperature', 'repository_url': 'https://archive.ics.uci.edu/dataset/925/infrared+thermography+temperature+dataset', 'data_url': 'https://archive.ics.uci.edu/static/public/925/data.csv', 'abstract': 'The Infrared Thermography Temperature Dataset contains temperatures read from various locations of inferred images about patients, with the addition of oral temperatures measured for each individual. The 33 features consist of gender, age, ethnicity, ambiant temperature, humidity, distance, and other temperature readings from the thermal images. The dataset is intended to be used in a regression task to predict the oral temperature using the environment information as well as the thermal image readings. ', 'area': 'Health and Medicine', 'tasks': ['Regression'], 'characteristics': ['Tabular'], 'num_instances': 1020, 'num_features': 33, 'feature_types': ['Real', 'Categorical'], 'demographics': ['Gender', 'Age', 'Ethnicity'], 'target_col': ['aveO

In [None]:
# metadata
print(infrared_thermography_temperature.metadata)

# variable information
print(infrared_thermography_temperature.variables)

In [14]:
X.describe().T


Unnamed: 0,count,mean,std,min,25%,50%,75%,max
T_atm,1020.0,24.115392,1.336338,20.2,23.4,24.0,24.7,29.1
Humidity,1020.0,28.723039,13.071627,9.9,17.6,26.3,36.2,61.2
Distance,1018.0,0.729784,2.456486,0.54,0.6,0.62,0.7,79.0
T_offset1,1020.0,0.968648,0.362587,-0.59,0.7725,0.94,1.14,2.875
Max1R13_1,1020.0,35.596533,0.574888,33.8975,35.2475,35.54875,35.8725,38.405
Max1L13_1,1020.0,35.611474,0.54976,34.1225,35.271875,35.575,35.883125,38.0425
aveAllR13_1,1020.0,34.888475,0.718613,31.77,34.45625,34.915,35.3,37.575
aveAllL13_1,1020.0,35.011345,0.633836,32.9025,34.65125,34.9975,35.363125,37.68
T_RC1,1020.0,35.659921,0.553897,33.985,35.3325,35.6025,35.910625,38.385
T_RC_Dry1,1020.0,35.587143,0.569278,33.825,35.249375,35.53375,35.855625,38.38


In [15]:
y.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
aveOralF,1020.0,36.979216,0.386403,35.75,36.8,36.9,37.1,39.6
aveOralM,1020.0,37.028382,0.509502,35.54,36.7775,36.94,37.14,40.34


In [24]:
#Use sklearn preprocessing pipeline with one-hot encoding
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import GradientBoostingRegressor

categorical_columns = ['Gender', 'Age', 'Ethnicity']

preprocessor = ColumnTransformer(
    transformers=[
        ('categorical', OneHotEncoder(), categorical_columns)
])


model_parameters = {
    'n_estimators': 100,
    'learning_rate': 0.1,
    'max_depth': 3,
    'random_state': 42
}

pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('model', GradientBoostingRegressor(**model_parameters))
])

#Train test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Fit the pipeline
pipeline.fit(X_train, y_train['aveOralM'])

In [27]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error

#Predict and evaluate error
y_pred = pipeline.predict(X_test)





In [28]:
mse = mean_squared_error(y_test['aveOralM'], y_pred)
print(mse)

0.21173000019777735


In [30]:
mae = mean_absolute_error(y_test['aveOralM'], y_pred)
print(mae)

0.31752781515365625
