# **Import Libraries**

In [None]:
import pickle
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV

from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR

from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

In [None]:
import warnings

# Ignore all warnings
warnings.filterwarnings("ignore")

### **Read Data**

In [None]:
with open("./data/task1_multiomics_data.pickle", "rb") as file:
    data_multiomics = pickle.load(file)

# show dataset
data_multiomics.head()

Unnamed: 0_level_0,Training/Validation,Gates ID,MRN,Study Subject ID Number,Sex,sex_bin,timepoint,gestational_age,cellfree_rna,cellfree_rna,...,plasma_somalogic,plasma_somalogic,plasma_somalogic,plasma_somalogic,plasma_somalogic,plasma_somalogic,plasma_somalogic,plasma_somalogic,plasma_somalogic,plasma_somalogic
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,0_C2orf76,1_ACTL10,...,1290_UBE2G2,1291_TAGLN2,1292_ATP5O,1293_POMC,1294_CRYZL1,1295_SERPINF1,1296_CTSF,1297_FTCD,1298_USP25,1299_PLXNB2
0,T,PTLG002,16661779,10565,Male,1,1,11,0.312437,-1.89293e-16,...,4804.4,2233.0,3610.9,715.8,151.4,37885.8,1479.1,3261.8,561.3,3227.0
1,T,PTLG002,16661779,10565,Male,1,2,18,0.312437,-1.89293e-16,...,4086.0,2160.5,2260.4,825.2,161.0,41821.5,1465.1,1839.8,597.8,3366.0
2,T,PTLG002,16661779,10565,Male,1,3,32,0.312437,-1.89293e-16,...,4328.0,1818.4,2445.2,1241.8,194.6,45526.1,1428.3,3057.2,625.7,8703.7
3,T,PTLG002,16661779,10565,Male,1,4,45,0.312437,-1.89293e-16,...,3442.4,2661.4,3879.2,703.6,153.7,36862.5,1063.6,7339.7,593.2,2918.9
4,T,PTLG004,23587868,10603,Female,0,1,11,5.204209,1.734736,...,4261.9,1804.6,1470.6,526.8,163.0,38938.3,1170.1,1036.8,552.8,3457.1


In [None]:
# Select immune system features
immune_system_df = data_multiomics["immune_system"]

# Show dataset
immune_system_df.head()

Unnamed: 0,0_Bcells,1_CD16+CD56-NKcells,2_CD4+Tcells_mem,3_CD4+Tcells_naive,4_CD4+Tcells,5_CD45RA+Tregs,6_CD45RA-Tregs,7_CD56+CD16-NKcells,8_CD7+NKcells,9_CD8+Tcells_mem,...,524_M-MDSC_STAT5_Unstim,525_mDCs_STAT5_Unstim,526_ncMCs_STAT5_Unstim,527_pDCs_STAT5_Unstim,528_Tbet+CD4+Tcells_mem_STAT5_Unstim,529_Tbet+CD4+Tcells_naive_STAT5_Unstim,530_Tbet+CD8+Tcells_mem_STAT5_Unstim,531_Tbet+CD8+Tcells_naive_STAT5_Unstim,532_TCRgd+Tcells_STAT5_Unstim,533_Tregs_STAT5_Unstim
0,0.053164,0.054978,0.297875,0.136289,0.445832,0.00257,0.013848,0.007052,0.070836,0.118884,...,0.998954,0.953637,1.082629,0.80861,0.504269,0.757424,0.462045,0.454665,0.443859,0.529431
1,0.052857,0.069794,0.279917,0.14035,0.430839,0.00247,0.010923,0.004759,0.080245,0.127831,...,0.930847,0.822618,0.931126,0.728738,0.613059,0.852393,0.506981,0.474408,0.491691,0.574133
2,0.053202,0.050829,0.277997,0.187659,0.479078,0.003473,0.013359,0.005302,0.063781,0.104513,...,1.077824,0.970954,1.011011,0.749277,0.752882,0.813249,0.560379,0.481862,0.505706,0.640245
3,0.049906,0.090496,0.266336,0.156263,0.432904,0.003071,0.014459,0.004318,0.101386,0.115243,...,0.976888,0.918164,1.028114,0.790166,0.505349,0.648406,0.464522,0.445444,0.438285,0.573058
4,0.103067,0.004128,0.162746,0.10395,0.27084,0.003198,0.007988,0.007153,0.090763,0.057064,...,0.890405,0.800468,1.067789,0.563615,0.464563,1.004497,0.378557,0.42353,0.332368,0.447904


# **EDA**

In [None]:
# display datatype of columns
immune_system_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 68 entries, 0 to 67
Columns: 534 entries, 0_Bcells to 533_Tregs_STAT5_Unstim
dtypes: float64(534)
memory usage: 284.2 KB


In [None]:
# display basic stats of feature set
immune_system_df.describe()

Unnamed: 0,0_Bcells,1_CD16+CD56-NKcells,2_CD4+Tcells_mem,3_CD4+Tcells_naive,4_CD4+Tcells,5_CD45RA+Tregs,6_CD45RA-Tregs,7_CD56+CD16-NKcells,8_CD7+NKcells,9_CD8+Tcells_mem,...,524_M-MDSC_STAT5_Unstim,525_mDCs_STAT5_Unstim,526_ncMCs_STAT5_Unstim,527_pDCs_STAT5_Unstim,528_Tbet+CD4+Tcells_mem_STAT5_Unstim,529_Tbet+CD4+Tcells_naive_STAT5_Unstim,530_Tbet+CD8+Tcells_mem_STAT5_Unstim,531_Tbet+CD8+Tcells_naive_STAT5_Unstim,532_TCRgd+Tcells_STAT5_Unstim,533_Tregs_STAT5_Unstim
count,68.0,68.0,68.0,68.0,68.0,68.0,68.0,68.0,68.0,68.0,...,68.0,68.0,68.0,68.0,68.0,68.0,68.0,68.0,68.0,68.0
mean,0.088664,0.04056,0.219791,0.166037,0.395832,0.006035,0.010955,0.004799,0.073156,0.096926,...,0.998286,0.816198,0.839417,0.618643,0.529808,0.768246,0.419237,0.424145,0.381095,0.506808
std,0.032683,0.032928,0.042841,0.069334,0.076555,0.0038,0.003607,0.002945,0.032396,0.03317,...,0.104334,0.106087,0.151541,0.098709,0.108285,0.240088,0.051584,0.064382,0.053005,0.121552
min,0.028,0.004128,0.127551,0.048464,0.195,0.000472,0.002967,0.001549,0.026858,0.042526,...,0.802085,0.56292,0.466587,0.44299,0.324728,0.361527,0.313406,0.300372,0.265301,0.332834
25%,0.06067,0.013874,0.191423,0.108341,0.352024,0.003499,0.008687,0.002536,0.047581,0.069418,...,0.911435,0.735956,0.753809,0.55175,0.456984,0.543226,0.381348,0.374881,0.34466,0.437602
50%,0.088891,0.033687,0.219631,0.162796,0.411729,0.005416,0.011389,0.003802,0.068568,0.100948,...,0.999512,0.803717,0.852729,0.610673,0.505229,0.761101,0.423601,0.420748,0.378587,0.494499
75%,0.108091,0.058694,0.252165,0.205072,0.441749,0.007428,0.013568,0.006478,0.090845,0.119696,...,1.07141,0.901521,0.915458,0.660776,0.604768,0.933505,0.454074,0.468308,0.413528,0.549361
max,0.171407,0.127325,0.310133,0.352913,0.514109,0.019802,0.018088,0.01461,0.169851,0.191042,...,1.202504,1.081295,1.270881,1.128234,0.798976,1.291782,0.560379,0.592947,0.505706,1.172768


**Feature Engineering**

In [None]:
# Set an appropriate correlation threshold
# to remove highly correlated columns for feature engineering
threshold = 0.95

# Create correlation matrix
corr_matrix = immune_system_df.corr().abs()

# Select upper triangle of correlation matrix
upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))

# Find features with correlation greater than threshold
to_drop = [column for column in upper.columns if any(upper[column] > threshold)]

# Drop features
immune_system_df = immune_system_df.drop(to_drop, axis=1)

In [None]:
# Add target variable
immune_system_df['gestational_age'] = data_multiomics['gestational_age']

# Check unique values in dataset
immune_system_df['gestational_age'].unique()

array([11, 18, 32, 45, 27, 48, 15, 25, 42, 24, 43, 17, 28, 26, 44, 46,  8,
       16, 12, 10, 19, 31, 47])

### **Data Preprocessing**

In [None]:
# Perform a train-test split for regression
X = immune_system_df.drop('gestational_age', axis=1)  # Features
y = immune_system_df['gestational_age']  # Target variable

# Adjust the test_size and random_state based on your requirements
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# **Model Training**

### **Linear Regression Model**

In [None]:
# Linear Regression
linear_regression_model = LinearRegression()

# Train Linear Regression on the training set
linear_regression_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred_linear_regression = linear_regression_model.predict(X_test)

# Evaluate Linear Regression
mse_linear_regression = mean_squared_error(y_test, y_pred_linear_regression)
mae_linear_regression = mean_absolute_error(y_test, y_pred_linear_regression)
r2_linear_regression = r2_score(y_test, y_pred_linear_regression)

print(f'Linear Regression Mean Squared Error: {mse_linear_regression}')
print(f'Linear Regression Mean Absolute Error: {mae_linear_regression}')
print(f'Linear Regression R-squared: {r2_linear_regression}')

Linear Regression Mean Squared Error: 247.5304889628446
Linear Regression Mean Absolute Error: 10.808611545107965
Linear Regression R-squared: -1.4355409556585115


### **SVR (Support Vector Regressor) Model**

In [None]:
# Define the SVR model
svr_model = SVR()

# Define the hyperparameters grid for grid search
param_grid_svr = {'C': [0.1, 1, 10],
                  'kernel': ['linear', 'rbf', 'poly'],
                  'gamma': ['scale', 'auto']}

# Create a grid search object
grid_search_svr = GridSearchCV(svr_model, param_grid_svr, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)

# Fit the grid search to the data
grid_search_svr.fit(X_train, y_train)

# Get the best hyperparameters from the grid search
best_params_svr = grid_search_svr.best_params_
print("Best hyperparameters for SVR:", best_params_svr)

# Instantiate the SVR model with the best hyperparameters
best_svr_model = SVR(**best_params_svr)

# Fit the SVR model to the training data
best_svr_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred_svr = best_svr_model.predict(X_test)

# Evaluate the SVR model
mse_svr = mean_squared_error(y_test, y_pred_svr)
mae_svr = mean_absolute_error(y_test, y_pred_svr)
r2_svr = r2_score(y_test, y_pred_svr)

print(f'SVR Mean Squared Error: {mse_svr}')
print(f'SVR Absolute Error: {mae_svr}')
print(f'SVR R-squared: {r2_svr}')


Best hyperparameters for SVR: {'C': 1, 'gamma': 'scale', 'kernel': 'linear'}
SVR Mean Squared Error: 193.91983315894134
SVR Absolute Error: 9.237086565129088
SVR R-squared: -0.9080465511622744


### **Decision Tree Model**

In [None]:
# Decision Tree
param_grid_decision_tree = {'max_depth': range(1, 50, 5),
                            'min_samples_split': range(1, 20, 2),
                            'min_samples_leaf': range(1, 8)}

grid_search_decision_tree = GridSearchCV(DecisionTreeRegressor(random_state=42),
                                         param_grid_decision_tree,
                                         cv=5,
                                         scoring='neg_mean_squared_error',
                                         n_jobs=-1)
grid_search_decision_tree.fit(X_train, y_train)

# Best hyperparameters for Decision Tree
best_params_decision_tree = grid_search_decision_tree.best_params_
print("Best hyperparameters for Decision Tree:", best_params_decision_tree)

# Evaluate Decision Tree on the test set with the best hyperparameters
best_decision_tree_model = DecisionTreeRegressor(**best_params_decision_tree, random_state=42)
best_decision_tree_model.fit(X_train, y_train)
y_pred_decision_tree = best_decision_tree_model.predict(X_test)

# Evaluate Decision Tree
mse_decision_tree = mean_squared_error(y_test, y_pred_decision_tree)
mae_decision_tree = mean_absolute_error(y_test, y_pred_decision_tree)
r2_decision_tree = r2_score(y_test, y_pred_decision_tree)

print(f'Decision Tree Mean Squared Error: {mse_decision_tree}')
print(f'Decision Tree Mean Absolute Error: {mae_decision_tree}')
print(f'Decision Tree R-squared: {r2_decision_tree}')

Best hyperparameters for Decision Tree: {'max_depth': 6, 'min_samples_leaf': 3, 'min_samples_split': 13}
Decision Tree Mean Squared Error: 221.98539462081126
Decision Tree Mean Absolute Error: 12.1765873015873
Decision Tree R-squared: -1.1841936418513561


### **Random Forest Model**

In [None]:
# Random Forest
param_grid_random_forest = {'n_estimators': range(1, 400, 50),
                             'max_depth': range(1, 30,10),
                             'min_samples_split': range(2, 20, 5),
                             'min_samples_leaf': [1, 2, 4]}

grid_search_random_forest = GridSearchCV(RandomForestRegressor(random_state=42),
                                          param_grid_random_forest,
                                          cv=5,
                                          scoring='neg_mean_squared_error',
                                          n_jobs=-1)
grid_search_random_forest.fit(X_train, y_train)

# Best hyperparameters for Random Forest
best_params_random_forest = grid_search_random_forest.best_params_
print("Best hyperparameters for Random Forest:", best_params_random_forest)

# Evaluate Random Forest on the test set with the best hyperparameters
best_random_forest_model = RandomForestRegressor(**best_params_random_forest, random_state=42)
best_random_forest_model.fit(X_train, y_train)
y_pred_random_forest = best_random_forest_model.predict(X_test)

# Evaluate Random Forest
mse_random_forest = mean_squared_error(y_test, y_pred_random_forest)
mae_random_forest = mean_absolute_error(y_test, y_pred_random_forest)
r2_random_forest = r2_score(y_test, y_pred_random_forest)

print(f'Random Forest Mean Squared Error: {mse_random_forest}')
print(f'Random Forest Mean Absolute Error: {mae_random_forest}')
print(f'Random Forest R-squared: {r2_random_forest}')

Best hyperparameters for Random Forest: {'max_depth': 11, 'min_samples_leaf': 2, 'min_samples_split': 7, 'n_estimators': 101}
Random Forest Mean Squared Error: 124.4876562012378
Random Forest Mean Absolute Error: 9.341164204725024
Random Forest R-squared: -0.22487854495193838


With Normalization

In [None]:
# Initialize the scaler
scaler = MinMaxScaler()

# Fit and transform the scaler on the training data and transform both training and test data
X_train_normalized = scaler.fit_transform(X_train)
X_test_normalized = scaler.transform(X_test)

### **Linear Regression Model**

In [None]:
# Linear Regression
linear_regression_model_normalized = LinearRegression()

# Train Linear Regression on the normalized training set
linear_regression_model_normalized.fit(X_train_normalized, y_train)

# Make predictions on the normalized test set
y_pred_linear_regression_normalized = linear_regression_model_normalized.predict(X_test_normalized)

# Evaluate Linear Regression on the normalized test set
mse_linear_regression_normalized = mean_squared_error(y_test, y_pred_linear_regression_normalized)
mae_linear_regression_normalized = mean_absolute_error(y_test, y_pred_linear_regression_normalized)
r2_linear_regression_normalized = r2_score(y_test, y_pred_linear_regression_normalized)

print(f'Linear Regression (Normalized) Mean Squared Error: {mse_linear_regression_normalized}')
print(f'Linear Regression (Normalized) Mean Absolute Error: {mae_linear_regression_normalized}')
print(f'Linear Regression (Normalized) R-squared: {r2_linear_regression_normalized}')

Linear Regression (Normalized) Mean Squared Error: 113.81549904765076
Linear Regression (Normalized) Mean Absolute Error: 7.9854666787103366
Linear Regression (Normalized) R-squared: -0.11987137617166432


In [None]:
# Define the SVR model
svr_model = SVR()

# Define the hyperparameters grid for grid search
param_grid_svr = {'C': [0.1, 1, 10],
                  'kernel': ['linear', 'rbf', 'poly'],
                  'gamma': ['scale', 'auto']}

# Create a grid search object
grid_search_svr = GridSearchCV(svr_model, param_grid_svr, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)

# Fit the grid search to the data
grid_search_svr.fit(X_train_normalized, y_train)

# Get the best hyperparameters from the grid search
best_params_svr = grid_search_svr.best_params_
print("Best hyperparameters for SVR:", best_params_svr)

# Instantiate the SVR model with the best hyperparameters
best_svr_model = SVR(**best_params_svr)

# Fit the SVR model to the training data
best_svr_model.fit(X_train_normalized, y_train)

# Make predictions on the test set
y_pred_svr_normalized = best_svr_model.predict(X_test_normalized)

# Evaluate the SVR model
mse_svr = mean_squared_error(y_test, y_pred_svr_normalized)
mae_svr = mean_absolute_error(y_test, y_pred_svr_normalized)
r2_svr = r2_score(y_test, y_pred_svr_normalized)

print(f'SVR Mean Squared Error: {mse_svr}')
print(f'SVR Absolute Error: {mae_svr}')
print(f'SVR R-squared: {r2_svr}')


Best hyperparameters for SVR: {'C': 1, 'gamma': 'scale', 'kernel': 'linear'}
SVR Mean Squared Error: 145.7078485102072
SVR Absolute Error: 8.853024395243766
SVR R-squared: -0.43367160180725994


### **Decision Tree Model**

In [None]:
# Decision Tree
param_grid_decision_tree = {'max_depth': range(1, 50, 5),
                            'min_samples_split': range(1, 20, 2),
                            'min_samples_leaf': range(1, 8)}

grid_search_decision_tree_normalized = GridSearchCV(DecisionTreeRegressor(random_state=42),
                                                    param_grid_decision_tree,
                                                    cv=5,
                                                    scoring='neg_mean_squared_error',
                                                    n_jobs=-1)
grid_search_decision_tree_normalized.fit(X_train_normalized, y_train)

# Best hyperparameters for Decision Tree
best_params_decision_tree_normalized = grid_search_decision_tree_normalized.best_params_
print("Best hyperparameters for Decision Tree (Normalized):", best_params_decision_tree_normalized)

# Evaluate Decision Tree on the normalized test set with the best hyperparameters
best_decision_tree_model_normalized = DecisionTreeRegressor(**best_params_decision_tree_normalized, random_state=42)
best_decision_tree_model_normalized.fit(X_train_normalized, y_train)
y_pred_decision_tree_normalized = best_decision_tree_model_normalized.predict(X_test_normalized)

# Evaluate Decision Tree (Normalized)
mse_decision_tree_normalized = mean_squared_error(y_test, y_pred_decision_tree_normalized)
mae_decision_tree_normalized = mean_absolute_error(y_test, y_pred_decision_tree_normalized)
r2_decision_tree_normalized = r2_score(y_test, y_pred_decision_tree_normalized)

print(f'Decision Tree (Normalized) Mean Squared Error: {mse_decision_tree_normalized}')
print(f'Decision Tree (Normalized) Mean Absolute Error: {mae_decision_tree_normalized}')
print(f'Decision Tree (Normalized) R-squared: {r2_decision_tree_normalized}')

Best hyperparameters for Decision Tree (Normalized): {'max_depth': 6, 'min_samples_leaf': 3, 'min_samples_split': 13}
Decision Tree (Normalized) Mean Squared Error: 221.98539462081126
Decision Tree (Normalized) Mean Absolute Error: 12.1765873015873
Decision Tree (Normalized) R-squared: -1.1841936418513561


### **Random Forest Model**

In [None]:
# Random Forest
param_grid_random_forest = {'n_estimators': range(1, 400, 50),
                             'max_depth': range(1, 30,5),
                             'min_samples_split': range(2, 20, 4),
                             'min_samples_leaf': [1, 2, 4]}

grid_search_random_forest_normalized = GridSearchCV(RandomForestRegressor(random_state=42),
                                                     param_grid_random_forest,
                                                     cv=5,
                                                     scoring='neg_mean_squared_error',
                                                     n_jobs=-1)
grid_search_random_forest_normalized.fit(X_train_normalized, y_train)

# Best hyperparameters for Random Forest
best_params_random_forest_normalized = grid_search_random_forest_normalized.best_params_
print("Best hyperparameters for Random Forest (Normalized):", best_params_random_forest_normalized)

# Evaluate Random Forest on the normalized test set with the best hyperparameters
best_random_forest_model_normalized = RandomForestRegressor(**best_params_random_forest_normalized, random_state=42)
best_random_forest_model_normalized.fit(X_train_normalized, y_train)
y_pred_random_forest_normalized = best_random_forest_model_normalized.predict(X_test_normalized)

# Evaluate Random Forest (Normalized)
mse_random_forest_normalized = mean_squared_error(y_test, y_pred_random_forest_normalized)
mae_random_forest_normalized = mean_absolute_error(y_test, y_pred_random_forest_normalized)
r2_random_forest_normalized = r2_score(y_test, y_pred_random_forest_normalized)

print(f'Random Forest (Normalized) Mean Squared Error: {mse_random_forest_normalized}')
print(f'Random Forest (Normalized) Mean Absolute Error: {mae_random_forest_normalized}')
print(f'Random Forest (Normalized) R-squared: {r2_random_forest_normalized}')


Random Forest (Normalized) Mean Squared Error: 127.97192299694215
Random Forest (Normalized) Mean Absolute Error: 9.556662400933972
Random Forest (Normalized) R-squared: -0.2591614913353748


# Report

## Regression Model Performance Summary

### Linear Regression

**Without Data Normalization:**
- Mean Squared Error: 247.53
- Mean Absolute Error: 10.81
- R-squared: -1.44

**With Data Normalization:**
- Mean Squared Error: 113.82
- Mean Absolute Error: 7.99
- R-squared: -0.12

### Support Vector Regression (SVR)

**Best Hyperparameters:** {'C': 1, 'gamma': 'scale', 'kernel': 'linear'}

**Without Data Normalization:**
- Mean Squared Error: 193.92
- Mean Absolute Error: 9.24
- R-squared: -0.91

**With Data Normalization:**
- Mean Squared Error: 145.71
- Mean Absolute Error: 8.85
- R-squared: -0.43

### Decision Tree

**Best Hyperparameters:** {'max_depth': 6, 'min_samples_leaf': 3, 'min_samples_split': 13}

**Without Data Normalization:**
- Mean Squared Error: 221.99
- Mean Absolute Error: 12.18
- R-squared: -1.18

**With Data Normalization:**
- Mean Squared Error: 221.99
- Mean Absolute Error: 12.18
- R-squared: -1.18

### Random Forest

**Best Hyperparameters:** {'max_depth': 11, 'min_samples_leaf': 2, 'min_samples_split': 7, 'n_estimators': 101}

**Without Data Normalization:**
- Mean Squared Error: 124.49
- Mean Absolute Error: 9.34
- R-squared: -0.22

**With Data Normalization:**
- Mean Squared Error: 127.97
- Mean Absolute Error: 9.56
- R-squared: -0.26

---

**Observations:**
- The Linear Regression model with data normalization stands out as the best performer.
- Linear Regression and SVR show improved performance with data normalization, as indicated by lower mean squared errors and absolute errors.
- Decision Tree performance remains consistent with or without normalization, suggesting less sensitivity to feature scaling.
- Random Forest exhibits a slight increase in mean squared error with normalization, but the impact is relatively small.

**Note:**
- Data normalization has varying effects on different regression models. Consideration should be given to the specific characteristics of each model and the dataset when deciding whether to normalize features.