# Model Selection

In [None]:
from sklearn.model_selection import train_test_split  # Split dataset into training and testing
from sklearn.model_selection import cross_val_score  # Evaluate model with cross-validation
from sklearn.model_selection import GridSearchCV  # Hyperparameter tuning using grid search
from sklearn.model_selection import RandomizedSearchCV  # Randomized hyperparameter search
from sklearn.model_selection import StratifiedKFold  # Stratified K-Folds cross-validator


# Preprocessing

In [None]:
from sklearn.preprocessing import StandardScaler  # Standardization of features
from sklearn.preprocessing import MinMaxScaler  # Scaling features to a given range
from sklearn.preprocessing import OneHotEncoder  # One-hot encoding for categorical variables
from sklearn.preprocessing import LabelEncoder  # Encode labels with value between 0 and n_classes-1
from sklearn.preprocessing import PolynomialFeatures  # Feature Engineering: Generate polynomial features
from sklearn.preprocessing import FunctionTransformer  # Custom transformations
from sklearn.impute import SimpleImputer  # Impute missing values
from sklearn.impute import KNNImputer  # K-Nearest Neighbors imputation
from sklearn.preprocessing import RobustScaler  # Scales features using statistics that are robust to outliers


# Feature Selection

In [None]:
from sklearn.feature_selection import SelectKBest  # Select features based on univariate statistical tests
from sklearn.feature_selection import RFE  # Recursive feature elimination
from sklearn.feature_selection import SelectFromModel  # Select features based on importance from a model


# Ensemble Methods (Improving accuracy)

In [None]:
from sklearn.ensemble import RandomForestClassifier  # Random Forest classifier
from sklearn.ensemble import RandomForestRegressor  # Random Forest regressor
from sklearn.ensemble import GradientBoostingClassifier  # Gradient boosting classifier
from sklearn.ensemble import GradientBoostingRegressor  # Gradient boosting regressor
from sklearn.ensemble import AdaBoostClassifier  # AdaBoost classifier
from sklearn.ensemble import AdaBoostRegressor  # AdaBoost regressor
from sklearn.ensemble import VotingClassifier  # Voting ensemble classifier
from sklearn.ensemble import VotingRegressor  # Voting ensemble regressor
from sklearn.ensemble import BaggingClassifier  # Ensemble of decision trees



# Tree-Based Models

In [None]:
from sklearn.tree import DecisionTreeClassifier  # Decision tree classifier, Control overfitting by adjusting parameters
from sklearn.tree import DecisionTreeRegressor  # Decision tree regressor
from sklearn.tree import export_graphviz  # Export a decision tree in DOT format


### Addressing overfitting and underfitting:
#####  Decision Trees: Set parameters like max_depth, min_samples_split, min_samples_leaf to control tree complexity.

##### Early Stopping (in tree-based models): Many libraries (like XGBoost and LightGBM) offer built-in options for early stopping during training based on validation performance.

# Linear Models

In [None]:
from sklearn.linear_model import LinearRegression  # Linear regression
from sklearn.linear_model import LogisticRegression  # Logistic regression
from sklearn.linear_model import Ridge  # Ridge regression- L2 Regularization
from sklearn.linear_model import Lasso  # Lasso regression- L1 Regularization
from sklearn.linear_model import ElasticNet  # Combination of L1 and L2 regularization


# Clustering

In [None]:
from sklearn.cluster import KMeans  # K-means clustering
from sklearn.cluster import DBSCAN  # Density-Based Spatial Clustering
from sklearn.cluster import AgglomerativeClustering  # Hierarchical clustering

# Metrics

In [None]:
from sklearn.metrics import accuracy_score  # Accuracy classification score
from sklearn.metrics import confusion_matrix  # Confusion matrix
from sklearn.metrics import classification_report  # Precision, recall, f1-score
from sklearn.metrics import mean_squared_error  # Mean squared error
from sklearn.metrics import mean_absolute_error  # Mean absolute error
from sklearn.metrics import r2_score  # R-squared score
from sklearn.metrics import roc_auc_score  # Area under the ROC curve
from sklearn.metrics import precision_score  # Precision score
from sklearn.metrics import recall_score  # Recall score
from sklearn.metrics import f1_score  # F1 score

# Dimensionality Reduction

In [None]:
from sklearn.decomposition import PCA  # Principal Component Analysis
from sklearn.decomposition import NMF  # Non-negative Matrix Factorization
from sklearn.manifold import TSNE  # t-distributed Stochastic Neighbor Embedding
from sklearn.manifold import MDS  # Multi-dimensional scaling

# Pipeline and Feature Union

In [None]:
from sklearn.pipeline import Pipeline  # Pipeline for chaining processing steps
from sklearn.pipeline import FeatureUnion  # Combine multiple feature extraction methods

# Other Useful Modules

In [None]:
from sklearn.utils import shuffle  # Shuffle datasets
from sklearn.utils import resample  # Resample datasets
from sklearn.compose import ColumnTransformer  # Apply different preprocessing to different columns
from sklearn.compose import TransformedTargetRegressor  # Apply transformation to target