In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler, PowerTransformer
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.preprocessing import QuantileTransformer, Normalizer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import mean_squared_error
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm


from scipy import stats
from sklearn.preprocessing import PolynomialFeatures
from sklearn.neural_network import MLPRegressor
from sklearn.decomposition import PCA
from scipy.signal import hilbert
from skimage import exposure
from sklearn.metrics import mutual_info_score
from sklearn.linear_model import LogisticRegression
from sklearn.feature_selection import RFE
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF
from sklearn.base import BaseEstimator, TransformerMixin
from statsmodels.api import GLM




# Sample data
data = np.random.rand(100, 3) * 100  # Generating random data
df = pd.DataFrame(data, columns=['A', 'B', 'C'])



In [None]:


# 1. Standard Scaler
scaler_standard = StandardScaler()
df_standard = scaler_standard.fit_transform(df)

# 2. Min-Max Scaling
scaler_minmax = MinMaxScaler()
df_minmax = scaler_minmax.fit_transform(df)

# 3. Robust Scaling
scaler_robust = RobustScaler()
df_robust = scaler_robust.fit_transform(df)

# 4. Power Transformation (Box-Cox)
power_transformer = PowerTransformer(method='box-cox')
df_boxcox = power_transformer.fit_transform(df)

# 5. Power Transformation (Yeo-Johnson)
power_transformer_yj = PowerTransformer(method='yeo-johnson')
df_yeojohnson = power_transformer_yj.fit_transform(df)

# 6. Z-Score Normalization
df_zscore = (df - df.mean()) / df.std()

# 7. Quantile Transformation
quantile_transformer = QuantileTransformer(output_distribution='normal')
df_quantile = quantile_transformer.fit_transform(df)

# 8. Log Transformation
df_log = np.log1p(df)  # log1p handles log(0)

# 9. Square Root Transformation
df_sqrt = np.sqrt(df)

# 10. Reciprocal Transformation
df_reciprocal = 1 / (df + 1e-9)  # Adding small value to avoid division by zero





# 11. Exponential Transformation
df_exponential = np.exp(df)

# 12. Gaussian Transformation
from scipy.stats import norm
df_gaussian = norm.ppf((df - df.min()) / (df.max() - df.min()))

# 13. Normalization
normalizer = Normalizer()
df_normalized = normalizer.fit_transform(df)

# 14. PCA
pca = PCA(n_components=2)
df_pca = pca.fit_transform(df)

# 15. t-SNE (2D)
tsne = TSNE(n_components=2)
df_tsne_2d = tsne.fit_transform(df)

# 16. t-SNE (3D)
tsne_3d = TSNE(n_components=3)
df_tsne_3d = tsne_3d.fit_transform(df)

# 17. Feature Binarization
df_binarized = (df > df.mean()).astype(int)

# 18. Rank-Based Normalization
df_rank = df.rank(axis=0)

# 19. Min-Max Normalization with Robust Scaling
df_minmax_robust = scaler_minmax.fit_transform(scaler_robust.fit_transform(df))

# 20. Truncated SVD
from sklearn.decomposition import TruncatedSVD
svd = TruncatedSVD(n_components=2)
df_svd = svd.fit_transform(df)






# 21. Min-Max Normalization with Outlier Removal
q_low = df.quantile(0.01)
q_hi  = df.quantile(0.99)
df_outlier_removed = df[(df < q_hi) & (df > q_low)]
df_minmax_outlier = scaler_minmax.fit_transform(df_outlier_removed)

# 22. Frequency Count Normalization
df_freq_norm = df.apply(lambda x: x.value_counts(normalize=True).reindex(x).fillna(0))

# 23. Discriminant Analysis
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
lda = LinearDiscriminantAnalysis()
df_lda = lda.fit_transform(df, np.random.randint(0, 2, size=100))

# 24. Sine Transform
df_sine = np.sin(df)

# 25. Cosine Transform
df_cosine = np.cos(df)

# 26. Polynomial Transformation
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(degree=2)
df_poly = poly.fit_transform(df)

# 27. Root Transformation
df_root = df ** (1/3)

# 28. Exponential Scaling
df_exp_scaled = df / np.max(np.abs(df))

# 29. Hinge Loss Normalization
def hinge_loss(y_true, y_pred):
    return np.maximum(0, 1 - y_true * y_pred)

# 30. Logistic Transformation
df_logistic = 1 / (1 + np.exp(-df))





# 31. Wavelet Transform
import pywt
df_wavelet = pd.DataFrame(data=[pywt.wavedec(row, 'haar') for row in df.values])

# 32. Bessel Transformation
def bessel_transform(x):
    return np.i0(x)  # Modified Bessel function of the first kind
df_bessel = bessel_transform(df)

# 33. Hilbert Transform
from scipy.signal import hilbert
df_hilbert = hilbert(df.values)

# 34. Fourier Transform
df_fourier = np.fft.fft(df.values)

# 35. Fuzzy Logic Transformation
def fuzzy_logic_transform(x):
    return np.clip(x, 0, 1)  # Simple fuzzy logic transformation
df_fuzzy = fuzzy_logic_transform(df)

# 36. Gaussian Mixture Model (GMM) Normalization
from sklearn.mixture import GaussianMixture
gmm = GaussianMixture(n_components=3)
gmm.fit(df)
df_gmm = gmm.predict(df)

# 37. Sensitivity Analysis-Based Normalization
# Typically involves statistical tests and is case-specific.

# 38. Statistical Scaling
df_statistical_scaled = (df - df.median()) / (df.quantile(0.75) - df.quantile(0.25))

# 39. Dynamic Time Warping (DTW)
from dtaidistance import dtw
dtw_distance = dtw.distance(df.values[0], df.values[1])  # Example distance calculation


# 40. Canonical Correlation Analysis (CCA)
from sklearn.cross_decomposition import CCA
cca = CCA(n_components=2)
df_cca = cca.fit_transform(df, df)




# 41. Generalized Additive Models (GAM)
# Custom implementation for GAM, typically requires specialized libraries.
def fit_gam(X, y):
    model = GLM(y, X).fit()
    return model
gam_model = fit_gam(df[['A', 'B']], df['C'])

# 42. Autoencoder Normalization
from keras.models import Model
from keras.layers import Input, Dense

def autoencoder(X):
    input_layer = Input(shape=(X.shape[1],))
    encoded = Dense(2, activation='relu')(input_layer)
    decoded = Dense(X.shape[1], activation='sigmoid')(encoded)
    
    autoencoder = Model(input_layer, decoded)
    autoencoder.compile(optimizer='adam', loss='mean_squared_error')
    autoencoder.fit(X, X, epochs=50, batch_size=10, shuffle=True)
    
    return autoencoder.predict(X)

df_autoencoded = autoencoder(df.values)


# 43. Factor Analysis
from sklearn.decomposition import FactorAnalysis
fa = FactorAnalysis(n_components=2)
df_fa = fa.fit_transform(df)

# 44. Sparse PCA
from sklearn.decomposition import SparsePCA
spca = SparsePCA(n_components=2)
df_spca = spca.fit_transform(df)

# 45. Rank Attack Normalization
# Typically requires custom implementation for specific use cases.

# 46. Normalization Based on Entropy
def entropy_normalization(data):
    return data / np.sum(data, axis=0)

df_entropy = entropy_normalization(df)

# 47. Orthogonal Transformation
# This requires implementing QR decomposition
def orthogonal_transform(data):
    Q, R = np.linalg.qr(data)
    return Q

df_orthogonal = orthogonal_transform(df)

# 48. Cubic Splines
from scipy.interpolate import CubicSpline
x = np.arange(len(df))
cs = CubicSpline(x, df['A'])
df_cubic_spline = cs(x)

# 49. Logit Transformation
df_logit = np.log(df / (1 - df))

# 50. Tukey’s Transformation
def tukey_transformation(data):
    return stats.tukeylambda(data, lambda_=0.5)

df_tukey = tukey_transformation(df['A'])








# 51. Normalization with Statistical Tests
# A custom implementation that typically requires statistical tests.
def statistical_test_normalization(data):
    z_scores = np.abs(stats.zscore(data))
    return data[z_scores < 3]  # Removing outliers

df_statistical_test = statistical_test_normalization(df)



# 52. Box-Cox Transformation
from scipy.stats import boxcox
df_boxcox_custom = boxcox(df + 1)  # Adding 1 to avoid log(0)

# 53. Quantile Binning
df_binned = pd.qcut(df['A'], q=4, labels=False)

# 54. Categorical Encoding
df_encoded = pd.get_dummies(df)

# 55. Harmonic Mean Transformation
df_harmonic = 1 / (np.mean(1 / df, axis=1))



# 56. Adaptive Histogram Equalization
def adaptive_histogram_equalization(image):
    return exposure.equalize_adapthist(image)

# For this example, let's assume `data` is a 2D image array.
# df_image = adaptive_histogram_equalization(df.values.reshape((10, 10)))

# 57. Variance Stabilizing Transformation
def variance_stabilizing_transformation(data):
    return np.sqrt(data + 3/8)  # Transformation based on variance stabilization

df_variance_stabilizing = variance_stabilizing_transformation(df)

# 58. Gini Coefficient Normalization
def gini_coefficient(data):
    """Calculate the Gini coefficient."""
    n = len(data)
    if n == 0:
        return 0
    index = np.arange(1, n + 1)
    return (2 * np.sum(index * np.sort(data)) / np.sum(data) - n - 1) / n

gini_score = gini_coefficient(df['A'].values)

# 59. Polynomial Feature Normalization
poly = PolynomialFeatures(degree=2)
df_poly = poly.fit_transform(df)

# 60. Sensitivity Scaling
# Custom implementation can vary by application.
def sensitivity_scaling(data):
    return data / np.max(np.abs(data))

df_sensitivity_scaled = sensitivity_scaling(df)









# 61. Feature Engineering
# Usually case-specific and involves domain knowledge.
# Example: Creating interaction terms
df['A_B_interaction'] = df['A'] * df['B']

# 62. Homogeneity Transformation
def homogeneity_transformation(data):
    return (data - np.mean(data)) / np.std(data)

df_homogeneous = homogeneity_transformation(df)

# 63. KNN-Based Normalization
from sklearn.impute import KNNImputer
knn_imputer = KNNImputer(n_neighbors=5)
df_knn_imputed = knn_imputer.fit_transform(df)



# 64. Geometric Mean Normalization
df_geometric_mean = np.exp(np.mean(np.log(df + 1e-9), axis=1))

# 65. Gradient Scaling
# Custom implementation generally required.
def gradient_scaling(data):
    return data / np.linalg.norm(data)

df_gradient_scaled = gradient_scaling(df)

# 66. Sparse Feature Extraction
from sklearn.decomposition import SparsePCA
sparse_pca = SparsePCA(n_components=2)
df_sparse_pca = sparse_pca.fit_transform(df)


# 67. Cumulative Distribution Function Normalization
df_cdf = df.rank() / df.shape[0]

# 68. Frequency-Based Normalization
df_frequency = df.apply(lambda x: x.value_counts(normalize=True).reindex(x).fillna(0))

# 69. Transformation via Learning Algorithms
# Custom implementation based on model learning.
from sklearn.ensemble import RandomForestRegressor
X_train, X_test, y_train, y_test = train_test_split(df, np.random.rand(100), test_size=0.2)
model = RandomForestRegressor()
model.fit(X_train, y_train)
df_transformed = model.predict(X_test)

# 70. Interaction Terms
from sklearn.preprocessing import PolynomialFeatures
interaction = PolynomialFeatures(interaction_only=True)
df_interaction = interaction.fit_transform(df)




# 71. Preprocessing via Neural Networks
def nn_preprocessing(X):
    model = MLPRegressor(hidden_layer_sizes=(5,), max_iter=1000)
    model.fit(X, np.random.rand(len(X)))
    return model.predict(X)

df_nn_preprocessed = nn_preprocessing(df.values)

# 72. Dynamic Normalization
# Requires real-time updating methods for normalization.
class DynamicNormalization(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        return self

    def transform(self, X):
        return (X - X.mean(axis=0)) / X.std(axis=0)

dynamic_normalizer = DynamicNormalization()
df_dynamic_normalized = dynamic_normalizer.fit_transform(df)

# 73. Local Outlier Factor
from sklearn.neighbors import LocalOutlierFactor
lof = LocalOutlierFactor()
df_lof = lof.fit_predict(df)

# 74. Outlier Removal via Clustering
from sklearn.cluster import DBSCAN
dbscan = DBSCAN()
df_dbscan = dbscan.fit_predict(df)

# 75. Variational Autoencoder
# Requires building a VAE using a library like TensorFlow.

# 76. Expectation-Maximization Normalization
# Typically requires a custom implementation.

# 77. Monotonic Transformation
# Custom implementations usually needed.

# 78. Probabilistic Normalization
# Generally case-specific and requires careful consideration.

# 79. Temporal Normalization
# Custom implementation is generally required for time series.

# 80. Neighborhood Components Analysis (NCA)
from sklearn.neighbors import NeighborhoodComponentsAnalysis
nca = NeighborhoodComponentsAnalysis()
df_nca = nca.fit_transform(df)

# 81. Supervised Learning-Based Normalization
# This usually requires specific data and context.

# 82. Coefficient of Variation Scaling
df_cv = df.std() / df.mean()

# 83. Feature Scaling with Elastic Net
from sklearn.linear_model import ElasticNet
en = ElasticNet()
df_en = en.fit_transform(df, np.random.randint(0, 2, size=100))

# 84. Multivariate Normalization
# Generally involves multivariate analysis techniques.

# 85. Causal Inference Normalization
# Typically involves domain-specific knowledge and methodology.

# 86. Causal Effect Transformation
# Custom implementations are generally required.

# 87. Counterfactual Normalization
# Usually case-specific and requires specialized methodologies.

# 88. Gumbel Transformation
# Requires specialized libraries for specific cases.

# 89. Bootstrap Sampling Normalization
# This usually requires resampling techniques.

# 90. Feature Selection and Normalization
# Can use Recursive Feature Elimination as an example.
rfe = RFE(estimator=LogisticRegression(), n_features_to_select=2)
rfe.fit(df, np.random.randint(0, 2, size=100))
df_selected = rfe.transform(df)

# 91. Multi-dimensional Scaling (MDS)
from sklearn.manifold import MDS
mds = MDS(n_components=2)
df_mds = mds.fit_transform(df)

# 92. t-SNE with Kernel Density Estimation
# Typically involves custom implementations.

# 93. Temporal Difference Scaling
# Requires time series analysis techniques.

# 94. Kernel Normalization
# Usually involves custom implementations.

# 95. Logistic Regression Coefficient Normalization
# Requires custom implementations based on the regression model.

# 96. Boosted Trees Normalization
from sklearn.ensemble import GradientBoostingClassifier
gbc = GradientBoostingClassifier()
df_gbc = gbc.fit_transform(df, np.random.randint(0, 2, size=100))

# 97. Ensemble Learning Normalization
# Combining multiple models can be done as follows:
ensemble_model = GradientBoostingRegressor()
ensemble_model.fit(X_train, y_train)
ensemble_predictions = ensemble_model.predict(X_test)

# 98. Cross-Validation Normalization
from sklearn.model_selection import cross_val_score
cv_scores = cross_val_score(model, df, np.random.rand(100), cv=5)

# 99. Recursive Feature Elimination (RFE)
# Typically involves custom implementations.

# 100. Generalization Performance Normalization
# Usually requires performance metrics evaluation.
