# Normalization

In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Create a DataFrame with the given dataset
data = {
    'Price': [110, 105, 115, 120, 110, 130, 150, 100, 105]
}
df = pd.DataFrame(data)

# Create a MinMaxScaler instance
min_max_scaler = MinMaxScaler()

# Apply MinMax scaling to the data
normalized_data = min_max_scaler.fit_transform(df)

# Convert the scaled data back to a DataFrame for better readability
normalized_df = pd.DataFrame(normalized_data, columns=['Price'])

# Display the normalized data
print(normalized_df)

   Price
0    0.2
1    0.1
2    0.3
3    0.4
4    0.2
5    0.6
6    1.0
7    0.0
8    0.1


# Standardization

In [2]:
from sklearn.preprocessing import StandardScaler

# Create a StandardScaler instance
standard_scaler = StandardScaler()

# Apply standardization to the data
standardized_data = standard_scaler.fit_transform(df)

# Convert the standardized data back to a DataFrame for better readability
standardized_df = pd.DataFrame(standardized_data, columns=['Price'])

# Display the standardized data
print(standardized_df)

      Price
0 -0.416356
1 -0.757011
2 -0.075701
3  0.264954
4 -0.416356
5  0.946264
6  2.308884
7 -1.097666
8 -0.757011


# Log Transformation

In [3]:
import numpy as np

# Apply log transformation with base e to the 'Price' column and rename it
log_transformed_data= pd.DataFrame({'log_price':np.log(df['Price'])})

log_transformed_data

Unnamed: 0,log_price
0,4.70048
1,4.65396
2,4.744932
3,4.787492
4,4.70048
5,4.867534
6,5.010635
7,4.60517
8,4.65396


# Max Absolute Scaler

In [4]:
from sklearn.preprocessing import MaxAbsScaler

# Create a MaxAbsScaler instance
scaler = MaxAbsScaler()

# Apply Max Absulate Sacler to the data
max_abs_scaled_data = scaler.fit_transform(df)

# Convert the scaled data back to a DataFrame for better readability
max_abs_scaled_df = pd.DataFrame(max_abs_scaled_data, columns=['Price'])

# Display the scaled data
print(max_abs_scaled_df)

      Price
0  0.733333
1  0.700000
2  0.766667
3  0.800000
4  0.733333
5  0.866667
6  1.000000
7  0.666667
8  0.700000


# Robust Scaler

In [5]:
from sklearn.preprocessing import RobustScaler

# Create a RobustScaler instance
RoSc = RobustScaler()

# Apply RobustScaler to the data
robust_scaled_data = RoSc.fit_transform(df)

# Convert the RobustScaled data back to a DataFrame for better readability
robust_scaled_df = pd.DataFrame(robust_scaled_data, columns=['Price'])

# Display the RobustScaled data
print(robust_scaled_df)

      Price
0  0.000000
1 -0.333333
2  0.333333
3  0.666667
4  0.000000
5  1.333333
6  2.666667
7 -0.666667
8 -0.333333


# Making an Excel File

In [6]:
import openpyxl
# Create a new DataFrame with transformed data
transformed_df = pd.DataFrame({
    'Original Price': df['Price'],
    'Normalized Price': normalized_df['Price'].values,
    'Standardized Price': standardized_data.flatten(),
    'Log Transformed Price': log_transformed_data['log_price'].values,
    'Robust Scaled Price': robust_scaled_data.flatten(),
    'Max Abs Scaled Price': max_abs_scaled_data.flatten()
})

#save the results to an excel file 
result_file = 'transfromed_data.xlsx'
transformed_df.to_excel(result_file, index=False)

print("Excel file 'transformed_data.xlsx' generated successfully.")

Excel file 'transformed_data.xlsx' generated successfully.
