Demo of Normalization -> Min-Max and Z-Score Normalization 

1. Apply Min-Max Normalization to 'Age' column in the Covid dataset; 
So age ranges will be in the interval [0,1]

In [5]:
import pandas as pd

In [6]:
df = pd.read_csv("Covid_data.csv")

In [7]:
df.head()

Unnamed: 0,Gender,Age,Co_Morbid,Admit_date,Discharge_date,Remdesivir_Given,DaysOfStay,DischargeType,Covid_Severity,Covid_SeverityDescription,DischargeTypeCategorical
0,M,46,1,01-09-2021,15-09-2021,0,14,1,3,Severe,Recovered
1,M,51,1,03-09-2021,15-09-2021,1,12,2,3,Severe,Expired
2,F,82,1,06-09-2021,15-09-2021,1,9,2,3,Severe,Expired
3,M,51,1,08-09-2021,15-09-2021,0,7,1,2,Moderate,Recovered
4,M,66,1,09-09-2021,15-09-2021,0,6,1,1,Mild,Recovered


In [8]:
#copy the data
df_min_max_scaled = df.copy()

In [9]:
# apply normalization techniques
#for column in df_min_max_scaled.columns:
# new-x = x - min(x) / max(x) - min(x)
df_min_max_scaled['Age'] = (df_min_max_scaled['Age'] - df_min_max_scaled['Age'].min()) / (df_min_max_scaled['Age'].max() - df_min_max_scaled['Age'].min())	

In [10]:
# view normalized data
lst = []
for val in df_min_max_scaled['Age']:
  lst.append(val) 

In [11]:
formatted_lst = ['%.2f' % elem for elem in lst]

In [12]:
#print(df_min_max_scaled['Age'])
print(formatted_lst)

['0.50', '0.56', '0.90', '0.56', '0.72', '0.42', '0.74', '0.72', '0.58', '0.39', '0.58', '0.78', '0.51', '0.49', '0.66', '0.40', '0.74', '0.66', '0.06', '0.53', '0.73', '0.80', '0.73', '0.69', '0.80', '0.76', '0.28', '0.47', '0.90', '0.78', '0.56', '0.60', '0.09', '0.78', '1.00', '0.37', '0.50', '0.24', '0.26', '0.33', '0.89', '0.58', '0.73', '0.58', '0.33', '0.49', '0.96', '0.83', '0.98', '0.47', '0.56', '0.47', '0.59', '0.91', '0.79', '0.61', '0.78', '0.73', '0.56', '0.77', '0.74', '0.61', '0.34', '0.94', '0.33', '0.67', '0.42', '0.82', '0.52', '0.66', '0.87', '0.78', '0.62', '0.88', '0.63', '0.61', '0.68', '0.67', '0.40', '0.13', '0.69', '0.04', '0.09', '0.10', '0.78', '0.30', '0.36', '0.29', '0.73', '0.80', '0.76', '0.30', '0.31', '0.28', '0.57', '0.67', '0.71', '0.87', '0.29', '0.67', '0.66', '0.68', '0.39', '0.02', '0.66', '0.61', '0.66', '0.38', '0.57', '0.57', '0.60', '0.76', '0.78', '0.26', '0.71', '0.71', '0.02', '0.59', '0.52', '0.74', '0.51', '0.51', '0.92', '0.30', '0.67',

In [13]:
# 2. Z-Score Normalization
from scipy.stats import zscore

In [14]:
# Calculate the zscores and drop zscores into new column
# So age ranges will be in the interval [-1,+1]
df['Age_zscore'] = zscore(df['Age'])
#print(df['Age_zscore'])

In [15]:
# view normalized data
lst = []
for val in df['Age_zscore']:
  lst.append(val) 

In [16]:
formatted_lst = ['%.2f' % elem for elem in lst]

In [17]:
print(formatted_lst)

['-0.30', '-0.04', '1.61', '-0.04', '0.76', '-0.67', '0.86', '0.76', '0.07', '-0.83', '0.07', '1.02', '-0.25', '-0.35', '0.44', '-0.78', '0.86', '0.44', '-2.42', '-0.14', '0.81', '1.13', '0.81', '0.60', '1.13', '0.92', '-1.36', '-0.46', '1.61', '1.02', '-0.04', '0.18', '-2.26', '1.02', '2.08', '-0.94', '-0.30', '-1.52', '-1.46', '-1.09', '1.55', '0.07', '0.81', '0.07', '-1.09', '-0.35', '1.87', '1.29', '1.98', '-0.46', '-0.04', '-0.46', '0.12', '1.66', '1.08', '0.23', '1.02', '0.81', '-0.04', '0.97', '0.86', '0.23', '-1.04', '1.82', '-1.09', '0.49', '-0.67', '1.24', '-0.19', '0.44', '1.45', '1.02', '0.28', '1.50', '0.34', '0.23', '0.55', '0.49', '-0.78', '-2.05', '0.60', '-2.47', '-2.26', '-2.21', '1.02', '-1.25', '-0.99', '-1.31', '0.81', '1.13', '0.92', '-1.25', '-1.20', '-1.36', '0.02', '0.49', '0.71', '1.45', '-1.31', '0.49', '0.44', '0.55', '-0.83', '-2.58', '0.44', '0.23', '0.44', '-0.88', '0.02', '0.02', '0.18', '0.92', '1.02', '-1.46', '0.71', '0.71', '-2.58', '0.12', '-0.19', 