### Data Cleaning steps (Structured / Tabular Data)

---



##### 1. Put NA where there is missing values
##### 2. Remove unneccessary columns (as per the description)
##### 3. Format or Binarize the target or unknown variable
##### 4. Check datatypes, handle columns with Object / string type (either convert to number / remove them)
##### 5. Remove columns which have all missing / NA values
##### 6. Remove rows with missing / NA values greater than 50%
##### 7. Fill missing / NA values using central tendencies
##### 8. Remove those columns which have same value in all rows
##### 9. Remove columns which have different values in all rows
##### 10. Remove repeated rows (In case of large datasets check for repeated columns)
##### 11. Normalize the data 

In [None]:
"""This is a Data Cleaning Execise"""
import pandas as pd
# Create Dataframe of a table from csv file
df = pd.read_csv("76_attributes_heartdiseases.csv",na_values=["-9",-9,-18])
## Do not use 81 as NA because V33 it is valid value
print(df.columns) # Print column names

Index(['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10', 'V11',
       'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20', 'V21',
       'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'V29', 'V30', 'V31',
       'V32', 'V33', 'V34', 'V35', 'V36', 'V37', 'V38', 'V39', 'V40', 'V41',
       'V42', 'V43', 'V44', 'V45', 'V46', 'V47', 'V48', 'V49', 'V50', 'V51',
       'V52', 'V53', 'V54', 'V55', 'V56', 'V57', 'V58', 'V59', 'V60', 'V61',
       'V62', 'V63', 'V64', 'V65', 'V66', 'V67', 'V68', 'V69', 'V70', 'V71',
       'V72', 'V73', 'V74', 'V75', 'V76'],
      dtype='object')


In [None]:
df.head()

Unnamed: 0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,V20,V21,V22,V23,V24,V25,V26,V27,V28,V29,V30,V31,V32,V33,V34,V35,V36,V37,V38,V39,V40,V41,V42,V43,V44,V45,V46,V47,V48,V49,V50,V51,V52,V53,V54,V55,V56,V57,V58,V59,V60,V61,V62,V63,V64,V65,V66,V67,V68,V69,V70,V71,V72,V73,V74,V75,V76
0,1,0,63,1,,,,,1,145,1,233.0,,50.0,20.0,1.0,,1,2,2,3,81,0.0,0.0,0.0,0.0,0.0,1.0,10.5,6.0,13.0,150,60,190,90,145,85,0,0.0,2.3,3.0,,172,0.0,,,,,,,6.0,,,,2,16,81,0,1,1,1.0,,1,,1.0,,1,1,1,1,1,1,1.0,,,name
1,2,0,67,1,,,,,4,160,1,286.0,,40.0,40.0,0.0,,1,2,3,5,81,0.0,1.0,0.0,0.0,0.0,1.0,9.5,6.0,13.0,108,64,160,90,160,90,1,0.0,1.5,2.0,,185,3.0,,,,,,,3.0,,,,2,5,81,2,1,2,2.0,,2,,1.0,,1,1,1,1,1,1,1.0,,,name
2,3,0,67,1,,,,,4,120,1,229.0,,20.0,35.0,0.0,,1,2,2,19,81,0.0,1.0,0.0,0.0,0.0,1.0,8.5,6.0,10.0,129,78,140,80,120,80,1,0.0,2.6,2.0,,150,2.0,,,,,,,7.0,,,,2,20,81,1,1,1,1.0,,1,,1.0,,2,2,1,1,1,7,3.0,,,name
3,4,0,37,1,,,,,3,130,0,250.0,,0.0,0.0,0.0,,1,0,2,13,81,0.0,1.0,0.0,0.0,0.0,1.0,13.0,13.0,17.0,187,84,195,68,130,78,0,0.0,3.5,3.0,,167,0.0,,,,,,,3.0,,,,2,4,81,0,1,1,1.0,,1,,1.0,,1,1,1,1,1,1,1.0,,,name
4,6,0,41,0,,,,,2,130,1,204.0,,0.0,0.0,0.0,,1,2,2,7,81,0.0,0.0,0.0,0.0,0.0,1.0,7.0,,9.0,172,71,160,74,130,86,0,0.0,1.4,1.0,,40,0.0,,,,,,,3.0,,,,2,18,81,0,1,1,1.0,,1,,1.0,,1,1,1,1,1,1,1.0,,,name


In [None]:
# From data description,
# V58 is decision variable
#  columns after V58 are part of decision or un-used columns
df = df.iloc[:,:58] # so keep only first 58 columnd and remove rest of them
df.columns

Index(['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10', 'V11',
       'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20', 'V21',
       'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'V29', 'V30', 'V31',
       'V32', 'V33', 'V34', 'V35', 'V36', 'V37', 'V38', 'V39', 'V40', 'V41',
       'V42', 'V43', 'V44', 'V45', 'V46', 'V47', 'V48', 'V49', 'V50', 'V51',
       'V52', 'V53', 'V54', 'V55', 'V56', 'V57', 'V58'],
      dtype='object')

In [None]:
df.drop(['V2','V36','V45','V46'],axis=1,inplace=True)

In [None]:
df['V58'].unique()

array([0, 2, 1, 3, 4])

In [None]:
# Tune the decision variable for Binary classification 
#(Heart Disease (1) or No Heart Disease (0))
df.loc[df['V58'] > 0, 'V58'] = 1

In [None]:
df['V58'].unique()

array([0, 1])

In [None]:
# Print sample records for viewing
df.head() #NaN - Not a Number are missing values in original data

Unnamed: 0,V1,V3,V4,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,V20,V21,V22,V23,V24,V25,V26,V27,V28,V29,V30,V31,V32,V33,V34,V35,V37,V38,V39,V40,V41,V42,V43,V44,V50,V51,V54,V55,V56,V57,V58,V59,V60,V61,V62,V63,V64,V65,V66,V67,V68,V69,V70
0,1,63,1,,1,145,1,233.0,,50.0,20.0,1.0,,1,2,2,3,81,0.0,0.0,0.0,0.0,0.0,1.0,10.5,6.0,13.0,150,60,190,90,85,0,0.0,2.3,3.0,,172,0.0,,6.0,,2,16,81,0,1,1,1.0,,1,,1.0,,1,1,1,1
1,2,67,1,,4,160,1,286.0,,40.0,40.0,0.0,,1,2,3,5,81,0.0,1.0,0.0,0.0,0.0,1.0,9.5,6.0,13.0,108,64,160,90,90,1,0.0,1.5,2.0,,185,3.0,,3.0,,2,5,81,1,1,2,2.0,,2,,1.0,,1,1,1,1
2,3,67,1,,4,120,1,229.0,,20.0,35.0,0.0,,1,2,2,19,81,0.0,1.0,0.0,0.0,0.0,1.0,8.5,6.0,10.0,129,78,140,80,80,1,0.0,2.6,2.0,,150,2.0,,7.0,,2,20,81,1,1,1,1.0,,1,,1.0,,2,2,1,1
3,4,37,1,,3,130,0,250.0,,0.0,0.0,0.0,,1,0,2,13,81,0.0,1.0,0.0,0.0,0.0,1.0,13.0,13.0,17.0,187,84,195,68,78,0,0.0,3.5,3.0,,167,0.0,,3.0,,2,4,81,0,1,1,1.0,,1,,1.0,,1,1,1,1
4,6,41,0,,2,130,1,204.0,,0.0,0.0,0.0,,1,2,2,7,81,0.0,0.0,0.0,0.0,0.0,1.0,7.0,,9.0,172,71,160,74,86,0,0.0,1.4,1.0,,40,0.0,,3.0,,2,18,81,0,1,1,1.0,,1,,1.0,,1,1,1,1


In [None]:
df.dtypes # Data types of each column

V1       int64
V3       int64
V4       int64
V5     float64
V6     float64
V7     float64
V8     float64
V9       int64
V10      int64
V11      int64
V12    float64
V13    float64
V14    float64
V15    float64
V16    float64
V17    float64
V18      int64
V19      int64
V20      int64
V21      int64
V22      int64
V23    float64
V24    float64
V25    float64
V26    float64
V27    float64
V28    float64
V29    float64
V30    float64
V31    float64
V32      int64
V33      int64
V34      int64
V35      int64
V37      int64
V38      int64
V39    float64
V40    float64
V41    float64
V42    float64
V43      int64
V44    float64
V47    float64
V48    float64
V49    float64
V50    float64
V51    float64
V52    float64
V53    float64
V54    float64
V55      int64
V56      int64
V57      int64
V58      int64
dtype: object

In [None]:
# Drop columns which are having strings or objects
df = df.select_dtypes(exclude=['object'])

In [None]:
# Print columns which have all NaN (np.nan) values
print("Columns with all null values are")
print(df.columns[df.isnull().all()])
print(df['V5'].describe())
# Drop columns which have all NaN values
l=df.columns[df.isnull().all()]
df.drop(l, inplace=True, axis=1)

Columns with all null values are
Index(['V5', 'V6', 'V7', 'V47', 'V48', 'V49', 'V52', 'V53'], dtype='object')
count    0.0
mean     NaN
std      NaN
min      NaN
25%      NaN
50%      NaN
75%      NaN
max      NaN
Name: V5, dtype: float64


In [None]:
df.shape

(282, 46)

In [None]:
df.columns

Index(['V1', 'V3', 'V4', 'V8', 'V9', 'V10', 'V11', 'V12', 'V13', 'V14', 'V15',
       'V16', 'V17', 'V18', 'V19', 'V20', 'V21', 'V22', 'V23', 'V24', 'V25',
       'V26', 'V27', 'V28', 'V29', 'V30', 'V31', 'V32', 'V33', 'V34', 'V35',
       'V37', 'V38', 'V39', 'V40', 'V41', 'V42', 'V43', 'V44', 'V50', 'V51',
       'V54', 'V55', 'V56', 'V57', 'V58'],
      dtype='object')

In [None]:
# Find rows with missing values greater than 50%
print(df.isnull())
print(df.isnull().sum(axis=1))
df[    df.isnull().sum(axis=1)    >   (df.shape[1]/2)   ]
df = df[df.isnull().sum(axis=1) <=(df.shape[1]/2) ]

        V1     V2     V3     V4    V8  ...   V54    V55    V56    V57    V58
0    False  False  False  False  True  ...  True  False  False  False  False
1    False  False  False  False  True  ...  True  False  False  False  False
2    False  False  False  False  True  ...  True  False  False  False  False
3    False  False  False  False  True  ...  True  False  False  False  False
4    False  False  False  False  True  ...  True  False  False  False  False
..     ...    ...    ...    ...   ...  ...   ...    ...    ...    ...    ...
277  False  False  False  False  True  ...  True  False  False  False  False
278  False  False  False  False  True  ...  True  False  False  False  False
279  False  False  False  False  True  ...  True  False  False  False  False
280  False  False  False  False  True  ...  True  False  False  False  False
281  False  False  False  False  True  ...  True  False  False  False  False

[282 rows x 48 columns]
0      6
1      6
2      6
3      6
4      7
      

In [None]:
df.shape

(282, 48)

In [None]:
# Print colmuns that have minimum one NaN value
df.columns[df.isna().any()]
# df.isna().any()

Index(['V1', 'V8', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V21', 'V22',
       'V23', 'V24', 'V25', 'V26', 'V27', 'V30', 'V33', 'V41', 'V42', 'V44',
       'V50', 'V51', 'V54', 'V56', 'V57'],
      dtype='object')

In [None]:
# Fill missing values with mode value for all columns which are float or int
for col in df.columns[df.isnull().any()]:
    df[col].fillna(df[col].mode()[0],inplace=True)
# Print remaining columns which still have NaN values
df.columns[df.isnull().any()]

Index([], dtype='object')

In [None]:
# Check those columns where all values are same
(df != df.iloc[0]).any()
# (df == df.iloc[0]).all()

V1     True
V3     True
V4     True
V8     True
V9     True
V10    True
V11    True
V12    True
V13    True
V14    True
V15    True
V16    True
V18    True
V19    True
V20    True
V21    True
V22    True
V23    True
V24    True
V25    True
V26    True
V27    True
V28    True
V29    True
V30    True
V31    True
V32    True
V33    True
V34    True
V35    True
V36    True
V37    True
V38    True
V39    True
V40    True
V41    True
V42    True
V43    True
V44    True
V50    True
V51    True
V54    True
V55    True
V56    True
V57    True
V58    True
dtype: bool

In [None]:
# Remove columns which have same value in all rows
df=df.loc[:, (df != df.iloc[0]).any()]
df.shape

(282, 46)

In [None]:
#Remove the columns where all values are different
#Example columns V1
# for col in df.columns:
#   if len(df[col].unique()) == df.shape[0]:
#     df.drop(col,inplace=True,axis=1)
######
#Problem :: A continous variable may get removed!!!!!
######

In [None]:
df.describe(include= 'all') 
# Get statistical description of each column in data frame

Unnamed: 0,V1,V2,V3,V4,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,V20,V21,V22,V23,V24,V25,V26,V27,V28,V29,V30,V31,V32,V33,V34,V35,V36,V37,V38,V39,V40,V41,V42,V43,V44,V50,V51,V54,V55,V56,V57,V58
count,282.0,282.0,282.0,282.0,3.0,282.0,282.0,282.0,279.0,3.0,277.0,277.0,279.0,26.0,282.0,282.0,282.0,282.0,282.0,280.0,280.0,280.0,280.0,280.0,282.0,282.0,213.0,282.0,282.0,282.0,282.0,282.0,282.0,282.0,282.0,282.0,282.0,278.0,4.0,282.0,276.0,4.0,276.0,4.0,282.0,282.0,282.0,282.0
mean,151.51773,0.0,54.411348,0.677305,3.333333,4.432624,130.269504,2.822695,249.530466,16.666667,16.956679,15.043321,0.150538,1.0,0.609929,1.070922,6.5,15.939716,81.432624,0.032143,0.335714,0.246429,0.1,0.125,1.080496,8.405319,4.910798,11.303191,148.957447,75.950355,167.347518,79.085106,131.173759,84.035461,0.322695,0.036525,1.036525,1.582734,162.75,121.29078,0.65942,6.0,4.65942,6.5,6.570922,16.241135,81.152482,0.453901
std,87.131234,0.0,9.053083,0.468338,0.57735,12.365608,22.218225,21.932859,51.069479,15.275252,19.485546,15.34642,0.35824,0.0,0.488633,1.16378,3.927501,11.021812,8.513172,0.176695,0.473085,0.431703,0.300537,0.331311,0.778864,2.595755,3.491935,15.288021,23.998479,16.411447,24.713875,14.047702,18.321806,12.848369,0.468338,0.201717,1.141351,0.611553,40.623269,48.558442,0.93027,2.0,1.935304,4.654747,4.124867,11.754246,9.768211,0.498755
min,1.0,0.0,29.0,0.0,3.0,1.0,1.0,0.0,126.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.8,0.0,3.0,71.0,40.0,84.0,26.0,78.0,0.0,0.0,0.0,0.0,1.0,105.0,0.0,0.0,3.0,3.0,2.0,1.0,1.0,0.0,0.0
25%,75.25,0.0,48.0,0.0,3.0,3.0,120.0,0.0,213.0,10.0,0.0,0.0,0.0,1.0,0.0,0.0,3.0,8.0,82.0,0.0,0.0,0.0,0.0,0.0,1.0,6.5,3.0,7.0,132.0,65.0,152.0,70.0,120.0,80.0,0.0,0.0,0.0,1.0,153.75,90.25,0.0,6.0,3.0,2.75,3.0,8.0,82.0,0.0
50%,151.5,0.0,55.0,1.0,3.0,3.0,130.0,1.0,244.0,20.0,10.0,15.0,0.0,1.0,1.0,2.0,7.0,15.0,82.0,0.0,0.0,0.0,0.0,0.0,1.0,8.5,5.5,9.5,153.0,74.0,168.0,80.0,130.0,85.0,0.0,0.0,0.8,2.0,173.0,117.5,0.0,7.0,3.0,6.5,7.0,15.0,82.0,0.0
75%,227.75,0.0,61.0,1.0,3.5,4.0,140.0,1.0,277.5,25.0,30.0,30.0,0.0,1.0,1.0,2.0,10.0,22.0,83.0,0.0,1.0,0.0,0.0,0.0,1.0,10.075,7.5,12.0,165.0,85.0,183.5,85.0,140.0,90.0,1.0,0.0,1.6,2.0,182.0,150.0,1.0,7.0,7.0,10.25,10.0,23.0,83.0,1.0
max,298.0,0.0,77.0,1.0,4.0,130.0,200.0,253.0,564.0,30.0,99.0,54.0,1.0,1.0,1.0,11.0,23.0,82.0,84.0,1.0,1.0,1.0,1.0,1.0,9.0,15.0,15.0,175.0,202.0,190.0,232.0,130.0,200.0,110.0,1.0,1.8,6.2,3.0,200.0,270.0,3.0,7.0,7.0,11.0,29.0,82.0,84.0,1.0


In [None]:
# Normalize the data
from sklearn import preprocessing

x = df.values #returns a numpy array
min_max_scaler = preprocessing.MinMaxScaler()
x_scaled = min_max_scaler.fit_transform(x)
df = pd.DataFrame(x_scaled,columns=df.columns)

In [None]:
df.describe(include= 'all')

Unnamed: 0,V1,V2,V3,V4,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,V20,V21,V22,V23,V24,V25,V26,V27,V28,V29,V30,V31,V32,V33,V34,V35,V36,V37,V38,V39,V40,V41,V42,V43,V44,V50,V51,V54,V55,V56,V57,V58
count,282.0,282.0,282.0,282.0,3.0,282.0,282.0,282.0,279.0,3.0,277.0,277.0,279.0,26.0,282.0,282.0,282.0,282.0,282.0,280.0,280.0,280.0,280.0,280.0,282.0,282.0,213.0,282.0,282.0,282.0,282.0,282.0,282.0,282.0,282.0,282.0,282.0,278.0,4.0,282.0,276.0,4.0,276.0,4.0,282.0,282.0,282.0,282.0
mean,-1.574784e-17,0.0,-1.20471e-16,7.795183e-17,-2.960595e-16,-1.392208e-16,2.008281e-16,1.506872e-16,1.668319e-16,-7.401487e-17,-1.306616e-16,7.97597e-17,5.1332890000000004e-17,0.0,1.283449e-16,2.645638e-16,-4.291288e-17,-9.094380000000001e-17,7.468415e-16,1.070572e-16,1.197455e-16,-2.561443e-16,7.077672e-16,1.106258e-16,1.8798990000000002e-17,-6.476301e-17,-1.188408e-16,5.2484610000000006e-17,-1.417306e-17,-8.267618e-18,-3.900544e-16,8.868005000000001e-17,-1.887773e-16,1.661398e-16,-3.936961e-17,6.358192e-17,3.03146e-16,2.619807e-16,5.5511150000000004e-17,8.503836000000001e-17,-1.826236e-16,5.5511150000000004e-17,-2.196311e-16,2.775558e-17,2.362177e-18,1.653524e-17,-2.877919e-16,2.1259590000000002e-17
std,1.001778,0.0,1.001778,1.001778,1.224745,1.001778,1.001778,1.001778,1.001797,1.224745,1.00181,1.00181,1.001797,0.0,1.001778,1.001778,1.001778,1.001778,1.001778,1.001791,1.001791,1.001791,1.001791,1.001791,1.001778,1.001778,1.002356,1.001778,1.001778,1.001778,1.001778,1.001778,1.001778,1.001778,1.001778,1.001778,1.001778,1.001803,1.154701,1.001778,1.001817,1.154701,1.001817,1.154701,1.001778,1.001778,1.001778,1.001778
min,-1.730554,0.0,-2.811918,-1.448758,-0.7071068,-0.278088,-5.828518,-0.1289259,-2.423217,-1.336306,-0.8717933,-0.9820238,-0.4209693,0.0,-1.250454,-0.9218463,-1.402871,-1.357878,-9.582491,-0.182237,-0.7108983,-0.5718516,-0.3333333,-0.3779645,-0.1035349,-2.549186,-1.409639,-0.544083,-3.254208,-2.19446,-3.378495,-3.785636,-2.907371,-6.552182,-0.6902462,-0.1813917,-0.9097704,-0.9545942,-1.641521,-2.502272,-0.7101361,-1.732051,-0.8590044,-1.116313,-1.352971,-1.298955,-8.322584,-0.9116846
25%,-0.8768763,0.0,-0.709454,-1.448758,-0.7071068,-0.1160615,-0.4630325,-0.1289259,-0.7165945,-0.5345225,-0.8717933,-0.9820238,-0.4209693,0.0,-1.250454,-0.9218463,-0.8927361,-0.7216446,0.06676531,-0.182237,-0.7108983,-0.5718516,-0.3333333,-0.3779645,-0.1035349,-0.7353184,-0.5484923,-0.2819751,-0.7078612,-0.668425,-0.6221122,-0.6478823,-0.6109454,-0.3146419,-0.6902462,-0.1813917,-0.9097704,-0.9545942,-0.2558215,-0.6403822,-0.7101361,5.5511150000000004e-17,-0.8590044,-0.9302605,-0.8672451,-0.7023663,0.08691708,-0.9116846
50%,-0.0002038536,0.0,0.06513792,0.6902462,-0.7071068,-0.1160615,-0.01215141,-0.08325113,-0.1084876,0.2672612,-0.3576636,-0.002828002,-0.4209693,0.0,0.7997092,0.7997475,0.1275337,-0.08541127,0.06676531,-0.182237,-0.7108983,-0.5718516,-0.3333333,-0.3779645,-0.1035349,0.03654011,0.1691297,-0.1181577,0.1687499,-0.1190524,0.02644839,0.06524342,-0.06417738,0.07520439,-0.6902462,-0.1813917,-0.2076007,0.6835366,0.2913522,-0.07820513,-0.7101361,0.5773503,-0.8590044,0.0,0.1042072,-0.1057781,0.08691708,-0.9116846
75%,0.8764686,0.0,0.7290738,0.6902462,0.3535534,-0.03504828,0.4387297,-0.08325113,0.5486603,0.6681531,0.670596,0.9763677,-0.4209693,0.0,0.7997092,0.7997475,0.8927361,0.5508221,0.1844392,-0.182237,1.406671,-0.5718516,-0.3333333,-0.3779645,-0.1035349,0.6443787,0.7432273,0.04565975,0.6696705,0.5524031,0.6547414,0.4218063,0.4825907,0.4650507,1.448758,-0.1813917,0.494569,0.6835366,0.5471737,0.5922813,0.3667736,0.5773503,1.211609,0.9302605,0.8327964,0.5760371,0.189472,1.096871
max,1.684157,0.0,2.49957,0.6902462,1.414214,10.17262,3.144016,11.42679,6.168745,1.069045,4.218091,2.543081,2.37547,0.0,0.7997092,8.546919,4.208613,6.004251,0.302113,5.487359,1.406671,1.748706,3.0,2.645751,10.1861,2.54508,2.896093,10.72656,2.214176,6.96175,2.620691,3.630872,3.763199,2.024436,1.448758,8.757874,4.532045,2.321667,1.058817,3.067924,2.520593,0.5773503,1.211609,1.116313,5.447195,5.604424,0.2920269,1.096871


In [None]:
from sklearn import preprocessing
x = df.values #returns a numpy array
min_max_scaler = preprocessing.StandardScaler()
x_scaled_std = min_max_scaler.fit_transform(x)
df = pd.DataFrame(x_scaled_std,columns=df.columns)

In [None]:
# Save the new cleaned data
df.to_csv("cleaned_76hd.csv",index=False)

In [None]:
# Min max normalization Formula  (element - min) / (max - min)
l=[12,24,10,2]
n_l=[]
for e in l:
    n_l.append((e-min(l))/(max(l)-min(l)))
print(n_l)

[0.45454545454545453, 1.0, 0.36363636363636365, 0.0]
