In [2]:
import pandas as pd
import numpy as np
from scipy import stats
from sklearn.preprocessing import StandardScaler

df = pd.read_csv('combined-training.csv')
scaler = StandardScaler()

# Removing outliers from X1 and X6 using IQR method
Q1x1 = df['x1'].quantile(0.25)
Q3x1 = df['x1'].quantile(0.75)
IQRx1 = Q3x1 - Q1x1

Q1x6 = df['x6'].quantile(0.25)
Q3x6 = df['x6'].quantile(0.75)
IQRx6 = Q3x6 - Q1x6

df_no_outliers_iqr = df[~((df['x1'] < (Q1x1 - 1.5 * IQRx1)) | (df['x1'] > (Q3x1 + 1.5 * IQRx1)) | (df['x6'] < (Q1x6 - 1.5 * IQRx6)) | (df['x6'] > (Q3x6 + 1.5 * IQRx6)))]

# Removing outliers from X2, X3, X4, X5, and X7 using Z-score method
z_scores = np.abs(stats.zscore(df_no_outliers_iqr[['x2', 'x3', 'x4', 'x5', 'x7']]))
df_no_outliers = df_no_outliers_iqr[(z_scores < 3).all(axis=1)]

# Standardizing the data
scaled_features = scaler.fit_transform(df_no_outliers[['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7']])

# Creating a new DataFrame with the standardized data
df_processed = pd.DataFrame(scaled_features, columns=['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7'])
