In [None]:
import pandas as pd
import numpy as np
from scipy import stats

df = pd.read_csv('games_ranking.csv')

print(df.head())
print(df.columns)

for col in df.columns:
    if df[col].dtype in ['int64', 'float64']:  # For numerical columns
        # Impute missing values with mean
        df[col].fillna(df[col].mean(), inplace=True)
    elif df[col].dtype == 'object':  # For categorical columns
        df[col].fillna('Unknown', inplace=True)

# 1. Z-score method to detect and remove outliers for numerical columns
numerical_cols = df.select_dtypes(include=['int64', 'float64']).columns
z_scores = np.abs(stats.zscore(df[numerical_cols]))
df_no_outliers_z = df[(z_scores < 3).all(axis=1)]  # Remove rows where z-scores are > 3

# 2. IQR method to detect and remove outliers for numerical columns
Q1 = df[numerical_cols].quantile(0.25)
Q3 = df[numerical_cols].quantile(0.75)
IQR = Q3 - Q1

df_no_outliers_iqr = df[~((df[numerical_cols] < (Q1 - 1.5 * IQR)) | (df[numerical_cols] > (Q3 + 1.5 * IQR))).any(axis=1)]

print("Original DataFrame shape:", df.shape)
print("DataFrame shape after Z-score outlier removal:", df_no_outliers_z.shape)
print("DataFrame shape after IQR outlier removal:", df_no_outliers_iqr.shape)

print(df_no_outliers_iqr.head())

