In [None]:
def analyze_and_fix_skew(df, feature):
    results = {}
    
    original_skew = skew(df[feature])
    
    Q1 = df[feature].quantile(0.25)
    Q3 = df[feature].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    outlier_count = ((df[feature] < lower_bound) | (df[feature] > upper_bound)).sum()

    try:
        winsorized_data = winsorize(df[feature], limits=[0.05, 0.05])
        winsorized_skew = skew(winsorized_data)
    except Exception as e:
        winsorized_skew = np.nan 
    try:
        winsorized_data2 = winsorize(df[feature], limits=[0.01, 0.01])
        winsorized_skew2 = skew(winsorized_data2)
    except Exception as e:
        winsorized_skew2 = np.nan 

    cbrt_data = np.cbrt(df[feature])
    cbrt_skew = skew(cbrt_data)

    try:
        yeo_transformed, _ = yeojohnson(df[feature] + abs(df[feature].min()) + 1)
        yeo_skew = skew(yeo_transformed)
    except Exception as e:
        yeo_skew = np.nan  
    try:
        yeo_transformed2, _ = yeojohnson(winsorize(df[feature], limits=[0.05, 0.05]) + abs(df[feature].min()) + 1)
        yeo_skew2 = skew(yeo_transformed2)
    except Exception as e:
        yeo_skew2 = np.nan  

    results["Feature"] = feature
    results["Original Skew"] = original_skew
    results["Outlier Count"] = outlier_count
    results["Winsorized Skew"] = winsorized_skew
    results["Winsorized Skew Different Limit"] = winsorized_skew2
    results["Cube Root Skew"] = cbrt_skew
    results["Yeo-Johnson Skew"] = yeo_skew
    results["Yeo-Johnson Skew After Winsor"] = yeo_skew2
    transformation_dict = {
        "Winsorization": winsorized_skew,
        "Winsorization2": winsorized_skew2,
        "Cube Root": cbrt_skew,
        "Yeo-Johnson": yeo_skew,
        "Yeo-Johnson2": yeo_skew2
    }
    
    transformation_dict = {k: abs(v) for k, v in transformation_dict.items() if not np.isnan(v)}
    
    if transformation_dict:
        best_transformation = min(transformation_dict, key=transformation_dict.get)
    else:
        best_transformation = "No valid transformation found"

    results["Best Transformation"] = best_transformation

    return results

In [None]:
#df_numeric = df_numeric.astype(float)
problem_features = ["collection_count", "contributed_mod_count", "views","all_wastebinned_mods","all_under_moderation_mods","has_collection"]
fix_results = [analyze_and_fix_skew(df_numeric, feature) for feature in problem_features]

fix_results_df = pd.DataFrame(fix_results)

fix_results_df.head(6)