In [7]:
!pip -q install autoviz

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.5/67.5 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m175.5/175.5 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.7/8.7 MB[0m [31m57.7 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m294.9/294.9 kB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m255.9/255.9 MB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m43.3 MB/s[0m eta [36m0:00:00[0m:00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m87.5/87.5 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. Thi

In [10]:
# autoviz_marketing_campaign_fixed_v2.py
# 1.3.4 - AutoViz on Customer Personality Analysis (Marketing Campaign)

# Install once if needed:
# !pip -q install autoviz

import os
import numpy as np
import pandas as pd

# Optional: silence strict FP warnings globally (we still use safe math)
np.seterr(all="ignore")

# -----------------------
# 1) Robust loader + normalize
# -----------------------
CSV_PATHS = [
    "/kaggle/input/customer-personality-analysis/marketing_campaign.csv",
    "/kaggle/input/customer-personality-analysis/MarketingCampaign.csv",
    "data/marketing_campaign.csv",
    "marketing_campaign.csv",
]

def load_marketing(paths):
    last_err = None
    for p in paths:
        if not os.path.exists(p):
            continue
        try:
            # Try automatic delimiter inference
            df = pd.read_csv(p, sep=None, engine="python")
            if df.shape[1] == 1:
                # Fallback through common delimiters
                for sep in [";", "\t", ",", "|"]:
                    df2 = pd.read_csv(p, sep=sep)
                    if df2.shape[1] >= 10:
                        df = df2
                        break
            print(f"[OK] Loaded {p} with shape {df.shape}")
            return df, p
        except Exception as e:
            last_err = e
    raise RuntimeError(f"Failed to load any of {paths}. Last error: {last_err}")

df, used_path = load_marketing(CSV_PATHS)

# Normalize headers (strip spaces)
df.columns = [str(c).strip() for c in df.columns]
print("Parsed columns:", df.columns.tolist())

# -----------------------
# 2) Type fixes
# -----------------------
num_candidates = [
    "Year_Birth","Income","Kidhome","Teenhome","Recency","Complain",
    "MntWines","MntFruits","MntMeatProducts","MntFishProducts",
    "MntSweetProducts","MntGoldProds",
    "NumDealsPurchases","AcceptedCmp1","AcceptedCmp2","AcceptedCmp3",
    "AcceptedCmp4","AcceptedCmp5","Response",
    "NumWebPurchases","NumCatalogPurchases","NumStorePurchases","NumWebVisitsMonth",
    "Z_CostContact","Z_Revenue",
]
for c in num_candidates:
    if c in df.columns:
        df[c] = pd.to_numeric(df[c], errors="coerce")

if "Dt_Customer" in df.columns:
    df["Dt_Customer"] = pd.to_datetime(df["Dt_Customer"], errors="coerce", dayfirst=True)

# -----------------------
# 3) Light feature engineering
# -----------------------
YEAR_REF = 2025
if "Year_Birth" in df.columns:
    df["Age"] = YEAR_REF - df["Year_Birth"]

if set(["Kidhome","Teenhome"]).issubset(df.columns):
    df["Children"] = df["Kidhome"] + df["Teenhome"]
    df["HasChild"] = (df["Children"] > 0).astype("Int64")

mnt_cols = [c for c in df.columns if c.startswith("Mnt")]
if mnt_cols:
    df["MntTotal"] = df[mnt_cols].sum(axis=1)

purch_cols = [c for c in df.columns if c.startswith("Num") and "Visits" not in c]
if purch_cols:
    df["PurchasesTotal"] = df[purch_cols].sum(axis=1)

# ---- SAFE channel shares (fixes FloatingPointError) ----
channels = ["NumWebPurchases", "NumCatalogPurchases", "NumStorePurchases"]
present = [c for c in channels if c in df.columns]
if len(present) == 3:
    total = df[present].sum(axis=1, min_count=1)    # keep NaN if all NaN
    positive_den = total.fillna(0) > 0              # NaN-safe comparison
    den = total.where(positive_den, np.nan)         # only divide where > 0
    df["ShareWeb"]     = df["NumWebPurchases"].div(den)
    df["ShareCatalog"] = df["NumCatalogPurchases"].div(den)
    df["ShareStore"]   = df["NumStorePurchases"].div(den)

# Drop pure IDs if present
df_av = df.drop(columns=[c for c in ["ID"] if c in df.columns], errors="ignore").copy()

# -----------------------
# 4) AutoViz (version-proof return handling)
# -----------------------
from autoviz.AutoViz_Class import AutoViz_Class
AV = AutoViz_Class()

SAVE_DIR = "AutoViz_Marketing"
os.makedirs(SAVE_DIR, exist_ok=True)

depVar = "Response" if "Response" in df_av.columns else None

ret = AV.AutoViz(
    filename="",            # we pass the DF directly
    sep=",",
    depVar=depVar,          # None => unsupervised; "Response" => supervised
    dfte=df_av,
    header=0,
    verbose=2,
    lowess=False,
    chart_format="png",     # png / svg / html
    max_rows_analyzed=200000,
    max_cols_analyzed=200,
    save_plot_dir=SAVE_DIR
)

# Handle return type across AutoViz versions
dft = None
charts = None
if isinstance(ret, tuple):
    # Some versions return (df_trafo, charts)
    if len(ret) >= 2:
        dft, charts = ret[0], ret[1]
    elif len(ret) == 1:
        charts = ret[0]
else:
    charts = ret

print(f"\n[DONE] AutoViz complete. Plots saved to: ./{SAVE_DIR}/")
print("Mode:", "Supervised (depVar='Response')" if depVar else "Unsupervised (no 'Response' found)")
print("Return summary:", {"dft_type": type(dft).__name__ if dft is not None else None,
                          "charts_type": type(charts).__name__ if charts is not None else None})


[OK] Loaded /kaggle/input/customer-personality-analysis/marketing_campaign.csv with shape (2240, 29)
Parsed columns: ['ID', 'Year_Birth', 'Education', 'Marital_Status', 'Income', 'Kidhome', 'Teenhome', 'Dt_Customer', 'Recency', 'MntWines', 'MntFruits', 'MntMeatProducts', 'MntFishProducts', 'MntSweetProducts', 'MntGoldProds', 'NumDealsPurchases', 'NumWebPurchases', 'NumCatalogPurchases', 'NumStorePurchases', 'NumWebVisitsMonth', 'AcceptedCmp3', 'AcceptedCmp4', 'AcceptedCmp5', 'AcceptedCmp1', 'AcceptedCmp2', 'Complain', 'Z_CostContact', 'Z_Revenue', 'Response']
Shape of your Data Set loaded: (2240, 36)
#######################################################################################
######################## C L A S S I F Y I N G  V A R I A B L E S  ####################
#######################################################################################
Classifying variables in data set...
HasChild of type=Int64 is not classified
  Printing up to 30 columns (max) in each category

Unnamed: 0,Data Type,Missing Values%,Unique Values%,Minimum Value,Maximum Value,DQ Issue
Year_Birth,int64,0.0,2.0,1893.0,1996.0,Column has 3 outliers greater than upper bound (2004.00) or lower than lower bound(1932.00). Cap them or remove them.
Education,object,0.0,0.0,,,No issue
Marital_Status,object,0.0,0.0,,,"3 rare categories: ['Alone', 'Absurd', 'YOLO']. Group them into a single category or drop the categories."
Income,float64,1.166181,,1730.0,666666.0,"24 missing values. Impute them with mean, median, mode, or a constant value such as 123., Column has 8 outliers greater than upper bound (117909.50) or lower than lower bound(-13900.50). Cap them or remove them."
Kidhome,int64,0.0,0.0,0.0,2.0,No issue
Teenhome,int64,0.0,0.0,0.0,2.0,No issue
Dt_Customer,datetime64[ns],0.0,32.0,,,Possible date-time colum: transform before modeling step.
Recency,int64,0.0,4.0,0.0,99.0,No issue
MntWines,int64,0.0,37.0,0.0,1493.0,Column has 33 outliers greater than upper bound (1225.50) or lower than lower bound(-698.50). Cap them or remove them.
MntFruits,int64,0.0,7.0,0.0,199.0,Column has 207 outliers greater than upper bound (81.00) or lower than lower bound(-47.00). Cap them or remove them.


Total Number of Scatter Plots = 10
All Plots are saved in AutoViz_Marketing/Response
Time to run AutoViz = 24 seconds 

[DONE] AutoViz complete. Plots saved to: ./AutoViz_Marketing/
Mode: Supervised (depVar='Response')
Return summary: {'dft_type': None, 'charts_type': 'DataFrame'}
