In [24]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Example: Load your cleaned data
df = pd.read_csv('cleaned_marketing_data.csv')

# Define features and target
X = df.drop(['ID', 'Dt_Customer','Education','Marital_Status','Country','Age_Group'], axis=1)  # drop target and non-predictive IDs/dates
y = df[['Education','Marital_Status','Country','Age_Group']]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [37]:
df.dtypes
test_df = df.tail()
print(test_df)
test_df = test_df.drop(['ID', 'Dt_Customer','Education','Marital_Status','Country','Age_Group'], axis=1)


         ID  Year_Birth   Education Marital_Status   Income  Kidhome  \
2235  10142        1976         Phd       Divorced  66476.0        0   
2236   5263        1977    2N Cycle        Married  31056.0        1   
2237     22        1976  Graduation       Divorced  46310.0        1   
2238    528        1978  Graduation        Married  65819.0        0   
2239   4070        1969         Phd        Married  94871.0        0   

      Teenhome Dt_Customer  Recency  MntWines  ...  Age  Total_Spending  \
2235         1  2013-03-07       99       372  ...   49           689.0   
2236         0  2013-01-22       99         5  ...   48            55.0   
2237         0  2012-12-03       99       185  ...   49           309.0   
2238         0  2012-11-29       99       267  ...   47          1383.0   
2239         2  2012-09-01       99       169  ...   56          1078.0   

      Total_Purchases  Education_enc  Marital_Status_enc  Country_enc  \
2235               18            1.0       

In [26]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.multioutput import MultiOutputClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import joblib

models = {
    'logistic_regression': Pipeline([
        ('scaler', StandardScaler()),
        ('clf', LogisticRegression(max_iter=2000, solver='lbfgs'))
    ]),
    'random_forest': RandomForestClassifier(n_estimators=100),
    'gradient_boosting': GradientBoostingClassifier()
}

##model = MultiOutputClassifier(RandomForestClassifier())
##model.fit(X_train, y_train)  # here y_train can be multi-column
##joblib.dump(model, 'multi_ytrain_model.joblib')

# Assume y_train is a DataFrame with multiple target columns
target_columns = y_train.columns

for target in target_columns:
    for name, model in models.items():
        model.fit(X_train, y_train[target])
        filename = f"{name}_{target}_model.joblib"
        joblib.dump(model, filename)
        print(f"Saved: {filename}")

Saved: logistic_regression_Education_model.joblib
Saved: random_forest_Education_model.joblib
Saved: gradient_boosting_Education_model.joblib
Saved: logistic_regression_Marital_Status_model.joblib
Saved: random_forest_Marital_Status_model.joblib
Saved: gradient_boosting_Marital_Status_model.joblib
Saved: logistic_regression_Country_model.joblib
Saved: random_forest_Country_model.joblib
Saved: gradient_boosting_Country_model.joblib
Saved: logistic_regression_Age_Group_model.joblib
Saved: random_forest_Age_Group_model.joblib
Saved: gradient_boosting_Age_Group_model.joblib


In [27]:
targets = ['Education', 'Marital_Status', 'Country', 'Age_Group']
models = ['logistic_regression', 'random_forest', 'gradient_boosting']


In [36]:
from sklearn.exceptions import NotFittedError

for target in targets:
    print(f"\nPredictions for: {target}")
    for model_name in models:
        file_name = f"{model_name}_{target}_model.joblib"
        
        try:
            model = joblib.load(file_name)
            prediction = model.predict(test_df)
            print(f"{model_name} prediction (first 2): {prediction[:2]}")
        except NotFittedError as e:
            print(f"Model not fitted: {file_name}")
        except Exception as e:
            print(f"Error loading/predicting {file_name}: {e}")


Predictions for: Education
logistic_regression prediction (first 2): ['Phd' '2N Cycle']
random_forest prediction (first 2): ['Phd' '2N Cycle']
gradient_boosting prediction (first 2): ['Phd' '2N Cycle']

Predictions for: Marital_Status
logistic_regression prediction (first 2): ['Divorced' 'Married']
random_forest prediction (first 2): ['Divorced' 'Married']
gradient_boosting prediction (first 2): ['Divorced' 'Married']

Predictions for: Country
logistic_regression prediction (first 2): ['US' 'SP']
random_forest prediction (first 2): ['US' 'SP']
gradient_boosting prediction (first 2): ['US' 'SP']

Predictions for: Age_Group
logistic_regression prediction (first 2): ['Younger' 'Younger']
random_forest prediction (first 2): ['Younger' 'Younger']
gradient_boosting prediction (first 2): ['Younger' 'Younger']
