In [8]:
import pandas as pd
import statsmodels.api as sm
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report

In [9]:
data = pd.DataFrame({
    'decision': ['Buy now','Buy now','Buy now','Buy later','Buy later',"Don't buy","Don't buy"],
    'gender': ['female','female','male','female','male','female','male'],
    'age': [22, 25, 18, 27, 48, 33, 40],
    'time_spent': [40, 78, 65, 28, 110, 65, 50]
})

In [10]:
data['gender_male'] = (data['gender'] == 'male').astype(int)
data['decision'] = data['decision'].astype('category')

In [11]:
X = data[['gender_male', 'age', 'time_spent']]
X = sm.add_constant(X)  # add intercept
y = data['decision']

In [12]:
model = sm.MNLogit(y, X.astype(float))
result = model.fit(method='newton', full_output=True, disp=False)

  eXB = np.column_stack((np.ones(len(X)), np.exp(X)))
  return eXB/eXB.sum(1)[:,None]


In [13]:
print(result.summary())

                          MNLogit Regression Results                          
Dep. Variable:               decision   No. Observations:                    7
Model:                        MNLogit   Df Residuals:                       -1
Method:                           MLE   Df Model:                            6
Date:                Wed, 10 Sep 2025   Pseudo R-squ.:                     nan
Time:                        06:00:06   Log-Likelihood:                    nan
converged:                       True   LL-Null:                       -7.5529
Covariance Type:            nonrobust   LLR p-value:                       nan
  decision=Buy now       coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------------
const                     nan        nan        nan        nan         nan         nan
gender_male               nan        nan        nan        nan         nan         nan
age                 

In [15]:
pred_probs = result.predict(X)

In [17]:
categories = y.cat.categories

In [18]:
pred_probs_df = pd.DataFrame(pred_probs, columns=categories)
data = pd.concat([data, pred_probs_df], axis=1)

In [20]:
print("\nPredicted probabilities:")
print(data[['decision'] + list(categories)])


Predicted probabilities:
    decision  Buy later  Buy now  Don't buy
0    Buy now        NaN      NaN        NaN
1    Buy now        NaN      NaN        NaN
2    Buy now        NaN      NaN        NaN
3  Buy later        NaN      NaN        NaN
4  Buy later        NaN      NaN        NaN
5  Don't buy        NaN      NaN        NaN
6  Don't buy        NaN      NaN        NaN


In [21]:
data['predicted'] = pred_probs_df.idxmax(axis=1)
print("\nPredicted decisions:")
print(data[['decision','predicted']])


Predicted decisions:
    decision predicted
0    Buy now       NaN
1    Buy now       NaN
2    Buy now       NaN
3  Buy later       NaN
4  Buy later       NaN
5  Don't buy       NaN
6  Don't buy       NaN


  data['predicted'] = pred_probs_df.idxmax(axis=1)
