# Analysis Summary
This data provides details on customer grocery spending habits and campaign response patterns. The data has 2,205 observations and 39 features. The key response variable is a binary indicator with about 15% responding customers. The data includes both categorical and continous predictor variables, neither of which have any missing values or separation issues. The data does have some aggregated total variables which present some multicollinearity concerns.

In [1]:
# import needed packages
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [3]:
# read in marketing data
ifood = pd.read_csv('ifood_df.csv')
print(ifood.head(n=5))

    Income  Kidhome  Teenhome  Recency  MntWines  MntFruits  MntMeatProducts  \
0  58138.0        0         0       58       635         88              546   
1  46344.0        1         1       38        11          1                6   
2  71613.0        0         0       26       426         49              127   
3  26646.0        1         0       26        11          4               20   
4  58293.0        1         0       94       173         43              118   

   MntFishProducts  MntSweetProducts  MntGoldProds  ...  marital_Together  \
0              172                88            88  ...                 0   
1                2                 1             6  ...                 0   
2              111                21            42  ...                 1   
3               10                 3             5  ...                 1   
4               46                27            15  ...                 0   

   marital_Widow  education_2n Cycle  education_Basic  e

In [9]:
# dataframe dimensions and response counts
print(ifood.shape)
print(ifood['Response'].value_counts())

(2205, 39)
Response
0    1872
1     333
Name: count, dtype: int64


# Variable Type Conversions

In [4]:
# convert target variable to binary
ifood['Response'] = ifood['Response'].astype('category')

# list all categorical predictor variables
ifood_cat = ['AcceptedCmp1', 'AcceptedCmp2', 'AcceptedCmp3', 'AcceptedCmp4', 'AcceptedCmp5', 'AcceptedCmpOverall',
             'education_2n Cycle', 'education_Basic', 'education_Graduation', 'education_Master', 'education_PhD',
             'marital_Divorced', 'marital_Married', 'marital_Single', 'marital_Together', 'marital_Widow',
             'Kidhome', 'Teenhome', 'Complain'
            ]

# convert all variables in list to categorical
ifood[ifood_cat] = ifood[ifood_cat].apply(pd.Categorical)

# Variable Distributions

In [5]:
# print the numeric summary for each continuous variable
desc = ifood.describe()
print(desc)

              Income      Recency     MntWines    MntFruits  MntMeatProducts  \
count    2205.000000  2205.000000  2205.000000  2205.000000      2205.000000   
mean    51622.094785    49.009070   306.164626    26.403175       165.312018   
std     20713.063826    28.932111   337.493839    39.784484       217.784507   
min      1730.000000     0.000000     0.000000     0.000000         0.000000   
25%     35196.000000    24.000000    24.000000     2.000000        16.000000   
50%     51287.000000    49.000000   178.000000     8.000000        68.000000   
75%     68281.000000    74.000000   507.000000    33.000000       232.000000   
max    113734.000000    99.000000  1493.000000   199.000000      1725.000000   

       MntFishProducts  MntSweetProducts  MntGoldProds  NumDealsPurchases  \
count      2205.000000       2205.000000   2205.000000        2205.000000   
mean         37.756463         27.128345     44.057143           2.318367   
std          54.824635         41.130468     51.

# Variable Anomolies

In [6]:
# check for missing data
missing_values = ifood.isnull().sum()
missing_columns = missing_values[missing_values > 0]
print(missing_columns)

Series([], dtype: int64)


In [7]:
# check for separation issues
for var in ifood_cat :
    print(pd.crosstab(ifood['Response'], ifood[var]))

AcceptedCmp1     0   1
Response              
0             1809  63
1              254  79
AcceptedCmp2     0   1
Response              
0             1862  10
1              313  20
AcceptedCmp3     0   1
Response              
0             1786  86
1              256  77
AcceptedCmp4     0    1
Response               
0             1770  102
1              271   62
AcceptedCmp5     0   1
Response              
0             1802  70
1              242  91
AcceptedCmpOverall     0    1   2   3   4
Response                                 
0                   1601  222  39   9   1
1                    146  100  42  35  10
education_2n Cycle     0    1
Response                     
0                   1696  176
1                    311   22
education_Basic     0   1
Response                 
0                1820  52
1                 331   2
education_Graduation    0    1
Response                      
0                     911  961
1                     181  152
education_Master    

# Correlations

In [None]:
# idenfity highly correlated variables (above 0.75 or below -0.75)
correlation_matrix = ifood.corr()
pos_threshold = 0.75
neg_threshold = -0.75
high_corr_pairs = correlation_matrix.where(
    ((correlation_matrix >= pos_threshold) | (correlation_matrix <= neg_threshold)) & (correlation_matrix != 1.0)).stack()
print("\nHighly Correlated Pairs:")
print(high_corr_pairs)


Highly Correlated Pairs:
Income               MntTotal               0.823066
                     MntRegularProds        0.816879
MntWines             MntTotal               0.902310
                     MntRegularProds        0.901848
MntMeatProducts      MntTotal               0.861392
                     MntRegularProds        0.860663
NumCatalogPurchases  MntTotal               0.791187
                     MntRegularProds        0.778742
MntTotal             Income                 0.823066
                     MntWines               0.902310
                     MntMeatProducts        0.861392
                     NumCatalogPurchases    0.791187
                     MntRegularProds        0.996569
MntRegularProds      Income                 0.816879
                     MntWines               0.901848
                     MntMeatProducts        0.860663
                     NumCatalogPurchases    0.778742
                     MntTotal               0.996569
dtype: float64
