In [1]:
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import KBinsDiscretizer
from mlxtend.frequent_patterns import apriori, association_rules

df = pd.read_csv('market.csv', sep="\t")

continuous_features = ['Income', 'MntWines', 'MntFruits', 'MntMeatProducts', 'MntFishProducts', 'MntSweetProducts', 'MntGoldProds']
imputer = SimpleImputer(strategy='mean')
df[continuous_features] = imputer.fit_transform(df[continuous_features])
df[continuous_features] = df[continuous_features].apply(lambda x: x > 0).astype(int)

categorical_features = ['Education', 'Marital_Status']
df = pd.get_dummies(df, columns=categorical_features)

binary_columns = df.columns[df.isin([0, 1]).all()]

frequent_itemsets = apriori(df[binary_columns], min_support=0.05, use_colnames=True)
rules = association_rules(frequent_itemsets, metric='confidence', min_threshold=0.5)

rules['leverage'] = rules['support'] - (rules['antecedent support'] * rules['consequent support'])
rules['conviction'] = (1 - rules['consequent support']) / (1 - rules['confidence'])

rules_sorted = rules.sort_values(by=['lift', 'confidence', 'support', 'leverage', 'conviction'], ascending=False)
rules_sorted.to_csv('association_rules.csv', index=False)

print(rules_sorted.head(10))



                                             antecedents  \
24600  (MntMeatProducts, Education_2n Cycle, MntFishP...   
30764  (MntMeatProducts, Education_2n Cycle, Income, ...   
30782  (MntMeatProducts, Education_2n Cycle, MntFishP...   
23345    (MntFishProducts, MntWines, Education_2n Cycle)   
28203  (MntFishProducts, MntWines, Education_2n Cycle...   
28221    (MntFishProducts, MntWines, Education_2n Cycle)   
32189  (MntMeatProducts, MntWines, Education_2n Cycle...   
32214    (MntFishProducts, MntWines, Education_2n Cycle)   
33909  (MntMeatProducts, MntWines, Income, Education_...   
33942  (MntMeatProducts, MntWines, Education_2n Cycle...   

                                             consequents  antecedent support  \
24600        (MntFruits, MntSweetProducts, MntGoldProds)            0.081696   
30764        (MntFruits, MntSweetProducts, MntGoldProds)            0.081696   
30782  (MntFruits, MntSweetProducts, MntGoldProds, In...            0.081696   
23345        (MntFr

In [2]:
rules_sorted = rules.sort_values(by=['lift'], ascending=False)
rules_sorted.to_csv('rules_by_lift.csv', index=False)
rules_sorted.head(10)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
24600,"(MntMeatProducts, Education_2n Cycle, MntFishP...","(MntFruits, MntSweetProducts, MntGoldProds)",0.081696,0.713393,0.074107,0.907104,1.271535,0.015825,3.085242,0.232547
30782,"(MntMeatProducts, Education_2n Cycle, MntFishP...","(MntFruits, MntSweetProducts, MntGoldProds, In...",0.081696,0.713393,0.074107,0.907104,1.271535,0.015825,3.085242,0.232547
30764,"(MntMeatProducts, Education_2n Cycle, Income, ...","(MntFruits, MntSweetProducts, MntGoldProds)",0.081696,0.713393,0.074107,0.907104,1.271535,0.015825,3.085242,0.232547
28221,"(MntFishProducts, MntWines, Education_2n Cycle)","(MntFruits, MntSweetProducts, MntGoldProds, In...",0.080357,0.713393,0.072768,0.905556,1.269364,0.015442,3.034664,0.230746
32214,"(MntFishProducts, MntWines, Education_2n Cycle)","(MntFruits, MntMeatProducts, MntGoldProds, Mnt...",0.080357,0.713393,0.072768,0.905556,1.269364,0.015442,3.034664,0.230746
33983,"(MntFishProducts, MntWines, Education_2n Cycle)","(MntMeatProducts, MntGoldProds, Income, MntSwe...",0.080357,0.713393,0.072768,0.905556,1.269364,0.015442,3.034664,0.230746
32189,"(MntMeatProducts, MntWines, Education_2n Cycle...","(MntFruits, MntSweetProducts, MntGoldProds)",0.080357,0.713393,0.072768,0.905556,1.269364,0.015442,3.034664,0.230746
33909,"(MntMeatProducts, MntWines, Income, Education_...","(MntFruits, MntSweetProducts, MntGoldProds)",0.080357,0.713393,0.072768,0.905556,1.269364,0.015442,3.034664,0.230746
33942,"(MntMeatProducts, MntWines, Education_2n Cycle...","(MntFruits, MntSweetProducts, MntGoldProds, In...",0.080357,0.713393,0.072768,0.905556,1.269364,0.015442,3.034664,0.230746
23345,"(MntFishProducts, MntWines, Education_2n Cycle)","(MntFruits, MntSweetProducts, MntGoldProds)",0.080357,0.713393,0.072768,0.905556,1.269364,0.015442,3.034664,0.230746


In [3]:
rules_sorted = rules.sort_values(by=['confidence'], ascending=False)
rules_sorted.to_csv('rules_by_confidence.csv', index=False)
rules_sorted.head(10)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(MntWines),(Income),0.994196,1.0,0.994196,1.0,1.0,0.0,,0.0
7151,"(Education_Graduation, MntWines, Marital_Statu...",(MntMeatProducts),0.052679,0.999554,0.052679,1.0,1.000447,2.4e-05,inf,0.000471
21582,"(Education_Graduation, Income, MntSweetProduct...",(MntMeatProducts),0.102232,0.999554,0.102232,1.0,1.000447,4.6e-05,inf,0.000497
3204,"(MntFruits, Education_Graduation, MntWines)",(MntMeatProducts),0.433036,0.999554,0.433036,1.0,1.000447,0.000193,inf,0.000787
7142,"(MntMeatProducts, Response, Income, Education_...",(MntWines),0.067857,0.994196,0.067857,1.0,1.005837,0.000394,inf,0.006226
7143,"(Education_Graduation, MntWines, Response, Inc...",(MntMeatProducts),0.067857,0.999554,0.067857,1.0,1.000447,3e-05,inf,0.000479
7144,"(MntMeatProducts, Response, Education_Graduation)","(MntWines, Income)",0.067857,0.994196,0.067857,1.0,1.005837,0.000394,inf,0.006226
7145,"(Education_Graduation, MntWines, Response)","(MntMeatProducts, Income)",0.067857,0.999554,0.067857,1.0,1.000447,3e-05,inf,0.000479
7146,"(Education_Graduation, Response, Income)","(MntMeatProducts, MntWines)",0.067857,0.994196,0.067857,1.0,1.005837,0.000394,inf,0.006226
3198,"(MntFruits, MntWines, Education_2n Cycle)",(MntMeatProducts),0.085268,0.999554,0.085268,1.0,1.000447,3.8e-05,inf,0.000488


In [4]:
rules_sorted = rules.sort_values(by=['support'], ascending=False)
rules_sorted.to_csv('rules_by_support.csv', index=False)
rules_sorted.head(10)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
4,(MntMeatProducts),(Income),0.999554,1.0,0.999554,1.0,1.0,0.0,,0.0
5,(Income),(MntMeatProducts),1.0,0.999554,0.999554,0.999554,1.0,0.0,1.0,0.0
0,(MntWines),(Income),0.994196,1.0,0.994196,1.0,1.0,0.0,,0.0
1,(Income),(MntWines),1.0,0.994196,0.994196,0.994196,1.0,0.0,1.0,0.0
151,"(MntWines, Income)",(MntMeatProducts),0.994196,0.999554,0.994196,1.0,1.000447,0.000444,inf,0.076923
152,(MntMeatProducts),"(MntWines, Income)",0.999554,0.994196,0.994196,0.99464,1.000447,0.000444,1.08285,1.0
153,(MntWines),"(MntMeatProducts, Income)",0.994196,0.999554,0.994196,1.0,1.000447,0.000444,inf,0.076923
154,(Income),"(MntMeatProducts, MntWines)",1.0,0.994196,0.994196,0.994196,1.0,0.0,1.0,0.0
149,"(MntMeatProducts, MntWines)",(Income),0.994196,1.0,0.994196,1.0,1.0,0.0,,0.0
29,(MntWines),(MntMeatProducts),0.994196,0.999554,0.994196,1.0,1.000447,0.000444,inf,0.076923


In [5]:
rules_sorted = rules.sort_values(by=['leverage'], ascending=False)
rules_sorted.to_csv('rules_by_leverage.csv', index=False)
rules_sorted.head(10)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
21930,"(MntFruits, MntFishProducts)","(MntMeatProducts, MntWines, MntGoldProds, MntS...",0.735714,0.792411,0.646875,0.879248,1.109586,0.063887,1.719131,0.373697
25299,"(MntSweetProducts, MntWines, MntGoldProds)","(MntFruits, MntMeatProducts, Income, MntFishPr...",0.792411,0.735714,0.646875,0.816338,1.109586,0.063887,1.438979,0.47576
25249,"(MntMeatProducts, MntWines, MntGoldProds, MntS...","(MntFruits, MntFishProducts, Income)",0.792411,0.735714,0.646875,0.816338,1.109586,0.063887,1.438979,0.47576
15676,"(MntFruits, MntFishProducts)","(MntSweetProducts, MntWines, MntGoldProds, Inc...",0.735714,0.792411,0.646875,0.879248,1.109586,0.063887,1.719131,0.373697
15660,"(MntFruits, MntFishProducts, Income)","(MntSweetProducts, MntWines, MntGoldProds)",0.735714,0.792411,0.646875,0.879248,1.109586,0.063887,1.719131,0.373697
11150,"(MntSweetProducts, MntWines, MntGoldProds)","(MntFruits, MntFishProducts)",0.792411,0.735714,0.646875,0.816338,1.109586,0.063887,1.438979,0.47576
11169,"(MntFruits, MntFishProducts)","(MntSweetProducts, MntWines, MntGoldProds)",0.735714,0.792411,0.646875,0.879248,1.109586,0.063887,1.719131,0.373697
25297,"(MntFruits, MntMeatProducts, MntFishProducts)","(MntSweetProducts, MntWines, MntGoldProds, Inc...",0.735714,0.792411,0.646875,0.879248,1.109586,0.063887,1.719131,0.373697
21905,"(MntFruits, MntMeatProducts, MntFishProducts)","(MntSweetProducts, MntWines, MntGoldProds)",0.735714,0.792411,0.646875,0.879248,1.109586,0.063887,1.719131,0.373697
25266,"(MntFruits, MntMeatProducts, Income, MntFishPr...","(MntSweetProducts, MntWines, MntGoldProds)",0.735714,0.792411,0.646875,0.879248,1.109586,0.063887,1.719131,0.373697


In [6]:
rules_sorted = rules.sort_values(by=['conviction'], ascending=True)
rules_sorted.to_csv('rules_by_conviction.csv', index=False)
rules_sorted.head(10)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
21265,"(MntGoldProds, Income, MntSweetProducts, Educa...",(MntMeatProducts),0.075446,0.999554,0.075,0.994083,0.994527,-0.000413,0.075446,-0.005917
21274,"(MntSweetProducts, Education_2n Cycle, MntGold...","(MntMeatProducts, Income)",0.075446,0.999554,0.075,0.994083,0.994527,-0.000413,0.075446,-0.005917
4984,"(MntFishProducts, Education_2n Cycle, MntSweet...",(MntMeatProducts),0.075446,0.999554,0.075,0.994083,0.994527,-0.000413,0.075446,-0.005917
9446,"(MntSweetProducts, Education_2n Cycle, Income,...",(MntMeatProducts),0.075446,0.999554,0.075,0.994083,0.994527,-0.000413,0.075446,-0.005917
9452,"(MntSweetProducts, Education_2n Cycle, MntFish...","(MntMeatProducts, Income)",0.075446,0.999554,0.075,0.994083,0.994527,-0.000413,0.075446,-0.005917
13866,"(MntSweetProducts, Education_2n Cycle, MntGold...",(MntMeatProducts),0.075446,0.999554,0.075,0.994083,0.994527,-0.000413,0.075446,-0.005917
21406,"(MntSweetProducts, Marital_Status_Divorced, Mn...","(MntMeatProducts, Income)",0.077679,0.999554,0.077232,0.994253,0.994697,-0.000412,0.077679,-0.005747
13930,"(MntSweetProducts, Marital_Status_Divorced, Mn...",(MntMeatProducts),0.077679,0.999554,0.077232,0.994253,0.994697,-0.000412,0.077679,-0.005747
21397,"(Marital_Status_Divorced, MntGoldProds, Income...",(MntMeatProducts),0.077679,0.999554,0.077232,0.994253,0.994697,-0.000412,0.077679,-0.005747
9510,"(MntSweetProducts, Marital_Status_Divorced, In...",(MntMeatProducts),0.079018,0.999554,0.078571,0.99435,0.994794,-0.000411,0.079018,-0.00565


In [7]:
numeric_df = df.select_dtypes(include=[int, float])
model = IsolationForest(contamination=0.1, random_state=42)
model.fit(numeric_df)

df['novelty'] = model.predict(numeric_df)
df['novelty'] = df['novelty'].map({1: 'NO', -1: 'YES'})

df.to_csv('novelty_analysis.csv', index=False)

print("DataFrame with Novelty Labels:")
print(df.head())

DataFrame with Novelty Labels:
     ID  Year_Birth  Income  Kidhome  Teenhome Dt_Customer  Recency  MntWines  \
0  5524        1957       1        0         0  04-09-2012       58         1   
1  2174        1954       1        1         1  08-03-2014       38         1   
2  4141        1965       1        0         0  21-08-2013       26         1   
3  6182        1984       1        1         0  10-02-2014       26         1   
4  5324        1981       1        1         0  19-01-2014       94         1   

   MntFruits  MntMeatProducts  ...  Education_PhD  Marital_Status_Absurd  \
0          1                1  ...          False                  False   
1          1                1  ...          False                  False   
2          1                1  ...          False                  False   
3          1                1  ...          False                  False   
4          1                1  ...           True                  False   

   Marital_Status_Alone  