In [1]:
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import KBinsDiscretizer
from mlxtend.frequent_patterns import apriori, association_rules

df = pd.read_csv('market.csv', sep="\t")

continuous_features = ['Income', 'MntWines', 'MntFruits', 'MntMeatProducts', 'MntFishProducts', 'MntSweetProducts', 'MntGoldProds']
imputer = SimpleImputer(strategy='mean')
df[continuous_features] = imputer.fit_transform(df[continuous_features])
df[continuous_features] = df[continuous_features].apply(lambda x: x > 0).astype(int)

categorical_features = ['Education', 'Marital_Status']
df = pd.get_dummies(df, columns=categorical_features)

binary_columns = df.columns[df.isin([0, 1]).all()]

frequent_itemsets = apriori(df[binary_columns], min_support=0.05, use_colnames=True)
rules = association_rules(frequent_itemsets, metric='confidence', min_threshold=0.5)

rules['leverage'] = rules['support'] - (rules['antecedent support'] * rules['consequent support'])
rules['conviction'] = (1 - rules['consequent support']) / (1 - rules['confidence'])

rules_sorted = rules.sort_values(by=['lift', 'confidence', 'support', 'leverage', 'conviction'], ascending=False)
rules_sorted.to_csv('association_rules.csv', index=False)

print(rules_sorted.head(10))



                                             antecedents  \
24598  (MntMeatProducts, Education_2n Cycle, MntFishP...   
30761  (MntMeatProducts, Education_2n Cycle, MntFishP...   
30779  (MntMeatProducts, Education_2n Cycle, MntFishP...   
23344    (Education_2n Cycle, MntWines, MntFishProducts)   
28203  (Education_2n Cycle, MntWines, MntFishProducts...   
28219    (Education_2n Cycle, MntWines, MntFishProducts)   
32188  (MntMeatProducts, Education_2n Cycle, MntWines...   
32213    (Education_2n Cycle, MntWines, MntFishProducts)   
33909  (MntMeatProducts, MntFishProducts, MntWines, E...   
33940  (MntMeatProducts, Education_2n Cycle, MntWines...   

                                             consequents  antecedent support  \
24598        (MntGoldProds, MntSweetProducts, MntFruits)            0.081696   
30761        (MntGoldProds, MntSweetProducts, MntFruits)            0.081696   
30779  (MntGoldProds, MntFruits, MntSweetProducts, In...            0.081696   
23344        (MntGo

In [2]:
rules_sorted = rules.sort_values(by=['lift'], ascending=False)
rules_sorted.to_csv('rules_by_lift.csv', index=False)
rules_sorted.head(10)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
30779,"(MntMeatProducts, Education_2n Cycle, MntFishP...","(MntGoldProds, MntFruits, MntSweetProducts, In...",0.081696,0.713393,0.074107,0.907104,1.271535,0.015825,3.085242,0.232547
30761,"(MntMeatProducts, Education_2n Cycle, MntFishP...","(MntGoldProds, MntSweetProducts, MntFruits)",0.081696,0.713393,0.074107,0.907104,1.271535,0.015825,3.085242,0.232547
24598,"(MntMeatProducts, Education_2n Cycle, MntFishP...","(MntGoldProds, MntSweetProducts, MntFruits)",0.081696,0.713393,0.074107,0.907104,1.271535,0.015825,3.085242,0.232547
32213,"(Education_2n Cycle, MntWines, MntFishProducts)","(MntGoldProds, MntMeatProducts, MntSweetProduc...",0.080357,0.713393,0.072768,0.905556,1.269364,0.015442,3.034664,0.230746
32188,"(MntMeatProducts, Education_2n Cycle, MntWines...","(MntGoldProds, MntSweetProducts, MntFruits)",0.080357,0.713393,0.072768,0.905556,1.269364,0.015442,3.034664,0.230746
33959,"(Education_2n Cycle, MntWines, MntFishProducts...","(MntGoldProds, MntMeatProducts, MntSweetProduc...",0.080357,0.713393,0.072768,0.905556,1.269364,0.015442,3.034664,0.230746
28203,"(Education_2n Cycle, MntWines, MntFishProducts...","(MntGoldProds, MntSweetProducts, MntFruits)",0.080357,0.713393,0.072768,0.905556,1.269364,0.015442,3.034664,0.230746
33981,"(Education_2n Cycle, MntWines, MntFishProducts)","(MntMeatProducts, MntSweetProducts, MntGoldPro...",0.080357,0.713393,0.072768,0.905556,1.269364,0.015442,3.034664,0.230746
33940,"(MntMeatProducts, Education_2n Cycle, MntWines...","(MntGoldProds, MntFruits, MntSweetProducts, In...",0.080357,0.713393,0.072768,0.905556,1.269364,0.015442,3.034664,0.230746
33909,"(MntMeatProducts, MntFishProducts, MntWines, E...","(MntGoldProds, MntSweetProducts, MntFruits)",0.080357,0.713393,0.072768,0.905556,1.269364,0.015442,3.034664,0.230746


In [10]:
rules_sorted = rules.sort_values(by=['confidence'], ascending=False)
rules_sorted.to_csv('rules_by_confidence.csv', index=False)
rules_sorted.head(10)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(MntWines),(Income),0.994196,1.0,0.994196,1.0,1.0,0.0,,0.0
3942,"(Marital_Status_Married, Education_Master, Mnt...",(MntMeatProducts),0.061607,0.999554,0.061607,1.0,1.000447,2.8e-05,inf,0.000476
10749,"(AcceptedCmp1, MntSweetProducts, MntFruits)","(MntMeatProducts, MntWines)",0.052232,0.994196,0.052232,1.0,1.005837,0.000303,inf,0.006123
3966,"(AcceptedCmp5, MntFishProducts, MntSweetProducts)",(MntWines),0.061161,0.994196,0.061161,1.0,1.005837,0.000355,inf,0.006182
10757,"(MntMeatProducts, Response, MntSweetProducts, ...",(MntWines),0.119643,0.994196,0.119643,1.0,1.005837,0.000694,inf,0.006592
10759,"(MntWines, Response, MntSweetProducts, MntFruits)",(MntMeatProducts),0.119643,0.999554,0.119643,1.0,1.000447,5.3e-05,inf,0.000507
33235,"(MntFishProducts, MntSweetProducts, Marital_St...","(MntWines, MntGoldProds)",0.083929,0.966964,0.083929,1.0,1.034164,0.002773,inf,0.036062
10764,"(Response, MntSweetProducts, MntFruits)","(MntMeatProducts, MntWines)",0.119643,0.994196,0.119643,1.0,1.005837,0.000694,inf,0.006592
10774,"(Education_2n Cycle, MntWines, MntSweetProduct...",(MntMeatProducts),0.078571,0.999554,0.078571,1.0,1.000447,3.5e-05,inf,0.000484
25102,"(MntFishProducts, MntSweetProducts, Marital_St...",(MntGoldProds),0.083929,0.972768,0.083929,1.0,1.027994,0.002286,inf,0.029727


In [12]:
rules_sorted = rules.sort_values(by=['support'], ascending=False)
rules_sorted.to_csv('rules_by_support.csv', index=False)
rules_sorted.head(10)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
4,(MntMeatProducts),(Income),0.999554,1.0,0.999554,1.0,1.0,0.0,,0.0
5,(Income),(MntMeatProducts),1.0,0.999554,0.999554,0.999554,1.0,0.0,1.0,0.0
0,(MntWines),(Income),0.994196,1.0,0.994196,1.0,1.0,0.0,,0.0
1,(Income),(MntWines),1.0,0.994196,0.994196,0.994196,1.0,0.0,1.0,0.0
151,"(MntWines, Income)",(MntMeatProducts),0.994196,0.999554,0.994196,1.0,1.000447,0.000444,inf,0.076923
152,(MntMeatProducts),"(MntWines, Income)",0.999554,0.994196,0.994196,0.99464,1.000447,0.000444,1.08285,1.0
153,(MntWines),"(MntMeatProducts, Income)",0.994196,0.999554,0.994196,1.0,1.000447,0.000444,inf,0.076923
154,(Income),"(MntMeatProducts, MntWines)",1.0,0.994196,0.994196,0.994196,1.0,0.0,1.0,0.0
149,"(MntMeatProducts, MntWines)",(Income),0.994196,1.0,0.994196,1.0,1.0,0.0,,0.0
29,(MntWines),(MntMeatProducts),0.994196,0.999554,0.994196,1.0,1.000447,0.000444,inf,0.076923


In [14]:
rules_sorted = rules.sort_values(by=['leverage'], ascending=False)
rules_sorted.to_csv('rules_by_leverage.csv', index=False)
rules_sorted.head(10)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
25287,"(MntMeatProducts, MntFishProducts, MntFruits)","(MntGoldProds, MntWines, MntSweetProducts, Inc...",0.735714,0.792411,0.646875,0.879248,1.109586,0.063887,1.719131,0.373697
25278,"(MntGoldProds, MntWines, MntSweetProducts, Inc...","(MntMeatProducts, MntFishProducts, MntFruits)",0.792411,0.735714,0.646875,0.816338,1.109586,0.063887,1.438979,0.47576
15651,"(MntFruits, MntFishProducts, Income)","(MntGoldProds, MntWines, MntSweetProducts)",0.735714,0.792411,0.646875,0.879248,1.109586,0.063887,1.719131,0.373697
15652,"(MntGoldProds, MntWines, MntSweetProducts)","(MntFruits, MntFishProducts, Income)",0.792411,0.735714,0.646875,0.816338,1.109586,0.063887,1.438979,0.47576
11163,"(MntFishProducts, MntFruits)","(MntGoldProds, MntWines, MntSweetProducts)",0.735714,0.792411,0.646875,0.879248,1.109586,0.063887,1.719131,0.373697
25258,"(MntGoldProds, MntMeatProducts, MntWines, MntS...","(MntFruits, MntFishProducts, Income)",0.792411,0.735714,0.646875,0.816338,1.109586,0.063887,1.438979,0.47576
25257,"(MntFruits, MntMeatProducts, MntFishProducts, ...","(MntGoldProds, MntWines, MntSweetProducts)",0.735714,0.792411,0.646875,0.879248,1.109586,0.063887,1.719131,0.373697
25307,"(MntFruits, MntFishProducts, Income)","(MntGoldProds, MntMeatProducts, MntWines, MntS...",0.735714,0.792411,0.646875,0.879248,1.109586,0.063887,1.719131,0.373697
21924,"(MntFishProducts, MntFruits)","(MntGoldProds, MntMeatProducts, MntWines, MntS...",0.735714,0.792411,0.646875,0.879248,1.109586,0.063887,1.719131,0.373697
21887,"(MntGoldProds, MntMeatProducts, MntWines, MntS...","(MntFishProducts, MntFruits)",0.792411,0.735714,0.646875,0.816338,1.109586,0.063887,1.438979,0.47576


In [17]:
rules_sorted = rules.sort_values(by=['conviction'], ascending=True)
rules_sorted.to_csv('rules_by_conviction.csv', index=False)
rules_sorted.head(10)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
13866,"(MntGoldProds, Education_2n Cycle, MntFishProd...",(MntMeatProducts),0.075446,0.999554,0.075,0.994083,0.994527,-0.000413,0.075446,-0.005917
21265,"(MntFishProducts, MntSweetProducts, MntGoldPro...",(MntMeatProducts),0.075446,0.999554,0.075,0.994083,0.994527,-0.000413,0.075446,-0.005917
21272,"(MntGoldProds, Education_2n Cycle, MntFishProd...","(MntMeatProducts, Income)",0.075446,0.999554,0.075,0.994083,0.994527,-0.000413,0.075446,-0.005917
9450,"(Education_2n Cycle, MntFishProducts, MntSweet...","(MntMeatProducts, Income)",0.075446,0.999554,0.075,0.994083,0.994527,-0.000413,0.075446,-0.005917
9446,"(Education_2n Cycle, MntFishProducts, MntSweet...",(MntMeatProducts),0.075446,0.999554,0.075,0.994083,0.994527,-0.000413,0.075446,-0.005917
4984,"(Education_2n Cycle, MntFishProducts, MntSweet...",(MntMeatProducts),0.075446,0.999554,0.075,0.994083,0.994527,-0.000413,0.075446,-0.005917
21404,"(MntGoldProds, Marital_Status_Divorced, MntFis...","(MntMeatProducts, Income)",0.077679,0.999554,0.077232,0.994253,0.994697,-0.000412,0.077679,-0.005747
13930,"(MntGoldProds, Marital_Status_Divorced, MntFis...",(MntMeatProducts),0.077679,0.999554,0.077232,0.994253,0.994697,-0.000412,0.077679,-0.005747
21397,"(MntFishProducts, MntSweetProducts, Marital_St...",(MntMeatProducts),0.077679,0.999554,0.077232,0.994253,0.994697,-0.000412,0.077679,-0.005747
9514,"(Marital_Status_Divorced, MntFishProducts, Mnt...","(MntMeatProducts, Income)",0.079018,0.999554,0.078571,0.99435,0.994794,-0.000411,0.079018,-0.00565


In [18]:
numeric_df = df.select_dtypes(include=[int, float])
model = IsolationForest(contamination=0.1, random_state=42)
model.fit(numeric_df)

df['novelty'] = model.predict(numeric_df)
df['novelty'] = df['novelty'].map({1: 'NO', -1: 'YES'})

df.to_csv('novelty_analysis.csv', index=False)

print("DataFrame with Novelty Labels:")
print(df.head())

DataFrame with Novelty Labels:
     ID  Year_Birth  Income  Kidhome  Teenhome Dt_Customer  Recency  MntWines  \
0  5524        1957       1        0         0  04-09-2012       58         1   
1  2174        1954       1        1         1  08-03-2014       38         1   
2  4141        1965       1        0         0  21-08-2013       26         1   
3  6182        1984       1        1         0  10-02-2014       26         1   
4  5324        1981       1        1         0  19-01-2014       94         1   

   MntFruits  MntMeatProducts  ...  Education_PhD  Marital_Status_Absurd  \
0          1                1  ...          False                  False   
1          1                1  ...          False                  False   
2          1                1  ...          False                  False   
3          1                1  ...          False                  False   
4          1                1  ...           True                  False   

   Marital_Status_Alone  