In [1]:
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import KBinsDiscretizer
from mlxtend.frequent_patterns import apriori, association_rules

df = pd.read_csv('market.csv', sep="\t")

continuous_features = ['Income', 'MntWines', 'MntFruits', 'MntMeatProducts', 'MntFishProducts', 'MntSweetProducts', 'MntGoldProds']
imputer = SimpleImputer(strategy='mean')
df[continuous_features] = imputer.fit_transform(df[continuous_features])
df[continuous_features] = df[continuous_features].apply(lambda x: x > 0).astype(int)

categorical_features = ['Education', 'Marital_Status']
df = pd.get_dummies(df, columns=categorical_features)

binary_columns = df.columns[df.isin([0, 1]).all()]

frequent_itemsets = apriori(df[binary_columns], min_support=0.05, use_colnames=True)
rules = association_rules(frequent_itemsets, metric='confidence', min_threshold=0.5)

rules['leverage'] = rules['support'] - (rules['antecedent support'] * rules['consequent support'])
rules['conviction'] = (1 - rules['consequent support']) / (1 - rules['confidence'])

rules_sorted = rules.sort_values(by=['lift', 'confidence', 'support', 'leverage', 'conviction'], ascending=False)
rules_sorted.to_csv('association_rules.csv', index=False)

print(rules_sorted.head(10))



                                             antecedents  \
24601  (MntFishProducts, MntMeatProducts, Education_2...   
30762  (MntFishProducts, MntMeatProducts, Education_2...   
30783  (MntFishProducts, MntMeatProducts, Education_2...   
23346    (MntWines, MntFishProducts, Education_2n Cycle)   
28201  (MntWines, MntFishProducts, Education_2n Cycle...   
28226    (MntWines, MntFishProducts, Education_2n Cycle)   
32190  (MntWines, MntFishProducts, MntMeatProducts, E...   
32215    (MntWines, MntFishProducts, Education_2n Cycle)   
33907  (MntMeatProducts, Income, MntWines, Education_...   
33947  (MntWines, MntFishProducts, MntMeatProducts, E...   

                                             consequents  antecedent support  \
24601        (MntFruits, MntGoldProds, MntSweetProducts)            0.081696   
30762        (MntFruits, MntGoldProds, MntSweetProducts)            0.081696   
30783  (MntFruits, MntGoldProds, MntSweetProducts, In...            0.081696   
23346        (MntFr

In [2]:
rules_sorted = rules.sort_values(by=['lift'], ascending=False)
rules_sorted.to_csv('rules_by_lift.csv', index=False)
rules_sorted.head(10)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
30762,"(MntFishProducts, MntMeatProducts, Education_2...","(MntFruits, MntGoldProds, MntSweetProducts)",0.081696,0.713393,0.074107,0.907104,1.271535,0.015825,3.085242,0.232547
30783,"(MntFishProducts, MntMeatProducts, Education_2...","(MntFruits, MntGoldProds, MntSweetProducts, In...",0.081696,0.713393,0.074107,0.907104,1.271535,0.015825,3.085242,0.232547
24601,"(MntFishProducts, MntMeatProducts, Education_2...","(MntFruits, MntGoldProds, MntSweetProducts)",0.081696,0.713393,0.074107,0.907104,1.271535,0.015825,3.085242,0.232547
32215,"(MntWines, MntFishProducts, Education_2n Cycle)","(MntFruits, MntMeatProducts, MntGoldProds, Mnt...",0.080357,0.713393,0.072768,0.905556,1.269364,0.015442,3.034664,0.230746
32190,"(MntWines, MntFishProducts, MntMeatProducts, E...","(MntFruits, MntGoldProds, MntSweetProducts)",0.080357,0.713393,0.072768,0.905556,1.269364,0.015442,3.034664,0.230746
28226,"(MntWines, MntFishProducts, Education_2n Cycle)","(MntFruits, MntGoldProds, MntSweetProducts, In...",0.080357,0.713393,0.072768,0.905556,1.269364,0.015442,3.034664,0.230746
28201,"(MntWines, MntFishProducts, Education_2n Cycle...","(MntFruits, MntGoldProds, MntSweetProducts)",0.080357,0.713393,0.072768,0.905556,1.269364,0.015442,3.034664,0.230746
33947,"(MntWines, MntFishProducts, MntMeatProducts, E...","(MntFruits, MntGoldProds, MntSweetProducts, In...",0.080357,0.713393,0.072768,0.905556,1.269364,0.015442,3.034664,0.230746
33907,"(MntMeatProducts, Income, MntWines, Education_...","(MntFruits, MntGoldProds, MntSweetProducts)",0.080357,0.713393,0.072768,0.905556,1.269364,0.015442,3.034664,0.230746
33957,"(MntWines, MntFishProducts, Education_2n Cycle...","(MntFruits, MntMeatProducts, MntGoldProds, Mnt...",0.080357,0.713393,0.072768,0.905556,1.269364,0.015442,3.034664,0.230746


In [3]:
rules_sorted = rules.sort_values(by=['confidence'], ascending=False)
rules_sorted.to_csv('rules_by_confidence.csv', index=False)
rules_sorted.head(10)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(MntWines),(Income),0.994196,1.0,0.994196,1.0,1.0,0.0,,0.0
2,(MntFruits),(Income),0.821429,1.0,0.821429,1.0,1.0,0.0,,0.0
4,(MntMeatProducts),(Income),0.999554,1.0,0.999554,1.0,1.0,0.0,,0.0
6,(MntFishProducts),(Income),0.828571,1.0,0.828571,1.0,1.0,0.0,,0.0
13162,"(MntFruits, Education_PhD, MntGoldProds, MntFi...",(MntMeatProducts),0.11875,0.999554,0.11875,1.0,1.000447,5.3e-05,inf,0.000507
13177,"(MntFruits, MntFishProducts, MntGoldProds, Mar...",(MntMeatProducts),0.077679,0.999554,0.077679,1.0,1.000447,3.5e-05,inf,0.000484
13111,"(MntFruits, MntFishProducts, MntMeatProducts, ...",(MntGoldProds),0.079911,0.972768,0.079911,1.0,1.027994,0.002176,inf,0.029597
13113,"(MntFruits, MntFishProducts, MntGoldProds, Edu...",(MntMeatProducts),0.079911,0.999554,0.079911,1.0,1.000447,3.6e-05,inf,0.000485
13118,"(MntFruits, MntFishProducts, Education_2n Cycle)","(MntMeatProducts, MntGoldProds)",0.079911,0.972321,0.079911,1.0,1.028466,0.002212,inf,0.030082
37,(AcceptedCmp4),(MntWines),0.074554,0.994196,0.074554,1.0,1.005837,0.000433,inf,0.006271


In [4]:
rules_sorted = rules.sort_values(by=['support'], ascending=False)
rules_sorted.to_csv('rules_by_support.csv', index=False)
rules_sorted.head(10)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
4,(MntMeatProducts),(Income),0.999554,1.0,0.999554,1.0,1.0,0.0,,0.0
5,(Income),(MntMeatProducts),1.0,0.999554,0.999554,0.999554,1.0,0.0,1.0,0.0
1,(Income),(MntWines),1.0,0.994196,0.994196,0.994196,1.0,0.0,1.0,0.0
0,(MntWines),(Income),0.994196,1.0,0.994196,1.0,1.0,0.0,,0.0
152,(MntWines),"(MntMeatProducts, Income)",0.994196,0.999554,0.994196,1.0,1.000447,0.000444,inf,0.076923
149,"(MntWines, MntMeatProducts)",(Income),0.994196,1.0,0.994196,1.0,1.0,0.0,,0.0
153,(MntMeatProducts),"(MntWines, Income)",0.999554,0.994196,0.994196,0.99464,1.000447,0.000444,1.08285,1.0
151,"(MntMeatProducts, Income)",(MntWines),0.999554,0.994196,0.994196,0.99464,1.000447,0.000444,1.08285,1.0
150,"(MntWines, Income)",(MntMeatProducts),0.994196,0.999554,0.994196,1.0,1.000447,0.000444,inf,0.076923
154,(Income),"(MntWines, MntMeatProducts)",1.0,0.994196,0.994196,0.994196,1.0,0.0,1.0,0.0


In [5]:
rules_sorted = rules.sort_values(by=['leverage'], ascending=False)
rules_sorted.to_csv('rules_by_leverage.csv', index=False)
rules_sorted.head(10)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
11168,"(MntFruits, MntFishProducts)","(MntWines, MntGoldProds, MntSweetProducts)",0.735714,0.792411,0.646875,0.879248,1.109586,0.063887,1.719131,0.373697
21907,"(MntWines, MntGoldProds, MntSweetProducts)","(MntFruits, MntFishProducts, MntMeatProducts)",0.792411,0.735714,0.646875,0.816338,1.109586,0.063887,1.438979,0.47576
11151,"(MntWines, MntGoldProds, MntSweetProducts)","(MntFruits, MntFishProducts)",0.792411,0.735714,0.646875,0.816338,1.109586,0.063887,1.438979,0.47576
21882,"(MntWines, MntMeatProducts, MntGoldProds, MntS...","(MntFruits, MntFishProducts)",0.792411,0.735714,0.646875,0.816338,1.109586,0.063887,1.438979,0.47576
21904,"(MntFruits, MntFishProducts, MntMeatProducts)","(MntWines, MntGoldProds, MntSweetProducts)",0.735714,0.792411,0.646875,0.879248,1.109586,0.063887,1.719131,0.373697
15628,"(MntWines, MntGoldProds, MntSweetProducts, Inc...","(MntFruits, MntFishProducts)",0.792411,0.735714,0.646875,0.816338,1.109586,0.063887,1.438979,0.47576
15650,"(MntFruits, MntFishProducts, Income)","(MntWines, MntGoldProds, MntSweetProducts)",0.735714,0.792411,0.646875,0.879248,1.109586,0.063887,1.719131,0.373697
15653,"(MntWines, MntGoldProds, MntSweetProducts)","(MntFruits, MntFishProducts, Income)",0.792411,0.735714,0.646875,0.816338,1.109586,0.063887,1.438979,0.47576
21929,"(MntFruits, MntFishProducts)","(MntWines, MntMeatProducts, MntGoldProds, MntS...",0.735714,0.792411,0.646875,0.879248,1.109586,0.063887,1.719131,0.373697
15675,"(MntFruits, MntFishProducts)","(MntWines, MntGoldProds, MntSweetProducts, Inc...",0.735714,0.792411,0.646875,0.879248,1.109586,0.063887,1.719131,0.373697


In [6]:
rules_sorted = rules.sort_values(by=['conviction'], ascending=True)
rules_sorted.to_csv('rules_by_conviction.csv', index=False)
rules_sorted.head(10)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
21275,"(MntFishProducts, MntGoldProds, MntSweetProduc...","(MntMeatProducts, Income)",0.075446,0.999554,0.075,0.994083,0.994527,-0.000413,0.075446,-0.005917
21265,"(Income, MntSweetProducts, MntGoldProds, Educa...",(MntMeatProducts),0.075446,0.999554,0.075,0.994083,0.994527,-0.000413,0.075446,-0.005917
4983,"(MntFishProducts, MntSweetProducts, Education_...",(MntMeatProducts),0.075446,0.999554,0.075,0.994083,0.994527,-0.000413,0.075446,-0.005917
13866,"(MntFishProducts, MntGoldProds, MntSweetProduc...",(MntMeatProducts),0.075446,0.999554,0.075,0.994083,0.994527,-0.000413,0.075446,-0.005917
9452,"(MntFishProducts, MntSweetProducts, Education_...","(MntMeatProducts, Income)",0.075446,0.999554,0.075,0.994083,0.994527,-0.000413,0.075446,-0.005917
9446,"(MntFishProducts, Education_2n Cycle, MntSweet...",(MntMeatProducts),0.075446,0.999554,0.075,0.994083,0.994527,-0.000413,0.075446,-0.005917
13930,"(MntFishProducts, MntGoldProds, MntSweetProduc...",(MntMeatProducts),0.077679,0.999554,0.077232,0.994253,0.994697,-0.000412,0.077679,-0.005747
21407,"(MntFishProducts, MntGoldProds, MntSweetProduc...","(MntMeatProducts, Income)",0.077679,0.999554,0.077232,0.994253,0.994697,-0.000412,0.077679,-0.005747
21397,"(Income, MntSweetProducts, MntGoldProds, MntFi...",(MntMeatProducts),0.077679,0.999554,0.077232,0.994253,0.994697,-0.000412,0.077679,-0.005747
9516,"(MntFishProducts, MntSweetProducts, Marital_St...","(MntMeatProducts, Income)",0.079018,0.999554,0.078571,0.99435,0.994794,-0.000411,0.079018,-0.00565


In [7]:
rules["leverage"] = rules["support"] - (
    rules["antecedent support"] * rules["consequent support"]
)
rules["conviction"] = (1 - rules["consequent support"]) / (1 - rules["confidence"])


weight_lift = 0.5
weight_confidence = 0.4
weight_support = 0.1

rules["combined_score"] = (
    weight_lift * rules["lift"]
    + weight_confidence * rules["confidence"]
    + weight_support * rules["support"]
)

top_overall_rules = rules.sort_values(by="combined_score", ascending=False)
top_overall_rules.to_csv("top_overall_rules.csv", index=False)

In [8]:
numeric_df = df.select_dtypes(include=[int, float])
model = IsolationForest(contamination=0.1, random_state=42)
model.fit(numeric_df)

df['novelty'] = model.predict(numeric_df)
df['novelty'] = df['novelty'].map({1: 'NO', -1: 'YES'})

df.to_csv('novelty_analysis.csv', index=False)

print("DataFrame with Novelty Labels:")
print(df.head())

DataFrame with Novelty Labels:
     ID  Year_Birth  Income  Kidhome  Teenhome Dt_Customer  Recency  MntWines  \
0  5524        1957       1        0         0  04-09-2012       58         1   
1  2174        1954       1        1         1  08-03-2014       38         1   
2  4141        1965       1        0         0  21-08-2013       26         1   
3  6182        1984       1        1         0  10-02-2014       26         1   
4  5324        1981       1        1         0  19-01-2014       94         1   

   MntFruits  MntMeatProducts  ...  Education_PhD  Marital_Status_Absurd  \
0          1                1  ...          False                  False   
1          1                1  ...          False                  False   
2          1                1  ...          False                  False   
3          1                1  ...          False                  False   
4          1                1  ...           True                  False   

   Marital_Status_Alone  