In [1]:
# ==============================
# 1. IMPORT LIBRARIES
# ==============================

import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans, DBSCAN, AgglomerativeClustering

from mlxtend.frequent_patterns import apriori, association_rules


# ==============================
# 2. LOAD DATASET
# ==============================

df = pd.read_csv("Finance_data.csv")
print("Dataset Loaded Successfully")
print(df.head())
print(df.info())


# ==============================
# 3. SELECT ALL NUMERICAL COLUMNS
# ==============================

num_df = df.select_dtypes(include=['int64', 'float64'])
print("\nNumerical Columns Used for Clustering:")
print(num_df.columns)


# ==============================
# 4. FEATURE SCALING
# ==============================

scaler = StandardScaler()
scaled_data = scaler.fit_transform(num_df)


# ==============================
# 5. K-MEANS CLUSTERING
# ==============================

kmeans = KMeans(n_clusters=3, random_state=42)
kmeans_labels = kmeans.fit_predict(scaled_data)

df['KMeans_Cluster'] = kmeans_labels

print("\nK-Means Cluster Distribution:")
print(df['KMeans_Cluster'].value_counts())


# ==============================
# 6. DBSCAN CLUSTERING
# ==============================

dbscan = DBSCAN(eps=1.5, min_samples=3)
dbscan_labels = dbscan.fit_predict(scaled_data)

df['DBSCAN_Cluster'] = dbscan_labels

print("\nDBSCAN Cluster Distribution (-1 = Noise):")
print(df['DBSCAN_Cluster'].value_counts())


# ==============================
# 7. HIERARCHICAL CLUSTERING
# ==============================

hierarchical = AgglomerativeClustering(n_clusters=3)
hierarchical_labels = hierarchical.fit_predict(scaled_data)

df['Hierarchical_Cluster'] = hierarchical_labels

print("\nHierarchical Cluster Distribution:")
print(df['Hierarchical_Cluster'].value_counts())


# ==============================
# 8. APRIORI ALGORITHM
# ==============================

# Investment-related columns
investment_cols = [
    'Mutual_Funds',
    'Equity_Market',
    'Debentures',
    'Government_Bonds',
    'Fixed_Deposits',
    'PPF',
    'Gold'
]

apriori_df = df[investment_cols]

# Convert to binary (required for Apriori)
apriori_df = apriori_df.applymap(lambda x: 1 if x > 0 else 0)

print("\nBinary Investment Data (Apriori Input):")
print(apriori_df.head())


# ==============================
# 9. FREQUENT ITEMSETS
# ==============================

frequent_itemsets = apriori(
    apriori_df,
    min_support=0.3,
    use_colnames=True
)

print("\nFrequent Itemsets:")
print(frequent_itemsets)


# ==============================
# 10. ASSOCIATION RULES
# ==============================

rules = association_rules(
    frequent_itemsets,
    metric="confidence",
    min_threshold=0.6
)

print("\nAssociation Rules:")
print(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])


# ==============================
# 11. FINAL DATASET WITH CLUSTERS
# ==============================

print("\nFinal Dataset with Cluster Labels:")
print(df.head())


Dataset Loaded Successfully
   gender  age Investment_Avenues  Mutual_Funds  Equity_Market  Debentures  \
0  Female   34                Yes             1              2           5   
1  Female   23                Yes             4              3           2   
2    Male   30                Yes             3              6           4   
3    Male   22                Yes             2              1           3   
4  Female   24                 No             2              1           3   

   Government_Bonds  Fixed_Deposits  PPF  Gold  ...           Duration  \
0                 3               7    6     4  ...          1-3 years   
1                 1               5    6     7  ...  More than 5 years   
2                 2               5    1     7  ...          3-5 years   
3                 7               6    4     5  ...   Less than 1 year   
4                 6               4    5     7  ...   Less than 1 year   

  Invest_Monitor   Expect       Avenue What are your savin

  apriori_df = apriori_df.applymap(lambda x: 1 if x > 0 else 0)
  cert_metric = np.where(certainty_denom == 0, 0, certainty_num / certainty_denom)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
