In [None]:
%pip install pandas mlxtend
%pip install mlxtend


In [14]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.frequent_patterns import fpgrowth

In [29]:
# Load data
file_path = './NVDA_1999-01-01_2024-12-04.csv'
data = pd.read_csv(file_path)

In [23]:
price_bins = [0, 0.02, 0.04, 0.06, 0.08, 0.1, data['Close'].max()]
price_bins

[0, 0.02, 0.04, 0.06, 0.08, 0.1, np.float64(148.8800048828125)]

In [5]:
# Step 1: Preprocess data
# Bin 'Close' prices into ranges for transactions
price_bins = [0, 0.02, 0.04, 0.06, 0.08, 0.1, data['Close'].max()]
price_labels = ['Very Low', 'Low', 'Medium', 'High', 'Very High', 'Extremely High']
data['Close_Bin'] = pd.cut(data['Close'], bins=price_bins, labels=price_labels, include_lowest=True)

# Create transactions
transactions = data[['Date', 'Close_Bin']].groupby('Date')['Close_Bin'].apply(list).reset_index()

# Explode transactions for One-Hot Encoding
exploded_transactions = transactions.explode('Close_Bin')
one_hot_data = pd.crosstab(exploded_transactions['Date'], exploded_transactions['Close_Bin'])

# Define thresholds
min_support = 0.1  # Hypothetical minimum support
min_confidence = 0.5  # Hypothetical minimum confidence

In [6]:
# Step 2: Apply Apriori (ID sinh viên lẻ)
frequent_itemsets_apriori = apriori(one_hot_data, min_support=min_support, use_colnames=True)
frequent_itemsets_apriori['support_count'] = frequent_itemsets_apriori['support'] * len(one_hot_data)
num_itemsets = len(frequent_itemsets_apriori)
rules_apriori = association_rules(frequent_itemsets_apriori, metric="confidence", min_threshold=min_confidence, num_itemsets= num_itemsets)



In [8]:
# Step 3: Apply MaxFP-growth (ID sinh viên chẵn)
frequent_itemsets_fpgrowth = fpgrowth(one_hot_data, min_support=min_support, use_colnames=True)
rules_fpgrowth = association_rules(frequent_itemsets_fpgrowth, metric="confidence", min_threshold=min_confidence, num_itemsets= num_itemsets)



In [9]:
# Output results
print("Frequent Itemsets (Apriori):")
print(frequent_itemsets_apriori.head())

Frequent Itemsets (Apriori):
    support          itemsets  support_count
0  0.943617  (Extremely High)         6142.0


In [10]:
print("Association Rules (Apriori):")
print(rules_apriori.head())

Association Rules (Apriori):
Empty DataFrame
Columns: [antecedents, consequents, antecedent support, consequent support, support, confidence, lift, representativity, leverage, conviction, zhangs_metric, jaccard, certainty, kulczynski]
Index: []


In [11]:
print("Frequent Itemsets (MaxFP-growth):")
print(frequent_itemsets_fpgrowth.head())

Frequent Itemsets (MaxFP-growth):
    support          itemsets
0  0.943617  (Extremely High)


In [12]:
print("Association Rules (MaxFP-growth):")
print(rules_fpgrowth.head())

Association Rules (MaxFP-growth):
Empty DataFrame
Columns: [antecedents, consequents, antecedent support, consequent support, support, confidence, lift, representativity, leverage, conviction, zhangs_metric, jaccard, certainty, kulczynski]
Index: []
