In [5]:
import yfinance as yf
import pandas as pd
import matplotlib

In [6]:
tickers = ['JPM', 'V', 'PG', 'CVX', 'ABBV', 'BAC', 'UNH', 'DIS', 'VZ', 'KO']
start_date = '2023-01-01'
end_date = '2024-01-31'

dataframes = []

for ticker in tickers:
    ticker_data = yf.Ticker(ticker)
    ticker_df = ticker_data.history(start=start_date, end=end_date)
    ticker_df['Ticker'] = ticker
    dataframes.append(ticker_df)
    
combined_df = pd.concat(dataframes)
combined_df.reset_index(inplace=True)

In [10]:
final_df = combined_df[['Date', 'Ticker', 'Open', 'High', 'Low', 'Close', 'Volume']]

In [12]:
print(final_df.head())

                       Date Ticker        Open        High         Low  \
0 2023-01-03 00:00:00-05:00    JPM  129.048598  130.479927  127.760397   
1 2023-01-04 00:00:00-05:00    JPM  129.764253  131.376871  129.363483   
2 2023-01-05 00:00:00-05:00    JPM  130.405550  130.453616  128.521459   
3 2023-01-06 00:00:00-05:00    JPM  130.857322  133.020174  129.280844   
4 2023-01-09 00:00:00-05:00    JPM  133.231671  133.500824  131.578289   

        Close    Volume  
0  128.934082  11054800  
1  130.136398  11687600  
2  130.107559   8381300  
3  132.597214  10029100  
4  132.049301   8482300  


In [14]:
final_df.to_csv('Financial_data.csv', index=False)

### Preparing the Data for Apriori Analysis

In [17]:
import pandas as pd

# Load financial data from CSV
df = pd.read_csv('Financial_data.csv')
transactions = []
for date, group in df.groupby('Date'):
    purchased = group[group['Close'] > group['Open']]['Ticker'].tolist()
    if purchased:
        transactions.append(purchased)

In [19]:
def calculate_support(itemset, transactions):
    count = sum(1 for transaction in transactions if set(itemset).issubset(transaction))
    return count / len(transactions)

In [21]:
def generate_new_combinations(old_combinations, k, items):
    new_combinations = set()
    for combination in old_combinations:
        for item in items:
            if item not in combination:
                potential_new_combination = tuple(sorted(set(combination) | {item}))
                if len(potential_new_combination) == k:
                    new_combinations.add(potential_new_combination)
    return new_combinations

In [25]:
def apriori(transactions, support_threshold):
    items = set(item for transaction in transactions for item in transaction)  # unique items
    current_itemsets = [{item} for item in items]
    frequent_itemsets = []
    k = 1

    while current_itemsets:
            print(f"Calculating frequent itemsets of size {k}")
            itemset_support = [(itemset, calculate_support(itemset, transactions)) for itemset in current_itemsets]
            current_itemsets = [itemset for itemset, support in itemset_support if support >= support_threshold]
            frequent_itemsets.extend(current_itemsets)
            current_itemsets = generate_new_combinations(current_itemsets, k + 1, items)
            k += 1

    return frequent_itemsets

In [99]:
support_threshold = 0.2  # just for example, typically this would be much lower
frequent_itemsets = apriori(transactions, support_threshold)

for itemset in frequent_itemsets:
    print(itemset)

Calculating frequent itemsets of size 1
Calculating frequent itemsets of size 2
Calculating frequent itemsets of size 3
Calculating frequent itemsets of size 4
Calculating frequent itemsets of size 5
{'ABBV'}
{'PG'}
{'V'}
{'JPM'}
{'VZ'}
{'CVX'}
{'DIS'}
{'UNH'}
{'BAC'}
{'KO'}
('DIS', 'PG')
('JPM', 'VZ')
('KO', 'VZ')
('CVX', 'UNH')
('BAC', 'CVX')
('DIS', 'VZ')
('CVX', 'V')
('CVX', 'JPM')
('BAC', 'UNH')
('ABBV', 'CVX')
('CVX', 'KO')
('BAC', 'V')
('BAC', 'JPM')
('BAC', 'KO')
('ABBV', 'UNH')
('ABBV', 'V')
('PG', 'VZ')
('UNH', 'V')
('ABBV', 'JPM')
('ABBV', 'KO')
('JPM', 'UNH')
('KO', 'UNH')
('JPM', 'V')
('KO', 'V')
('CVX', 'PG')
('DIS', 'UNH')
('JPM', 'KO')
('DIS', 'V')
('BAC', 'PG')
('V', 'VZ')
('DIS', 'JPM')
('DIS', 'KO')
('CVX', 'VZ')
('CVX', 'DIS')
('ABBV', 'PG')
('PG', 'UNH')
('BAC', 'VZ')
('BAC', 'DIS')
('PG', 'V')
('ABBV', 'VZ')
('ABBV', 'DIS')
('JPM', 'PG')
('UNH', 'VZ')
('KO', 'PG')
('ABBV', 'BAC')
('ABBV', 'BAC', 'JPM')
('JPM', 'KO', 'UNH')
('DIS', 'JPM', 'PG')
('ABBV', 'JPM', 'KO'

In [101]:
def calculate_confidence_and_lift(frequent_itemsets, transactions):
    result = []
    for itemset in frequent_itemsets:
        support_itemset = calculate_support(itemset, transactions)
        
        for item in itemset:
            consequent = set(itemset) - {item}
            support_consequent = calculate_support(consequent, transactions)
            confidence = support_itemset / support_consequent
            
            support_antecedent = calculate_support({item}, transactions)
            lift = confidence / support_antecedent
            
            rule = {
                'antecedent': {item},
                'consequent': consequent,
                'support': support_itemset,
                'confidence': confidence,
                'lift': lift
            }
            result.append(rule)
    return result

In [103]:
association_rules = calculate_confidence_and_lift(frequent_itemsets, transactions)

strong_associations = [rule for rule in association_rules if rule['lift'] > 1]
for rule in strong_associations:
    print(f"{rule['antecedent']} -> {rule['consequent']}, "
          f"support: {rule['support']:.2f}, "
          f"confidence: {rule['confidence']:.2f}, "
          f"lift: {rule['lift']:.2f}")

{'DIS'} -> {'PG'}, support: 0.31, confidence: 0.59, lift: 1.04
{'PG'} -> {'DIS'}, support: 0.31, confidence: 0.55, lift: 1.04
{'JPM'} -> {'VZ'}, support: 0.32, confidence: 0.61, lift: 1.05
{'VZ'} -> {'JPM'}, support: 0.32, confidence: 0.56, lift: 1.05
{'KO'} -> {'VZ'}, support: 0.30, confidence: 0.56, lift: 1.12
{'VZ'} -> {'KO'}, support: 0.30, confidence: 0.60, lift: 1.12
{'CVX'} -> {'UNH'}, support: 0.30, confidence: 0.52, lift: 1.03
{'UNH'} -> {'CVX'}, support: 0.30, confidence: 0.59, lift: 1.03
{'BAC'} -> {'CVX'}, support: 0.30, confidence: 0.60, lift: 1.14
{'CVX'} -> {'BAC'}, support: 0.30, confidence: 0.58, lift: 1.14
{'DIS'} -> {'VZ'}, support: 0.35, confidence: 0.66, lift: 1.17
{'VZ'} -> {'DIS'}, support: 0.35, confidence: 0.62, lift: 1.17
{'CVX'} -> {'V'}, support: 0.33, confidence: 0.57, lift: 1.11
{'V'} -> {'CVX'}, support: 0.33, confidence: 0.65, lift: 1.11
{'CVX'} -> {'JPM'}, support: 0.33, confidence: 0.56, lift: 1.11
{'JPM'} -> {'CVX'}, support: 0.33, confidence: 0.64, l

In [105]:
strong_associations

[{'antecedent': {'DIS'},
  'consequent': {'PG'},
  'support': 0.31153846153846154,
  'confidence': 0.5869565217391305,
  'lift': 1.0381543921916594},
 {'antecedent': {'PG'},
  'consequent': {'DIS'},
  'support': 0.31153846153846154,
  'confidence': 0.5510204081632654,
  'lift': 1.0381543921916594},
 {'antecedent': {'JPM'},
  'consequent': {'VZ'},
  'support': 0.3230769230769231,
  'confidence': 0.6086956521739131,
  'lift': 1.0480852289087244},
 {'antecedent': {'VZ'},
  'consequent': {'JPM'},
  'support': 0.3230769230769231,
  'confidence': 0.5562913907284768,
  'lift': 1.0480852289087244},
 {'antecedent': {'KO'},
  'consequent': {'VZ'},
  'support': 0.29615384615384616,
  'confidence': 0.5579710144927537,
  'lift': 1.1245927423884958},
 {'antecedent': {'VZ'},
  'consequent': {'KO'},
  'support': 0.29615384615384616,
  'confidence': 0.5968992248062015,
  'lift': 1.1245927423884956},
 {'antecedent': {'CVX'},
  'consequent': {'UNH'},
  'support': 0.3,
  'confidence': 0.5234899328859061,


In [82]:
def transaction_reduction(transactions, frequent_itemsets):
    """
    Reduces the transaction list by removing transactions that do not contain any
    of the frequent itemsets.
    """
    new_transactions = []
    for transaction in transactions:
        if any(frequent_itemset.issubset(transaction) for frequent_itemset in frequent_itemsets):
            new_transactions.append(transaction)
    return new_transactions

transactions = transaction_reduction(transactions, frequent_itemsets)

In [84]:
transactions

[['PG', 'ABBV', 'BAC', 'VZ'],
 ['JPM', 'V', 'PG', 'CVX', 'ABBV', 'BAC', 'DIS', 'VZ'],
 ['CVX', 'ABBV', 'BAC', 'DIS', 'VZ'],
 ['JPM', 'V', 'PG', 'ABBV', 'BAC', 'DIS', 'VZ', 'KO'],
 ['DIS'],
 ['JPM', 'V', 'BAC', 'DIS', 'VZ'],
 ['JPM', 'V', 'BAC', 'UNH', 'DIS'],
 ['V', 'CVX', 'UNH', 'DIS', 'VZ'],
 ['JPM', 'V', 'PG', 'CVX', 'ABBV', 'BAC', 'UNH', 'DIS', 'VZ', 'KO'],
 ['V', 'CVX'],
 ['V', 'CVX', 'UNH', 'DIS', 'VZ'],
 ['V', 'PG', 'CVX', 'ABBV', 'BAC', 'UNH', 'DIS', 'KO'],
 ['JPM', 'BAC', 'DIS'],
 ['JPM', 'V', 'PG', 'BAC', 'DIS', 'VZ', 'KO'],
 ['JPM', 'V', 'PG', 'ABBV', 'BAC', 'UNH', 'DIS', 'KO'],
 ['JPM', 'CVX', 'BAC', 'DIS'],
 ['V', 'BAC', 'DIS', 'VZ'],
 ['PG', 'BAC', 'VZ', 'KO'],
 ['JPM', 'V', 'PG', 'CVX', 'ABBV', 'BAC', 'UNH', 'DIS', 'VZ', 'KO'],
 ['JPM', 'V', 'PG', 'BAC', 'DIS', 'KO'],
 ['BAC', 'DIS', 'VZ'],
 ['JPM', 'V', 'BAC'],
 ['JPM', 'V', 'CVX', 'BAC', 'UNH', 'KO'],
 ['JPM', 'V', 'PG', 'CVX', 'ABBV', 'BAC', 'UNH', 'DIS', 'KO'],
 ['BAC', 'UNH'],
 ['ABBV', 'UNH'],
 ['JPM', 'PG', 'CVX',

### Moving Forward with Efficient Analysis

In [107]:
import pandas as pd

# Load pre-processed financial transactions from CSV
df = pd.read_csv('financial_data.csv')

In [109]:
# Prepare the transaction data from stock price movements
transactions = []
for date, group in df.groupby('Date'):
    purchased = group[group['Close'] > group['Open']]['Ticker'].tolist()
    if purchased:
        transactions.append(purchased)

In [111]:
# Apply the Apriori algorithm to find frequent itemsets with reduced transaction database
frequent_itemsets = apriori(transactions, support_threshold=0.2)  # For demonstrative purposes
transactions = transaction_reduction(transactions, frequent_itemsets)

Calculating frequent itemsets of size 1
Calculating frequent itemsets of size 2
Calculating frequent itemsets of size 3
Calculating frequent itemsets of size 4
Calculating frequent itemsets of size 5


In [113]:
# Calculate confidence and lift for the frequent itemsets
association_rules = calculate_confidence_and_lift(frequent_itemsets, transactions)

# Identify strong associations based on lift
strong_associations = [rule for rule in association_rules if rule['lift'] > 1]
for rule in strong_associations:
    print(f"{rule['antecedent']} -> {rule['consequent']}, "
          f"support: {rule['support']:.2f}, "
          f"confidence: {rule['confidence']:.2f}, "
          f"lift: {rule['lift']:.2f}")

{'DIS'} -> {'PG'}, support: 0.31, confidence: 0.59, lift: 1.04
{'PG'} -> {'DIS'}, support: 0.31, confidence: 0.55, lift: 1.04
{'JPM'} -> {'VZ'}, support: 0.32, confidence: 0.61, lift: 1.05
{'VZ'} -> {'JPM'}, support: 0.32, confidence: 0.56, lift: 1.05
{'KO'} -> {'VZ'}, support: 0.30, confidence: 0.56, lift: 1.12
{'VZ'} -> {'KO'}, support: 0.30, confidence: 0.60, lift: 1.12
{'CVX'} -> {'UNH'}, support: 0.30, confidence: 0.52, lift: 1.03
{'UNH'} -> {'CVX'}, support: 0.30, confidence: 0.59, lift: 1.03
{'BAC'} -> {'CVX'}, support: 0.30, confidence: 0.60, lift: 1.14
{'CVX'} -> {'BAC'}, support: 0.30, confidence: 0.58, lift: 1.14
{'DIS'} -> {'VZ'}, support: 0.35, confidence: 0.66, lift: 1.17
{'VZ'} -> {'DIS'}, support: 0.35, confidence: 0.62, lift: 1.17
{'CVX'} -> {'V'}, support: 0.33, confidence: 0.57, lift: 1.11
{'V'} -> {'CVX'}, support: 0.33, confidence: 0.65, lift: 1.11
{'CVX'} -> {'JPM'}, support: 0.33, confidence: 0.56, lift: 1.11
{'JPM'} -> {'CVX'}, support: 0.33, confidence: 0.64, l

In [115]:
strong_associations

[{'antecedent': {'DIS'},
  'consequent': {'PG'},
  'support': 0.31153846153846154,
  'confidence': 0.5869565217391305,
  'lift': 1.0381543921916594},
 {'antecedent': {'PG'},
  'consequent': {'DIS'},
  'support': 0.31153846153846154,
  'confidence': 0.5510204081632654,
  'lift': 1.0381543921916594},
 {'antecedent': {'JPM'},
  'consequent': {'VZ'},
  'support': 0.3230769230769231,
  'confidence': 0.6086956521739131,
  'lift': 1.0480852289087244},
 {'antecedent': {'VZ'},
  'consequent': {'JPM'},
  'support': 0.3230769230769231,
  'confidence': 0.5562913907284768,
  'lift': 1.0480852289087244},
 {'antecedent': {'KO'},
  'consequent': {'VZ'},
  'support': 0.29615384615384616,
  'confidence': 0.5579710144927537,
  'lift': 1.1245927423884958},
 {'antecedent': {'VZ'},
  'consequent': {'KO'},
  'support': 0.29615384615384616,
  'confidence': 0.5968992248062015,
  'lift': 1.1245927423884956},
 {'antecedent': {'CVX'},
  'consequent': {'UNH'},
  'support': 0.3,
  'confidence': 0.5234899328859061,


Source: 

https://python.plainenglish.io/pioneering-portfolio-management-with-market-basket-analysis-and-the-apriori-algorithm-96530e75755c