In [26]:
import pandas as pd
import warnings
from apyori import apriori
warnings.filterwarnings('ignore')

In [27]:
df = pd.read_csv('../Dataset/Dataset_CriticalActions.csv')

In [28]:
# Replace Training Bug with value T, Model bug with value M, API bug with value A, Tensor & Input Bug with value I, and GPU Bug with value G
df.loc[df['Type of Bug'] == 'Training Bug', 'Type of Bug'] = 'T'
df.loc[df['Type of Bug'] == 'Model Bug', 'Type of Bug'] = 'M'
df.loc[df['Type of Bug'] == 'API Bug', 'Type of Bug'] = 'A'
df.loc[df['Type of Bug'] == 'Tensor and Input Bug', 'Type of Bug'] = 'I'
df.loc[df['Type of Bug'] == 'GPU Bug', 'Type of Bug'] = 'G'
df.loc[df['Type of Bug'] == 'Mixed Bug', 'Type of Bug'] = 'X'

In [29]:
# Replace all 1's with the corresponding letter
df['Data'] = df['Data'].replace(1, 'D')
df['Neural Network'] = df['Neural Network'].replace(1, 'N')
df['Logs'] = df['Logs'].replace(1, 'L')
df['Hyperparameters'] = df['Hyperparameters'].replace(1, 'H')
df['Training Code'] = df['Training Code'].replace(1, 'C')


df['Data'] = df['Data'].replace(0, '')
df['Neural Network'] = df['Neural Network'].replace(0, '')
df['Logs'] = df['Logs'].replace(0, '')
df['Hyperparameters'] = df['Hyperparameters'].replace(0, '')
df['Training Code'] = df['Training Code'].replace(0, '')

In [30]:
training_bugs = df[df['Type of Bug'] == 'T']
gpu_bugs = df[df['Type of Bug'] == 'G']
api_bugs = df[df['Type of Bug'] == 'A']
model_bugs = df[df['Type of Bug'] == 'M']
tensor_bugs = df[df['Type of Bug'] == 'I']
mixed_bugs = df[df['Type of Bug'] == 'X']

training_transactions = []
for i in range(0, len(training_bugs)):
    training_transactions.append([str(training_bugs.values[i,j]) for j in range(0, len(training_bugs.columns)) if str(training_bugs.values[i,j]) != ''])

gpu_transactions = []
for i in range(0, len(gpu_bugs)):
    gpu_transactions.append([str(gpu_bugs.values[i,j]) for j in range(0, len(gpu_bugs.columns)) if str(gpu_bugs.values[i,j]) != ''])

api_transactions = []
for i in range(0, len(api_bugs)):
    api_transactions.append([str(api_bugs.values[i,j]) for j in range(0, len(api_bugs.columns)) if str(api_bugs.values[i,j]) != ''])

model_transactions = []
for i in range(0, len(model_bugs)):
    model_transactions.append([str(model_bugs.values[i,j]) for j in range(0, len(model_bugs.columns)) if str(model_bugs.values[i,j]) != ''])

tensor_transactions = []
for i in range(0, len(tensor_bugs)):
    tensor_transactions.append([str(tensor_bugs.values[i,j]) for j in range(0, len(tensor_bugs.columns)) if str(tensor_bugs.values[i,j]) != ''])

mixed_transactions = []
for i in range(0, len(mixed_bugs)):
    mixed_transactions.append([str(mixed_bugs.values[i,j]) for j in range(0, len(mixed_bugs.columns)) if str(mixed_bugs.values[i,j]) != ''])

transactions = []
for i in range(0, len(df)):
    transactions.append([str(df.values[i,j]) for j in range(0, len(df.columns)) if str(df.values[i,j]) != ''])

In [31]:
import csv 

github_transactions = '../Dataset/Dataset_Github_CriticalInformation.csv'

# Column headers that should be considered for the dynamic output
considered_columns = ['D', 'N', 'H', 'L', 'C']

# Function to process the CSV file and extract transactions dynamically
def process_csv(file_path):
    transactions = []
    
    with open(file_path, mode='r', newline='') as file:
        reader = csv.DictReader(file)
        
        # Iterate over each row in the CSV
        for row in reader:
            # Collect the columns marked as '1'
            marked_columns = [col for col in considered_columns if row[col] == '1']
            
            # Check if there are any marked columns for this row
            if marked_columns:
                if row['type'] == 'Training':
                    transaction = ['T'] + marked_columns
                    training_transactions.append(transaction)
                elif row['type'] == 'GPU':
                    transaction = ['G'] + marked_columns
                    gpu_transactions.append(transaction)
                elif row['type'] == 'API':
                    transaction = ['A'] + marked_columns
                    api_transactions.append(transaction)
                elif row['type'] == 'Model':
                    transaction = ['M'] + marked_columns
                    model_transactions.append(transaction)
                elif row['type'] == 'Tensor and Input':
                    transaction = ['I'] + marked_columns
                    tensor_transactions.append(transaction)

process_csv(github_transactions)

In [32]:
def get_apriori_results(rules, character):
    for result in list(rules):
     if character in result.items:
        items = ", ".join(result.items)
        support = result.support
        print(f"Items: {{{items}}}")
        print(f"Support: {support:.4f}")
        if result.ordered_statistics:
            print("Association Rules:")
            for rule in result.ordered_statistics:
                if character in rule.items_base:
                    antecedent = ", ".join(rule.items_base)
                    consequent = ", ".join(rule.items_add)
                    confidence = rule.confidence
                    lift = rule.lift
                    print(f"  {{{antecedent}}} => {{{consequent}}}")
                    print(f"Confidence: {confidence:.4f}")

In [33]:
get_apriori_results(apriori(training_transactions, max_length = 2), 'T')

Items: {T}
Support: 1.0000
Association Rules:
Items: {T, C}
Support: 0.8600
Association Rules:
  {T} => {C}
Confidence: 0.8600
Items: {T, D}
Support: 0.8200
Association Rules:
  {T} => {D}
Confidence: 0.8200
Items: {H, T}
Support: 0.7200
Association Rules:
  {T} => {H}
Confidence: 0.7200
Items: {T, L}
Support: 0.7600
Association Rules:
  {T} => {L}
Confidence: 0.7600
Items: {N, T}
Support: 0.5800
Association Rules:
  {T} => {N}
Confidence: 0.5800


In [34]:
get_apriori_results(apriori(model_transactions, max_length = 2), 'M')

Items: {M}
Support: 1.0000
Association Rules:
Items: {M, C}
Support: 0.7143
Association Rules:
  {M} => {C}
Confidence: 0.7143
Items: {M, D}
Support: 0.5476
Association Rules:
  {M} => {D}
Confidence: 0.5476
Items: {H, M}
Support: 0.6429
Association Rules:
  {M} => {H}
Confidence: 0.6429
Items: {M, L}
Support: 0.7857
Association Rules:
  {M} => {L}
Confidence: 0.7857
Items: {M, N}
Support: 0.6429
Association Rules:
  {M} => {N}
Confidence: 0.6429


In [35]:
get_apriori_results(apriori(gpu_transactions, max_length = 2), 'G')

Items: {G}
Support: 1.0000
Association Rules:
Items: {G, C}
Support: 0.6667
Association Rules:
  {G} => {C}
Confidence: 0.6667
Items: {G, L}
Support: 1.0000
Association Rules:
  {G} => {L}
Confidence: 1.0000


In [36]:
get_apriori_results(apriori(tensor_transactions, max_length = 2), 'I')

Items: {I}
Support: 1.0000
Association Rules:
Items: {I, C}
Support: 0.7241
Association Rules:
  {I} => {C}
Confidence: 0.7241
Items: {I, D}
Support: 0.9655
Association Rules:
  {I} => {D}
Confidence: 0.9655
Items: {H, I}
Support: 0.4138
Association Rules:
  {I} => {H}
Confidence: 0.4138
Items: {I, L}
Support: 0.9310
Association Rules:
  {I} => {L}
Confidence: 0.9310
Items: {I, N}
Support: 0.5172
Association Rules:
  {I} => {N}
Confidence: 0.5172


In [37]:
get_apriori_results(apriori(api_transactions, max_length = 2), 'A')

Items: {A}
Support: 1.0000
Association Rules:
Items: {A, C}
Support: 0.7500
Association Rules:
  {A} => {C}
Confidence: 0.7500
Items: {A, D}
Support: 0.5500
Association Rules:
  {A} => {D}
Confidence: 0.5500
Items: {H, A}
Support: 0.2500
Association Rules:
  {A} => {H}
Confidence: 0.2500
Items: {A, L}
Support: 0.8500
Association Rules:
  {A} => {L}
Confidence: 0.8500
Items: {N, A}
Support: 0.7000
Association Rules:
  {A} => {N}
Confidence: 0.7000
