In [56]:
import pandas as pd
import warnings
from apyori import apriori
warnings.filterwarnings('ignore')

In [57]:
df = pd.read_csv('../Dataset/Dataset_EditActions.csv')
df = df.drop(['Architecture', 'Reproducible?', 'Code Snippet Present?', 'System Configuration Present?', 'Data Description Present?', 'Framework'], axis = 1)

In [58]:
# Replace Training Bug with value T, Model bug with value M, API bug with value A, Tensor & Input Bug with value I, and GPU Bug with value G
df.loc[df['Type of Bug'] == 'Training Bug', 'Type of Bug'] = 'T'
df.loc[df['Type of Bug'] == 'Model Bug', 'Type of Bug'] = 'M'
df.loc[df['Type of Bug'] == 'API Bug', 'Type of Bug'] = 'A'
df.loc[df['Type of Bug'] == 'Tensor and Input Bug', 'Type of Bug'] = 'I'
df.loc[df['Type of Bug'] == 'GPU Bug', 'Type of Bug'] = 'G'
df.loc[df['Type of Bug'] == 'Mixed Bug', 'Type of Bug'] = 'X'

In [59]:
# Map values from new columns as the following:
# Input Data Generation: D 
# Neural Network Definition: N
# Obsolete Parameter Removal: O
# Framework Migration: F
# Dataset Procurement: P
# Models and Tokenizers: M
# Logging: L
# Import Addition and Dependency Resolution: R
# Compiler Error Resolution: C
# Hyperparameter Initialization: H

# Replace all 1's with the corresponding letter
df['Input Data Generation'] = df['Input Data Generation'].replace(1, 'D')
df['Neural Network Definition'] = df['Neural Network Definition'].replace(1, 'N')
df['Obsolete Parameter Removal'] = df['Obsolete Parameter Removal'].replace(1, 'O')
df['Framework Migration'] = df['Framework Migration'].replace(1, 'F')
df['Dataset Procurement'] = df['Dataset Procurement'].replace(1, 'P')
df['Downloading Models and Tokenizers'] = df['Downloading Models and Tokenizers'].replace(1, 'M')
df['Logging'] = df['Logging'].replace(1, 'L')
df['Import Addition and Dependency Resolution'] = df['Import Addition and Dependency Resolution'].replace(1, 'R')
df['Compiler Error Resolution'] = df['Compiler Error Resolution'].replace(1, 'C')
df['Hyperparameter Initialization'] = df['Hyperparameter Initialization'].replace(1, 'H')


df['Input Data Generation'] = df['Input Data Generation'].replace(0, '')
df['Neural Network Definition'] = df['Neural Network Definition'].replace(0, '')
df['Obsolete Parameter Removal'] = df['Obsolete Parameter Removal'].replace(0, '')
df['Framework Migration'] = df['Framework Migration'].replace(0, '')
df['Dataset Procurement'] = df['Dataset Procurement'].replace(0, '')
df['Downloading Models and Tokenizers'] = df['Downloading Models and Tokenizers'].replace(0, '')
df['Logging'] = df['Logging'].replace(0, '')
df['Import Addition and Dependency Resolution'] = df['Import Addition and Dependency Resolution'].replace(0, '')
df['Compiler Error Resolution'] = df['Compiler Error Resolution'].replace(0, '')
df['Hyperparameter Initialization'] = df['Hyperparameter Initialization'].replace(0, '')

In [60]:
training_bugs = df[df['Type of Bug'] == 'T']
gpu_bugs = df[df['Type of Bug'] == 'G']
api_bugs = df[df['Type of Bug'] == 'A']
model_bugs = df[df['Type of Bug'] == 'M']
tensor_bugs = df[df['Type of Bug'] == 'I']
mixed_bugs = df[df['Type of Bug'] == 'X']

training_transactions = []
for i in range(0, len(training_bugs)):
    training_transactions.append([str(training_bugs.values[i,j]) for j in range(0, len(training_bugs.columns)) if str(training_bugs.values[i,j]) != ''])

gpu_transactions = []
for i in range(0, len(gpu_bugs)):
    gpu_transactions.append([str(gpu_bugs.values[i,j]) for j in range(0, len(gpu_bugs.columns)) if str(gpu_bugs.values[i,j]) != ''])

api_transactions = []
for i in range(0, len(api_bugs)):
    api_transactions.append([str(api_bugs.values[i,j]) for j in range(0, len(api_bugs.columns)) if str(api_bugs.values[i,j]) != ''])

model_transactions = []
for i in range(0, len(model_bugs)):
    model_transactions.append([str(model_bugs.values[i,j]) for j in range(0, len(model_bugs.columns)) if str(model_bugs.values[i,j]) != ''])

tensor_transactions = []
for i in range(0, len(tensor_bugs)):
    tensor_transactions.append([str(tensor_bugs.values[i,j]) for j in range(0, len(tensor_bugs.columns)) if str(tensor_bugs.values[i,j]) != ''])

mixed_transactions = []
for i in range(0, len(mixed_bugs)):
    mixed_transactions.append([str(mixed_bugs.values[i,j]) for j in range(0, len(mixed_bugs.columns)) if str(mixed_bugs.values[i,j]) != ''])

transactions = []
for i in range(0, len(df)):
    transactions.append([str(df.values[i,j]) for j in range(0, len(df.columns)) if str(df.values[i,j]) != ''])

In [61]:
import csv 

github_transactions = '../Dataset/ManualReproduction_Github_Bugs.csv'

# Column headers that should be considered for the dynamic output
considered_columns = ['D', 'N', 'H', 'R', 'L', 'O', 'C', 'P', 'M', 'V']

# Function to process the CSV file and extract transactions dynamically
def process_csv(file_path):
    transactions = []
    
    with open(file_path, mode='r', newline='') as file:
        reader = csv.DictReader(file)
        
        # Iterate over each row in the CSV
        for row in reader:
            # Collect the columns marked as '1'
            marked_columns = [col for col in considered_columns if row[col] == '1']
            
            # Check if there are any marked columns for this row
            if marked_columns:
                if row['type'] == 'Training':
                    # Create a transaction in the format ['T', 'D', 'F', 'R'] etc,.
                    transaction = ['T'] + marked_columns
                    training_transactions.append(transaction)
                elif row['type'] == 'GPU':
                    transaction = ['G'] + marked_columns
                    gpu_transactions.append(transaction)
                elif row['type'] == 'API':
                    transaction = ['A'] + marked_columns
                    api_transactions.append(transaction)
                elif row['type'] == 'Model':
                    transaction = ['M'] + marked_columns
                    model_transactions.append(transaction)
                elif row['type'] == 'Tensor and Input':
                    transaction = ['I'] + marked_columns
                    tensor_transactions.append(transaction)

process_csv(github_transactions)

In [62]:
def get_apriori_results(rules, character):
    filtered_results = []

    # Collect relevant data
    for result in rules:
        if character in result.items:
            for stat in result.ordered_statistics:
                if character in stat.items_base:
                    filtered_results.append({
                        "items": result.items,
                        "support": result.support,
                        "antecedent": stat.items_base,
                        "consequent": stat.items_add,
                        "confidence": stat.confidence,
                        "lift": stat.lift
                    })

    # Sort the results by support first, then by confidence
    filtered_results.sort(key=lambda x: (x["support"], x["confidence"]), reverse=True)

    # Print sorted results
    for res in filtered_results:
        items = ", ".join(res["items"])
        print(f"Items: {{{items}}}")
        print(f"Support: {res['support']:.4f}")
        print("Association Rules:")
        antecedent = ", ".join(res["antecedent"])
        consequent = ", ".join(res["consequent"])
        print(f"  {{{antecedent}}} => {{{consequent}}}")
        print(f"Confidence: {res['confidence']:.4f}")
        print(f"Lift: {res['lift']:.4f}")

In [63]:
get_apriori_results(apriori(training_transactions, max_length = 2), 'T')

Items: {T, D}
Support: 0.5625
Association Rules:
  {T} => {D}
Confidence: 0.5625
Lift: 1.0000
Items: {R, T}
Support: 0.4583
Association Rules:
  {T} => {R}
Confidence: 0.4583
Lift: 1.0000
Items: {C, T}
Support: 0.3750
Association Rules:
  {T} => {C}
Confidence: 0.3750
Lift: 1.0000
Items: {T, P}
Support: 0.3542
Association Rules:
  {T} => {P}
Confidence: 0.3542
Lift: 1.0000
Items: {T, H}
Support: 0.3333
Association Rules:
  {T} => {H}
Confidence: 0.3333
Lift: 1.0000
Items: {O, T}
Support: 0.2292
Association Rules:
  {T} => {O}
Confidence: 0.2292
Lift: 1.0000
Items: {F, T}
Support: 0.1875
Association Rules:
  {T} => {F}
Confidence: 0.1875
Lift: 1.0000
Items: {L, T}
Support: 0.1458
Association Rules:
  {T} => {L}
Confidence: 0.1458
Lift: 1.0000


In [64]:
get_apriori_results(apriori(model_transactions, max_length = 2), 'M')

Items: {M, H}
Support: 0.5122
Association Rules:
  {M} => {H}
Confidence: 0.5122
Lift: 1.0000
Items: {M, P}
Support: 0.4390
Association Rules:
  {M} => {P}
Confidence: 0.4390
Lift: 1.0000
Items: {C, M}
Support: 0.4146
Association Rules:
  {M} => {C}
Confidence: 0.4146
Lift: 1.0000
Items: {R, M}
Support: 0.4146
Association Rules:
  {M} => {R}
Confidence: 0.4146
Lift: 1.0000
Items: {N, M}
Support: 0.3659
Association Rules:
  {M} => {N}
Confidence: 0.3659
Lift: 1.0000
Items: {M, D}
Support: 0.3171
Association Rules:
  {M} => {D}
Confidence: 0.3171
Lift: 1.0000
Items: {V, M}
Support: 0.2439
Association Rules:
  {M} => {V}
Confidence: 0.2439
Lift: 1.0000
Items: {L, M}
Support: 0.2195
Association Rules:
  {M} => {L}
Confidence: 0.2195
Lift: 1.0000
Items: {O, M}
Support: 0.1951
Association Rules:
  {M} => {O}
Confidence: 0.1951
Lift: 1.0000


In [65]:
get_apriori_results(apriori(tensor_transactions, max_length = 2), 'I')

Items: {I, H}
Support: 0.5517
Association Rules:
  {I} => {H}
Confidence: 0.5517
Lift: 1.0000
Items: {D, I}
Support: 0.5172
Association Rules:
  {I} => {D}
Confidence: 0.5172
Lift: 1.0000
Items: {R, I}
Support: 0.5172
Association Rules:
  {I} => {R}
Confidence: 0.5172
Lift: 1.0000
Items: {C, I}
Support: 0.4138
Association Rules:
  {I} => {C}
Confidence: 0.4138
Lift: 1.0000
Items: {O, I}
Support: 0.3448
Association Rules:
  {I} => {O}
Confidence: 0.3448
Lift: 1.0000
Items: {L, I}
Support: 0.3103
Association Rules:
  {I} => {L}
Confidence: 0.3103
Lift: 1.0000
Items: {P, I}
Support: 0.4138
Association Rules:
  {I} => {P}
Confidence: 0.4138
Lift: 1.0000
Items: {V, I}
Support: 0.1034
Association Rules:
  {I} => {V}
Confidence: 0.1034
Lift: 1.0000


In [67]:
get_apriori_results(apriori(api_transactions, max_length = 2), 'A')

Items: {A, R}
Support: 0.3500
Association Rules:
  {A} => {R}
Confidence: 0.3500
Lift: 1.0000
Items: {A, D}
Support: 0.4000
Association Rules:
  {A} => {D}
Confidence: 0.4000
Lift: 1.0000
Items: {A, H}
Support: 0.4000
Association Rules:
  {A} => {H}
Confidence: 0.4000
Lift: 1.0000
Items: {A, P}
Support: 0.1500
Association Rules:
  {A} => {P}
Confidence: 0.1500
Lift: 1.0000
Items: {C, A}
Support: 0.1500
Association Rules:
  {A} => {C}
Confidence: 0.1500
Lift: 1.0000
Items: {O, A}
Support: 0.2500
Association Rules:
  {A} => {O}
Confidence: 0.2500
Lift: 1.0000
Items: {A, L}
Support: 0.2500
Association Rules:
  {A} => {L}
Confidence: 0.2500
Lift: 1.0000
Items: {N, A}
Support: 0.1000
Association Rules:
  {A} => {N}
Confidence: 0.1000
Lift: 1.0000
Items: {A, V}
Support: 0.1000
Association Rules:
  {A} => {V}
Confidence: 0.1000
Lift: 1.0000
