**Import Libraries**

In [None]:
# Import necessary python libraries.
import pandas as pd
import csv
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

**Helper Functions to Perform Association Rule Mining**

In [None]:
# Convert categorical binary variables "1" or "0" to more distinct categories for data cleansing.
def distinct_categorical(curr, header):
  return ("yes_" + header) if curr == "1" else ("no_" + header)

In [None]:
# Transform data for apriori algorithm.
def prepare_data_for_apriori(dataset):
  print("Prepare for Apriori:")
  oht = TransactionEncoder()
  oht_array = oht.fit(dataset).transform(dataset)
  df = pd.DataFrame(oht_array, columns=oht.columns_)
  print(df)
  print()
  return df

In [None]:
# Groups together the frequent itemsets using the apriori algorithm.
def frequent_itemsets(df, min_supp):
  print("Group Frequent Itemsets:")
  frequent_itemsets = apriori(df, min_support=min_supp, use_colnames=True)
  print(frequent_itemsets)
  print()
  return frequent_itemsets

In [None]:
# Determines the association rules between the provided itemsets.
def determine_associations(frequent_itemsets, min_thresh):
  print("Determine Associations:")
  rules = association_rules(frequent_itemsets, metric='confidence', min_threshold=min_thresh)
  print(rules[['antecedents', 'consequents', 'support', 'confidence']])
  print()

In [None]:
# Print out rows to confirm dataset visually looks correct.
def print_rows(dataset):
  for row in dataset:
    print(row)

**Association Between Resume Variables**

In [None]:
# Goes through all data in 'file', cleansing the data to do association rule mining.
def gather_resume_data(file):
  # Stores all cleansed rows of data.
  dataset = []

  # Specify relevant columns that will be needed from original data.
  received_callback = 6
  resume_start = 7
  resume_end = 13

  # For the given file, gather and transform all relevant data to do associaton.
  with open(file) as f:
    reader = csv.reader(f)
    
    # Use column headers to convert binary to distinct categories.
    headers = next(reader, None)
    callback_header = [headers[received_callback]]
    resume_headers =  headers[resume_start:resume_end]
    combined_headers = callback_header + resume_headers

    for row in reader:
      # Only keep relevant columns for association.
      callback_data = [row[received_callback]]
      resume_data = row[resume_start:resume_end]
      combined_data = callback_data + resume_data

      # Cleanse each categorical resume variable to have a distinct name.
      distinct_resume_data = list(map(distinct_categorical, combined_data, combined_headers))
      dataset.append(distinct_resume_data)
  
  # Return cleaned dataset.
  return dataset

In [None]:
# Transformed and cleansed data from original dataset.
resume_dataset = gather_resume_data('data/resume data modified OG.csv')

# Run functions to determine assoications for provided data.
df = prepare_data_for_apriori(resume_dataset)
freq = frequent_itemsets(df, 0.8)
determine_associations(freq, 0.8)

In [None]:
# Transformed and cleansed data from modified dataset with extra rows.
resume_dataset_extra_rows = gather_resume_data('data/resume data modified Extra Rows.csv')

# Run functions to determine assoications for provided data.
df = prepare_data_for_apriori(resume_dataset_extra_rows)
freq = frequent_itemsets(df, 0.8)
determine_associations(freq, 0.8)

**Association Between Job Variables**

In [None]:
# Goes through all data in 'file', cleansing the data to do association rule mining.
def gather_job_data(file):
  # Stores all cleansed rows of data.
  dataset = []

  # Specify relevant columns that will be needed from original data.
  job_start = 1
  job_end = 6
  received_callback = 6

  # For the given file, gather and transform all relevant data to do associaton.
  with open(file) as f:
    reader = csv.reader(f)
    
    # Use column headers to convert binary to distinct categories.
    headers = next(reader, None)
    callback_header = [headers[received_callback]]
    job_headers = headers[job_start:job_end]
    combined_headers = callback_header + job_headers

    for row in reader:
      # Only keep relevant columns for association.
      callback_data = [row[received_callback]]
      job_data = row[job_start:job_end]
      combined_data = callback_data + job_data

      # Cleanse each categorical job variable to have a distinct name.
      recieved_callback = distinct_categorical(combined_data[0], combined_headers[0])
      job_type = combined_data[1]
      job_fed_contractor = distinct_categorical(combined_data[2], combined_headers[2])
      job_ownership = combined_data[3]
      job_req_communication = distinct_categorical(combined_data[4], combined_headers[4])
      job_req_organization = distinct_categorical(combined_data[5], combined_headers[5])

      # Add cleansed row to overall dataset.
      distinct_job_data = [recieved_callback, job_type, job_fed_contractor, job_ownership, job_req_communication, job_req_organization]
      dataset.append(distinct_job_data)
  
  # Return cleaned dataset.
  return dataset

In [None]:
# Transformed and cleansed data from original dataset.
job_dataset = gather_job_data('data/resume data modified OG.csv')

# Run functions to determine assoications for provided data.
df = prepare_data_for_apriori(job_dataset)
freq = frequent_itemsets(df, 0.8)
determine_associations(freq, 0.8)

In [None]:
# Transformed and cleansed data from modified dataset with extra rows.
job_dataset_extra_rows = gather_job_data('data/resume data modified Extra Rows.csv')

# Run functions to determine assoications for provided data.
df = prepare_data_for_apriori(job_dataset_extra_rows)
freq = frequent_itemsets(df, 0.8)
determine_associations(freq, 0.8)