In [2]:
%matplotlib inline

import math
import numpy as np
import pandas as pd
import scipy.stats as stats
import matplotlib.pyplot as plt

from collections import defaultdict
from scipy.stats.stats import pearsonr

import fim
from fim import apriori

In [3]:
help(fim.apriori)

Help on built-in function apriori in module fim:

apriori(...)
    apriori (tracts, target='s', supp=10, zmin=1, zmax=None, report='a',
             eval='x', agg='x', thresh=10, prune=None, algo='b', mode='',
             border=None)
    Find frequent item sets with the Apriori algorithm.
    tracts  transaction database to mine (mandatory)
            The database must be an iterable of transactions;
            each transaction must be an iterable of items;
            each item must be a hashable object.
            If the database is a dictionary, the transactions are
            the keys, the values their (integer) multiplicities.
    target  type of frequent item sets to find     (default: s)
            s/a   sets/all   all     frequent item sets
            c     closed     closed  frequent item sets
            m     maximal    maximal frequent item sets
            g     gens       generators
            r     rules      association rules
    supp    minimum support of an i

In [4]:
# Calling external C function

import subprocess

# Helper function to call 'apriori' from within a linux executable file
def call_apriori(fileinput, fileoutput, delimiter=',', target_type='s', 
                 min_nbr_items=1, min_sup=2, min_conf=2):
    # apriori
    # -t# {m: maximal, c: closed, s: frequent, r: association rules}
    # -m# minimum number of items per item set/association rule
    # -s# minimum support of an item set, positive: percentage, negative: absolute
    # -c# minimum confidence rule percentage
    # -b# line delimiter (,)
    # The default additional information output format for rules is " (%X, %C)"
    # %X relative body set support as a percentage
    # %C rule confidence as a percentage
    # %L lift

    if target_type == 'r':
        call_cmd = ['./apriori', '-b%s' % delimiter, '-t%s' % target_type, '-m%s' % min_nbr_items, 
                    '-s%s' % min_sup, '-c%s' % min_conf, '-v (%X, %C, %L)', 
                    fileinput, fileoutput]
    else:
        call_cmd = ['./apriori', '-b%s' % delimiter, '-t%s' % target_type, 
                           '-m%s' % min_nbr_items, '-s%s' % min_sup, fileinput, fileoutput]

    ret = subprocess.call(call_cmd,  stdout=open('apriori_stdout.txt', 'w'), 
                          stderr=open('apriori_stderr.txt', 'w'))
    return ret

In [5]:
def read_freq_patterns(filename):
    
    data = open(filename, 'r')
    freq_patterns = list()
    
    for row in data:
        fields = row.rstrip('\n\r').split(' ')
        
        support = float(fields[len(fields) - 1].split('(')[1].split(')')[0])
        freq_pattern = {
            'itemset': fields[:len(fields) - 1],
            'support': support
        }
        freq_patterns.append(freq_pattern)
    data.close()
    return freq_patterns

In [6]:
# Helper function to better visualize association rules returned by the call to 'apriori' (with target_type='r')

def read_rules(filename):
    data = open(filename, 'r')
    rules = list()
    for row in data:
        fileds = row.rstrip('\n\r').split(' <- ')
        cons = fileds[0]
        other = fileds[1].split(' (')
        ant = other[0].split(' ')
        other2 = other[1].split(', ')
        sup = float(other2[0])
        conf = float(other2[1])
        lift = float(other2[2].replace(')', ''))
        rule = {
            'ant': ant,
            'cons': cons,
            'sup': sup,
            'conf': conf,
            'lift': lift
        }
        rules.append(rule)
    data.close()
    return rules

In [7]:
############### STARTING WITH ASSOCIATION RULES/FREQUENT PATTERNS MINING ON HR ###############

In [8]:
df = pd.read_csv("../HR.csv")
df.head()

Unnamed: 0,satisfaction_level,last_evaluation,number_project,average_montly_hours,time_spend_company,Work_accident,left,promotion_last_5years,sales,salary
0,0.38,0.53,2,157,3,0,1,0,sales,low
1,0.8,0.86,5,262,6,0,1,0,sales,medium
2,0.11,0.88,7,272,4,0,1,0,sales,medium
3,0.72,0.87,5,223,5,0,1,0,sales,low
4,0.37,0.52,2,159,3,0,1,0,sales,low


In [10]:
############################### DISCRETIZATION OF NUMERICAL FEATURES ###############################

# IMPORTANT: INTERVALS FOR BINS MAY BE CHOSEN BY LOOKING AT THE DISTRIBUTION OF THE FEATURE!
# NOTE: WE MAY DISCUSS ON THE MOST APPROPRIATE NUMBER/WIDTH OF BINS FOR EACH SEPARATE FEATURE!

# Discretize 'satisfaction_level'
# (Moltiplico per 100 perché range(0, 1, 0.1) dà errore, visto che range() si aspetta parametri interi)

df['satisfaction_level_group'] = pd.cut(df['satisfaction_level'] * 100, 
                                        bins=range(0, 125, 25), 
                                        right=False, 
                                        #labels=range(0, 120, 20)
                                        labels=['very_low', 'low', 'medium', 'high'])

# Discretize 'last_evaluation'
df['last_evaluation_group'] = pd.cut(df['last_evaluation'] * 100, 
                                     bins=range(30, 130, 20), 
                                     right=False, 
                                     labels=range(30, 110, 20))

# Discretize 'average_montly_hours'
df['average_montly_hours_group'] = pd.cut(df['average_montly_hours'], 
                                          bins=range(90, 340, 30), 
                                          right=False, 
                                          labels=range(90, 320, 30))


# 
# Question: shall we further discretize 'number_project' and 'time_spend_company'? 
# Even small bins would do, as I don't think leaving them as they are makes much sense
# If we don't further discretize, we may end up getting sorts of 'duplicated' rules
#     Answer: YES

# Discretize 'number_project'
df['number_project_group'] = pd.cut(df['number_project'], 
                                    bins=range(2, 12, 2),
                                    right=False, 
                                    labels=range(2, 10, 2))

# Discretize 'time_spend_company'
df['time_spend_company_group'] = pd.cut(df['time_spend_company'], 
                                 bins=[2, 4, 7, 11], 
                                 right=False, 
                                 labels=['2_to_3', '4_to_6', '7_to_10'])


df.drop(['satisfaction_level', 'last_evaluation', 'average_montly_hours', 
         'number_project', 'time_spend_company'], axis=1, inplace=True)

In [11]:
df.head()

Unnamed: 0,Work_accident,left,promotion_last_5years,sales,salary,satisfaction_level_group,last_evaluation_group,average_montly_hours_group,number_project_group,time_spend_company_group
0,0,1,0,sales,low,low,50,150,2,2_to_3
1,0,1,0,sales,medium,high,70,240,4,4_to_6
2,0,1,0,sales,medium,very_low,70,270,6,4_to_6
3,0,1,0,sales,low,medium,70,210,4,4_to_6
4,0,1,0,sales,low,low,50,150,2,2_to_3


In [12]:
df2 = df.copy()
df2['Work_accident'] = df['Work_accident'].astype(str) + '_WA'
df2['left'] = df['left'].astype(str) + '_L'
df2['promotion_last_5years'] = df['promotion_last_5years'].astype(str) + '_PL5'
df2['satisfaction_level_group'] = df['satisfaction_level_group'].astype(str) + '_SAT'
df2['salary'] = df['salary'].astype(str) + '_SAL'
df2['last_evaluation_group'] = df['last_evaluation_group'].astype(str) + '_LE'
df2['average_montly_hours_group'] = df['average_montly_hours_group'].astype(str) + '_AMH'
df2['number_project_group'] = df['number_project_group'].astype(str) + '_NP'
df2['time_spend_company_group'] = df['time_spend_company_group'].astype(str) + '_TSC'

In [13]:
df2.head()

Unnamed: 0,Work_accident,left,promotion_last_5years,sales,salary,satisfaction_level_group,last_evaluation_group,average_montly_hours_group,number_project_group,time_spend_company_group
0,0_WA,1_L,0_PL5,sales,low_SAL,low_SAT,50_LE,150_AMH,2_NP,2_to_3_TSC
1,0_WA,1_L,0_PL5,sales,medium_SAL,high_SAT,70_LE,240_AMH,4_NP,4_to_6_TSC
2,0_WA,1_L,0_PL5,sales,medium_SAL,very_low_SAT,70_LE,270_AMH,6_NP,4_to_6_TSC
3,0_WA,1_L,0_PL5,sales,low_SAL,medium_SAT,70_LE,210_AMH,4_NP,4_to_6_TSC
4,0_WA,1_L,0_PL5,sales,low_SAL,low_SAT,50_LE,150_AMH,2_NP,2_to_3_TSC


In [14]:
df2.to_csv('hr_for_pattern_mining.csv', sep=',', header=False)

In [15]:
###############################################
############## FREQUENT ITEMSETS ##############
###############################################

delimiter=','
target_type='s' # Meaning that I want frequent itemsets
min_nbr_items=3
min_sup=10
min_conf=100 # Meaningless here


ret_val = call_apriori('hr_for_pattern_mining.csv', 
                       'Frequent/frequent_patterns.txt', 
                       delimiter, target_type, min_nbr_items, min_sup, min_conf)

In [16]:
###############################################
############### CLOSED ITEMSETS ###############
###############################################

delimiter=','
target_type='c' # Meaning that I want closed frequent itemsets
min_nbr_items=3
min_sup=10
min_conf=100 # Meaningless here


ret_val = call_apriori('hr_for_pattern_mining.csv', 
                       'Closed/closed_patterns.txt', 
                       delimiter, target_type, min_nbr_items, min_sup, min_conf)

In [17]:
###############################################
############### MAXIMAL ITEMSETS ##############
###############################################

delimiter=','
target_type='m' # Meaning that I want maximal frequent itemsets
min_nbr_items=2
min_sup=10
min_conf=100 # Meaningless here


ret_val = call_apriori('hr_for_pattern_mining.csv', 
                       'Maximal/maximal_patterns.txt', 
                       delimiter, target_type, min_nbr_items, min_sup, min_conf)
    

In [18]:
def write_freq_patterns(filename, freq_pts):
    
    f = open(filename, 'w')
    for pt in sorted(freq_pts, key=lambda p: p['support'], reverse=True):
        f.write('{} support = {}\n'.format(pt['itemset'], pt['support']))
        
    f.close()
    return

In [19]:
# Obtaining frequent itemsets with different values of support

freq_patterns = read_freq_patterns('Frequent/frequent_patterns.txt')
previous_min_sup = 101;

for min_sup in [50, 40, 30, 20, 10]:    
    pts = [fp for fp in freq_patterns 
           if fp['support'] >= min_sup 
           and fp['support'] < previous_min_sup]    
    write_freq_patterns('Frequent/freq_{}_to_{}_sup.txt'.format(min_sup, previous_min_sup), pts)
    previous_min_sup = min_sup


In [20]:
# Obtaining closed itemsets with different values of support

freq_patterns = read_freq_patterns('Closed/closed_patterns.txt')
previous_min_sup = 101;

for min_sup in [50, 40, 30, 20, 10]:    
    pts = [fp for fp in freq_patterns 
           if fp['support'] >= min_sup 
           and fp['support'] < previous_min_sup]    
    write_freq_patterns('Closed/closed_{}_to_{}_sup.txt'.format(min_sup, previous_min_sup), pts)
    previous_min_sup = min_sup
    

In [21]:
# Obtaining maximal itemsets with different values of support

freq_patterns = read_freq_patterns('Maximal/maximal_patterns.txt')
previous_min_sup = 101;

for min_sup in [20, 15, 10]:    
    pts = [fp for fp in freq_patterns 
           if fp['support'] >= min_sup 
           and fp['support'] < previous_min_sup]    
    write_freq_patterns('Maximal/maximal_{}_to_{}_sup.txt'.format(min_sup, previous_min_sup), pts)
    previous_min_sup = min_sup


In [22]:
###############################################
############## ASSOCIATION RULES ##############
###############################################

delimiter=','
target_type='r' # Meaning that I want association rules
min_nbr_items=3
min_sup=5
min_conf=70


ret_val = call_apriori('hr_for_pattern_mining.csv', 
                       'Rules/association_rules.txt', 
                       delimiter, target_type, min_nbr_items, min_sup, min_conf)

In [23]:
def write_rules(filename, assoc_rules):
    
    f = open(filename, 'w')
    for rl in sorted(assoc_rules, key=lambda r: (r['lift'], r['conf']), reverse=True):
        f.write('{} -> {}, lift = {}, confidence = {}\n'.format(rl['ant'], rl['cons'], rl['lift'], rl['conf']))
        
    f.close()
    return

In [24]:
assoc_rules = read_rules('Rules/association_rules.txt')

# Rules whose consequences tell me something about the 'left' attribute
rls_left_true = [rl for rl in assoc_rules if rl['cons'].endswith('1_L')]
rls_left_false = [rl for rl in assoc_rules if rl['cons'].endswith('0_L') and rl['sup'] > 20]

# Rules whose consequences tell me something about an attribute that is not 'left'
rls_other = [rl for rl in assoc_rules if not(rl['cons'].endswith('_L')) and rl['sup'] > 30]

In [25]:
# Obtaining association rules with different values of confidence

# RULES TELLING SOMETHING ABOUT 'left', IN PARTICULAR left=1
previous_min_conf = 101;
for min_conf in [90, 80, 70]:    
    rls = [rl for rl in rls_left_true
           if rl['conf'] >= min_conf 
           and rl['conf'] < previous_min_conf]  
    write_rules('Rules/Rules_about_left/Rules_left_true/rules_{}_to_{}_conf.txt'
                .format(min_conf, previous_min_conf), rls)
    previous_min_conf = min_conf
    
# RULES TELLING SOMETHING ABOUT 'left', IN PARTICULAR left=0
previous_min_conf = 101;
for min_conf in [90, 80, 70]:    
    rls = [rl for rl in rls_left_false
           if rl['conf'] >= min_conf 
           and rl['conf'] < previous_min_conf]  
    write_rules('Rules/Rules_about_left/Rules_left_false/rules_{}_to_{}_conf.txt'
                .format(min_conf, previous_min_conf), rls)
    previous_min_conf = min_conf    
    
# GENERAL RULES    
previous_min_conf = 101;
for min_conf in [90, 80, 70]:    
    rls = [rl for rl in rls_other
           if rl['conf'] >= min_conf 
           and rl['conf'] < previous_min_conf]  
    write_rules('Rules/Rules_not_about_left/rules_{}_to_{}_conf.txt'.format(min_conf, previous_min_conf), rls)
    previous_min_conf = min_conf    

In [26]:
##########################################################################
# USING THE MOST MEANINGFUL RULES TO PREDICT (ARTIFICIAL) MISSING VALUES #
##########################################################################

In [27]:

target_attr = 'promotion_last_5years'

# I randomly take 5% of the rows and try to predict the feature 'promotion_last_5years'
subset = df2.sample(frac=0.1).reset_index(drop=True)

# I sort the rules according to the confidence value, then I extract a bunch of them
conf_sorted_rules = sorted(rls_other, key=lambda r: (r['conf'], r['lift']), reverse=True)
rules_pool = conf_sorted_rules[:15]

affected_rows = 0
correctly_guessed = 0
for i in range(len(subset.index)):
    df_row = subset.iloc[[i]] # Fetch i-th row
    rule = list() # Empty list
    for r in rules_pool:
        if set(r['ant']) < set(df_row.values[0]):
            rule = r
            break
    if rule: # If I found a rule
        affected_rows = affected_rows + 1
        if rule['cons'] == df_row[target_attr].values[0]:
            correctly_guessed = correctly_guessed + 1
            
accuracy = correctly_guessed / float(affected_rows)
accuracy
        

0.9735413839891451

In [28]:
###############################################################################
# USING THE MOST MEANINGFUL RULES TO PREDICT IF AN EMPLOYEE WILL LEAVE OR NOT #
###############################################################################

In [29]:
# I sort the rules according to the confidence value, then I extract a bunch of them
rls_left_true_sorted = sorted(rls_left_true, key=lambda r: (r['conf'], r['lift']), reverse=True)
rls_left_false_sorted = sorted(rls_left_false, key=lambda r: (r['conf'], r['lift']), reverse=True)
rules_pool = rls_left_true_sorted[:5] + rls_left_false_sorted[:5]

affected_rows = 0
correctly_guessed = 0
for i in range(len(df2.index)):
    emp = df2.iloc[[i]] # Fetch i-th row
    rule = list() # Empty list
    for r in rules_pool:
        if set(r['ant']) < set(emp.values[0]):
            rule = r
            break
    if rule: # If I found a rule
        affected_rows = affected_rows + 1
        if rule['cons'] == emp['left'].values[0]:
            correctly_guessed = correctly_guessed + 1
            
accuracy = correctly_guessed / float(affected_rows)
accuracy

0.972880983750549

In [None]:
# THE CODE BELOW CAN BE IGNORED

In [188]:
rules = read_rules('Rules/association_rules.txt')
for r in rules[:3]:
    print r['ant'], '-->', r['cons'], ' lift', r['lift'], ' conf', r['conf']

['accounting', '0_PL5'] --> 0_L  lift 95.6906  conf 72.9084
['accounting', '0_PL5'] --> 0_WA  lift 102.157  conf 87.3838
['RandD', '0_PL5'] --> 0_L  lift 110.352  conf 84.0789


In [189]:
# Collect those rules whose consequences tell something about the 'left' attribute
rules_cons_L = list()
for r in rules:
    if r['cons'].endswith('_L'):
        rules_cons_L.append(r)

In [190]:
sorted_rules_cons_L = sorted(rules_cons_L, key=lambda r: r['conf'], reverse=True)

In [191]:
for r in sorted_rules_cons_L[:10]:
    print r['ant'], '-->', r['cons'], ' lift', r['lift'], ' conf', r['conf']

['180_AMH', '4_NP', '2_to_3_TSC'] --> 0_L  lift 130.935  conf 99.7613
['180_AMH', '4_NP', '2_to_3_TSC', '0_PL5'] --> 0_L  lift 130.928  conf 99.7567
['80_SL', '4_NP', '2_to_3_TSC'] --> 0_L  lift 130.904  conf 99.7382
['80_SL', '4_NP', '2_to_3_TSC', '0_PL5'] --> 0_L  lift 130.892  conf 99.7292
['80_SL', '4_NP', '2_to_3_TSC', '0_WA'] --> 0_L  lift 130.834  conf 99.6848
['80_SL', '4_NP', '2_to_3_TSC', '0_WA', '0_PL5'] --> 0_L  lift 130.82  conf 99.6737
['60_SL', '4_NP', '2_to_3_TSC', '0_PL5'] --> 0_L  lift 130.75  conf 99.621
['80_SL', 'low', '2_to_3_TSC', '0_WA'] --> 0_L  lift 130.746  conf 99.6176
['80_SL', 'low', '2_to_3_TSC', '0_WA', '0_PL5'] --> 0_L  lift 130.741  conf 99.6139
['80_SL', 'low', '2_to_3_TSC'] --> 0_L  lift 130.733  conf 99.6075


In [192]:
rules_did_leave = list()
for r in rules:
    if r['cons'].endswith('1_L'):
        rules_did_leave.append(r)
        
sorted_rules_did_leave = sorted(rules_did_leave, key=lambda r: r['conf'], reverse=True)

for r in sorted_rules_did_leave:
    print r['ant'], '-->', r['cons'], ' lift', r['lift'], ' conf', r['conf']

['0_SL', '6_NP', '4_to_6_TSC', '0_WA'] --> 1_L  lift 395.143  conf 94.0767
['0_SL', '6_NP', '4_to_6_TSC', '0_WA', '0_PL5'] --> 1_L  lift 395.056  conf 94.0559
['0_SL', '6_NP', '4_to_6_TSC', '0_PL5'] --> 1_L  lift 388.705  conf 92.5439
['0_SL', '6_NP', '4_to_6_TSC'] --> 1_L  lift 387.96  conf 92.3664
['0_SL', '6_NP', '0_WA', '0_PL5'] --> 1_L  lift 379.465  conf 90.3441
['0_SL', '6_NP', '0_WA'] --> 1_L  lift 377.928  conf 89.978
['0_SL', '6_NP', '0_PL5'] --> 1_L  lift 372.332  conf 88.6458
['0_SL', '6_NP'] --> 1_L  lift 370.175  conf 88.1321
['120_AMH', '40_SL', '2_NP', '2_to_3_TSC', '0_PL5'] --> 1_L  lift 343.124  conf 81.6919
['120_AMH', '40_SL', '2_NP', '2_to_3_TSC'] --> 1_L  lift 341.695  conf 81.3517
['120_AMH', '40_SL', '2_NP', '0_WA', '0_PL5'] --> 1_L  lift 333.531  conf 79.408
['120_AMH', '40_SL', '2_NP', '0_WA'] --> 1_L  lift 332.161  conf 79.0816
['6_NP', '4_to_6_TSC', '0_WA', '0_PL5'] --> 1_L  lift 328.021  conf 78.0961
['6_NP', '4_to_6_TSC', '0_WA'] --> 1_L  lift 327.672  con

In [193]:
df2.values[0]

array(['0_WA', '1_L', '0_PL5', 'sales', 'low', '20_SL', '50_LE', '150_AMH',
       '2_NP', '2_to_3_TSC'], dtype=object)

In [194]:
employee_test = df2.values[0]

In [195]:
for r in rules:
    if set(r['ant']) < set(employee_test) and r['cons'].endswith('_L'):
        print r['ant'], '-->', r['cons']

['20_SL', '2_NP', '2_to_3_TSC', '0_PL5'] --> 1_L
['20_SL', '2_NP', '2_to_3_TSC'] --> 1_L
['150_AMH', 'sales', '0_PL5'] --> 0_L
['150_AMH', 'sales'] --> 0_L
['150_AMH', '50_LE', '0_WA', '0_PL5'] --> 0_L
['150_AMH', '50_LE', '0_WA'] --> 0_L
['150_AMH', '50_LE', '0_PL5'] --> 0_L
['150_AMH', '50_LE'] --> 0_L
['150_AMH', 'low', '2_to_3_TSC', '0_PL5'] --> 0_L
['150_AMH', 'low', '2_to_3_TSC'] --> 0_L
['150_AMH', 'low', '0_WA', '0_PL5'] --> 0_L
['150_AMH', 'low', '0_WA'] --> 0_L
['150_AMH', 'low', '0_PL5'] --> 0_L
['150_AMH', 'low'] --> 0_L
['150_AMH', '2_to_3_TSC', '0_WA', '0_PL5'] --> 0_L
['150_AMH', '2_to_3_TSC', '0_WA'] --> 0_L
['150_AMH', '2_to_3_TSC', '0_PL5'] --> 0_L
['150_AMH', '2_to_3_TSC'] --> 0_L
['150_AMH', '0_WA', '0_PL5'] --> 0_L
['150_AMH', '0_WA'] --> 0_L
['150_AMH', '0_PL5'] --> 0_L
['sales', '50_LE', 'low', '0_PL5'] --> 0_L
['sales', '50_LE', 'low'] --> 0_L
['sales', '50_LE', '2_to_3_TSC', '0_WA', '0_PL5'] --> 0_L
['sales', '50_LE', '2_to_3_TSC', '0_WA'] --> 0_L
['sales', '50

In [35]:
hr_baskets_list = list()
for row in df2.values:
    hr_baskets_list.append(list(row))

In [36]:
rules = apriori(hr_baskets_list, supp=5, zmin=2, target='r', conf=80, report='ascl') 

In [37]:
for rule in rules:
    if rule[0] == '1_L': # i.e. If left=1 is the consequence of this association rule
        print rule

('1_L', ('0_SL', '6_NP', '4_to_6_TSC', '0_WA', '0_PL5'), 807, 0.05380358690579372, 0.9405594405594405, 3.950560360949608)
('1_L', ('0_SL', '6_NP', '4_to_6_TSC', '0_WA'), 810, 0.054003600240016, 0.9407665505226481, 3.951430269193279)
('1_L', ('0_SL', '6_NP', '4_to_6_TSC', '0_PL5'), 844, 0.05627041802786852, 0.9254385964912281, 3.887049428387547)
('1_L', ('0_SL', '6_NP', '4_to_6_TSC'), 847, 0.0564704313620908, 0.9236641221374046, 3.8795962385715295)
('1_L', ('0_SL', '6_NP', '0_WA', '0_PL5'), 814, 0.05427028468564571, 0.9034406215316315, 3.794653005419474)
('1_L', ('0_SL', '6_NP', '0_WA'), 817, 0.05447029801986799, 0.8997797356828194, 3.7792764647176162)
('1_L', ('0_SL', '6_NP', '0_PL5'), 851, 0.05673711580772051, 0.8864583333333333, 3.7233235904975266)
('1_L', ('0_SL', '6_NP'), 854, 0.056937129141942794, 0.8813209494324046, 3.701745427201522)
('1_L', ('120_AMH', '40_SL', '2_NP', '2_to_3_TSC', '0_PL5'), 647, 0.04313620908060537, 0.8169191919191919, 3.431243617921019)
('1_L', ('120_AMH', '