In [7]:
import sys

from itertools import chain, combinations
from collections import defaultdict
from optparse import OptionParser
from openpyxl import load_workbook
from operator import mul
import pandas as pd

In [8]:
def subsets(arr):
    """ Returns non empty subsets of arr"""
    return chain(*[combinations(arr, i + 1) for i, a in enumerate(arr)])


def returnItemsWithMinSupport(itemSet, transactionList, minSupport, freqSet):
        """calculates the support for items in the itemSet and returns a subset
       of the itemSet each of whose elements satisfies the minimum support"""
        _itemSet = set()
        localSet = defaultdict(int)

        for item in itemSet:
                for transaction in transactionList:
                        if item.issubset(transaction):
                                freqSet[item] += 1
                                localSet[item] += 1

        for item, count in localSet.items():
                support = float(count)/len(transactionList)

                if support >= minSupport:
                        _itemSet.add(item)

        return _itemSet


def joinSet(itemSet, length):
        """Join a set with itself and returns the n-element itemsets"""
        return set([i.union(j) for i in itemSet for j in itemSet if len(i.union(j)) == length])


def getItemSetTransactionList(data_iterator):
    transactionList = list()
    itemSet = set()
    for record in data_iterator:
        transaction = frozenset(record)
        transactionList.append(transaction)
        for item in transaction:
            itemSet.add(frozenset([item]))              # Generate 1-itemSets
    return itemSet, transactionList


def runApriori(data_iter, minSupport, minConfidence):
    """
    run the apriori algorithm. data_iter is a record iterator
    Return both:
     - items (tuple, support)
     - rules ((pretuple, posttuple), confidence)
    """
    itemSet, transactionList = getItemSetTransactionList(data_iter)

    freqSet = defaultdict(int)
    largeSet = dict()
    # Global dictionary which stores (key=n-itemSets,value=support)
    # which satisfy minSupport

    assocRules = dict()
    # Dictionary which stores Association Rules

    oneCSet = returnItemsWithMinSupport(itemSet,
                                        transactionList,
                                        minSupport,
                                        freqSet)

    currentLSet = oneCSet
    k = 2
    while(currentLSet != set([])):
        largeSet[k-1] = currentLSet
        currentLSet = joinSet(currentLSet, k)
        currentCSet = returnItemsWithMinSupport(currentLSet,
                                                transactionList,
                                                minSupport,
                                                freqSet)
        currentLSet = currentCSet
        k = k + 1

    def getSupport(item):
            """local function which Returns the support of an item"""
            return float(freqSet[item])/len(transactionList)

    toRetItems = []
    for key, value in largeSet.items():
        toRetItems.extend([(tuple(item), getSupport(item))
                           for item in value])

    toRetRules = []
    for key, value in largeSet.items()[1:]:
        for item in value:
            _subsets = map(frozenset, [x for x in subsets(item)])
            for element in _subsets:
                remain = item.difference(element)
                if len(remain) > 0:
                    confidence = getSupport(item)/getSupport(element)
                    if confidence >= minConfidence:
                        toRetRules.append(((tuple(element), tuple(remain)),
                                           confidence))
    return toRetItems, toRetRules


def printResults(items, rules):
    """prints the generated itemsets sorted by support and the confidence rules sorted by confidence"""
    dSupport = {}
    for item, support in sorted(items, key=lambda (item, support): support):
        dSupport[item] = support
        print("item: %s , %.3f" % (str(item), support))

    for rule, confidence in sorted(rules, key=lambda (rule, confidence): confidence):
        pre, post = rule
        tempRule=()
        tempRule+=pre
        tempRule+=post
        supportAimplikasiB = getSupportAimplikasiB(tempRule, dSupport)
        lift = supportAimplikasiB/(dSupport[pre]*dSupport[post])
        print("Rule: %s ==> %s : [confidence is %.3f, lift is %.3f]" % (str(pre), str(post), confidence, lift))

def getSupportAimplikasiB(A, B):
    rule = list(A)
    dict_support = B
    result = 0
    for index_dict_support in dict_support:
        list_index_dict_support = list(index_dict_support)
        check = []
        for data_index in list_index_dict_support:
            if data_index in rule and len(list_index_dict_support) >= len(rule):
                check.append(True)
            else:
                check.append(False)
        if(reduce(mul, check, 1)==True):
#             print(list_index_dict_support)
#             print(dict_support[index_dict_support])
            result = dict_support[index_dict_support]
    
    return result

def dataFromFile(fname):
        """Function which reads from the file and yields a generator"""
        file_iter = open(fname, 'rU')
        for line in file_iter:
                line = line.strip().rstrip(',')                         # Remove trailing comma
                record = frozenset(line.split(','))
                yield record

In [9]:
wb = load_workbook(filename = 'Data Alumni pelatihan phiton.xlsx')
sheet_ranges = wb['Sheet1']

df = pd.DataFrame(sheet_ranges.values)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,14,15,16,17,18,19,20,21,22,23
0,DATA ALUMNI,,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,No,Nim,Nama,Tempat & tanggal lahir,Fakultas/Ketrampilan,Alamat,Tahun Masuk,Tahun Lulus,Status Pekerjaan,Lama Masa Tunggu (bulan),...,,2. Apakah SDMmempengaruhi proses belajar (misa...,,3. Jika sudah bekerja apakah sesuai dengan ju...,,4. Apakah Kurikulum pada data-data kuliah sesu...,,,,
3,,,,,,,,,,,...,Tidak,Ya,Tidak,Ya,Tidak,Ya,Tidak,,,
4,1,9018263,Puguh Drajat E.P,"Purworejo, 4 November 1989",FTI / Teknik Iformatika,Tangerang Rt. 01/ 0,2009,2016,,2.4,...,,v,,v,,v,,,,
5,2,12022030,Arsyad Cahya Subrata,"Bantul, 21 juni 1994",FTI / Teknik Elektro,"Karangpule Rt.01 , Tirtonirmolo , Kasihan Bantul",2012,2016,,,...,,v,,v,,v,,,,
6,3,12020021,Yesi Novela,"Tempilang, 1 januari 1995",FTI,,2012,2016,Mahasiswa,2.4,...,,v,,v,,v,,,,
7,4,9018269,Pandu Herwijaya,"Lampung, 19 Desember 1990",FTI/ Teknik Infomatika,"Tamantirto, Kasihan ,Bantul",2009,2016,,2.4,...,,v,,v,,v,,,,
8,5,9018109,Anjar Prasetyo,"Gunungkidul, 27 April 1991",FTI /Teknik Informatika,"Umbulharjo , Yogyakarta",2009,2016,,,...,,v,,v,,v,,,,
9,6,9020026,Moh. Dadi Kurniawan,"Savanajaya, 27 Maret 1992",FTI / Futsal,Saren Rt. 02 Rw. 01 Caturtunggal,2009,2016,,2.4,...,,v,,,,,,,,


In [10]:
d = df[4:20][[13,14,15,16,17,18,19,20]]

d.columns = ['A[Y]','A[T]','B[Y]','B[T]','C[Y]','C[T]','D[Y]','D[T]']
#A = FASILITAS MEMADAI
#B = SDM BERPENGARUH
#C = KERJA SESUAI JURUSAN
#D = KURIKULUM SESUAI DENGAN DUNIA KERJA
#---------------------------------------
# NOTE : Y = YA, T = TIDAK, B = BINGUNG

d    

Unnamed: 0,A[Y],A[T],B[Y],B[T],C[Y],C[T],D[Y],D[T]
4,v,,v,,v,,v,
5,v,,v,,v,,v,
6,v,,v,,v,,v,
7,v,,v,,v,,v,
8,v,,v,,v,,v,
9,v,,v,,,,,
10,,,,,,,,
11,v,,v,,,v,,v
12,v,,v,,v,,v,
13,v,,v,,v,,v,


In [11]:
list_data, temp_data = [], []

for index, data in d.iterrows():
    if (data['A[Y]'] == None) & (data['A[T]'] == None):
        temp_data.append('A[B]')
    elif data['A[Y]'] == 'v':
        temp_data.append('A[Y]')
    else:
        temp_data.append('A[T]')
    if (data['B[Y]'] == None) & (data['B[T]'] == None):
        temp_data.append('B[B]')
    elif data['B[Y]'] == 'v':
        temp_data.append('B[Y]')
    else:
        temp_data.append('B[T]')
    if (data['C[Y]'] == None) & (data['C[T]'] == None):
        temp_data.append('C[B]')
    elif data['C[Y]'] == 'v':
        temp_data.append('C[Y]')
    else:
        temp_data.append('C[T]')
    if (data['D[Y]'] == None) & (data['D[T]'] == None):
        temp_data.append('D[B]')
    elif data['D[Y]'] == 'v':
        temp_data.append('D[Y]')
    else:
        temp_data.append('D[T]')
        
    list_data.append(temp_data)
    temp_data = []

In [12]:
minSupport = 0.1
minConfidence = 0.2

items, rules = runApriori(list_data, minSupport, minConfidence)

printResults(items, rules)

item: ('C[B]',) , 0.125
item: ('A[T]',) , 0.125
item: ('C[T]',) , 0.125
item: ('D[T]',) , 0.125
item: ('D[B]',) , 0.125
item: ('C[Y]', 'A[T]') , 0.125
item: ('B[Y]', 'A[T]') , 0.125
item: ('B[Y]', 'C[T]') , 0.125
item: ('D[T]', 'B[Y]') , 0.125
item: ('D[B]', 'C[B]') , 0.125
item: ('A[Y]', 'C[T]') , 0.125
item: ('A[Y]', 'B[Y]', 'C[T]') , 0.125
item: ('B[Y]', 'C[Y]', 'A[T]') , 0.125
item: ('A[Y]', 'C[Y]') , 0.625
item: ('A[Y]', 'B[Y]', 'C[Y]') , 0.625
item: ('A[Y]', 'C[Y]', 'D[Y]') , 0.625
item: ('B[Y]', 'C[Y]', 'A[Y]', 'D[Y]') , 0.625
item: ('A[Y]', 'D[Y]') , 0.688
item: ('C[Y]', 'D[Y]') , 0.688
item: ('A[Y]', 'B[Y]', 'D[Y]') , 0.688
item: ('B[Y]', 'C[Y]', 'D[Y]') , 0.688
item: ('C[Y]',) , 0.750
item: ('D[Y]',) , 0.750
item: ('B[Y]', 'C[Y]') , 0.750
item: ('B[Y]', 'D[Y]') , 0.750
item: ('A[Y]',) , 0.812
item: ('A[Y]', 'B[Y]') , 0.812
item: ('B[Y]',) , 0.938
Rule: ('B[Y]',) ==> ('A[Y]', 'C[Y]') : [confidence is 0.667, lift is 1.067]
Rule: ('B[Y]',) ==> ('A[Y]', 'C[Y]', 'D[Y]') : [confide