# 11.2 示例：hello world！Apriori

In [28]:
def createC1(dataSet):
    C1 = []
    for transaction in dataSet:
        for item in transaction:
            if [item] not in C1:
                C1.append([item])
    C1.sort()
    return list(map(frozenset, C1))

def scanD(D, Ck, minSupport):
    ssCnt = {}
    for tid in D:
        for can in Ck:
            if can.issubset(tid):
                ssCnt[can] = ssCnt.get(can, 0) + 1
    numItems = float(len(D))
    retList = []
    supportData = {}
    for key in ssCnt:
        support = ssCnt[key] / numItems
        if support >= minSupport:
            retList.append(key)
        supportData[key] = support
    return retList, supportData

def aprioriGen(Lk, k):
    retList = []
    lenLk = len(Lk)
    for i in range(lenLk):
        for j in range(i + 1, lenLk):
            L1 = list(Lk[i])[:k-2]
            L2 = list(Lk[j])[:k-2]
            L1.sort()
            L2.sort()
            if L1 == L2:
                retList.append(Lk[i] | Lk[j])
    return retList

def apriori(dataSet, minSupport=0.5):
    C1 = createC1(dataSet)
    D = list(map(set, dataSet))
    L1, suppData = scanD(D, C1, minSupport)
    L = [L1]
    k = 2

    while (len(L[k - 2]) > 0):
        Ck = aprioriGen(L[k - 2], k)
        Lk, supK = scanD(D, Ck, minSupport)
        suppData.update(supK)
        L.append(Lk)
        k += 1
    return L, suppData

def calcConf(freqSet, H, supportData, brl, minConf=0.7):
    prunedH = []
    for conseq in H:
        conf = supportData[freqSet] / supportData[freqSet - conseq]
        if conf >= minConf:
            print(freqSet - conseq, '-->', conseq, 'conf:', conf)
            brl.append((freqSet - conseq, conseq, conf))
            prunedH.append(conseq)
    return prunedH

def rulesFromConseq(freqSet, H, supportData, brl, minConf=0.7):
    m = len(H[0])

    if len(freqSet) > m + 1:
        Hmp1 = aprioriGen(H, m + 1)
        Hmp1 = calcConf(freqSet, Hmp1, supportData, brl, minConf)

        if len(Hmp1) > 1:
            rulesFromConseq(freqSet, Hmp1, supportData, brl, minConf)

def generateRules(L, supportData, minConf=0.7):
    bigRuleList = []
    for i in range(1, len(L)):
        for freqSet in L[i]:
            H1 = [frozenset([item]) for item in freqSet]

            if i > 1:
                rulesFromConseq(freqSet, H1, supportData, bigRuleList, minConf)
            else:
                calcConf(freqSet, H1, supportData, bigRuleList, minConf)
    return bigRuleList

In [29]:
myDat = [ [1, 3, 4], [2, 3, 5], [1, 2, 3, 5], [2, 5] ]
L, suppData = apriori(myDat, 0.5)

In [30]:
L

[[frozenset({1}), frozenset({3}), frozenset({2}), frozenset({5})],
 [frozenset({1, 3}), frozenset({2, 3}), frozenset({3, 5}), frozenset({2, 5})],
 [frozenset({2, 3, 5})],
 []]

In [31]:
suppData

{frozenset({1}): 0.5,
 frozenset({3}): 0.75,
 frozenset({4}): 0.25,
 frozenset({2}): 0.75,
 frozenset({5}): 0.75,
 frozenset({1, 3}): 0.5,
 frozenset({2, 3}): 0.5,
 frozenset({3, 5}): 0.5,
 frozenset({2, 5}): 0.75,
 frozenset({1, 2}): 0.25,
 frozenset({1, 5}): 0.25,
 frozenset({2, 3, 5}): 0.5}

In [32]:
rules = generateRules(L, suppData, minConf=0.7)
rules

frozenset({1}) --> frozenset({3}) conf: 1.0
frozenset({5}) --> frozenset({2}) conf: 1.0
frozenset({2}) --> frozenset({5}) conf: 1.0


[(frozenset({1}), frozenset({3}), 1.0),
 (frozenset({5}), frozenset({2}), 1.0),
 (frozenset({2}), frozenset({5}), 1.0)]

# 11.3 示例：使用Apriori算法挖掘XSS相关参数

In [42]:
from apriori import apriori
from apriori import generateRules
import re

#myDat = [ [ 1, 3, 4 ], [ 2, 3, 5 ], [ 1, 2, 3, 5 ], [ 2, 5 ] ]
myDat=[]
#L, suppData = apriori(myDat, 0.5)
#rules = generateRules(L, suppData, minConf=0.7)
#print 'rules:\n', rules
with open("../Data/xss-2000.txt") as f:
    for line in f:
        #/discuz?q1=0&q3=0&q2=0%3Ciframe%20src=http://xxooxxoo.js%3E
        print(line)
        index=line.find("?")
        if index>0:
            line=line[index+1:len(line)]
            tokens=re.split('\=|&|\?|\%3e|\%3c|\%3E|\%3C|\%20|\%22|<|>|\\n|\(|\)|\'|\"|;|:|,|\%28|\%29',line)
            myDat.append(tokens)
    f.close()

/0_1/?%22onmouseover='prompt(42873)'bad=%22%3E

/0_1/api.php?op=map&maptype=1&city=test%3Cscript%3Ealert%28/42873/%29%3C/script%3E

/0_1/api.php?op=map&maptype=1&defaultcity=%e5%22;alert%28/42873/%29;//

/0_1/api.php?op=map&maptype=1&defaultcity=%E5%8C%97%E4%BA%AC&api_key=%22%3E%3C/script%3E%3Cscript%3Ealert%28/42873/%29;%3C/script%3E

/0_1/api.php?op=map&maptype=1&defaultcity=%E5%8C%97%E4%BA%AC&field=%29%3C/script%3E%3Cscript%3Ealert%2842873%29%3C/script%3E//

/0_1/api.php?op=video_api&pc_hash=1&uid=1&snid=%3C/script%3E%3Cscript%3Ealert(/42873/)%3C/script%3E//&do_complete=1%20

/0_1/api.php?op=video_api&uid=1&snid=1&pc_hash=%3C/script%3E%3Cscript%3Ealert(/360/)%3C/script%3E//&do_complete=1

/0_1/?callback=%3Cscript%3Eprompt(42873)%3C/script%3E

/0_1/connect.php?receive=yes&mod=login&op=callback&referer=webscan%5Cu0027.replace(/.%2b/,/javascript:alert(42873)/.source);//

/0_1/connect.php?receive=yes&mod=login&op=callback&referer=webscan%bf%5Cu0027.replace(/.%2b/,/javascript:alert(42873

In [41]:
import pandas as pd

pd.DataFrame(myDat)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,910,911,912,913,914,915,916,917,918,919
0,,onmouseover,,prompt,42873,,bad,,,,...,,,,,,,,,,
1,op,map,maptype,1,city,test,script,alert,/42873/,,...,,,,,,,,,,
2,op,map,maptype,1,defaultcity,%e5,,alert,/42873/,,...,,,,,,,,,,
3,op,map,maptype,1,defaultcity,%E5%8C%97%E4%BA%AC,api_key,,,,...,,,,,,,,,,
4,op,map,maptype,1,defaultcity,%E5%8C%97%E4%BA%AC,field,,,/script,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,jsoncallback,,iframe/onload,alert,/webscan/,,,,,,...,,,,,,,,,,
1996,a,lists,agent,2,onmoonmouseoveruseover,alert,42873,,y,,...,,,,,,,,,,
1997,a,lists,agent,2,,onmouseover,alert,42873,,y,...,,,,,,,,,,
1998,a,lists,agent,,bedroom,4_100,onmouonmouseoverseover,alert,42873,,...,,,,,,,,,,


In [43]:
L, suppData = apriori(myDat, 0.1)
rules = generateRules(L, suppData, minConf=0.99)
rules

KeyboardInterrupt: 

In [None]:
L, suppData = apriori(myDat, 0.001)
rules = generateRules(L, suppData, minConf=0.99)
rules

# 11.5 示例：hello world！FP-growth

In [44]:
%%cmd

pip install pyfpgrowth

Microsoft Windows [�汾 10.0.22631.4460]
(c) Microsoft Corporation����������Ȩ����

(base) d:\Documents\��ؾ���\WebSecMLLearnNote\Code>
(base) d:\Documents\��ؾ���\WebSecMLLearnNote\Code>pip install pyfpgrowth
Collecting pyfpgrowth
  Downloading pyfpgrowth-1.0.tar.gz (1.6 MB)
     ---------------------------------------- 1.6/1.6 MB 110.7 kB/s eta 0:00:00
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: pyfpgrowth
  Building wheel for pyfpgrowth (setup.py): started
  Building wheel for pyfpgrowth (setup.py): finished with status 'done'
  Created wheel for pyfpgrowth: filename=pyfpgrowth-1.0-py2.py3-none-any.whl size=5488 sha256=eeabf3b99ceec634401011b8fc7f9d7b4d50f3a48e295d8eb9916e4a51996067
  Stored in directory: c:\users\坂田银时\appdata\local\pip\cache\wheels\30\bd\27\bbd99f16e2a89737066af54b00f0d3c1219416c24bcb0b962a
Successfully built pyfpgrowth
Installing collected packages: pyfpgrowth
Successfully



In [None]:
import pyfpgrowth

transactions = [[1, 2, 5],
                [2, 4],
                [2, 3],
                [1, 2, 4],
                [1, 3],
                [2, 3],
                [1, 3],
                [1, 2, 3, 5],
                [1, 2, 3]]

patterns = pyfpgrowth.find_frequent_patterns(transactions, 2)
rules = pyfpgrowth.generate_association_rules(patterns, 0.7)
rules

# 11.6 示例：使用FP-growth算法挖掘疑似僵尸主机

In [None]:
import pyfpgrowth


transactions=[]

with open("../Data/KnowledgeGraph/sample7.txt") as f:
    for line in f:
        line=line.strip('\n')
        ip,ua,target=line.split(',')
        print("Add (%s %s %s)" % (ip,ua,target))
        transactions.append([ip,ua,target])

In [None]:
transactions

In [None]:
patterns = pyfpgrowth.find_frequent_patterns(transactions, 3)
rules = pyfpgrowth.generate_association_rules(patterns, 0.9)
rules