In [1]:
print(sc.version)

In [2]:
### Settings

# Name to use in extracted files
_set_name = 'BBCN_02'

# in_degree or degree
_in_degree = True

# Window size to use for graph-of-words extraction
_windowSize = 3

# Path to source files
_path_to_files = '/FileStore/data/bbc_news'
_path_to_test_files = '/FileStore/data/bbc_news_test'

# Path to outputs
_path_to_output = "/FileStore/data/bbc_output"

# If the degree is > windowSize-1 then add the term and edge
# We use windowSize for in_degree counts, and windowSize*2 for degree counts
_degree_compare = (_windowSize-1)*2
if _in_degree == True:
    _degree_compare = _windowSize-1

    
# Normalised word limit - value below which connected terms will be removed
_normalised_term_limit = 0.0

# Limits for connected term removal
_upper_percentile = 99.7
_lower_count = 2

print("Done")

In [3]:
# Import required libraries
#from graphframes import *

# NetworkX graph library
import networkx as nx

# Random library
import random


import re
import nltk
#from nltk.corpus import stopwords
from nltk.stem.porter import *

stemmer = PorterStemmer()

# Time Library
from time import time

# Maths library
import math

# Scipy stats functions
import scipy.stats
import numpy as np

# KD Tree algo
from sklearn.neighbors import KDTree

# kmeans cluster algorithm 
from sklearn.cluster import KMeans

# Confusion matrix, precision, recall, F1
from sklearn.metrics import *

# Pandas library
import pandas as pd
from pandas_ml import ConfusionMatrix

# Operator
import operator

# File libraries
import os
import glob2
from pathlib import Path
import csv

# Persistent storage
from pyspark import StorageLevel

# cPickle library
import cPickle as pickle

# Setup DataFrames to receive nodes (vertices) and edges
from pyspark.sql.types import *


In [4]:
# This is a combination of the English stopwords from the NTLK library
# combined with a list of Google stopwords from https://www.link-assistant.com/seo-stop-words.html

cachedStopWords = ['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', 'her', 'hers', 'herself', 'it', 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 's', 't', 'can', 'will', 'just', 'don', 'should', 'now', 'd', 'll', 'm', 'o', 're', 've', 'y', 'ain', 'aren', 'couldn', 'didn', 'doesn', 'hadn', 'hasn', 'haven', 'isn', 'ma', 'mightn', 'mustn', 'needn', 'shan', 'shouldn', 'wasn', 'weren', 'won', 'wouldn', 'better', 'pleas', 'ye', "daren't", 'how', 'upward', 'for', 'could', 'yourselv', 'whenev', 'then', 'whereupon', 'so', 'anyth', 'as', 'themselv', 'thi', 'brief', 'undo', 'onli', 'backward', 'hi', 'six', 'by', 'k', 'quit', 'yourself', 'fairli', 'right', 'fifth', 'wish', 'somebodi', 'did', "they'v", 'whose', 'otherwis', 'inward', 'appear', "who'll", 'late', 'presum', 'reason', 'chang', 'her', 'outsid', 'clearli', 'sorri', 'new', 'accord', 'not', 'got', 'took', 'provid', 'each', 'selv', 'whether', 'plu', 'serious', 'inner', 'normal', 'me', 'gotten', 'noth', 'mr', 'nevertheless', 'co.', 'abroad', 'unfortun', 'rather', 'particularli', 'former', 'usual', 'away', 'up', 'abov', 'becaus', 'than', 'except', 'inc.', 'can', 'howev', 'via', 'someth', 'along', 'fewer', 'while', 'directli', 'our', 'alon', 'neverless', 'c', 'either', 'must', "c'mon", 'differ', 'whilst', 'hundr', 'between', "mightn't", 'ever', 'z', 'apart', 'gone', 'low', 'came', 'let', 'like', 'in', 'u', 'asid', "needn't", 'again', 'therebi', "she'll", 'mainli', 'taken', 'itself', 'thank', 'need', 'sometim', 'until', 'she', 'no', 'say', "they'll", 'sensibl', 'us', 'place', "he'", 'amongst', "they'r", 'hither', "i'll", 'indic', 'thing', 'were', 'littl', 'whole', 'nine', 'thanx', 'under', 'an', 'versu', 'therein', 'go', 'appropri', 'would', 'exampl', 'entir', 'beyond', 'near', 'etc', "doesn't", 'sure', 's', 'veri', "shan't", 'someon', 'dure', 'elsewher', 'myself', 'therefor', 'they', 'about', "hasn't", 'ie', 'whichev', 'far', "i'd", 'avail', 'possibl', 'current', 'though', 'nd', 'thru', "isn't", 'a', 'furthermor', "haven't", 'nonetheless', "hadn't", 'never', 'cant', 'saw', 'viz', 'their', 'even', "what'll", "can't", 'am', 'onc', 'amidst', 'that', 'last', 'to', 'd', 'everi', "couldn't", 'upon', 'one', 'everywher', 'inde', 'round', 'more', 'amid', 'ha', 'hello', 'certainli', 'wa', 'himself', 'nobodi', 'regardless', 'use', 'well', 'notwithstand', "shouldn't", 'same', 'b', 'obvious', "there'r", 'j', 'anoth', 'nearli', 'self', 'these', 'mere', 'thu', "wasn't", 'meantim', 'with', 'but', 'abl', 'secondli', 'went', "a'", "what'", 'thereupon', 'thorough', 'appreci', 'dare', 'third', 'tend', 'befor', 'miss', "we'll", 'some', "oughtn't", "she'", "where'", 'perhap', 'seen', 'caus', "what'v", 'describ', 'think', 'underneath', 'lower', 'twice', 'th', 'few', 'associ', 'insofar', 'valu', 'anybodi', 'latterli', 'moreov', 'co', 'him', 'neither', 'okay', 'mayb', 'do', 'probabl', "there'd", 'rel', 'consequ', 'ask', 'howbeit', 'o', 'wherein', 'someday', 'next', 'noon', "we'v", 'mani', 'against', 'edu', 'throughout', 'actual', 'et', 'is', 'minu', 'unless', 'yet', "i'v", 'eight', 'still', 'name', 'inc', 'mine', 'sup', 'believ', 'somehow', 'you', 'eg', 'whi', 'been', 'where', 'get', 'three', 'g', 'seven', 'everyon', 'made', 'may', 'ani', "mayn't", 'recent', 'none', 'be', 'further', 'help', 'toward', "you'll", 'consid', 'my', 'meanwhil', 'p', 'what', "you'r", "there'l", 'els', 'beforehand', 'there', 'becam', 'forth', 'thenc', 'way', 'exactli', 'four', 'hope', 'kept', 'necessari', 'sub', 'forev', 'unlik', 'among', 'at', 'q', 'ex', 'besid', 'such', 't', "she'd", 'overal', 'wherebi', 'goe', 'no-on', 'alreadi', 'r', 'nowher', 'togeth', 'whomev', 'keep', "wouldn't", 'tell', 'y', 'variou', 'onto', 'given', 'farther', 'sinc', 'later', 'had', 'afterward', "mustn't", 'or', 'ourselv', 'also', 'soon', 'see', "one'", 'best', 'somewher', "aren't", 'less', 'whither', 'lest', 'much', 'n', 'whereaft', 'should', 'although', 'around', "c'", 'seem', 'thereaft', "there'", 'concern', 'tri', 'que', 'becom', 'accordingli', 'l', 'greet', 'of', 'oh', 'down', 'five', 'seriou', 'them', "who'", 'allow', 'on', 'everybodi', "you'd", 'truli', 'wherev', 'regard', 'come', 'somewhat', 'ago', 'hereaft', 'shall', "weren't", 'e', "it'll", 'through', 'wherea', 'most', 'cannot', "we'r", 'begin', 'here', "don't", 'adj', 'per', 'v', 'whoever', 'out', 'we', "you'v", 'opposit', 'second', 'qv', 'past', 'want', 'both', 'likewis', "there'v", 're', 'h', 'especi', 'formerli', 'm', 'old', 'anyhow', 'after', 'back', 'definit', 'despit', 'thoroughli', 'anyon', 'done', 'often', 'know', 'w', "he'll", 'insid', "they'd", "ain't", 'latter', 'might', 'evermor', 'now', 'doe', 'caption', 'end', 'take', 'unto', 'certain', 'till', 'neverf', "here'", "won't", 'aw', 'eighti', 'own', 'correspond', 'henc', 'non', 'ahead', 'sent', 'all', 'ok', 'make', 'which', 'ignor', 'nor', 'give', 'welcom', 'other', 'almost', 'hereupon', 'who', 'wonder', "i'm", 'zero', 'specifi', 'rd', 'com', 'everyth', 'said', 'too', 'x', "didn't", 'immedi', 'he', 'i', 'realli', 'half', "it'd", 'particular', 'herebi', 'vs', 'inasmuch', 'those', 'the', "t'", 'contain', 'it', 'first', 'forward', 'herein', 'follow', 'instead', 'just', "that'v", 'over', 'have', 'and', "we'd", 'nineti', 'when', 'below', "that'", 'f', 'if', 'happen', 'behind', 'whom', 'are', 'alongsid', 'will', 'from', 'into', 'enough', 'whatev', 'least', 'your', 'anyway', 'respect', 'found', 'sever', 'un', 'herself', "let'", 'ought', 'within', "it'", 'novel', 'thirti', 'known', 'look', 'off', 'cours', "that'll", 'across', 'whenc', 'anywher', 'downward', 'alway', 'mean', 'two', 'without', 'hardli', "who'd", "he'd", 'ltd', 'mostli']

In [5]:
# Functions used in the model build


# VB style text parsing functions
def left(s, amount):
    return s[:amount]
def right(s, amount):
    return s[-amount:]
def mid(s, offset, amount):
    return s[offset:offset+amount]

### CleanWord
# This function cleans a piece of text of non letter/space characters
p1 = re.compile(r'[^a-z ]', re.UNICODE)
p2 = re.compile(r' +', re.UNICODE)
def CleanWord(w):
    x = w.lower()
    #x = re.sub(r'[^a-z ]','',x)
    x = p1.sub(' ', x) #.strip() 
    x = p2.sub(' ', x).strip()
    #x = x.split(' ')
    return x

### StoreGraphToFile
# This function takes a graph and stores it to file
def StoreGraphToFile(Graph, Desc):
    filename = _path_to_output + '/' + Desc + '.nodes'
    vertices = sqlContext.createDataFrame(Graph.nodes(data=True), ["id", "data"])
    vertices.repartition(1).write.json(filename)
    StoreEdgesToFile(Graph, Desc)
    
def StoreEdgesToFile(Graph, Desc):
    filename = _path_to_output + '/' + Desc + '.edges'
    edges = sqlContext.createDataFrame(Gr.edges(data=True), ["src", "dst", "data"])
    edges.repartition(1).write.json(filename)


### FileToGraph
# Function to parse out words and bigrams from a text file
def FileToGraph(fileTuple):
    # Array to return from function
    rArr = []

    # Take the filename from the tuple
    filename = fileTuple[0]
    
    # remove non letters from the text in the tuple
    r = CleanWord(fileTuple[1])
    
    # split into an array, stem, and remove words in the stopword list
    arr = [stemmer.stem(y) for y in r.split(" ") if len(y)>2]
    arr = [word for word in arr if word not in cachedStopWords]
    
    # Create a new directed graph
    G = nx.DiGraph()

    arrLen = len(arr)

    if arrLen >= _windowSize-1:
        
        # Add the first few nodes
        for i in range(0, _windowSize-1):
            G.add_node(arr[i])

        for i in range(_windowSize-1, arrLen):
            G.add_node(arr[i])
            for j in range(0, (_windowSize-1)):
                src = arr[i-(_windowSize-1)]
                tgt = arr[i-j]
                # Only add edge if src is not equal to target
                if src!=tgt:
                    if G.has_edge(src, tgt):
                        # we added this one before, just increase the weight by one
                        G[src][tgt]['weight'] += 1
                    else:
                        # new edge. add with weight=1
                        G.add_edge(src, tgt, weight=1)

        for i in range(arrLen-(_windowSize-1), arrLen):
            for j in range(1, (arrLen-i)):
                src = arr[i]
                tgt = arr[i+j]
                # Only add edge if src is not equal to target
                if src!=tgt:
                    if G.has_edge(src, tgt):
                        # we added this one before, just increase the weight by one
                        G[src][tgt]['weight'] += 1
                    else:
                        # new edge. add with weight=1
                        G.add_edge(src, tgt, weight=1)

        # Get the "indegree" or "degree" of the terms
        if _in_degree == True:
            d = G.in_degree()
        else:
            d = G.degree()

        dMax = d[max(d, key=d.get)]

        # Now add all the words the the return array with the filename and the count
        for term in d:
            if(dMax>0):
                normD = float(d[term])/float(dMax)
            else:
                normD = 0
                
            if normD > _normalised_term_limit and int(d[term])>_degree_compare:    
                rArr.append(filename + "\t" + term + "\t" + str(normD))
        
    # Return the array of graphs and the array of words
    return rArr



In [6]:
#### TEST - Test the FileToGraph function on fixed text
text = 'in natural language processing (NLP) a text graph is a graph representation of a text item '
text += '(document, passage or sentence) it is typically created as a preprocessing step to support '
text += 'NLP tasks such as text condensation term disambiguation (topic based) text summarization '
text += '(summarize large text collections) and relation extraction (extract relations from unstructured text)'
d = [r'name.ext', text]

print("Testing: " + str(_degree_compare) + " " + str(_normalised_term_limit))
x = FileToGraph(d)
for y in x: 
    print(y)

In [7]:
#################  TEST TEST TEST  

filename = _path_to_files + '/sport/005.txt'
#text = Path(filename).read_text()
text_files = sc.wholeTextFiles(filename) \
    .flatMap(lambda fileTuple: FileToGraph(fileTuple))

for x in text_files.collect():   # FOR SPARK APPEND: .collect():

    # Split out the 
    y = x.split("\t")
    filename = y[0]
    term = y[1]
    degree = y[2]
    label = os.path.basename(os.path.dirname(filename))  ## filename[32:33]  ### Need to derive a new function for this
    
    print(label + ': ' + filename + ', ' + term + ', ' + degree)

In [8]:

##### Load files #####

# PYSPARK IMPLEMENTATION: 
text_files = sc.wholeTextFiles(_path_to_files + "/*/*") \
    .flatMap(lambda fileTuple: FileToGraph(fileTuple))





In [9]:

##### Create initial graph #####
Gr=nx.Graph()

# fArr will hold the array of filenames
fArr = []
fTest = []

# tArr will hold the array of terms added to the graph
tArr = []
tTest = []

# tArr1 will hold the array of terms that we have seen at least once
tArr1 = []

for x in text_files.collect():   

    # Split out the 
    y = x.split("\t")
    filename = y[0]
    term = y[1]
    degree = y[2]
    
    # The label is derived from the sub-folder name
    label = os.path.basename(os.path.dirname(filename))  
    
    # If the filename is not already added, add it to the graph
    if not filename in fArr:

        # Create some random spatial positions
        xR = 0 #random.random()*1000
        yR = 0 #random.random()*1000
        
        # Add the file node to the graph
        Gr.add_node(filename, type="file", x=xR, y = yR, label = label)
        
        # Add the filename to the file array
        fArr.append(filename)
        
    # If we have not seen this term before, add a node to the graph
    if not term in tArr:
        # Create some random spatial positions
        xR2 = 0 #random.random()*1000
        yR2 = 0 #random.random()*1000

        # Add the term to the graph
        Gr.add_node(term, type="term", x=xR2, y=yR2)
        tArr.append(term)

    # Add the edge to the graph
    Gr.add_edge(filename, term, weight=degree)

        
# Remove terms with only one connection or > % fractile # of connections
d = Gr.degree()

# List to allow calculation of percentile
degreeList = []

for t in tArr:
    degreeList.append(d[t])

# What #degree is > 99.7    
perc = np.percentile(degreeList, _upper_percentile)

for t in tArr:
    n = d[t]
    # Remove <2 #####or > 99.7 pecentile
    if n < _lower_count or n > perc:
        # Remove Edge
        for f in Gr.neighbors(t):
            Gr.remove_edge(f,t)
        # Remove node
        Gr.remove_node(t)
        # Remove from tArr list
        tArr.remove(t)

# Create the label arrays
lArr = []
lDict = dict()

for f in fArr:
    # Get the label
    label = Gr.node[f]["label"]
    # Add the label to the label array
    if not label in lArr:
        lArr.append(label)
        lDict[label]=1
    else:
        lDict[label]+=1
    


In [10]:

# Calculate the initial file cluster position
clustRad = 1000

# lAngle is the full circle (in radians) divided by number of files
lAngle = 2*math.pi / len(fArr)
lastEnd = 0
lStart = dict()
lEnd = dict()

# Go through each label and calculate its span as 
# No. of files in class * angle size
for l in lArr:
    print(l)
    lStart[l] = lastEnd
    lEnd[l] = lastEnd + (lDict[l]*lAngle)
    lastEnd = lEnd[l]

# Reset x,y for files based on calculated centres
for f in fArr:
    # Reset x,y for each file
    # Get the centre from the label
    c = Gr.node[f]["label"]

    # centre of arc
    cCentre = (lEnd[c]-lStart[c])*.5
    clustAng = lStart[c] + cCentre

    # Set the x, y for the file
    Gr.node[f]["x"] = clustRad+(math.cos(clustAng)*clustRad)
    Gr.node[f]["y"] = clustRad+(math.sin(clustAng)*clustRad)



In [11]:
########## Build the Model

# Reposition terms, then files

# Reposition all the term nodes in the centroid of their documents
for t in tArr:
    sumx=0
    sumy=0
    sumw=0

    for f in Gr.neighbors(t):
        w=float(Gr.edge[f][t]["weight"])
        sumw+=w
        sumx+=float(Gr.node[f]["x"])*w
        sumy+=float(Gr.node[f]["y"])*w

    if sumw > 0:
        Gr.node[t]["x"] = sumx/sumw
        Gr.node[t]["y"] = sumy/sumw
    else:
        print("0 weight on term " + t)

StoreGraphToFile(Gr, _set_name + ".1.Reposition Terms")

pSumx = 0
pSumy = 0


# Reposition all the file nodes in the centroid of their terms
for f in fArr:
    sumx=0
    sumy=0
    sumw=0

    for t in Gr.neighbors(f):
        w=float(Gr.edge[f][t]["weight"])
        sumw+=w
        sumx+=float(Gr.node[t]["x"])*w
        sumy+=float(Gr.node[t]["y"])*w

    if sumw > 0:
        Gr.node[f]["x"] = sumx/sumw
        pSumx += sumx/sumw
        Gr.node[f]["y"] = sumy/sumw
        pSumy += sumy/sumw

## Store results for visualisation
StoreGraphToFile(Gr, _set_name + ".2.Reposition Files")

# Extract values for Tree from Graph
Tree_Array = [];
Tree_label = [];

for f in fArr:
    x = Gr.node[f]["x"]
    y = Gr.node[f]["y"]
    lab = Gr.node[f]["label"]
    Tree_Array.append([x,y])
    Tree_label.append(lab)

tree = KDTree(Tree_Array)

# Persist the model
pickle.dump(tree,open("PredictionTree.pkl","wb"))
pickle.dump(Gr,open("PredictionGraph.pkl","wb"))
pickle.dump(tArr,open("PredictionTermArray.pkl","wb"))

## Load model from Pickle
# tree = pickle.load(open("PredictionTree.pkl","rb"))



In [13]:
## Create the model graph as a DataFrames (GraphFrame not necessary)
#vertices = sqlContext.createDataFrame(Gr.nodes(data=True), ["id", "data"])

# Persist data frames
#vertices.persist(StorageLevel.MEMORY_AND_DISK)


In [14]:
def predict(fileTuple): #text, test_k, single_value_only=True):
    
    test_k = 13
    single_value_only=True
    
    sumx=0
    sumy=0
    sumw=0
    pred_lab = dict()
    
    label = os.path.basename(os.path.dirname(fileTuple[0]))
    
    rArr = FileToGraph(fileTuple)

    # Predict base on returned array
    for r in rArr:
        ts = r.split('\t')
        t = ts[1]
        w = float(ts[2])
        
        if t in tArr:
            sumw+=w
            # Get the x,y of the term from the model graph
            sumx+=float(Gr.node[t]["x"])*w #float(vertices.filter("id='" + t + "'").select("data").first()[0].get('x'))*w
            sumy+=float(Gr.node[t]["y"])*w #float(vertices.filter("id='" + t + "'").select("data").first()[0].get('y'))*w

    if sumw > 0:
        # Get the centroid
        x = sumx/sumw
        y = sumy/sumw
        # Query the tree for nearest neighbor
        dist,ind = tree.query([[x,y]], k=test_k)
        for i in ind[0]:
            if Tree_label[i] in pred_lab:
                pred_lab[Tree_label[i]] += 1
            else:
                pred_lab[Tree_label[i]] = 1

    else:
        pred_lab["UNK"] = 1
    
    # Set the return value to be the highest matched label
    rVal = sorted(pred_lab.items(), key=operator.itemgetter(1), reverse=True)[0][0]
    if single_value_only==False:
        # Return the entire result dictionary
        rVal = pred_lab
        for l in pred_lab:
            pred_lab[l] = float(pred_lab[l]) / float(test_k)
    
    return [filename + "\t" + label + "\t" + rVal]



In [15]:
#### TEST - Test the FileToGraph function on fixed text
text = 'Runs and wickets from Australian paceman James Pattinson '
text += 'and England''s Stuart Broad put Nottinghamshire firmly in command at Leicestershire. '
text += 'Ben Raine (6-66) had helped reduce the visitors to 167-7, '
text += 'still 84 runs adrift of the hosts'' first-innings 251. '
text += 'But Broad (52) and Pattinson (89) added 122 for the eighth wicket to help Notts to a 78-run lead. '
text += 'Luke Fletcher then piled in with three wickets as the hosts slumped to 51-6 at the close. '
text += 'Making his Notts debut, Pattinson hit 14 boundaries and two sixes '
text += 'in his 108-ball knock, and then had opener Harry Dearden caught behind '
text += 'by Chris Read with the ninth ball of Leicestershire''s second innings. '
text += 'Fletcher replaced Broad, who trapped Paul Horton lbw for two, and '
text += 'produced an excellent spell of line and length to dismantle '
text += 'Leicestershire''s middle order, putting Notts on the brink of victory. '
d = [r'path/test1/name1.ext', text]

x = predict(d)
print(x)
for y in x: 
    print(y)
    
text = 'in natural language processing (NLP) a text graph is a graph representation of a text item '
text += '(document, passage or sentence) it is typically created as a preprocessing step to support '
text += 'NLP tasks such as text condensation term disambiguation (topic based) text summarization '
text += '(summarize large text collections) and relation extraction (extract relations from unstructured text)'
d = [r'path/test2/name2.ext', text]

x = predict(d)
print(x)
for y in x: 
    print(y)

In [16]:
# PYSPARK IMPLEMENTATION: 
predictions = sc.wholeTextFiles(_path_to_test_files + "/*/*")  \
    .flatMap(lambda fileTuple: predict(fileTuple))
  

In [17]:

Actual = []
Pred = []
Labels = []

# Get all the (files - one file at a time)
for x in predictions.collect():

    # Split out the 
    y = x.split("\t")
    filename = y[0]
    label = y[1]
    pred = y[2]    
    
    Actual.append(label)
    Pred.append(pred)

    if not label in Labels:
        Labels.append(label)
    if not pred in Labels:
        Labels.append(pred)
    
accuracy = accuracy_score(Actual, Pred)
precision = precision_score(Actual, Pred, average='weighted') 
recall = recall_score(Actual, Pred, average='weighted')
f1 = f1_score(Actual, Pred, average='weighted') 

cm = ConfusionMatrix(Actual, Pred, Labels)
  

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 400)

print(cm)
print("")
print("Accuracy\t" + str(accuracy))
print("Precision\t" + str(precision))
print("Recall\t" + str(recall))
print("F1\t" + str(f1))



