### **IMPORT THƯ VIỆN**

In [1]:
import os
import re
import joblib
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize
from sklearn.naive_bayes import MultinomialNB, GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report

### **1.LOAD DỮ LIỆU TRAIN**

In [2]:
df = pd.read_csv(r'DATA/emails2.csv')
df.head(1)

Unnamed: 0,Email No.,the,to,ect,and,for,of,a,you,hou,...,connevey,jay,valued,lay,infrastructure,military,allowing,ff,dry,Prediction
0,Email 1,0,0,1,0,0,0,2,0,0,...,0,0,0,0,0,0,0,0,0,0


### **2.TRAIN MÔ HÌNH VÀ ĐÁNH GIÁ**

In [3]:
X = df.drop(columns=['Prediction', 'Email No.'])
y = df['Prediction']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

models = [
    ('Naive Bayes', MultinomialNB()),
    ('Logistic Regression', LogisticRegression(max_iter=1000)),
    ('SVM', SVC(kernel='linear')),
    ('Random Forest', RandomForestClassifier(n_estimators=100, random_state=42)),
    ('Gradient Boosting', GradientBoostingClassifier(random_state=42))
]

model_dir = 'MODEL'
if not os.path.exists(model_dir):
    os.makedirs(model_dir)

results = []

for model_name, model in models:
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred, output_dict=True)

    results.append({
        'Model': model_name,
        'Accuracy': acc,
        'Precision': report['weighted avg']['precision'],
        'Recall': report['weighted avg']['recall'],
        'F1-Score': report['weighted avg']['f1-score']
    })
    joblib.dump(model, os.path.join(model_dir, f'{model_name.lower().replace(" ", "_")}_model1.joblib'))
results_df = pd.DataFrame(results)
results_df

Unnamed: 0,Model,Accuracy,Precision,Recall,F1-Score
0,Naive Bayes,0.954589,0.956655,0.954589,0.95508
1,Logistic Regression,0.971981,0.972175,0.971981,0.97205
2,SVM,0.95942,0.959739,0.95942,0.959541
3,Random Forest,0.977778,0.977802,0.977778,0.977789
4,Gradient Boosting,0.971981,0.972175,0.971981,0.97205


### **3.TESING TRÊN NHỮNG BỘ DATASET KHÁC**

#### **Lấy ra các kí tự trong bộ train**

In [4]:
col = df.drop(columns=['Prediction', 'Email No.']).columns.tolist()
with open("words.txt", 'w') as file:
    for word in col:
        file.write(word + '\n')
print(col)



#### **Count từ trong đoạn text**

In [25]:
def count_words(df):
    words = ['the', 'to', 'ect', 'and', 'for', 'of', 'a', 'you', 'hou', 'in', 'on', 'is', 'this', 'enron', 'i', 'be', 'that', 'will', 'have', 'with', 'your', 'at', 'we', 's', 'are', 'it', 'by', 'com', 'as', 'from', 'gas', 'or', 'not', 'me', 'deal', 'if', 'meter', 'hpl', 'please', 're', 'e', 'any', 'our', 'corp', 'can', 'd', 'all', 'has', 'was', 'know', 'need', 'an', 'forwarded', 'new', 't', 'may', 'up', 'j', 'mmbtu', 'should', 'do', 'am', 'get', 'out', 'see', 'no', 'there', 'price', 'daren', 'but', 'been', 'company', 'l', 'these', 'let', 'so', 'would', 'm', 'into', 'xls', 'farmer', 'attached', 'us', 'information', 'they', 'message', 'day', 'time', 'my', 'one', 'what', 'only', 'http', 'th', 'volume', 'mail', 'contract', 'which', 'month', 'more', 'robert', 'sitara', 'about', 'texas', 'nom', 'energy', 'pec', 'questions', 'www', 'deals', 'volumes', 'pm', 'ena', 'now', 'their', 'file', 'some', 'email', 'just', 'also', 'call', 'change', 'other', 'here', 'like', 'b', 'flow', 'net', 'following', 'p', 'production', 'when', 'over', 'back', 'want', 'original', 'them', 'below', 'o', 'ticket', 'c', 'he', 'could', 'make', 'inc', 'report', 'march', 'contact', 'were', 'days', 'list', 'nomination', 'system', 'who', 'april', 'number', 'sale', 'don', 'its', 'first', 'thanks', 'business', 'help', 'per', 'through', 'july', 'forward', 'font', 'free', 'daily', 'use', 'order', 'today', 'r', 'had', 'fw', 'set', 'plant', 'statements', 'go', 'gary', 'oil', 'line', 'sales', 'w', 'effective', 'well', 'tenaska', 'take', 'june', 'x', 'within', 'nbsp', 'she', 'how', 'north', 'america', 'being', 'under', 'next', 'week', 'than', 'january', 'last', 'two', 'service', 'purchase', 'name', 'less', 'height', 'off', 'agreement', 'k', 'work', 'tap', 'group', 'year', 'based', 'transport', 'after', 'think', 'made', 'each', 'available', 'changes', 'due', 'f', 'h', 'services', 'smith', 'send', 'management', 'stock', 'sent', 'll', 'co', 'office', 'needs', 'cotten', 'did', 'actuals', 'u', 'money', 'before', 'looking', 'then', 'pills', 'online', 'request', 'look', 'desk', 'ami', 'his', 'same', 'george', 'chokshi', 'point', 'delivery', 'friday', 'does', 'size', 'august', 'product', 'pat', 'width', 'iv', 'noms', 'address', 'above', 'sure', 'give', 'october', 'future', 'find', 'market', 'n', 'mary', 'vance', 'melissa', 'said', 'internet', 'still', 'account', 'those', 'down', 'link', 'hsc', 'rate', 'people', 'pipeline', 'best', 'actual', 'very', 'end', 'home', 'houston', 'tu', 'high', 'her', 'team', 'products', 'many', 'currently', 'spot', 'receive', 'good', 'such', 'going', 'process', 'feb', 'monday', 'info', 'david', 'lloyd', 'again', 'both', 'click', 'subject', 'jackie', 'december', 'total', 'na', 'lisa', 've', 'september', 'hours', 'until', 'resources', 'because', 'aol', 'february', 'where', 'g', 'investment', 'issue', 'duke', 'since', 'pay', 'show', 'way', 'global', 'computron', 'further', 'most', 'place', 'offer', 'natural', 'activity', 'eastrans', 'graves', 'right', 'prices', 'date', 'john', 'utilities', 'november', 'clynes', 'jan', 'securities', 'meeting', 'susan', 'hplc', 'julie', 'able', 'received', 'align', 'term', 'id', 'revised', 'thursday', 'pg', 'fee', 'hplno', 'trading', 'additional', 'site', 'txu', 'data', 'wellhead', 'reply', 'taylor', 'news', 'unify', 'michael', 'provide', 'note', 'much', 'access', 'lannou', 'every', 'between', 'keep', 'tuesday', 'review', 'great', 'tom', 'put', 'done', 'long', 'save', 'section', 'must', 'v', 'part', 'nd', 'million', 'check', 'trade', 'bob', 'created', 'steve', 'prior', 'copy', 'continue', 'numbers', 'via', 'world', 'demand', 'hanks', 'contracts', 'phone', 'transaction', 'customer', 'possible', 'pefs', 'meyers', 'months', 'special', 'without', 'used', 'regarding', 'software', 'howard', 'support', 'buy', 'young', 'meters', 'thru', 'believe', 'gcs', 'cec', 'entered', 'control', 'dec', 'face', 'create', 'weissman', 'st', 'color', 'come', 'supply', 'brian', 'hplo', 'own', 'correct', 'customers', 'web', 'allocation', 'soon', 'using', 'development', 'mark', 'low', 'power', 'problem', 'once', 'however', 'tickets', 'border', 'performance', 'manager', 'rates', 'center', 'companies', 'risk', 'details', 'needed', 'international', 'field', 'even', 'someone', 'doc', 'fuel', 'lee', 'paid', 'while', 'start', 'index', 'include', 'nominations', 'act', 'pricing', 'scheduled', 'gathering', 'type', 'href', 'during', 'aimee', 'anything', 'feel', 'fuels', 'getting', 'advice', 'why', 'increase', 'path', 'sell', 'works', 'issues', 'three', 'enronxgate', 'camp', 'either', 'form', 'security', 'interest', 'financial', 'family', 'xp', 'plan', 'current', 'top', 'another', 'src', 'spreadsheet', 'allen', 'wednesday', 'read', 'him', 'working', 'wynne', 'add', 'deliveries', 'buyback', 'allocated', 'firm', 'james', 'marketing', 'tx', 'results', 'got', 'stocks', 'calpine', 'might', 'operations', 'position', 'logistics', 'fax', 'cost', 'party', 'zero', 'pops', 'old', 'pt', 'scheduling', 'flowed', 'dollars', 'update', 'gco', 'katy', 'including', 'follow', 'yahoo', 'already', 'suite', 'error', 'past', 'page', 'stop', 'changed', 'book', 'program', 'few', 'better', 'operating', 'equistar', 'move', 'cotton', 'aep', 'y', 'state', 'ees', 'rita', 'provided', 'employees', 'period', 'morning', 'cd', 'hotmail', 'entex', 'swing', 'real', 'exchange', 'tomorrow', 'lst', 'counterparty', 'parker', 'person', 'follows', 'valid', 'visit', 'little', 'professional', 'quality', 'confirm', 'something', 'megan', 'brenda', 'around', 'windows', 'im', 'storage', 'accounting', 'called', 'ranch', 'tax', 'problems', 'case', 'teco', 'fact', 'always', 'too', 'unsubscribe', 'amount', 'coastal', 'never', 'rodriguez', 'love', 'acton', 'shut', 'pipe', 'project', 'hope', 'limited', 'invoice', 'credit', 'full', 'survey', 'ray', 'carlos', 'anyone', 'wanted', 'yet', 'ic', 'scott', 'years', 'charlie', 'soft', 'notice', 'advise', 'addition', 'donald', 'lsk', 'wish', 'katherine', 'website', 'hplnl', 'schumack', 'prescription', 'cover', 'shares', 'cash', 'imbalance', 'united', 'handle', 'big', 'everyone', 'style', 'clear', 'producer', 'weekend', 'city', 'requested', 'stone', 'left', 'payment', 'mobil', 'shows', 'small', 'confirmed', 'technology', 'meet', 'extend', 'life', 'intended', 'sherlyn', 'schedule', 'else', 'letter', 'box', 'bill', 'richard', 'lamphier', 'complete', 'ever', 'release', 'newsletter', 'anita', 'clem', 'having', 'herod', 'beginning', 'papayoti', 'try', 'mike', 'enter', 'estimates', 'location', 'cut', 'question', 'things', 'personal', 'feedback', 'cialis', 'found', 'area', 'dow', 'terms', 'central', 'necessary', 'man', 'run', 'reason', 'third', 'midcon', 'charge', 'president', 'de', 'listed', 'meds', 'thomas', 'thought', 'capital', 'added', 'ask', 'weeks', 'investing', 'commercial', 'star', 'several', 'easy', 'view', 'cannot', 'extended', 'lauri', 'beaumont', 'union', 'times', 'open', 'cause', 'monthly', 'action', 'offers', 'industry', 'states', 'side', 'mailto', 'probably', 'neal', 'second', 'stephanie', 'download', 'flash', 'agree', 'mcf', 'transfer', 'doing', 'important', 'basis', 'different', 'final', 'koch', 'exxon', 'remove', 'microsoft', 'interested', 'application', 'sept', 'mg', 'write', 'lp', 'east', 'requirements', 'code', 'value', 'thank', 'together', 'exploration', 'mid', 'dfarmer', 'everything', 'receipt', 'thu', 'afternoon', 'late', 'enserch', 'coming', 'bank', 'response', 'tell', 'shipping', 'night', 'events', 'cynthia', 'lsp', 'close', 'legal', 'country', 'direct', 'expected', 'ces', 'corporation', 'options', 'really', 'voip', 'nominated', 'etc', 'latest', 'potential', 'priced', 'edward', 'valero', 'material', 'stack', 'victor', 'redeliveries', 'loss', 'remember', 'baumbach', 'option', 'private', 'longer', 'aware', 'included', 'drugs', 'public', 'reinhardt', 'version', 'hesse', 'discuss', 'related', 'asked', 'say', 'viagra', 'revision', 'bgcolor', 'kind', 'pro', 'completed', 'health', 'ready', 'plans', 'registered', 'regards', 'carthage', 'zone', 'fill', 'away', 'computer', 'systems', 'industrial', 'mentioned', 'told', 'therefore', 'growth', 'sold', 'track', 'reports', 'south', 'rd', 'jim', 'costs', 'image', 'expect', 'return', 'physical', 'el', 'browser', 'donna', 'stacey', 'begin', 'china', 'duty', 'approximately', 'showing', 'unit', 'jones', 'hard', 'verify', 'updated', 'eol', 'cs', 'orders', 'talk', 'trying', 'base', 'given', 'server', 'source', 'pathed', 'strong', 'bryan', 'directly', 'risks', 'whole', 'major', 'users', 'purchases', 'oo', 'karen', 'luong', 'level', 'required', 'delivered', 'portfolio', 'riley', 'ali', 'easttexas', 'poorman', 'bellamy', 'assistance', 'nothing', 'gif', 'thing', 'retail', 'didn', 'valley', 'department', 'cleburne', 'allow', 'gpgfin', 'answer', 'items', 'paste', 'avila', 'taken', 'mm', 'nguyen', 'ensure', 'reference', 'hall', 'later', 'lone', 'user', 'methanol', 'facility', 'network', 'spoke', 'though', 'tabs', 'taking', 'status', 'considered', 'purchased', 'says', 'yourself', 'paliourg', 'dy', 'jeff', 'businesses', 'fred', 'transportation', 'apache', 'morris', 'nov', 'ltd', 'brand', 'federal', 'statement', 'oasis', 'reflect', 'assets', 'lamadrid', 'general', 'bridge', 'ability', 'oct', 'play', 'enrononline', 'compliance', 'spam', 'availability', 'king', 'understanding', 'chance', 'quick', 'effort', 'points', 'reliantenergy', 'fixed', 'short', 'hill', 'cheryl', 'aepin', 'key', 'understand', 'valign', 'capacity', 'game', 'took', 'bring', 'guys', 'god', 'green', 'care', 'withers', 'property', 'hub', 'johnson', 'employee', 'wants', 'albrecht', 'meaning', 'expectations', 'mx', 'moved', 'cernosek', 'matter', 'devon', 'calls', 'worldwide', 'records', 'removed', 'lose', 'large', 'referenced', 'walker', 'iferc', 'enw', 'ponton', 'eileen', 'ship', 'upon', 'enerfin', 'jennifer', 'looks', 'staff', 'pc', 'target', 'waha', 'making', 'cp', 'impact', 'partner', 'immediately', 'shall', 'channel', 'takes', 'sat', 'others', 'hear', 'went', 'travel', 'listing', 'approved', 'processing', 'early', 'enough', 'sally', 'starting', 'distribution', 'tejas', 'transactions', 'stay', 'earl', 'superty', 'doesn', 'reserves', 'includes', 'choose', 'adobe', 'publisher', 'paso', 'cornhusker', 'training', 'markets', 'content', 'solution', 'shell', 'jpg', 'print', 'drive', 'pain', 'password', 'half', 'herrera', 'saturday', 'moopid', 'hotlist', 'balance', 'super', 'vacation', 'sex', 'happy', 'excess', 'existing', 'fund', 'stella', 'share', 'sign', 'wells', 'won', 'four', 'text', 'card', 'tisdale', 'fwd', 'appreciate', 'non', 'experience', 'savings', 'settlements', 'draft', 'couple', 'informed', 'biz', 'watch', 'plus', 'sun', 'expense', 'images', 'land', 'occur', 'flowing', 'mar', 'terry', 'darren', 'cheap', 'weight', 'dynegy', 'activities', 'become', 'mr', 'format', 'attention', 'entire', 'photoshop', 'williams', 'instructions', 'neon', 'janet', 'contains', 'ago', 'friends', 'against', 'boas', 'music', 'certain', 'liz', 'svcs', 'record', 'fast', 'dave', 'held', 'mind', 'ua', 'publication', 'differ', 'comments', 'fun', 'rest', 'instant', 'agent', 'communications', 'director', 'partners', 'investors', 'expedia', 'kevin', 'assist', 'safe', 'approval', 'allocate', 'black', 'none', 'intrastate', 'document', 'eric', 'hakemack', 'expired', 'lower', 'active', 'secure', 'cc', 'five', 'determine', 'press', 'colspan', 'missing', 'jill', 'discussion', 'relief', 'respect', 'specific', 'technologies', 'al', 'holmes', 'white', 'yesterday', 'medical', 'pinion', 'sorry', 'men', 'leave', 'pass', 'video', 'gomes', 'doctor', 'projects', 'limit', 'air', 'knle', 'pharmacy', 'confirmation', 'opportunity', 'involve', 'notify', 'gtc', 'class', 'ken', 'started', 'outage', 'confidential', 'room', 'blue', 'estimated', 'officer', 'reach', 'messages', 'database', 'words', 'prc', 'tracked', 'transition', 'light', 'national', 'hot', 'offering', 'gulf', 'provides', 'iit', 'demokritos', 'mckay', 'average', 'wide', 'heard', 'files', 'dan', 'billed', 'mccoy', 'rc', 'exactly', 'middle', 'select', 'bruce', 'louisiana', 'receiving', 'california', 'event', 'roll', 'mops', 'william', 'appear', 'perfect', 'html', 'features', 'join', 'greater', 'sunday', 'pick', 'featured', 'cdnow', 'prize', 'reveffo', 'olsen', 'expects', 'estimate', 'near', 'common', 'package', 'title', 'whether', 'bought', 'evergreen', 'difference', 'elizabeth', 'history', 'monitor', 'advised', 'result', 'sources', 'school', 'unaccounted', 'paragraph', 'turn', 'kimberly', 'increased', 'communication', 'members', 'concerns', 'uncertainties', 'associated', 'reduce', 'committed', 'wi', 'asap', 'goes', 'trader', 'waiting', 'canada', 'worth', 'representative', 'claim', 'ceo', 'london', 'discussions', 'php', 'brazos', 'trevino', 'calling', 'involved', 'la', 'gift', 'southern', 'groups', 'hour', 'tufco', 'previously', 'voice', 'normally', 'resolve', 'efforts', 'nor', 'recent', 'purchasing', 'county', 'ok', 'express', 'generic', 'according', 'respond', 'situation', 'hold', 'lot', 'interconnect', 'word', 'came', 'west', 'role', 'opportunities', 'corporate', 'remain', 'similar', 'readers', 'suggestions', 'subscribers', 'projections', 'lead', 'learn', 'resolved', 'agreed', 'sec', 'head', 'enjoy', 'img', 'rnd', 'responsible', 'outstanding', 'member', 'panenergy', 'american', 'cass', 'register', 'promotions', 'parties', 'winfree', 'selling', 'usage', 'appropriate', 'assignment', 'media', 'believes', 'require', 'submit', 'model', 'spinnaker', 'copano', 'facilities', 'opinion', 'factors', 'identified', 'beverly', 'ews', 'gdp', 'deliver', 'job', 'profile', 'across', 'neuweiler', 'suggest', 'girls', 'manage', 'usa', 'local', 'bad', 'greg', 'vs', 'fees', 'digital', 'cf', 'strangers', 'registration', 'delta', 'rolex', 'goliad', 'hesco', 'success', 'primary', 'quarter', 'course', 'chairman', 'petroleum', 'notes', 'medications', 'ei', 'instead', 'fine', 'lake', 'pre', 'force', 'seek', 'recipient', 'gain', 'placed', 'age', 'least', 'body', 'asking', 'discussed', 'hanson', 'emails', 'nominate', 'ext', 'known', 'ones', 'ed', 'assigned', 'htmlimg', 'means', 'present', 'various', 'invoices', 'gd', 'agency', 'along', 'located', 'reflects', 'solutions', 'ex', 'house', 'cds', 'br', 'owner', 'apr', 'sullivan', 'basin', 'linda', 'worked', 'car', 'seen', 'properties', 'booked', 'higher', 'store', 'est', 'revenue', 'wait', 'women', 'far', 'met', 'wholesale', 'range', 'kcs', 'recorded', 'brown', 'lots', 'match', 'input', 'grant', 'providing', 'huge', 'investor', 'kelly', 'apply', 'paths', 'handling', 'pipes', 'advantage', 'analysis', 'focus', 'draw', 'red', 'origination', 'connection', 'planning', 'wilson', 'golf', 'summary', 'item', 'bankruptcy', 'expenses', 'pgev', 'encina', 'beaty', 'memo', 'initial', 'thousand', 'mills', 'penis', 'friend', 'conversation', 'multiple', 'martin', 'names', 'bit', 'dth', 'talked', 'behalf', 'preliminary', 'button', 'herein', 'gisb', 'coupon', 'sa', 'oi', 'appears', 'door', 'texaco', 'csikos', 'arrangements', 'cpr', 'expires', 'popular', 'sending', 'research', 'conditions', 'gb', 'board', 'ca', 'applications', 'tried', 'paying', 'acquisition', 'reporting', 'normal', 'maintenance', 'resume', 'announced', 'attachment', 'buyer', 'objectives', 'prod', 'represent', 'sandi', 'hplnol', 'government', 'committee', 'running', 'tetco', 'discount', 'jo', 'holding', 'earlier', 'positions', 'happen', 'mailing', 'decided', 'recently', 'chris', 'xanax', 'valium', 'broadband', 'individual', 'station', 'td', 'financing', 'somehow', 'pena', 'critical', 'attend', 'kristen', 'inform', 'highly', 'hl', 'phillips', 'minutes', 'titles', 'affiliate', 'wife', 'lonestar', 'charlotte', 'quickly', 'paper', 'test', 'comes', 'mobile', 'internal', 'privacy', 'ideas', 'live', 'gotten', 'floor', 'benefit', 'percent', 'ms', 'dr', 'ebs', 'msn', 'gave', 'dallas', 'enterprise', 'rx', 'spring', 'ftar', 'ooking', 'hawkins', 'exclusive', 'selected', 'baxter', 'actually', 'single', 'shop', 'nominates', 'guarantee', 'minute', 'correctly', 'unique', 'bid', 'building', 'stated', 'accept', 'assumptions', 'centana', 'senior', 'pill', 'kinsey', 'sap', 'immediate', 'goals', 'category', 'mitchell', 'acceptance', 'termination', 'sweeney', 'facts', 'amazon', 'arrangement', 'josey', 'funds', 'among', 'accuracy', 'mean', 'rather', 'kim', 'egmnom', 'indicate', 'updates', 'extra', 'adjustment', 'accounts', 'lowest', 'gold', 'purposes', 'remaining', 'talking', 'entry', 'road', 'load', 'simply', 'europe', 'lindley', 'understood', 'logos', 'hi', 'speed', 'profit', 'notified', 'jackson', 'z', 'vols', 'serve', 'additionally', 'shipped', 'connor', 'fontfont', 'q', 'kept', 'dollar', 'jr', 'almost', 'fri', 'paul', 'documents', 'analyst', 'crude', 'cap', 'shopping', 'aug', 'clearance', 'schneider', 'ftworth', 'father', 'anticipated', 'resellers', 'congress', 'counterparties', 'epgt', 'buying', 'san', 'invest', 'cartwheel', 'brandywine', 'wrong', 'mtbe', 'split', 'submitted', 'hull', 'gra', 'children', 'leader', 'true', 'baseload', 'mb', 'letters', 'billion', 'rights', 'mtr', 'heidi', 'clean', 'historical', 'asset', 'foreign', 'gr', 'entity', 'developed', 'maybe', 'jeffrey', 'transmission', 'outside', 'lost', 'membership', 'invitation', 'ocean', 'legislation', 'hernandez', 'pep', 'payments', 'wallis', 'rev', 'kenneth', 'seaman', 'annual', 'guess', 'bammel', 'lines', 'guadalupe', 'zivley', 'exception', 'example', 'pathing', 'revisions', 'pipelines', 'equity', 'budget', 'wed', 'dealers', 'window', 'juno', 'claims', 'bottom', 'standard', 'alternative', 'merchant', 'braband', 'topica', 'telephone', 'reliant', 'speculative', 'yes', 'en', 'morgan', 'cable', 'edmondson', 'participate', 'usb', 'throughout', 'checked', 'myself', 'contents', 'fat', 'investments', 'six', 'build', 'giving', 'calendar', 'inherent', 'edition', 'darial', 'hr', 'trip', 'pull', 'moving', 'concern', 'proposed', 'rm', 'deer', 'enquiries', 'alt', 'tammy', 'front', 'reduction', 'evening', 'concerning', 'gets', 'effect', 'isn', 'haven', 'cowboy', 'sea', 'dvd', 'launch', 'minimum', 'changing', 'built', 'avoid', 'chief', 'stephen', 'chad', 'manual', 'finally', 'strategy', 'executive', 'thousands', 'conflict', 'resulting', 'policy', 'commission', 'stand', 'positive', 'quantity', 'programs', 'airmail', 'texoma', 'prepared', 'austin', 'matt', 'intent', 'uae', 'citibank', 'jaquet', 'hol', 'harris', 'min', 'hplr', 'advance', 'weather', 'terminated', 'whom', 'sheet', 'venturatos', 'cellpadding', 'hotel', 'leading', 'guaranteed', 'idea', 'announce', 'pleased', 'award', 'operational', 'prepare', 'schedulers', 'child', 'sum', 'quote', 'adjusted', 'warning', 'issued', 'ga', 'cross', 'detail', 'pertaining', 'tess', 'owe', 'crow', 'availabilities', 'griffin', 'christy', 'crosstex', 'eel', 'itoy', 'heart', 'licensed', 'overnight', 'cal', 'otherwise', 'luck', 'stretch', 'generation', 'broker', 'construed', 'except', 'traders', 'carry', 'column', 'approx', 'main', 'alert', 'charges', 'step', 'revenues', 'games', 'gottlob', 'looked', 'individuals', 'beck', 'stuff', 'welcome', 'port', 'glover', 'description', 'daniel', 'quantities', 'park', 'managing', 'town', 'seller', 'summer', 'tina', 'dates', 'eff', 'dudley', 'ferc', 'robin', 'charles', 'customerservice', 'zonedubai', 'emirates', 'aeor', 'clickathome', 'materia', 'island', 'vaughn', 'sexual', 'eiben', 'forms', 'delete', 'realize', 'tailgate', 'behind', 'villarreal', 'lon', 'benoit', 'simple', 'tech', 'ahead', 'double', 'ordering', 'se', 'miss', 'law', 'eb', 'post', 'outlook', 'equipment', 'leslie', 'reeves', 'org', 'tools', 'cold', 'adjustments', 'contained', 'saw', 'edit', 'deciding', 'finance', 'patti', 'listbot', 'river', 'kathryn', 'holiday', 'successful', 'unable', 'advisor', 'pool', 'bryce', 'outages', 'adjust', 'screen', 'otc', 'brent', 'helps', 'auto', 'foot', 'region', 'links', 'contain', 'knowledge', 'yvette', 'dial', 'pressure', 'detailed', 'indicated', 'charged', 'sites', 'makes', 'female', 'mcmills', 'cook', 'mazowita', 'meredith', 'allocations', 'meetings', 'particular', 'environment', 'drug', 'search', 'mailings', 'designed', 'rock', 'measurement', 'art', 'corrected', 'kids', 'benefits', 'tv', 'seems', 'husband', 'fix', 'grow', 'decision', 'wireless', 'mo', 'conference', 'interview', 'levels', 'copies', 'cindy', 'urgent', 'regular', 'payroll', 'shown', 'consumers', 'reliable', 'tr', 'indicating', 'coast', 'greif', 'severson', 'tri', 'vicodin', 'liquids', 'significant', 'intend', 'usd', 'pager', 'avails', 'spencer', 'ce', 'charset', 'verdana', 'fully', 'flynn', 'da', 'personnel', 'multi', 'closed', 'vice', 'administration', 'gmt', 'midstream', 'eye', 'speckels', 'studio', 'cilco', 'likely', 'managers', 'structure', 'sit', 'parent', 'preparation', 'mix', 'mmbtus', 'timing', 'happening', 'lottery', 'killing', 'acquire', 'mack', 'pcx', 'fares', 'internationa', 'notification', 'swift', 'identify', 'areas', 'separate', 'unless', 'producers', 'allows', 'pretty', 'waste', 'joanie', 'drop', 'taxes', 'premium', 'teams', 'choice', 'largest', 'addressed', 'dolphin', 'ngo', 'self', 'davis', 'htm', 'ad', 'graphics', 'hit', 'competitive', 'thus', 'incorrect', 'ti', 'acts', 'previous', 'edu', 'proven', 'electric', 'pictures', 'charlene', 'benedict', 'chevron', 'treatment', 'lesson', 'player', 'sds', 'wc', 'intraday', 'assurance', 'sdsnom', 'rebecca', 'quit', 'netco', 'intra', 'whatever', 'lyondell', 'reviewed', 'solicitation', 'filings', 'log', 'noon', 'locations', 'joe', 'completely', 'rivers', 'language', 'street', 'automatically', 'ft', 'powerful', 'specials', 'alone', 'fyi', 'properly', 'proper', 'explode', 'decrease', 'medication', 'desks', 'impacted', 'anywhere', 'completion', 'banking', 'consider', 'certificate', 'exercise', 'zeroed', 'websites', 'tonight', 'diligence', 'education', 'club', 'vegas', 'affordable', 'sports', 'predictions', 'billing', 'diamond', 'posted', 'prayer', 'actions', 'nomad', 'resuits', 'jason', 'purpose', 'deposit', 'entertainment', 'materially', 'blank', 'resolution', 'anderson', 'nat', 'rom', 'soma', 'organization', 'aquila', 'solid', 'affected', 'transco', 'spend', 'responsibilities', 'assume', 'header', 'accountant', 'functionality', 'meant', 'killed', 'analysts', 'rick', 'rolled', 'noted', 'discovered', 'offices', 'torch', 'often', 'york', 'joint', 'briley', 'competition', 'guide', 'intercompany', 'son', 'settlement', 'presently', 'cart', 'tim', 'entries', 'russ', 'valadez', 'rules', 'molly', 'apple', 'atleast', 'scheduler', 'pi', 'hector', 'dell', 'opm', 'hottlist', 'yap', 'gone', 'heal', 'llc', 'setting', 'reached', 'proposal', 'hundred', 'trust', 'official', 'table', 'mcgee', 'written', 'operation', 'cellspacing', 'laptop', 'feature', 'ram', 'victoria', 'larry', 'units', 'requests', 'continued', 'external', 'pack', 'couldn', 'lateral', 'strictly', 'resource', 'although', 'sr', 'commodity', 'pulled', 'protocol', 'bed', 'generated', 'redmond', 'girl', 'apparently', 'tool', 'reviews', 'released', 'movies', 'inside', 'shareholder', 'rr', 'compensation', 'beliefs', 'foresee', 'lease', 'rule', 'marta', 'chemical', 'hillary', 'hp', 'tongue', 'adonis', 'advises', 'master', 'eight', 'wasn', 'itself', 'documentation', 'xl', 'humble', 'elsa', 'pics', 'hughes', 'brokered', 'distribute', 'consultation', 'sheri', 'lists', 'cannon', 'treated', 'factor', 'putting', 'verified', 'releases', 'enhanced', 'controls', 'craig', 'worksheet', 'conversion', 'max', 'hrs', 'helpful', 'hand', 'producing', 'dl', 'developing', 'design', 'woman', 'understands', 'standards', 'promotion', 'sarco', 'hospital', 'ffffff', 'respective', 'richmond', 'conoco', 'driver', 'easily', 'sean', 'den', 'gateway', 'holdings', 'brad', 'college', 'gains', 'adult', 'dated', 'em', 'mcloughlin', 'anticipates', 'henderson', 'julia', 'negotiations', 'sofftwaares', 'garrick', 'comstock', 'trochta', 'imceanotes', 'ecom', 'larger', 'nommensen', 'coordinate', 'partnership', 'otcbb', 'announces', 'louis', 'dealer', 'reliance', 'season', 'agua', 'dulce', 'offshore', 'gathered', 'forever', 'function', 'happened', 'sample', 'easier', 'aim', 'pa', 'expensive', 'thinks', 'maximum', 'war', 'mining', 'drilling', 'owned', 'todd', 'advanced', 'provider', 'pending', 'providers', 'silver', 'cherry', 'hundreds', 'thoughts', 'addresses', 'beach', 'baby', 'requires', 'caused', 'variance', 'extension', 'carbide', 'anytime', 'adding', 'triple', 'dawn', 'martinez', 'entering', 'login', 'bretz', 'ls', 'writeoff', 'locker', 'wiil', 'block', 'blood', 'romeo', 'responsibility', 'brennan', 'btu', 'venture', 'connected', 'nascar', 'opinions', 'executed', 'cell', 'flag', 'doctors', 'invoiced', 'marlin', 'coffey', 'nice', 'amazing', 'ii', 'determined', 'handled', 'keeping', 'touch', 'upgrade', 'shipment', 'brought', 'forwarding', 'confidence', 'hesitate', 'seem', 'electronic', 'appreciated', 'deadline', 'franklin', 'heather', 'reasons', 'passed', 'safety', 'procedures', 'payback', 'networks', 'utility', 'count', 'africa', 'exact', 'creating', 'loading', 'processed', 'court', 'tier', 'sender', 'att', 'mailbox', 'glad', 'buddy', 'profiles', 'portion', 'protection', 'compressor', 'okay', 'oba', 'finding', 'heads', 'bar', 'turned', 'remote', 'illustrator', 'oem', 'noticed', 'mails', 'darron', 'nick', 'urbanek', 'jerry', 'barrett', 'ehronline', 'und', 'abdv', 'egm', 'couid', 'technoiogies', 'owns', 'improved', 'eat', 'moment', 'owners', 'develop', 'installed', 'videos', 'frank', 'hearing', 'inches', 'busy', 'ref', 'valuable', 'et', 'un', 'url', 'shawna', 'iso', 'capture', 'extremely', 'ya', 'causing', 'consent', 'anyway', 'round', 'discrepancies', 'cheapest', 'confidentiality', 'disclosure', 'prohibited', 'vol', 'correction', 'communicate', 'processes', 'spain', 'shareholders', 'supported', 'smoking', 'mine', 'biggest', 'erections', 'platform', 'miles', 'exciting', 'association', 'die', 'restricted', 'ma', 'income', 'goal', 'bane', 'collection', 'nathan', 'wind', 'piece', 'familiar', 'gore', 'experiencing', 'pico', 'mai', 'dewpoint', 'tessie', 'hair', 'bussell', 'diane', 'delivering', 'originally', 'accurate', 'began', 'seven', 'tracking', 'randall', 'gay', 'emerging', 'prescriptions', 'story', 'arial', 'florida', 'space', 'ownership', 'european', 'sutton', 'concerned', 'male', 'spent', 'agreements', 'industries', 'picture', 'filled', 'continues', 'death', 'choate', 'majeure', 'device', 'hence', 'ten', 'campaign', 'massive', 'eyes', 'requesting', 'lives', 'reminder', 'eliminate', 'copied', 'consemiu', 'died', 'sound', 'offered', 'expressed', 'anti', 'duplicate', 'steps', 'books', 'improve', 'implementation', 'gives', 'ac', 'peggy', 'proprietary', 'ways', 'advertisement', 'published', 'earnings', 'mortgage', 'consumer', 'ct', 'tape', 'fl', 'cia', 'organizational', 'agenda', 'rental', 'carriere', 'moshou', 'church', 'trouble', 'medium', 'aggressive', 'smart', 'zajac', 'ail', 'participants', 'gap', 'earthlink', 'wire', 'trades', 'messaging', 'ut', 'wil', 'richardson', 'blvd', 'glo', 'seneca', 'pubiisher', 'imited', 'isc', 'contacts', 'sleep', 'kyle', 'cooperation', 'possibly', 'leaving', 'motor', 'hopefully', 'tie', 'speak', 'mi', 'suggested', 'canadian', 'uses', 'connect', 'pvr', 'rich', 'places', 'auction', 'po', 'spacer', 'client', 'recommended', 'royalty', 'amended', 'default', 'living', 'regardless', 'human', 'bringing', 'focused', 'stores', 'variety', 'netherlands', 'leaders', 'bowen', 'salary', 'signed', 'penny', 'loan', 'desktop', 'chase', 'pleasure', 'compare', 'session', 'overall', 'stranger', 'length', 'planned', 'sp', 'darrel', 'raise', 'palestinian', 'expiration', 'serial', 'premiere', 'suzanne', 'reduced', 'players', 'applicable', 'impotence', 'buckley', 'wayne', 'hansen', 'indicative', 'sabrae', 'dating', 'winners', 'marshall', 'highest', 'ea', 'presentation', 'allowed', 'square', 'danny', 'gepl', 'hydrocarbon', 'alpine', 'christmas', 'muscle', 'souza', 'relating', 'begins', 'ecf', 'forth', 'answers', 'audit', 'approve', 'lunch', 'types', 'starts', 'difficult', 'le', 'lasts', 'series', 'till', 'edge', 'growing', 'covered', 'shipper', 'sometime', 'republic', 'filter', 'sooner', 'increasing', 'nelson', 'percentage', 'returned', 'pop', 'interface', 'kin', 'experienced', 'prime', 'merger', 'obtain', 'ryan', 'servers', 'attachments', 'achieve', 'effects', 'gov', 'examples', 'procedure', 'explore', 'caribbean', 'rally', 'amounts', 'comfort', 'attempt', 'greatly', 'amelia', 'engel', 'delay', 'fare', 'der', 'cove', 'filing', 'fletcher', 'leth', 'undervalued', 'cents', 'esther', 'hlavaty', 'reid', 'lls', 'troy', 'palmer', 'metals', 'las', 'carter', 'luis', 'migration', 'brief', 'hess', 'therein', 'ur', 'pond', 'joanne', 'community', 'tglo', 'eogi', 'ml', 'wysak', 'felipe', 'errors', 'affect', 'convenient', 'minimal', 'boost', 'incremental', 'decide', 'reserve', 'superior', 'kerr', 'willing', 'quite', 'wild', 'unlimited', 'sans', 'mother', 'computers', 'unfortunately', 'ordered', 'satisfaction', 'priority', 'traded', 'testing', 'portal', 'ward', 'lets', 'aren', 'knows', 'refer', 'shot', 'fda', 'tue', 'saying', 'cancel', 'forecast', 'cousino', 'bass', 'permanent', 'phones', 'technical', 'whose', 'objective', 'cards', 'distributed', 'learning', 'fire', 'drill', 'towards', 'forget', 'explosion', 'gloria', 'formula', 'redelivery', 'audio', 'visual', 'encoding', 'approach', 'doubt', 'staffing', 'excite', 'corel', 'tm', 'enronavailso', 'contacting', 'alland', 'heavy', 'economic', 'nigeria', 'milwaukee', 'phillip', 'curve', 'returns', 'padre', 'kathy', 'buttons', 'sir', 'vary', 'sounds', 'disclose', 'authority', 'flw', 'straight', 'worldnet', 'beemer', 'ooo', 'defs', 'thorough', 'officers', 'flight', 'prefer', 'awesome', 'macintosh', 'feet', 'constitutes', 'formosa', 'porn', 'armstrong', 'driscoll', 'watches', 'newsietter', 'twenty', 'tommy', 'fields', 'method', 'setup', 'allocating', 'initially', 'missed', 'clarification', 'especially', 'dorcheus', 'del', 'millions', 'insurance', 'pooling', 'trial', 'tennessee', 'ellis', 'direction', 'bold', 'catch', 'performing', 'accepted', 'matters', 'batch', 'continuing', 'winning', 'symbol', 'offsystem', 'decisions', 'produced', 'ended', 'greatest', 'degree', 'solmonson', 'imbalances', 'fall', 'fear', 'hate', 'fight', 'reallocated', 'debt', 'reform', 'australia', 'plain', 'prompt', 'remains', 'ifhsc', 'enhancements', 'connevey', 'jay', 'valued', 'lay', 'infrastructure', 'military', 'allowing', 'ff', 'dry']
    word_counts = pd.DataFrame()
    for word in words:
        word_counts[word] = df['email'].apply(lambda x: x.lower().split().count(word))
    word_counts['label'] = df['label']
    word_counts.to_csv(f'dt1_split.csv')

In [None]:
def count_words(df, words_file='words.txt'):
    with open(words_file, 'r') as file:
        words = [line.strip() for line in file.readlines()]
    
    word_counts = pd.DataFrame()
    
    for word in words:
        word_counts[word] = df['email'].apply(lambda x: x.lower().split().count(word))
    
    word_counts['label'] = df['label']
    
    word_counts.to_csv('dt1_split.csv', index=False)

#### **DATA 1**

In [9]:
df = pd.read_csv(r'DATA/emails_test1.csv')
df.columns = ['email', 'label']
df.head(3)

Unnamed: 0,email,label
0,Subject: naturally irresistible your corporate...,1
1,Subject: the stock trading gunslinger fanny i...,1
2,Subject: unbelievable new homes made easy im ...,1


In [14]:
count_words(df)

  word_counts[word] = df['email'].apply(lambda x: x.lower().split().count(word))
  word_counts[word] = df['email'].apply(lambda x: x.lower().split().count(word))
  word_counts[word] = df['email'].apply(lambda x: x.lower().split().count(word))
  word_counts[word] = df['email'].apply(lambda x: x.lower().split().count(word))
  word_counts[word] = df['email'].apply(lambda x: x.lower().split().count(word))
  word_counts[word] = df['email'].apply(lambda x: x.lower().split().count(word))
  word_counts[word] = df['email'].apply(lambda x: x.lower().split().count(word))
  word_counts[word] = df['email'].apply(lambda x: x.lower().split().count(word))
  word_counts[word] = df['email'].apply(lambda x: x.lower().split().count(word))
  word_counts[word] = df['email'].apply(lambda x: x.lower().split().count(word))
  word_counts[word] = df['email'].apply(lambda x: x.lower().split().count(word))
  word_counts[word] = df['email'].apply(lambda x: x.lower().split().count(word))
  word_counts[word] = df['em

In [15]:
df = pd.read_csv('dt1_split.csv')
df.head(3)

Unnamed: 0.1,Unnamed: 0,the,to,ect,and,for,of,a,you,hou,...,connevey,jay,valued,lay,infrastructure,military,allowing,ff,dry,label
0,0,5,4,0,5,1,4,5,3,0,...,0,0,0,0,0,0,0,0,0,1
1,1,1,0,0,3,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,2,1,4,0,2,1,1,3,5,0,...,0,0,0,0,0,0,0,0,0,1


In [16]:
model = joblib.load(r'MODEL/naive_bayes_model1.joblib')

In [17]:
df = df.iloc[:, 1:]
X = df.drop(columns=['label'])
y = df['label']  
y_pred = model.predict(X)

print(f"Độ chính xác của:", accuracy_score(y, y_pred))
print(f"Báo cáo phân loại của:\n", classification_report(y, y_pred))

Độ chính xác của: 0.9402932960893855
Báo cáo phân loại của:
               precision    recall  f1-score   support

           0       0.97      0.95      0.96      4360
           1       0.85      0.91      0.88      1368

    accuracy                           0.94      5728
   macro avg       0.91      0.93      0.92      5728
weighted avg       0.94      0.94      0.94      5728



#### **DATA 2**

In [21]:
df = pd.read_csv(r'DATA/SMS_test2.csv', encoding = 'latin-1')
df.head()

Unnamed: 0,v1,v2,Unnamed: 2,Unnamed: 3,Unnamed: 4
0,ham,"Go until jurong point, crazy.. Available only ...",,,
1,ham,Ok lar... Joking wif u oni...,,,
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...,,,
3,ham,U dun say so early hor... U c already then say...,,,
4,ham,"Nah I don't think he goes to usf, he lives aro...",,,


In [22]:
df = df[['v1', 'v2']]
df.columns = ['label', 'email']
df.head()

Unnamed: 0,label,email
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."


In [23]:
df['label'] = df['label'].apply(lambda x: 1 if x == 'spam' else 0)
df.head()

Unnamed: 0,label,email
0,0,"Go until jurong point, crazy.. Available only ..."
1,0,Ok lar... Joking wif u oni...
2,1,Free entry in 2 a wkly comp to win FA Cup fina...
3,0,U dun say so early hor... U c already then say...
4,0,"Nah I don't think he goes to usf, he lives aro..."


In [26]:
count_words(df)

  word_counts[word] = df['email'].apply(lambda x: x.lower().split().count(word))
  word_counts[word] = df['email'].apply(lambda x: x.lower().split().count(word))
  word_counts[word] = df['email'].apply(lambda x: x.lower().split().count(word))
  word_counts[word] = df['email'].apply(lambda x: x.lower().split().count(word))
  word_counts[word] = df['email'].apply(lambda x: x.lower().split().count(word))
  word_counts[word] = df['email'].apply(lambda x: x.lower().split().count(word))
  word_counts[word] = df['email'].apply(lambda x: x.lower().split().count(word))
  word_counts[word] = df['email'].apply(lambda x: x.lower().split().count(word))
  word_counts[word] = df['email'].apply(lambda x: x.lower().split().count(word))
  word_counts[word] = df['email'].apply(lambda x: x.lower().split().count(word))
  word_counts[word] = df['email'].apply(lambda x: x.lower().split().count(word))
  word_counts[word] = df['email'].apply(lambda x: x.lower().split().count(word))
  word_counts[word] = df['em

In [55]:
df = pd.read_csv('dt1_split.csv')
df.head(3)

Unnamed: 0.1,Unnamed: 0,the,to,ect,and,for,of,a,you,hou,...,connevey,jay,valued,lay,infrastructure,military,allowing,ff,dry,label
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2,0,3,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,1


In [60]:
model = joblib.load(r'MODEL/naive_bayes_model1.joblib')

In [56]:
df = df.iloc[:, 1:]

In [61]:
X = df.drop(columns=['label'])
y = df['label']  
y_pred = model.predict(X)

print(f"Độ chính xác của:", accuracy_score(y, y_pred))
print(f"Báo cáo phân loại của:\n", classification_report(y, y_pred))

Độ chính xác của: 0.7347451543431442
Báo cáo phân loại của:
               precision    recall  f1-score   support

           0       0.93      0.75      0.83      4825
           1       0.28      0.62      0.39       747

    accuracy                           0.73      5572
   macro avg       0.60      0.69      0.61      5572
weighted avg       0.84      0.73      0.77      5572



#### **DATA 3**

In [64]:
df = pd.read_csv(r'DATA/emails_test2.csv')
df['label'] = df['label'].apply(lambda x: 1 if x == 'spam' else 0)
df.head()

Unnamed: 0,email,label
0,Upgrade to our premium plan for exclusive acce...,0
1,Happy holidays from our team! Wishing you joy ...,0
2,We're hiring! Check out our career opportuniti...,0
3,Your Amazon account has been locked. Click her...,1
4,Your opinion matters! Take our survey and help...,0


In [66]:
count_words(df)
df = pd.read_csv('dt1_split.csv')

  word_counts[word] = df['email'].apply(lambda x: x.lower().split().count(word))
  word_counts[word] = df['email'].apply(lambda x: x.lower().split().count(word))
  word_counts[word] = df['email'].apply(lambda x: x.lower().split().count(word))
  word_counts[word] = df['email'].apply(lambda x: x.lower().split().count(word))
  word_counts[word] = df['email'].apply(lambda x: x.lower().split().count(word))
  word_counts[word] = df['email'].apply(lambda x: x.lower().split().count(word))
  word_counts[word] = df['email'].apply(lambda x: x.lower().split().count(word))
  word_counts[word] = df['email'].apply(lambda x: x.lower().split().count(word))
  word_counts[word] = df['email'].apply(lambda x: x.lower().split().count(word))
  word_counts[word] = df['email'].apply(lambda x: x.lower().split().count(word))
  word_counts[word] = df['email'].apply(lambda x: x.lower().split().count(word))
  word_counts[word] = df['email'].apply(lambda x: x.lower().split().count(word))
  word_counts[word] = df['em

In [67]:
df = df.iloc[:, 1:]

In [68]:
X = df.drop(columns=['label'])
y = df['label']  
y_pred = model.predict(X)

print(f"Độ chính xác của:", accuracy_score(y, y_pred))
print(f"Báo cáo phân loại của:\n", classification_report(y, y_pred))

Độ chính xác của: 0.664804469273743
Báo cáo phân loại của:
               precision    recall  f1-score   support

           0       0.79      0.54      0.64       100
           1       0.59      0.82      0.68        79

    accuracy                           0.66       179
   macro avg       0.69      0.68      0.66       179
weighted avg       0.70      0.66      0.66       179

