In [2]:
##Importing Packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import random
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import plotly
import plotly.figure_factory as ff
from sklearn.preprocessing import StandardScaler 
from sklearn.model_selection import train_test_split
from imblearn.datasets import make_imbalance
from imblearn.over_sampling import SMOTE

#Models
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import SGDClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier

#Model Result Analysis
from sklearn.metrics import accuracy_score, confusion_matrix, roc_auc_score, ConfusionMatrixDisplay, precision_score, recall_score, f1_score, classification_report, roc_curve, plot_roc_curve, auc, precision_recall_curve, plot_precision_recall_curve, average_precision_score
from sklearn.model_selection import cross_val_score


#Loading data into dataframe
dataset = pd.read_csv('Transaction-Dataset.csv')

#Getting top 5 rows of the dataframe
dataset.head()

Unnamed: 0,year,level_1,level_2,level_3,level_4,value
0,2013,Total,FOOD AND NON-ALCOHOLIC BEVERAGES,FOOD,Bread and Cereals,78.2
1,2013,Total,FOOD AND NON-ALCOHOLIC BEVERAGES,FOOD,Meat,66.3
2,2013,Total,FOOD AND NON-ALCOHOLIC BEVERAGES,FOOD,Fish and Seafood,64.3
3,2013,Total,FOOD AND NON-ALCOHOLIC BEVERAGES,FOOD,"Milk, Cheese and Eggs",49.4
4,2013,Total,FOOD AND NON-ALCOHOLIC BEVERAGES,FOOD,Oils and Fats,8.9


In [10]:
#Categories Level 2
# unique values in column "Team"
print(dataset['level_2'].unique())



['FOOD AND NON-ALCOHOLIC BEVERAGES' 'ALCOHOLIC BEVERAGES AND TOBACCO'
 'CLOTHING AND FOOTWEAR' 'HOUSING AND UTILITIES'
 'FURNISHINGS, HOUSEHOLD EQUIPMENT AND ROUTINE HOUSEHOLD MAINTENANCE'
 'HEALTH' 'TRANSPORT' 'COMMUNICATION' 'RECREATION AND CULTURE'
 'EDUCATIONAL SERVICES' 'MISCELLANEOUS GOODS AND SERVICES']


In [9]:
#Categories Level 2
# value counts of each unique value
print(dataset['level_2'].value_counts())

RECREATION AND CULTURE                                                152
TRANSPORT                                                             140
FOOD AND NON-ALCOHOLIC BEVERAGES                                      121
MISCELLANEOUS GOODS AND SERVICES                                      118
EDUCATIONAL SERVICES                                                   99
FURNISHINGS, HOUSEHOLD EQUIPMENT AND ROUTINE HOUSEHOLD MAINTENANCE     92
HEALTH                                                                 88
CLOTHING AND FOOTWEAR                                                  65
HOUSING AND UTILITIES                                                  44
ALCOHOLIC BEVERAGES AND TOBACCO                                        33
COMMUNICATION                                                          20
Name: level_2, dtype: int64


In [5]:
#Categories Level 3
# unique values in column "Team"
print(dataset['level_3'].unique())

['FOOD' 'NON-ALCOHOLIC BEVERAGES' 'ALCOHOLIC BEVERAGES' 'CLOTHING'
 'FOOTWEAR' 'MAINTENANCE AND REPAIR OF DWELLING'
 'UTILITIES AND OTHER FUELS'
 'FURNITURE AND FURNISHINGS, CARPETS AND OTHER FLOOR COVERINGS'
 'HOUSEHOLD APPLIANCES' 'TOOLS AND EQUIPMENT FOR HOUSE AND GARDEN'
 'GOODS AND SERVICES FOR ROUTINE HOUSEHOLD MAINTENANCE'
 'MEDICAL PRODUCTS, APPLIANCES AND EQUIPMENT' 'OUTPATIENT SERVICES'
 'HOSPITAL, CONVALESCENT AND REHABILITATION SERVICES'
 'PURCHASE OF VEHICLES' 'OPERATION OF PERSONAL TRANSPORT EQUIPMENT'
 'LAND TRANSPORT SERVICES' 'OTHER TRANSPORT SERVICES'
 'POSTAL AND COURIER/DELIVERY SERVICES'
 'AUDIO-VISUAL, PHOTOGRAPHIC AND INFORMATION PROCESSING EQUIPMENT'
 'OTHER RECREATIONAL GOODS, GARDENS AND PETS'
 'RECREATIONAL AND CULTURAL SERVICES' 'NEWSPAPERS, BOOKS AND STATIONERY'
 'GENERAL, VOCATIONAL AND HIGHER EDUCATION'
 'PRIVATE TUITION AND OTHER EDUCATIONAL COURSES' 'PERSONAL CARE'
 'OTHER PERSONAL EFFECTS' 'INSURANCE' 'OTHER FINANCIAL SERVICES']


In [11]:
#Categories Level 3
# value counts of each unique value
print(dataset['level_3'].value_counts())

FOOD                                                               99
GENERAL, VOCATIONAL AND HIGHER EDUCATION                           66
INSURANCE                                                          55
AUDIO-VISUAL, PHOTOGRAPHIC AND INFORMATION PROCESSING EQUIPMENT    55
CLOTHING                                                           44
NEWSPAPERS, BOOKS AND STATIONERY                                   44
OPERATION OF PERSONAL TRANSPORT EQUIPMENT                          44
LAND TRANSPORT SERVICES                                            33
ALCOHOLIC BEVERAGES                                                33
PERSONAL CARE                                                      33
PRIVATE TUITION AND OTHER EDUCATIONAL COURSES                      33
HOUSEHOLD APPLIANCES                                               33
RECREATIONAL AND CULTURAL SERVICES                                 33
MEDICAL PRODUCTS, APPLIANCES AND EQUIPMENT                         33
OUTPATIENT SERVICES 

In [6]:
#Categories Level 4
# unique values in column "Team"
print(dataset['level_4'].unique())

['Bread and Cereals' 'Meat' 'Fish and Seafood' 'Milk, Cheese and Eggs'
 'Oils and Fats' 'Fruits' 'Vegetables'
 'Sugar, Jam, Honey, Chocolate and Confectionery' 'Food Products n.e.c'
 'Coffee, Tea and Cocoa'
 'Mineral Water, Soft Drinks, Fruit and Vegetable Juices' 'Spirits' 'Wine'
 'Beer' 'Clothing Materials' 'Garments'
 'Other Clothing and Clothing Accessories'
 'Cleaning, Repair and Hire of Clothing' 'Shoes and Other Footwear'
 'Repair and Hire of Footwear'
 'Materials for Maintenance and Repair of Dwelling'
 'Services for Maintenance and Repair of Dwelling'
 'Water Supply and Miscellaneous Services Related to Dwelling'
 'Electricity, Gas and Other Fuels' 'Furniture and Furnishings'
 'Repair of Furniture, Furnishings and Floor Coverings'
 'Major Household Appliances' 'Small Electrical Household Appliances'
 'Repair of Household Appliances' 'Major Tools and Equipment'
 'Small Tools and Miscellaneous Accessories' 'Non-Durable Household Goods'
 'Domestic Services and Household Services'