# 1. Necessary packages

In [11]:
""" System Processing """
import os
from time import time
""" Dataset Processing """
import csv
import pandas as pd
from pandas import Series, DataFrame
import numpy as np
""" Image Processing """
import cv2
from PIL import Image
# In Google Colab: cv2_imshow() instead of cv2.imshow() OpenCV
# from google.colab.patches import cv2_imshow
# % matplotlib inline
""""""
import seaborn as sns
import matplotlib.pyplot as plt
""" Support Vector Machine """
from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split, KFold, GridSearchCV
from sklearn.metrics import confusion_matrix, precision_score, recall_score, accuracy_score, f1_score
from sklearn.metrics import classification_report
from sklearn.metrics import precision_recall_fscore_support
""""""
import pickle

# 2. Helpers

## 2.1 Working with data set

In [1]:
def makeDataset(CSV_PATH):
  """
    Make a image dataset
    Args:
      CSV_PATH (str): path to File.csv for storing data
      folders (list): 
  """
  IMAGE_SIZE = 28
  folders = ['10', '20', '50', '100', '200', '500']
  for folder in folders:
    for imgFile in os.listdir(folder):
      image = cv2.imread(f"{folder}/{imgFile}")
      try:
        image = cv2.resize(image, (IMAGE_SIZE, IMAGE_SIZE))
        newImage = image.reshape(1, -1)
        row = newImage[0].tolist()
        # assign label for vector
        row.insert(0, int(folder))
        with open(CSV_PATH, 'a') as file:
          writer = csv.writer(file)
          writer.writerow(row)
      except:
        continue

In [2]:
def getDataset(path, header):
  """
    GET dataset on Github
    @param path {string}: link to github
    Returns: 
      {[]}: there are 4 array: xtrain, xtest, ytrain, ytest
  """
  try:
    VietNamMoneyDataset = pd.read_csv(path, header=header)

    X = VietNamMoneyDataset.iloc[:,1:]
    label = VietNamMoneyDataset.iloc[:,:1]
    
    X_train, X_test, y_train, y_test = train_test_split(X, label, test_size=0.4)
    
    return np.array(X_train.values.tolist()), np.array(X_test.values.tolist()), np.array(y_train), np.array(y_test)
    
  except:
    return np.array([]), np.array([]), np.array([]), np.array([])

In [3]:
def printDatasetShape(X_train, X_test, y_train, y_test):
  try:
    print(f'Shape of X_train: {X_train.shape}')
    print(f'Shape of X_test: {X_test.shape}')
    print(f'Shape of y_train: {y_train.shape}')
    print(f'Shape of y_test: {y_test.shape}\n')
  except:
    print(f'Error occured')

## 2.2. Get currency by label

In [None]:
def getExchangeRate(path):
     exchangeRate = []
     tree = ET.parse('exchangeRate_Vietcombank.xml')
     root = tree.getroot()
     for x in root.findall('Exrate'):
          # print(x.attrib)
          # print(type(x.attrib))
          # print(x.attrib['CurrencyCode'])
          # transfer = x.attrib['Transfer'].replace(',', '')
          # print(round(10000 / float(transfer), 4))
          exchangeRate.append(
               {
                    'CurrencyName': x.attrib['CurrencyName'].strip(),
                    'CurrencyCode': x.attrib['CurrencyCode'].strip(),
                    'CurrencyTransfer': float(x.attrib['Transfer'].replace(',', '').strip())
               }
          )
     return exchangeRate

In [None]:
def getCurrencyTransfer(code):
     for item in getExchangeRate('exchangeRate_Vietcombank.xml'):
          if item['CurrencyCode'] == code:
               return item['CurrencyTransfer'] 

In [None]:
def getCurrencyById(id):
     labels = [
          {  "id": 10, "value": "Ten Thousand Vietnam Dongs", 'denomination': 10000},
          {  "id": 20, "value": "Two Thousand Vietnam Dongs", 'denomination': 20000}, 
          {  "id": 50, "value": "Five Thousand Vietnam Dongs", 'denomination': 50000 }, 
          {  "id": 100, "value": "One Hundred Thousand VND", 'denomination': 100000}, 
          {  "id": 200, "value": "Two Hundred Thousand VND", 'denomination': 200000}, 
          {  "id": 500, "value": "Five Hundred Thousand VND", 'denomination': 500000},
     ]
     for i in labels:
          if i['id'] == id:
               return i['value'], i['denomination']

## 2.3. Working with model

In [7]:
def trainModelWithParameters(X, y, params):
  model = GridSearchCV(SVC(), params, refit=True, verbose=10)
  model.fit(X, y)
  return model

In [8]:
def getBestParameters(model):
  return model.best_params

## 2.4. Working with image

In [5]:
def getArrayFromImage(path, mode):
  """
    path: String
    mode: RGB, GRAYSCALE
    default: RGB
  """
  IMAGE_SIZE = (28, 28)
  if mode == 'RGB':
    image = cv2.imread(path)
  else:
    image = cv2.imread(path, 0)
  image = cv2.resize(image, IMAGE_SIZE)
  Image = image.reshape(1, -1)
  return Image / 255.0

## 2.5 Working with confusion matrix

In [None]:
def confusionMatrix(y_val, y_pred):
     confusionMatrix = confusion_matrix(y_val, y_pred)
     return confusionMatrix

In [None]:
def drawConfusionMatrix(confusionMatrix, numberOfClass ):
     print()
     df_cm = pd.DataFrame(confusionMatrix, range(numberOfClass), range(numberOfClass))
     # plt.figure(figsize=(10,7))
     # sns.set(font_scale=1.4) # for label size
     plt.figure(figsize = (10,7))
     sns.heatmap(df_cm, annot=True, annot_kws={"size": 12}) # font size
     plt.show()

# 3. Preprocessing data

## 3.1. Load data from CSV file on Github

In [4]:
URL = 'https://raw.githubusercontent.com/nguyenanhkhai/Vietnamese-Currency-Recognition/master/dataset/RGB.csv'

In [12]:
x, x_val, y, y_val = getDataset(URL, header=None)
printDatasetShape(x, x_val, y, y_val)

Shape of X_train: (551, 2352)
Shape of X_test: (368, 2352)
Shape of y_train: (551, 1)
Shape of y_test: (368, 1)



## 3.2. Binary image

In [13]:
x = x / 255.0

x_val = x_val / 255.0

## 3.3. Applying Principal Component Analysis on data set

In [14]:
N=60

pca = PCA(n_components=N)

pca.fit(x)

pcaX = pca.transform(x)

pcaX_val = pca.transform(x_val)

# 4. Training model

## 4.1. Declare params

In [15]:
params1 = [
           {'C': [i for i in range(1, 100)], 'degree': [1,2,3,4,5], 'kernel': ['poly'], 'gamma': [ *['auto', 'scale'], *[i/10 for i in range(20)]],'cache_size': [3000], 'probability': [True]},
           {'C': [i for i in range(1, 100)], 'gamma': [ *['auto', 'scale'], *[i/10 for i in range(20)]], 'kernel': ['rbf'], 'cache_size': [3000], 'probability': [True]},
           {'C': [i for i in range(1, 100)], 'kernel': ['linear'], 'random_state': [0]}
]

# params2 = {
#     'kernel': ['poly', 'rbf', 'linear'],
#     'C': [1, 10, 100, 1000],
#     'degree': [i/10 for i in range(50)],
#     'gamma': [ *['auto', 'scale'], *[i/10 for i in range(20)]],
#     'cache_size': [3000],
#     'probability': [True]
# }

rbfParams = [
        {
                'C': [1, 10, 100],
                'gamma': [ *['auto', 'scale'], *[i/10 for i in range(20)]],
                'kernel': ['rbf'],
                'cache_size': [3000],
                'probability': [True]
        }
]

polyParams = [
        {
                'C': [1, 10, 100],
                'degree': [1,2,3,4,5],
                'kernel': ['poly'],
                'gamma': [ *['auto', 'scale'], *[i/10 for i in range(20)]],
                'cache_size': [3000],
                'probability': [True]
        }
]

## 4.2. Build a model with parameters

In [16]:
newY = [i[0] for i in y]

In [None]:
start = time()
model = trainModelWithParameters(pcaX, newY, params1)
end = time()

print(f'Train Time: { end-start }')

## 4.3. Find best parameters for model

In [None]:
print(f"Best Parameters is: {model.best_params_}")

# 5. Reports

## 5.1. Accuracy and FMeasure

### Train

In [None]:
y_pred_with_train = model.predict(pcaX)
# y_pred_with_train

accuracyWithTrainSet = accuracy_score(y, y_pred_with_train)
f1ScoreWithTrainSet = f1_score(y, y_pred_with_train, average= "weighted")

In [None]:
print(f"Accuracy: {accuracyWithTrainSet * 100} %")
print(f"f1 score: {f1ScoreWithTrainSet}")

### Test

In [None]:
y_pred_with_test = model.predict(pcaX_val)
# y_pred_with_test

accuracyWithTestSet = accuracy_score(y_val, y_pred_with_test)
f1ScoreWithTestSet = f1_score(y_val, y_pred_with_test, average= "weighted")

In [None]:
print(f"Accuracy: {accuracyWithTestSet * 100} %")
print(f"f1 score: {f1ScoreWithTestSet}")

## 5.2. Using 5-fold for training

In [None]:
def trainUsingKFold(X, y):
  kf = KFold(n_splits=5, shuffle=True)
  accuracy = []
  for train_index, test_index in kf.split(X):
    # print('Train:', train_index, 'Test:', test_index)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # print(X_train.shape)
    # print(X_test.shape)

    model = SVC(C=100, cache_size=3000, gamma='auto', kernel='rbf', probability=True)

    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)

    accuracy.append(accuracy_score(y_test, y_pred))
  
  for value in accuracy:
    print(f'Result:{value}')
  print(f'Avarage: {sum(accuracy) / 5}')

In [None]:
trainUsingKFold(pcaX, y)

In [None]:
trainUsingKFold(pcaX_val, y_val)

## 5.3. Confusion Matrix

In [None]:
train_cfs = confusionMatrix(y, y_pred_with_train)

In [None]:
test_cfs = confusionMatrix(y_val, y_pred_with_test)

In [None]:
percent = test_cfs / np.sum(test_cfs)

ax = sns.heatmap(percent, annot=True, fmt='.2%', cmap='Blues')

ax.set_title('Seaborn Confusion Matrix with labels\n\n');
ax.set_xlabel('\nPredicted Values')
ax.set_ylabel('Actual Values ');

## 5.4. Precision, Recall and FMeasure index on test data set

In [None]:
model.score(pcaX_val, y_val)
Y_score = model.predict(pcaX_val)
precision, recall, fscore, support = precision_recall_fscore_support(y_val, Y_score, average=None);

In [None]:
fig = plt.figure(figsize=(8, 6))
data = np.array([0.56097561, 0.82, 0.51785714, 0.66666667, 0.6, 0.68055556]) * 100
labels = ['10,000', '20,000', '50,000', '100,000', '200,000', '500,000']
plt.xticks(rotation=50)
plt.xticks(range(len(data)), labels, fontsize=15)
plt.yticks(fontsize=15)
plt.xlabel('Nhãn', fontsize=15)
plt.ylabel('Precision (%)', fontsize=15)
plt.title('Chỉ số Precision cho mỗi lớp', fontsize=15)
plt.bar(range(len(data)), data, color=['#597dbf', '#d98b5f', '#75bf71', '#c76e6e', '#9475ab', '#d08abb']) 
plt.show()

In [None]:
fig = plt.figure(figsize=(8, 6))
data = np.array([0.67647059, 0.68333333, 0.53703704, 0.58181818, 0.61016949, 0.68055556]) * 100
labels = ['10,000', '20,000', '50,000', '100,000', '200,000', '500,000']
plt.xticks(rotation=50)
plt.xticks(range(len(data)), labels, fontsize=15)
plt.yticks(fontsize=15)
plt.xlabel('Nhãn', fontsize=15)
plt.ylabel('Recall (%)', fontsize=15)
plt.title('Chỉ số Recall cho mỗi lớp', fontsize=15)
plt.bar(range(len(data)), data, color=['#597dbf', '#d98b5f', '#75bf71', '#c76e6e', '#9475ab', '#d08abb']) 
plt.show()

In [None]:
fig = plt.figure(figsize=(8, 6))
data = np.array([0.61333333, 0.74545455, 0.52727273, 0.62135922, 0.60504202, 0.68055556]) * 100
labels = ['10,000', '20,000', '50,000', '100,000', '200,000', '500,000']
plt.xticks(rotation=50)
plt.xticks(range(len(data)), labels, fontsize=15)
plt.yticks(fontsize=15)
plt.xlabel('Nhãn', fontsize=15)
plt.ylabel('F Score (%)', fontsize=15)
plt.title('Chỉ số F score cho mỗi lớp', fontsize=15)
plt.bar(range(len(data)), data, color=['#597dbf', '#d98b5f', '#75bf71', '#c76e6e', '#9475ab', '#d08abb']) 
plt.show()

# 6. Prediction

## 6.1. Image list for test

In [None]:
store = '../test/'
imageList = os.listdir(store)
imageList = [f'{store}/{item}' for item in imageList]

## 6.2. Results

In [None]:
ord = 1
for image in imageList:
  principalComponentAnalysis = pca.transform(getArrayFromImage(image, mode='RGB'))  
  result = model.predict(principalComponentAnalysis)
  
  ord = ord + 1

  print(f"Name of image: {image}")
  print(f'Label: {getCurrencyById(result[0])[0]}')
  # img = cv2.resize(cv2.imread(image), (300, 300))
  # cv2_imshow(img)
  # cv2.imshow('Hello',img)
  print()

# 10. Export model

In [None]:
PATH_TO_MODEL = '../app/model.pkl'
PATH_TO_PKL = './app/model.pca'
pickle.dump(model, open(f'{PATH_TO_MODEL}', 'wb'))
pickle.dump(pca, open(f"{PATH_TO_MODEL}", 'wb'))

# 11. Others

## 11.1 Download exchange rate file from Vietcombank

In [None]:
import urllib.request
urllib.request.urlretrieve('https://portal.vietcombank.com.vn/Usercontrols/TVPortal.TyGia/pXML.aspx', 'exchangeRate.xml')