In [1]:
import numpy as np # 建立向量、矩陣等以進行高效率的大量資料運算
import matplotlib.pyplot as plt # 資料視覺化（製作圖表）

from collections import Counter # 提供特定目標的容器
import math # 處理數學運算（不適用於複數計算，若有關複數需使用需使用 cmath）

import tensorflow as tf # 機器學習大禮包
from tensorflow.keras.datasets import mnist # mnist 手寫數字資料集
(X_train, y_train), (X_test, y_test) = mnist.load_data() # 從 mnist 抓取資料 
print("Training: " + format(X_train.shape)) # 印出訓練資料大小
print("Test: " + format(X_test.shape)) # 印出測試資料大小

X_train = X_train / 255. # 除以灰階最大值進行標準化
X_train = np.where(X_train < 0.14, 0, 1) # 離散化，若小於 0.14 設為 0，大於 0.14 設為 1，讓照片只有 0 和 1，黑白更分明，去除灰灰的模糊區域


X_train = X_train.reshape(len(X_train), -1) # 把二維的圖片變成一維
print(X_train.shape) # 印出圖片格式大小

totalNum = X_train.shape[0] # 找出總共有幾張照片(60000)
classNumDic = Counter(y_train) # 排序並統計
prioriP = np.empty(10)
for i in range(10):
  prioriP[i] = classNumDic[i]/totalNum  # 標準化，算出從 0 到 9 各自的數量佔所有圖片的比例

posteriorNum = np.empty((10, X_train.shape[1])) # 建立一個 10*784 的陣列 posteriorNum
posteriorP = np.empty((10, X_train.shape[1])) # 建立一個 10*784 的陣列 posteriorP

for i in range(10):
    posteriorNum[i] = X_train[np.where(y_train == i)].sum(axis=0) # 把所有標籤等於 i 的圖片加起來
    posteriorP[i] = (posteriorNum[i] + 1) / (classNumDic[i] + 10) # 利用拉普拉斯平滑防止機率出現防止機率出現 0 的狀況


def bayesClassifier(test_x, prioriP, posteriorP):
    oldShape = test_x.shape # 記錄目前測試圖片大小
    test_x.resize(oldShape[0]*oldShape[1]) # 重新排列成 784X1
    classP = np.empty(10)
    for j in range(10): # 跑過每一張照片
        temp = 0
        for x in range(test_x.shape[0]): # 跑過每一個像素（共784個）
          # 取log讓值不會太大避免爆炸，所以公式中本來相乘的情況變成相加
          if test_x[x] == 0: # 背景（黑色處）
            temp += math.log(1-posteriorP[j][x]) # 因為之前都是對等於１的地方做平均，沒有對等於０的地方做平均，但離散化後的像素非１即０，所以用「１」減掉「等於１的機率」就是「等於０的機率」
          if test_x[x] == 1: # 線（白色處）
            temp += math.log(posteriorP[j][x])
        classP[j] = np.array(math.log(prioriP[j]) + temp) # 情況的機率乘以情況下各特徵的機率
    test_x.resize(oldShape) # 變回 28X28 的格式
    return np.argmax(classP) # 取機率最大的情況

def bayesClassifierProb(test_x, prioriP, posteriorP):
    oldShape = test_x.shape # 記錄目前測試圖片大小
    test_x.resize(oldShape[0]*oldShape[1]) # 重新排列成 784X1
    classP = np.empty(10)
    for j in range(10): # 跑過每一張照片
        temp = 0
        for x in range(test_x.shape[0]): # 跑過每一個像素（共784個）
          # 取log讓值不會太大避免爆炸，所以公式中本來相乘的情況變成相加
          if test_x[x] == 0: # 背景（黑色處）
            temp += math.log(1-posteriorP[j][x]) # 因為之前都是對等於１的地方做平均，沒有對等於０的地方做平均，但離散化後的像素非１即０，所以用「１」減掉「等於１的機率」就是「等於０的機率」
          if test_x[x] == 1: # 線（白色處）
            temp += math.log(posteriorP[j][x])
        classP[j] = np.array(math.log(prioriP[j]) + temp) # 情況的機率乘以情況下各特徵的機率
    test_x.resize(oldShape) # 變回 28X28 的格式
    return classP # 取機率最大的情況

X_test = X_test/255. # 除以灰階最大值進行標準化
X_test = np.where(X_test < 0.14, 0, 1) # 離散化，若小於 0.14 設為 0，大於 0.14 設為 1，讓照片只有 0 和 1，黑白更分明，去除灰灰的模糊區域

bayesClassifierRes = np.empty(X_test.shape[0]) # 算出在 X_test 中有幾筆資料(10000)
for i in range(X_test.shape[0]): # 跑過每一張測試圖片
    bayesClassifierRes[i] = bayesClassifier(X_test[i], prioriP, posteriorP) # 丟進貝氏分類器

(bayesClassifierRes == y_test).sum() / y_test.shape[0] # 將「經過貝氏分類器預測出的標籤與真實標籤是一樣的情況」的數量，除以「總共有幾張照片」，算出正確率

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
Training: (60000, 28, 28)
Test: (10000, 28, 28)
(60000, 784)


0.8456

In [2]:
import sys # 執行環境的變數和函數的模組
import sklearn # 機器學習工具箱箱
import numpy as np # 建立向量、矩陣等以進行高效率的大量資料運算
import matplotlib.pyplot as plt # 資料視覺化（製作圖表）
from sklearn.metrics import accuracy_score # 在評估性能的各種測試方式中，計算精確度（匹配程度）
import tensorflow as tf # 機器學習大禮包
from tensorflow.keras.datasets import mnist

(X_train, y_train), (X_test, y_test) = mnist.load_data() # 從 mnist 抓取資料 

print("Training: " + format(X_train.shape)) # 印出訓練資料大小
print("Test: " + format(X_test.shape)) # 印出測試資料大小

X_train = X_train.reshape(len(X_train), -1)
print(X_train.shape) # 印出圖片格式大小

X_train = X_train.astype(np.float32) / 255.

from sklearn.cluster import MiniBatchKMeans # K-Means工具箱箱

n_digits = len(np.unique(y_train))
print(n_digits) # 印出去除重複資料後剩下的資料數量

kmeans = MiniBatchKMeans(n_clusters = n_digits)  # 請 MiniBatchKMeans 把資料分成 n_digit 群 (10群)
kmeans.fit(X_train) # 使用 X_train 開始訓練！

kmeans.labels_  # K-Means 分類器初步產生的標籤

def infer_cluster_labels(kmeans, actual_labels): # 幫每個分群找到最大共同特徵

    inferred_labels = {}  # 最後要回傳的字典

    for i in range(kmeans.n_clusters): # 跑過每個分群 

        labels = [] # 儲存所有位置的列表
        index = np.where(kmeans.labels_ == i) 

        labels.append(actual_labels[index]) # 把在剛剛找出的位置上的真實標籤放進列表

        if len(labels[0]) == 1: 
            counts = np.bincount(labels[0])
        else:
            counts = np.bincount(np.squeeze(labels))

        if np.argmax(counts) in inferred_labels:
            inferred_labels[np.argmax(counts)].append(i)
        else:
            inferred_labels[np.argmax(counts)] = [i]
        
    return inferred_labels # 回傳分好的字典


def infer_data_labels(X_labels, cluster_labels): # 幫分群找到對應的標籤
    
    predicted_labels = np.zeros(len(X_labels)).astype(np.uint8) # 最後要回傳的標籤列表
    
    for i, cluster in enumerate(X_labels): # 跑過所有預測的分群
        for key, value in cluster_labels.items(): # 跑過字典的所有內容
            if cluster  in value: # 如果遇到相同的圖片
                predicted_labels[i] = key # 把真實標籤記下來存進標籤列表
                
    return predicted_labels # 回傳對應好的標籤列表


cluster_labels = infer_cluster_labels(kmeans, y_train) # 利用訓練好的小堆和 mnist 的訓練標籤製作字典
X_clusters = kmeans.predict(X_train) # 利用 mnist 的訓練圖片預測分群
predicted_labels = infer_data_labels(X_clusters, cluster_labels) # 利用預測好的分群和製作好的字典進行配對
print(X_clusters[:20])
print(predicted_labels[:20])
print(y_train[:20])

X_test = X_test.reshape(len(X_test), -1) # 把二維的測試資料變成一維
X_test = X_test.astype(np.float32) / 255. # 除以灰階最大值進行標準化


kmeans = MiniBatchKMeans(n_clusters = 10) # 請 MiniBatchKMeans 把資料分成 n_digit 群 (10群)
kmeans.fit(X_test) # 使用 X_test 開始測試！

centroids = kmeans.cluster_centers_ # 找到分群的中心們

images = centroids.copy() # 複製一份中心來改變格式，以免之後發生遺憾的事
images = images.reshape(10, 28, 28) # 把照片變成十張，每張是 28X28 像素
images *= 255
images = images.astype(np.uint8) # 把型態設為整數

cluster_labels = infer_cluster_labels(kmeans, y_test) # 利用訓練好的小堆和 mnist 的測試標籤製作字典
prediction = infer_data_labels(kmeans.predict(X_test), cluster_labels) # 利用預測好的分群和製作好的字典進行配對

print('Accuracy: ' + format(accuracy_score(y_test, prediction))) # 印出預測準確率

Training: (60000, 28, 28)
Test: (10000, 28, 28)
(60000, 784)
10
[6 7 8 1 0 5 1 6 1 3 6 1 6 2 1 3 5 3 2 3]
[3 0 4 1 7 2 1 3 1 8 3 1 3 6 1 8 2 8 6 8]
[5 0 4 1 9 2 1 3 1 4 3 5 3 6 1 7 2 8 6 9]
Accuracy: 0.5702


In [3]:
!pip install flask-ngrok

!pip install pyngrok==4.1.1

!ngrok authtoken 'your own ngrok token'

!mkdir templates

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting flask-ngrok
  Downloading flask_ngrok-0.0.25-py3-none-any.whl (3.1 kB)
Installing collected packages: flask-ngrok
Successfully installed flask-ngrok-0.0.25
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pyngrok==4.1.1
  Downloading pyngrok-4.1.1.tar.gz (18 kB)
Building wheels for collected packages: pyngrok
  Building wheel for pyngrok (setup.py) ... [?25l[?25hdone
  Created wheel for pyngrok: filename=pyngrok-4.1.1-py3-none-any.whl size=15983 sha256=4eccf7578426d3b67b7d09262485cd9acec98a097283000dc404fb95df621674
  Stored in directory: /root/.cache/pip/wheels/b1/d9/12/045a042fee3127dc40ba6f5df2798aa2df38c414bf533ca765
Successfully built pyngrok
Installing collected packages: pyngrok
Successfully installed pyngrok-4.1.1
Authtoken saved to configuration file: /root/.ngrok2/ngrok.yml


In [4]:
!ngrok help

NAME:
   ngrok - tunnel local ports to public URLs and inspect traffic

DESCRIPTION:
    ngrok exposes local networked services behinds NATs and firewalls to the
    public internet over a secure tunnel. Share local websites, build/test
    webhook consumers and self-host personal services.
    Detailed help for each command is available with 'ngrok help <command>'.
    Open http://localhost:4040 for ngrok's web interface to inspect traffic.

EXAMPLES:
    ngrok http 80                    # secure public URL for port 80 web server
    ngrok http -subdomain=baz 8080   # port 8080 available at baz.ngrok.io
    ngrok http foo.dev:80            # tunnel to host:port instead of localhost
    ngrok http https://localhost     # expose a local https server
    ngrok tcp 22                     # tunnel arbitrary TCP traffic to port 22
    ngrok tls -hostname=foo.com 443  # TLS traffic for foo.com to port 443
    ngrok start foo bar baz          # start tunnels from the configuration file

VERSI

In [5]:
import os
import cv2
import base64
import urllib.request
from flask import Flask, render_template, request
from flask_ngrok import run_with_ngrok
import numpy as np
from matplotlib import pyplot as plt

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning) 

target_url = "https://raw.githubusercontent.com/jotpalch/Digit_Recog_App/main/flask_app.html"
html = urllib.request.urlopen(target_url).read().decode("utf-8") 

HTML_file = open('templates/index.html', 'w')
HTML_file.write(html)
HTML_file.close()

app = Flask(__name__, static_folder='/templates')
run_with_ngrok(app)

def process(file):
    
    file = base64.b64decode(file)  
    img_array = np.fromstring(file,np.uint8) 
    image = cv2.imdecode(img_array,cv2.COLOR_BGR2RGB)  
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    image = cv2.resize(image, (28, 28))
    image = image/255.0
    image = 1-image
    return image

@app.route('/')
def index():
    return render_template('index.html')

@app.route('/', methods=['GET', 'POST'])
def predict():
    if request.method == 'POST':
        t = request.values.get('type')

        image = process(request.values.get('payload').replace("data:image/jpeg;base64,",""))
        # print(request.values.get('payload').replace("data:image/jpeg;base64,",""))
        print('process done')
        prediction = "X"
        if t == "n" :
          image = np.where(image < 0.14, 0, 1)
          prediction = bayesClassifier(image, prioriP, posteriorP)
          predictionProb = bayesClassifierProb(image, prioriP, posteriorP)
        elif t == "k" :
          image = np.array([image]).astype(np.float32).reshape(1, -1)
          prediction = infer_data_labels(kmeans.predict(image), cluster_labels)[0]
       
        return render_template('index.html', number=prediction )

if __name__ == '__main__':
    app.run()

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)


 * Running on http://f0d4-35-245-180-249.ngrok.io
 * Traffic stats available on http://127.0.0.1:4040


127.0.0.1 - - [26/Jul/2022 18:11:00] "[37mGET / HTTP/1.1[0m" 200 -
127.0.0.1 - - [26/Jul/2022 18:11:00] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
127.0.0.1 - - [26/Jul/2022 18:11:06] "[37mPOST / HTTP/1.1[0m" 200 -


process done


127.0.0.1 - - [26/Jul/2022 18:12:23] "[37mPOST / HTTP/1.1[0m" 200 -


process done


127.0.0.1 - - [26/Jul/2022 18:12:40] "[37mGET / HTTP/1.1[0m" 200 -
127.0.0.1 - - [26/Jul/2022 18:12:50] "[37mGET / HTTP/1.1[0m" 200 -
127.0.0.1 - - [26/Jul/2022 18:12:57] "[37mPOST / HTTP/1.1[0m" 200 -


process done


127.0.0.1 - - [26/Jul/2022 18:13:02] "[37mPOST / HTTP/1.1[0m" 200 -


process done


127.0.0.1 - - [26/Jul/2022 18:13:06] "[37mPOST / HTTP/1.1[0m" 200 -


process done
