<a href="https://colab.research.google.com/github/nhanhl0902/TLS/blob/main/neuralnet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'mnist-in-csv:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F27352%2F34877%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240420%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240420T144348Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D5733f2583c66134ab0983ba5027708159ba3028e3a3e5b35cc9ef9798ec092b57f28f07232b95a163d71c60f0e866f1b615d1ef189d51d1e43ed8994e016dc960880a3944e50c18fc457611f1dfe8701c2f40d952819e357b55dd3c04a563b97d83d859ca21f5ccd74f3216d97dfaf3c1489507660fe6d179a7588efd81eb841d58e879319d27b6066015d7f01e25c228f3caf91e8fe84fb27e767110c5ec4a356186933f68ac6ad3c766a6b300f7cb449493bda662cf34049761a283b5bf4265e51cea3d9a998b1b89dc240b9347f04ae4c07fe0141bc7d937ad5245883b0276e7ac1c841b104d61fb8ceb34fc9e2c194093515cb8312072b785859fae0eaaa'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')

Downloading mnist-in-csv, 15970596 bytes compressed
Downloaded and uncompressed: mnist-in-csv
Data source import complete.


In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/mnist-in-csv/mnist_test.csv
/kaggle/input/mnist-in-csv/mnist_train.csv


In [None]:
def check_grad(fn, gr, X):
    X_flat = X.reshape(-1) # convert X to an 1d array, 1 for loop needed
    shape_X = X.shape # original shape of X
    num_grad = np.zeros_like(X) # numerical grad, shape = shape of X
    grad_flat = np.zeros_like(X_flat) # 1d version of grad
    eps = 1e-6# a small number, 1e-10 -> 1e-6 is usually good
    numElems = X_flat.shape[0] # number of elements in X
# calculate numerical gradient
    for i in range(numElems): # iterate over all elements of X
        Xp_flat = X_flat.copy()
        Xn_flat = X_flat.copy()
        Xp_flat[i] += eps
        Xn_flat[i] -= eps
        Xp = Xp_flat.reshape(shape_X)
        Xn = Xn_flat.reshape(shape_X)
        grad_flat[i] = (fn(Xp) - fn(Xn))/(2*eps)
    num_grad = grad_flat.reshape(shape_X)
    diff = np.linalg.norm(num_grad - gr(X))
    print('Difference between two methods should be small:', diff)


def sigmoid(x):
    return 1 / (1 + np.exp(-x))
def gd_of_sigmoid(x):
    return sigmoid(x)*(1-sigmoid(x))
np.random.seed(10)
W0_test= np.random.rand(16,784)
B0_test=np.random.rand(1,16)
W1_test=np.random.rand(17,16)
B1_test=np.random.rand(1,17)
W2_test=np.random.rand(10,17)
B2_test=np.random.rand(1,10)
batch_size = 100
mini_batches = create_mini_batches(df, batch_size)
label=mini_batches[0]['label'].values
X=mini_batches[0].drop(columns='label').values/255
onehot=vector_to_one_hot(label,10)
def fn1(W2_test):
    a0=sigmoid(X @ W0_test.T+B0_test)
    a1=sigmoid(a0 @ W1_test.T +B1_test)
    a2=sigmoid(a1 @ W2_test.T +B2_test)
    return (1/200)*(np.linalg.norm(a2-onehot)**2)
def gr1(W2_test):
    a0=sigmoid(X @ W0_test.T+B0_test)
    a1=sigmoid(a0 @ W1_test.T +B1_test)
    a2=sigmoid(a1 @ W2_test.T +B2_test)
    one2=np.ones(10)
    return np.mean(np.einsum('ij,ik->ikj',a1,((a2*(one2-a2))*(a2-onehot))),axis=0)
def fn2(W1_test):
    a0=sigmoid(X @ W0_test.T+B0_test)
    a1=sigmoid(a0 @ W1_test.T +B1_test)
    a2=sigmoid(a1 @ W2_test.T +B2_test)
    return (1/200)*(np.linalg.norm(a2-onehot)**2)
def gr2(W1_test):
    a0=sigmoid(X @ W0_test.T+B0_test)
    a1=sigmoid(a0 @ W1_test.T +B1_test)
    a2=sigmoid(a1 @ W2_test.T +B2_test)
    one2=np.ones(10)
    one1=np.ones(17)
    return np.mean(np.einsum('ij,ik->ikj',a0,((a1*(one1-a1))*np.einsum('ij,ki->kj',W2_test,((a2*(one2-a2))*(a2-onehot))))),axis=0)
def fn3(W0_test):
    a0=sigmoid(X @ W0_test.T+B0_test)
    a1=sigmoid(a0 @ W1_test.T +B1_test)
    a2=sigmoid(a1 @ W2_test.T +B2_test)
    return (1/200)*(np.linalg.norm(a2-onehot)**2)
def gr3(W0_test):
    a0=sigmoid(X @ W0_test.T+B0_test)
    a1=sigmoid(a0 @ W1_test.T +B1_test)
    a2=sigmoid(a1 @ W2_test.T +B2_test)
    one2=np.ones(10)
    one1=np.ones(17)
    one0=np.ones(16)
    return np.mean(np.einsum('ij,ik->ikj',X,(a0*(one0-a0))*(np.einsum('ij,ki->kj',W1_test,(a1*(one1-a1))*np.einsum('ij,ki->kj',W2_test,((a2*(one2-a2))*(a2-onehot)))))),axis=0)
def fn4(B2_test):
    a0=sigmoid(X @ W0_test.T+B0_test)
    a1=sigmoid(a0 @ W1_test.T +B1_test)
    a2=sigmoid(a1 @ W2_test.T +B2_test)
    return (1/200)*(np.linalg.norm(a2-onehot)**2)
def gr4(B2_test):
    a0=sigmoid(X @ W0_test.T+B0_test)
    a1=sigmoid(a0 @ W1_test.T +B1_test)
    a2=sigmoid(a1 @ W2_test.T +B2_test)
    one2=np.ones(10)
    return np.mean((a2*(one2-a2))*(a2-onehot),axis=0)
def fn5(B1_test):
    a0=sigmoid(X @ W0_test.T+B0_test)
    a1=sigmoid(a0 @ W1_test.T +B1_test)
    a2=sigmoid(a1 @ W2_test.T +B2_test)
    return (1/200)*(np.linalg.norm(a2-onehot)**2)
def gr5(B1_test):
    a0=sigmoid(X @ W0_test.T+B0_test)
    a1=sigmoid(a0 @ W1_test.T +B1_test)
    a2=sigmoid(a1 @ W2_test.T +B2_test)
    one2=np.ones(10)
    one1=np.ones(17)

    return (((a1*(one1-a1)).T)*(W2_test.T @ ((a2*(one2-a2))*(a2-onehot)).T)).T
def fn6(B0_test):
    a0=sigmoid(X @ W0_test.T+B0_test)
    a1=sigmoid(a0 @ W1_test.T +B1_test)
    a2=sigmoid(a1 @ W2_test.T +B2_test)
    return 0.5*(np.linalg.norm(a2-onehot)**2)
def gr6(B0_test):
    a0=sigmoid(X @ W0_test.T+B0_test)
    a1=sigmoid(a0 @ W1_test.T +B1_test)
    a2=sigmoid(a1 @ W2_test.T +B2_test)
    one2=np.ones(10)
    one1=np.ones(17)
    one0=np.ones(16)
    return ((a0*(one0-a0)).T*(W1_test.T @ ((a1*(one1-a1)).T*(W2_test.T @ ((a2*(one2-a2))*(a2-onehot)).T)))).T


# theoretical_derivative, estimated_derivative, error = check_derivative(fn4, gr4, x)

# print(f"Đạo hàm lý thuyết: {theoretical_derivative}")
# print(f"Đạo hàm ước tính: {estimated_derivative}")
# print(f"Lỗi tuyệt đối: {error}")

check_grad(fn1,gr1,W2_test)
check_grad(fn2,gr2,W1_test)
check_grad(fn3,gr3,W0_test)
check_grad(fn4,gr4,B2_test)
# check_grad(fn5,gr5,B1_test)
# check_grad(fn6,gr6,B0_test)
# a=fn1(W2_test)
# print(a.shape)

NameError: name 'create_mini_batches' is not defined

In [None]:
from keras.utils import to_categorical
test='/kaggle/input/mnist-in-csv/mnist_test.csv'
train='/kaggle/input/mnist-in-csv/mnist_train.csv'
df=pd.read_csv(train)
def create_mini_batches(dataframe, batch_size):
    num_batches = len(dataframe) // batch_size
    mini_batches = []

    for batch_idx in range(num_batches):
        start_idx = batch_idx * batch_size
        end_idx = start_idx + batch_size
        mini_batch = dataframe.iloc[start_idx:end_idx]
        mini_batches.append(mini_batch)

    # If there are any remaining data points, create a final mini-batch
    if len(dataframe) % batch_size != 0:
        mini_batch = dataframe.iloc[num_batches * batch_size:]
        mini_batches.append(mini_batch)

    return mini_batches


# For a scalar
def scalar_to_one_hot(scalar, num_classes):
    return to_categorical(scalar, num_classes=num_classes)
# For a vector
def vector_to_one_hot(vector, num_classes):
    return to_categorical(vector, num_classes=num_classes)

# Assuming your dataframe is named 'df' and you want mini-batches of size 100
batch_size = 100
mini_batches = create_mini_batches(df, batch_size)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))
def gd_of_sigmoid(x):
    return sigmoid(x)*(1-sigmoid(x))

In [None]:
def initialize_weights_xavier(input_size, output_size):
    """
    Khởi tạo trọng số theo phương pháp Xavier.

    Tham số:
    input_size: Số lượng neuron trong lớp đầu vào.
    output_size: Số lượng neuron trong lớp đầu ra.

    Trả về:
    Trọng số được khởi tạo theo phương pháp Xavier.
    """
    # Tính độ lệch chuẩn theo công thức của Xavier
    std_dev = np.sqrt(2 / (input_size + output_size))
    # Khởi tạo trọng số ngẫu nhiên từ phân phối chuẩn (mean=0, std=std_dev)
    weights = np.random.normal(loc=0, scale=std_dev, size=(input_size, output_size))
    return weights

w_0= initialize_weights_xavier(784,16).T
b_0=np.random.rand(1,16)
w_1=initialize_weights_xavier(16,17).T
b_1=np.random.rand(1,17)
w_2=initialize_weights_xavier(17,10).T
b_2=np.random.rand(1,10)
one_2=np.ones(10)
one_1=np.ones(17)
one_0=np.ones(16)

In [None]:
index=0
label=mini_batches[index]['label'].values
X=mini_batches[index].drop(columns='label').values/255
onehot=vector_to_one_hot(label,10)

In [None]:
df_test=pd.read_csv(test)
X_test = df_test.drop(columns=['label']).values/255
y_test = df_test['label'].values
oh_test=vector_to_one_hot(y_test,10)


for index in range(0,600):


  a_0_test=(X_test @ w_0.T+b_0)
  a_1_test=sigmoid(a_0_test @ w_1.T+b_1)
  a_2_test=sigmoid(a_1_test @ w_2.T+b_2)

  predictions=np.argmax(a_2_test,axis=1)
  accuracy = np.mean(y_test == predictions)


  # index=0
  label=mini_batches[index]['label'].values
  X=mini_batches[index].drop(columns='label').values/255
  onehot=vector_to_one_hot(label,10)




  validate_index=np.random.randint(0,599)
  validate_X=mini_batches[validate_index].drop(columns='label')/255
  validate_y=mini_batches[validate_index]['label']
  validate_one_hot=vector_to_one_hot(validate_y,10)
  a=0


  while a<1000:
      a_0=sigmoid(X @ w_0.T+b_0)
      a_1=sigmoid(a_0 @ w_1.T+b_1)
      a_2=sigmoid(a_1 @ w_2.T+b_2)
      dist=(1/200)*(np.linalg.norm(a_2-onehot)**2)



      a_0_test=(X_test @ w_0.T+b_0)
      a_1_test=sigmoid(a_0_test @ w_1.T+b_1)
      a_2_test=sigmoid(a_1_test @ w_2.T+b_2)
      test_error=(1/10000)*0.5*(np.linalg.norm(a_2_test-oh_test)**2)






      validate_a_0=(validate_X @ w_0.T+b_0)
      validate_a_1=sigmoid(validate_a_0 @ w_1.T+b_1)
      validate_a_2=sigmoid(validate_a_1 @ w_2.T+b_2)
      validate_error=(1/200)*(np.linalg.norm(validate_a_2-validate_one_hot)**2)


      print(dist,'Validate_error:',validate_error,'Error_test:',test_error,"Accuracy:",accuracy*100,'%')



      gd_of_w_2=np.mean(np.einsum('ij,ik->ikj',a_1,((a_2*(one_2-a_2))*(a_2-onehot))),axis=0)
      gd_of_w_1=np.mean(np.einsum('ij,ik->ikj',a_0,((a_1*(one_1-a_1))*np.einsum('ij,ki->kj',w_2,((a_2*(one_2-a_2))*(a_2-onehot))))),axis=0)
      gd_of_w_0=np.mean(np.einsum('ij,ik->ikj',X,(a_0*(one_0-a_0))*(np.einsum('ij,ki->kj',w_1,(a_1*(one_1-a_1))*np.einsum('ij,ki->kj',w_2,((a_2*(one_2-a_2))*(a_2-onehot)))))),axis=0)
      gd_of_b_2=np.mean((a_2*(one_2-a_2))*(a_2-onehot),axis=0)
      gd_of_b_1=np.mean((a_1*(one_1-a_1))*(np.einsum('ij,ki->kj',w_2,((a_2*(one_2-a_2))*(a_2-onehot)))),axis=0)
      gd_of_b_0=np.mean((a_0*(one_0-a_0))*np.einsum('ij,ki->kj',w_1,(a_1*(one_1-a_1))*np.einsum('ij,ki->kj',w_2,((a_2*(one_2-a_2))*(a_2-onehot)))),axis=0)



      learning_rate=10




      w_0=w_0-learning_rate*gd_of_w_0
      w_1=w_1-learning_rate*gd_of_w_1
      w_2=w_2-learning_rate*gd_of_w_2
      b_0=b_0-learning_rate*gd_of_b_0
      b_1=b_1-learning_rate*gd_of_b_1
      b_2=b_2-learning_rate*gd_of_b_2
      a+=1

In [None]:
weights = [w_0, w_1, w_2, b_0, b_1, b_2]

# Save weights to a .npy file.npy', weight)


loaded_weights = [np.load(f'/content/drive/MyDrive/weight_model/weight_{i}.npy', al
for i, weight in enumerate(weights):
    np.save(f'/content/drive/MyDrive/weight_model/weight_{i}low_pickle=True) for i in range(len(weights))]
print(loaded_weights)

FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/MyDrive/weight_model/weight_0.npy'

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# df_test=pd.read_csv(test)
# X_test = df_test.drop(columns=['label']).values/255
# y_test = df_test['label'].values


# a_0_test=(X_test @ w_0.T+b_0)
# a_1_test=sigmoid(a_0_test @ w_1.T+b_1)
# a_2_test=sigmoid(a_1_test @ w_2.T+b_2)
# oh_test=vector_to_one_hot(y_test,10)

# predictions=np.argmax(a_2_test,axis=1)

# accuracy = np.mean(y_test == predictions)

# print("Accuracy:", accuracy*100,'%')
# test_error=(1/10000)*0.5*(np.linalg.norm(a_2_test-oh_test)**2)
i=np.random.randint(0,599)
validate_X=mini_batches[i].drop(columns='label')/255
validate_y=mini_batches[i]['label']
validate_a_0=(validate_X @ w_0.T+b_0)
validate_a_1=sigmoid(validate_a_0 @ w_1.T+b_1)
validate_a_2=sigmoid(validate_a_1 @ w_2.T+b_2)
validate_one_hot=vector_to_one_hot(validate_y,10)
validate_error=(1/200)*(np.linalg.norm(validate_a_2-validate_one_hot)**2)
print(validate_error)


2.109665725306228
