# Linear Regression

In [2]:
# Install libraries if needed (this step is often unnecessary in Colab)
!pip install numpy pandas matplotlib

# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Optional: Print versions to verify installations
print("NumPy version:", np.__version__)
print("Pandas version:", pd.__version__)



NumPy version: 1.26.4
Pandas version: 2.1.4


In [None]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


data = pd.read_csv('https://raw.githubusercontent.com/ClupusVT/AI_machine_learning/97a67fa8cd900213b0b5b35bb82cba9855dd3613/1st_Self_learning/data_house_price.csv').values
N = data.shape[0]
x = data[:, 0].reshape(-1, 1)
y = data[:, 1].reshape(-1, 1)
plt.scatter(x, y)
plt.xlabel('mét vuông')
plt.ylabel('giá')

x = np.hstack((np.ones((N, 1)), x))

w = np.array([0.,1.]).reshape(-1,1)

numOfIteration = 1000
cost = np.zeros((numOfIteration,1))
learning_rate = 0.00001
for i in range(1, numOfIteration):
    r = np.dot(x, w) - y
    cost[i] = 0.5*np.sum(r*r)
    w[0] -= learning_rate*np.sum(r)
    # correct the shape dimension
    w[1] -= learning_rate*np.sum(np.multiply(r, x[:,1].reshape(-1,1)))
    print(cost[i])
predict = np.dot(x, w)
plt.plot((x[0][1], x[N-1][1]),(predict[0], predict[N-1]), 'r')
plt.show()

x1 = 50
y1 = w[0] + w[1] * 50
print('Giá nhà cho 50m^2 là : ', y1)

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Load data
data = pd.read_csv('https://raw.githubusercontent.com/ClupusVT/AI_machine_learning/97a67fa8cd900213b0b5b35bb82cba9855dd3613/1st_Self_learning/data_house_price.csv').values
N = data.shape[0]
x = data[:, 0].reshape(-1, 1)
y = data[:, 1].reshape(-1, 1)

# Plot data
plt.scatter(x, y)
plt.xlabel('mét vuông')
plt.ylabel('giá')

# Add bias term
X = np.hstack((np.ones((N, 1)), x))

# Initialize weights
w = np.zeros((2, 1))

# Set hyperparameters
num_iterations = 1000
learning_rate = 0.00001

# Gradient Descent
for _ in range(num_iterations):
    predictions = X @ w
    errors = predictions - y
    gradients = (X.T @ errors) / N
    w -= learning_rate * gradients

    # Optional: Calculate and print cost
    cost = (errors ** 2).sum() / (2 * N)
    print(f'Cost: {cost}')

# Plot the regression line
plt.plot(x, X @ w, 'r')
plt.show()

# Prediction
x1 = 50
y1 = w[0] + w[1] * x1
print(f'Giá nhà cho {x1}m^2 là : {y1[0]}')


# **How the code optimize the linear regression** **bold text**
1. Matrix Multiplication (X @ w and X.T @ errors):
X @ w: This performs a matrix multiplication between the feature matrix X and the weight vector w. In linear algebra, this computes the predicted values for all training examples in one operation. Matrix multiplication is optimized in NumPy and allows for parallel computation, making it much faster than iterating through each data point.
X.T @ errors: This computes the gradient of the cost function with respect to the weights in one operation. By taking the dot product of the transpose of X and the errors, it efficiently calculates the gradients without needing explicit loops.
2. Avoiding Explicit Loops:
The code avoids explicit loops for updating each weight individually. Instead, it uses vectorized operations which are inherently faster due to optimized low-level implementations in NumPy. These operations are highly optimized for performance and make use of efficient memory access patterns and parallel processing.
3. Gradient Calculation:
gradients = (X.T @ errors) / N: This computes the gradient of the cost function with respect to the weights. By dividing by N, it averages the gradient over all data points. This approach ensures that the gradient calculation is vectorized and leverages the efficiency of matrix operations.
4. Weight Update:
w -= learning_rate * gradients: This performs a single operation to update all weights simultaneously. The vectorized approach ensures that weight updates are done efficiently, avoiding the overhead of multiple individual updates.
5. Cost Calculation:
cost = (errors ** 2).sum() / (2 * N): Calculates the cost in a vectorized manner. This ensures that the cost function is computed quickly by summing the squared errors over all data points in one operation.
Summary of Benefits:
Speed: Matrix operations are highly optimized for performance. NumPy's underlying implementation leverages low-level optimizations, multi-threading, and vectorization to perform operations faster than loops.
Simplicity: Vectorized operations make the code cleaner and easier to understand. It avoids the need for nested loops and manual index management.
Scalability: Matrix operations scale better with larger datasets and higher-dimensional feature spaces, making them more suitable for real-world applications.

# Logistic Regression


In [None]:
# -*- coding: utf-8 -*-
"""
Created on Tue Feb 26 13:49:07 2019

@author: DELL
"""

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Hàm sigmoid
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# Load data từ file csv
data = pd.read_csv('https://raw.githubusercontent.com/nttuan8/DL_Tutorial/master/L2/dataset.csv').values
N, d = data.shape
x = data[:, 0:d-1].reshape(-1, d-1)
y = data[:, 2].reshape(-1, 1)

# Vẽ data bằng scatter
x_cho_vay = x[y[:,0]==1]
x_tu_choi = x[y[:,0]==0]

plt.scatter(x_cho_vay[:, 0], x_cho_vay[:, 1], c='red', edgecolors='none', s=30, label='cho vay')
plt.scatter(x_tu_choi[:, 0], x_tu_choi[:, 1], c='blue', edgecolors='none', s=30, label='từ chối')
plt.legend(loc=1)
plt.xlabel('mức lương (triệu)')
plt.ylabel('kinh nghiệm (năm)')

# Thêm cột 1 vào dữ liệu x
x = np.hstack((np.ones((N, 1)), x))

w = np.array([0.,0.1,0.1]).reshape(-1,1)

# Số lần lặp bước 2
numOfIteration = 1000
cost = np.zeros((numOfIteration,1))
learning_rate = 0.01

for i in range(1, numOfIteration):

	 # Tính giá trị dự đoán
    y_predict = sigmoid(np.dot(x, w))
    cost[i] = -np.sum(np.multiply(y, np.log(y_predict)) + np.multiply(1-y, np.log(1-y_predict)))
    # Gradient descent
    w = w - learning_rate * np.dot(x.T, y_predict-y)
    print(cost[i])

# Vẽ đường phân cách.
t = 0.5
plt.plot((4, 10),(-(w[0]+4*w[1]+ np.log(1/t-1))/w[2], -(w[0] + 10*w[1]+ np.log(1/t-1))/w[2]), 'g')
plt.show()

# Neural Network

In [18]:
import numpy as np
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.utils import to_categorical  # Updated import
from keras.datasets import mnist

# Load MNIST dataset
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Preprocess data
X_train = X_train.reshape(X_train.shape[0], 28, 28, 1).astype('float32') / 255
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1).astype('float32') / 255

# Convert labels to one-hot encoding
y_train = to_categorical(y_train, 10)  # Updated function
y_test = to_categorical(y_test, 10)    # Updated function

# Build the model
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=(28, 28, 1)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(10))
model.add(Activation('softmax'))

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, batch_size=128, epochs=10, validation_data=(X_test, y_test))

# Evaluate the model
score = model.evaluate(X_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 67ms/step - accuracy: 0.8306 - loss: 0.5500 - val_accuracy: 0.9723 - val_loss: 0.0885
Epoch 2/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 63ms/step - accuracy: 0.9643 - loss: 0.1226 - val_accuracy: 0.9786 - val_loss: 0.0672
Epoch 3/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 70ms/step - accuracy: 0.9734 - loss: 0.0887 - val_accuracy: 0.9827 - val_loss: 0.0507
Epoch 4/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 70ms/step - accuracy: 0.9772 - loss: 0.0738 - val_accuracy: 0.9843 - val_loss: 0.0448
Epoch 5/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 68ms/step - accuracy: 0.9819 - loss: 0.0597 - val_accuracy: 0.9843 - val_loss: 0.0456
Epoch 6/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 72ms/step - accuracy: 0.9834 - loss: 0.0540 - val_accuracy: 0.9851 - val_loss: 0.0417
Epoch 7/10
[1m4