From e4befffe0ba466366066cb77139fc441915e52f2 Mon Sep 17 00:00:00 2001 From: Saniya Date: Mon, 19 Oct 2020 22:05:57 +0530 Subject: [PATCH] Added code related to linear algebra --- .../house_price.csv | 8 +++ .../linear_regression.py | 59 +++++++++++++++++++ .../pca_with_python.py | 59 +++++++++++++++++++ 3 files changed, 126 insertions(+) create mode 100644 linear-algebra-for-ml-and-deep-learning/house_price.csv create mode 100644 linear-algebra-for-ml-and-deep-learning/linear_regression.py create mode 100644 linear-algebra-for-ml-and-deep-learning/pca_with_python.py diff --git a/linear-algebra-for-ml-and-deep-learning/house_price.csv b/linear-algebra-for-ml-and-deep-learning/house_price.csv new file mode 100644 index 0000000..576933b --- /dev/null +++ b/linear-algebra-for-ml-and-deep-learning/house_price.csv @@ -0,0 +1,8 @@ +square_feet,price +150,6450 +200,7450 +250,8450 +300,9450 +350,11450 +400,15450 +600,18450 diff --git a/linear-algebra-for-ml-and-deep-learning/linear_regression.py b/linear-algebra-for-ml-and-deep-learning/linear_regression.py new file mode 100644 index 0000000..e4c26c3 --- /dev/null +++ b/linear-algebra-for-ml-and-deep-learning/linear_regression.py @@ -0,0 +1,59 @@ +# import important libraries + +import pandas as pd +import numpy as np + +df = pd.read_csv('house_price.csv') + +print(df.head()) + +def get_mean(value): + total = sum(value) + length = len(value) + mean = total/length + return mean + +def get_variance(value): + mean = get_mean(value) + mean_difference_square = [pow((item - mean), 2) for item in value] + variance = sum(mean_difference_square)/float(len(value)-1) + return variance + +def get_covariance(value1, value2): + value1_mean = get_mean(value1) + value2_mean = get_mean(value2) + values_size = len(value1) + covariance = 0.0 + for i in range(0, values_size): + covariance += (value1[i] - value1_mean) * (value2[i] - value2_mean) + + return covariance / float(values_size - 1) + +def linear_regression(df): + + X = df['square_feet'] + Y = df['price'] + m = len(X) + + square_feet_mean = get_mean(X) + price_mean = get_mean(Y) + + #variance of X + square_feet_variance = get_variance(X) + price_variance = get_variance(Y) + + covariance_of_price_and_square_feet = get_covariance(X, Y) + w1 = covariance_of_price_and_square_feet / float(square_feet_variance) + w0 = price_mean - w1 * square_feet_mean + + # prediction --> Linear Equation + prediction = w0 + w1 * X + + df['price (prediction)'] = prediction + return df['price (prediction)'] + + + predicted = linear_regression(df) + + print(predicted) + diff --git a/linear-algebra-for-ml-and-deep-learning/pca_with_python.py b/linear-algebra-for-ml-and-deep-learning/pca_with_python.py new file mode 100644 index 0000000..28fe1a3 --- /dev/null +++ b/linear-algebra-for-ml-and-deep-learning/pca_with_python.py @@ -0,0 +1,59 @@ +# Import important libraries +import numpy as np +import pylab as plt +import pandas as pd +from sklearn import datasets +import matplotlib.pyplot as plt +from sklearn.preprocessing import StandardScaler + +load_iris = datasets.load_iris() +iris_df = pd.DataFrame(load_iris.data, columns=[load_iris.feature_names]) + +print(iris_df.head()) + +print(load_iris.data.shape) + +standardized_x = StandardScaler().fit_transform(load_iris.data) +print(standardized_x[:2]) + +print(standardized_x.T) + +covariance_matrix_x = np.cov(standardized_x.T) +print(covariance_matrix_x) + +eigenvalues, eigenvectors = np.linalg.eig(covariance_matrix_x) + +print(eigenvalues) + +print(eigenvectors) + +total_of_eigenvalues = sum(eigenvalues) +varariance = [(i / total_of_eigenvalues)*100 for i in sorted(eigenvalues, reverse=True)] + +print(varariance) + +eigenpairs = [(np.abs(eigenvalues[i]), eigenvectors[:,i]) for i in range(len(eigenvalues))] + +# Sorting from Higher values to lower value +eigenpairs.sort(key=lambda x: x[0], reverse=True) +print(eigenpairs) + +matrix_weighing = np.hstack((eigenpairs[0][1].reshape(4,1), + eigenpairs[1][1].reshape(4,1))) +print(matrix_weighing) + +Y = standardized_x.dot(matrix_weighing) +print(Y) + +plt.figure() +target_names = load_iris.target_names +y = load_iris.target +for c, i, target_name in zip("rgb", [0, 1, 2], target_names): + plt.scatter(Y[y==i,0], Y[y==i,1], c=c, label=target_name) + +plt.xlabel('PCA 1') +plt.ylabel('PCA 2') +plt.legend() +plt.title('PCA') +plt.show() +