 # WEEK-7 (Feature Engineering/Preprocessing)

#### 1. Create a vector (array) of 1XN dimension representing N-dimensional feature vector of a sample. Write a program to compute the mean and variance of the elements present in the array.

In [1]:
### population formula - σ2 = ∑ni=1 (xi – x̅)2 / N

### sample variance formula - s2 = ∑ni=1 (xi – x̅)2 / N-1

import numpy as np

def compute_mean(array):
    mean = np.sum(array) / len(array)
    return mean

def compute_sample_variance(array):
    mean = compute_mean(array)
    squared_diff = np.sum((array - mean) ** 2)
    variance = squared_diff / (len(array) - 1)
    return variance

def compute_population_variance(array):
    mean = compute_mean(array)
    squared_diff = np.sum((array - mean) ** 2)
    variance = squared_diff / len(array)
    return variance

def main():
    N = int(input("Enter the dimension of the vector: "))
    sample_vector = np.zeros(N)
    for i in range(N):
        sample_vector[i] = float(input(f"Enter element {i+1}: "))

    print("Sample Vector:", sample_vector)
    mean = compute_mean(sample_vector)
    sample_variance = compute_sample_variance(sample_vector)
    population_variance = compute_population_variance(sample_vector)

    print("Mean:", mean)
    print("Sample Variance:", sample_variance)
    print("Population Variance:", population_variance)

if __name__ == "__main__":
    main()


Enter the dimension of the vector: 5
Enter element 1: 1
Enter element 2: 3
Enter element 3: 6
Enter element 4: 8
Enter element 5: 9
Sample Vector: [1. 3. 6. 8. 9.]
Mean: 5.4
Sample Variance: 11.3
Population Variance: 9.040000000000001


#### 2. Create two vectors each of dimension 1XM each representing N-dimensional feature vector of a sample. Write a program to compute the Covariance between them.

In [8]:
### cov(X, Y) = Σ((Xi - X̄) * (Yi - Ȳ)) / n-1


import numpy as np

def compute_mean(array):
    mean = np.sum(array) / len(array)
    return mean

def compute_covariance(vector1, vector2):
    mean1 = compute_mean(vector1)
    mean2 = compute_mean(vector2)
    covariance = np.sum((vector1 - mean1) * (vector2 - mean2)) / (len(vector1)-1)
    return covariance

def main():
    M = int(input("Enter the dimension of vector 1: "))
    N = int(input("Enter the dimension of vector 2: "))

    vector1 = np.array(input(f"Enter {M} elements for vector 1: ").split(), dtype=float)

    vector2 = np.array(input(f"Enter {N} elements for vector 2: ").split(), dtype=float)

    print("Vector 1:", vector1)
    print("Vector 2:", vector2)

    covariance = compute_covariance(vector1, vector2)

    print("Covariance between Vector 1 and Vector 2:", covariance)

if __name__ == "__main__":
    main()


Enter the dimension of vector 1: 3
Enter the dimension of vector 2: 3
Enter 3 elements for vector 1: 2 4 6
Enter 3 elements for vector 2: 7 5 3
Vector 1: [2. 4. 6.]
Vector 2: [7. 5. 3.]
Covariance between Vector 1 and Vector 2: -4.0


#### 3.Create two vectors each of dimension 1XN. Write a program to compute the Correlation between them.

In [15]:
### r = Σ((Xi - X̄) * (Yi - Ȳ)) / √(Σ(Xi - X̄)² * Σ(Yi - Ȳ)²)


import numpy as np

def compute_mean(array):
    mean = np.sum(array) / len(array)
    return mean

def compute_correlation(vector1, vector2):
    mean1 = compute_mean(vector1)
    mean2 = compute_mean(vector2)
    covariance = np.sum((vector1 - mean1) * (vector2 - mean2)) / len(vector1)
    std_dev1 = np.sqrt(np.sum((vector1 - mean1) ** 2) / len(vector1))
    std_dev2 = np.sqrt(np.sum((vector2 - mean2) ** 2) / len(vector2))
    correlation = covariance / (std_dev1 * std_dev2)
    return correlation

def main():
    N = int(input("Enter the dimension of the vectors: "))

    vector1 = np.array(input(f"Enter {N} elements for vector 1: ").split(), dtype=float)

    vector2 = np.array(input(f"Enter {N} elements for vector 2: ").split(), dtype=float)

    if len(vector1) != N or len(vector2) != N:
        print(f"Error: Vectors must be of length {N}.")
        return

    print("Vector 1:", vector1)
    print("Vector 2:", vector2)
    
    correlation = compute_correlation(vector1, vector2)

    print("Correlation between Vector 1 and Vector 2:", correlation)

if __name__ == "__main__":
    main()


Enter the dimension of the vectors: 4 
Enter 4 elements for vector 1: 3 4 5 6
Enter 4 elements for vector 2: 4 2 5 8
Vector 1: [3. 4. 5. 6.]
Vector 2: [4. 2. 5. 8.]
Correlation between Vector 1 and Vector 2: 0.7745966692414833


#### 4. Create a Matrix of MXN dimension representing the M-dimensional feature vector for N number of samples i. e (i,j)th entry of the matrix represents the ith feature of jth sample. Write a program to compute the covariance matrix and correlation matrix. Comment on takeaways from these matrixes.

In [14]:
import numpy as np

data = np.array([[1, 2,3], [4,5,6], [9,8,7]])

def compute_covariance(data):
    mean = np.mean(data, axis=0)
    centered_data = data - mean
    covariance = np.cov(centered_data.T)
    return covariance

def compute_correlation(data):
    covariance = compute_covariance(data)
    std_dev = np.diag(covariance)**0.5
    correlation = covariance / np.outer(std_dev, std_dev)
    return correlation

print("Sample data matrix:")
print(data)

covariance_matrix = compute_covariance(data)
print("\nCovariance matrix:")
print(covariance_matrix)

correlation_matrix = compute_correlation(data)
print("\nCorrelation matrix:")
print(correlation_matrix)


Sample data matrix:
[[1 2 3]
 [4 5 6]
 [9 8 7]]

Covariance matrix:
[[16.33333333 12.          7.66666667]
 [12.          9.          6.        ]
 [ 7.66666667  6.          4.33333333]]

Correlation matrix:
[[1.         0.98974332 0.91129318]
 [0.98974332 1.         0.96076892]
 [0.91129318 0.96076892 1.        ]]
