In [None]:
# Problem Statement - 
'''The company wants to optimize its marketing strategy by understanding
the purchasing behavior of customers across various product categories. 
However, the dataset is high-dimensional, making it challenging to analyze 
effectively. The goal is to apply Principal Component Analysis (PCA) to 
reduce the dimensionality of the data while retaining the most critical
information. This will help the company gain insights into the primary
factors influencing customer purchasing behavior and improve the 
effectiveness of targeted marketing campaigns.'''
# Solution using Principal Component Analysis (PCA)

In [2]:
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

In [3]:
np.random.seed(42)
data = np.random.rand(100,5) * 10 + 5

In [4]:
columns = ['Product A','Product B','Product C','Product D','Product E']
df = pd.DataFrame(data, columns=columns)

In [12]:
scaler = StandardScaler()
scaled_data = scaler.fit_transform(df)


In [7]:
# apply PCA
num_components = 2
pca = PCA(n_components = num_components)
principal_components = pca.fit_transform(scaled_data)

In [9]:
explained_variance_ratio_ = pca.explained_variance_ratio_
print(explained_variance_ratio_)

[0.29005782 0.23214006]


In [10]:
principal_df = pd.DataFrame(data =principal_components, columns=['PC1', 'PC2'])
print("Principal Components: ")
print(principal_df.head())

Principal Components: 
        PC1       PC2
0 -1.485848 -0.105171
1 -0.090878 -1.465542
2 -2.802347 -0.297502
3 -0.549662 -0.022572
4  0.846820  0.759975


In [13]:
# user input
user_input = np.array([[8,6,4,7,9]])
scaled_user_input = scaler.transform(user_input)
user_principal_components = pca.transform(scaled_user_input)



In [15]:
# display the pca for user input
user_principal_df = pd.DataFrame(data = user_principal_components,columns=['PC1', 'PC2'])
print(user_principal_df)

        PC1       PC2
0  0.804304  0.986471
