###  PCA (Principal Component Analysis)
**Principal Component Analysis (PCA)** is a **statistical and machine learning technique** used to **reduce the number of features (dimensions)** in a dataset while **preserving as much information (variance) as possible**
It is mainly used when data has **many features**, which makes models slow, complex, or noisy.



In [None]:
from sklearn.decomposition import PCA
from sklearn.datasets import load_iris
import matplotlib.pyplot as plt


# load iris data
iris = load_iris()


# call PCA
pca = PCA(n_components=2)

# fit and transform the data
iris_pca = pca.fit_transform(iris.data)

# now we can plot the first two principle commponent
plt.scatter(iris_pca[:,0], iris_pca[:,1], c=iris.target)
plt.xlabel('First Principle Component')
plt.ylabel('Second Principle Component')
plt.title('PCA of Iris Dataset')
plt.show()


# **PCA on Tips Data**

In [None]:
# import libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.decomposition import PCA

# load dataset
df = sns.load_dataset('tips')

# preprocessing
# encode the categorical data
le = LabelEncoder()
cat_feature = df.select_dtypes(include=['category']).columns
for col in cat_feature:
    df[col] = le.fit_transform(df[col])

# standardize the data
scaler = StandardScaler()
df_scaled = scaler.fit_transform(df)

# call PCA
pca = PCA()

# fit and transform the data
df_pca = pca.fit_transform(df_scaled)


# plot the explained variance ratio
plt.plot(np.cumsum(pca.explained_variance_ratio_))
plt.xlabel('Number of components')
plt.ylabel('Cumulative explained variance')
plt.show()