In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, root_mean_squared_error
from sklearn.preprocessing import StandardScaler

In [None]:
# Data

data = pd.read_csv("PoductDemand.csv")
data.set_index("ID",inplace=True)
data.index.name = None

data['Total Price'] = data['Total Price'].fillna(data['Total Price'].mean())
data.drop_duplicates(inplace=True)

In [None]:
# Plot

plt.figure(figsize=(8,6))
sns.pairplot(data)
plt.grid()
plt.show()

In [None]:
# Heatmap

plt.figure(figsize=(8,6))
sns.heatmap(data.corr())
plt.title("Correlation")
plt.show()

In [None]:
# Regplot

plt.figure(figsize=(8,6))
sns.regplot(y=data["Total Price"], x=data["Base Price"])
plt.title("Regression")
plt.show()

In [None]:
# Boxplot

plt.figure(figsize=(8,6))
sns.boxplot(data)
plt.title("Boxplot")
plt.show()

In [None]:
# Histogram

plt.figure(figsize=(12,8))
sns.histplot(data)
plt.title("Histogram")
plt.show()

In [None]:
# Standardization

scaler = StandardScaler()
scaled_data = pd.DataFrame(scaler.fit_transform(data),columns = data.columns)

In [None]:
# Comparison

fig, ax = plt.subplots(1,2, figsize = (8,12))

sns.histplot(data, ax=ax[0])
ax[0].set_title("Histogram")

sns.histplot(scaled_data, ax=ax[1])
ax[1].set_title("Histogram after Standardization")
#ax[1].set_xlim(-5, 5)    
#ax[1].set_ylim(0, 5000)

plt.show()

In [None]:
# Decomposition

pca = PCA(n_components=2)
df_pca = pd.DataFrame(pca.fit_transform(data.drop("Units Sold", axis=1)), columns=['PC1',"PC2"])

df_pca.head()

In [None]:
# Heatmap

plt.figure(figsize=(8,6))
sns.heatmap(df_pca.corr(), cmap="viridis")
plt.title("Correlation after PCA")
plt.show()

In [None]:
# Regplot

plt.figure(figsize=(8,6))
sns.regplot(y=df_pca["PC1"], x=df_pca["PC2"])
plt.title("No correlation regression plot")
plt.show()

In [None]:
# Boxplot

plt.figure(figsize=(8,6))
sns.boxplot(df_pca)
plt.title("Boxplot after PCA")
plt.show()

In [None]:
# Histogram

plt.figure(figsize=(12,8))
sns.histplot(df_pca)
plt.title("Histogram after PCA")
plt.show()

In [None]:
# Standardization

scaler = StandardScaler()
scaled_pca = pd.DataFrame(scaler.fit_transform(df_pca),columns = df_pca.columns)

In [None]:
# Comparison

fig, ax = plt.subplots(1,2, figsize = (12,6))

sns.histplot(scaled_pca, ax=ax[0])
ax[0].set_title("Histogram")

sns.histplot(scaled_pca, ax=ax[1])
ax[1].set_title("Histogram after Standardization")

plt.show()

In [None]:
# Model

x = df_pca
y = data["Units Sold"]

x_train, x_test, y_train, y_test = train_test_split(x,y, random_state=42, shuffle=True, test_size=0.2)

model = LinearRegression()
model.fit(x_train,y_train)

In [None]:
# Accuracy

y_pred = model.predict(x_test)

rmse = root_mean_squared_error(y_test,y_pred)
r2 = r2_score(y_test,y_pred)

print(f"Root Mean Squared Error: {rmse:.2f}\nR2 Score: {r2:.2f}")