In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
file_path = "shampoo_sales.csv"
shampoo_data = pd.read_csv(file_path)

shampoo_data['Date'] = pd.to_datetime(shampoo_data['Month'], format="%m-%y")
shampoo_data.set_index('Date', inplace=True)
shampoo_data.drop(columns=['Month'], inplace=True)
shampoo_data.head()

In [None]:
print(shampoo_data.info())
print(shampoo_data.describe())

In [None]:
plt.figure(figsize=(10, 5))
plt.plot(shampoo_data['Sales'], marker='o', linestyle='-')
plt.title("Shampoo Sales Over Time")
plt.show()

In [None]:
plt.scatter(shampoo_data.index, shampoo_data['Sales'])
plt.title("Scatter Plot of Shampoo Sales")
plt.show()

In [None]:
plot_acf(shampoo_data['Sales'])
plt.show()

plot_pacf(shampoo_data['Sales'])
plt.show()

In [None]:
plt.figure(figsize=(8, 5))
sns.histplot(shampoo_data['Sales'], bins=20, kde=True, edgecolor='black')
plt.title("Histogram of Shampoo Sales")
plt.show()

In [None]:
sns.kdeplot(shampoo_data['Sales'], fill=True)
plt.title("Density Plot of Shampoo Sales")
plt.show()

In [None]:
plt.figure(figsize=(8, 6))
sns.heatmap(shampoo_data.corr(), annot=True, cmap='coolwarm')
plt.title("Correlation Heatmap")
plt.show()

In [None]:
upsampled = shampoo_data.resample('D').interpolate(method='linear')

plt.figure(figsize=(12, 5))
plt.plot(upsampled['Sales'], label='Linear Interpolation', color='blue')
plt.title("Upsampled Data (Daily)")
plt.legend()
plt.show()

In [None]:
upsampled_quadratic = shampoo_data.resample('D').interpolate(method='quadratic')

plt.figure(figsize=(12, 5))
plt.plot(upsampled_quadratic['Sales'], label='Quadratic Interpolation', color='red')
plt.title("Upsampled Data (Quadratic)")
plt.legend()
plt.show()

In [None]:
def adf_test(series):
    result = adfuller(series.dropna())
    print("ADF Statistic:", result[0])
    print("p-value:", result[1])
    print("Stationary" if result[1] < 0.05 else "Non-Stationary")

print("\nADF Test Results:")
adf_test(shampoo_data['Sales'])

def kpss_test(series):
    result = kpss(series.dropna(), regression='c')
    print("KPSS Statistic:", result[0])
    print("p-value:", result[1])
    print("Stationary" if result[1] > 0.05 else "Non-Stationary")

print("\nKPSS Test Results:")
kpss_test(shampoo_data['Sales'])