In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.cluster import KMeans

# Load dataset
df = pd.read_csv("retail_sales.csv")

# Convert Date column
df['Date'] = pd.to_datetime(df['Date'])

# Create TotalSales column
df['TotalSales'] = df['Quantity'] * df['PricePerUnit']

# Create Month column
df['Month'] = df['Date'].dt.month

# ---------------------------
# SALES PREDICTION
# ---------------------------
X = df[['Quantity', 'Month']]
y = df['TotalSales']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestRegressor()
model.fit(X_train, y_train)

predictions = model.predict(X_test)

print("Sales Prediction Completed")

# ---------------------------
# CUSTOMER SEGMENTATION
# ---------------------------
customer_sales = df.groupby('CustomerID')['TotalSales'].sum().reset_index()

kmeans = KMeans(n_clusters=3)
customer_sales['Cluster'] = kmeans.fit_predict(customer_sales[['TotalSales']])

print("Customer Segments:")
print(customer_sales)

# ---------------------------
# VISUALIZATION
# ---------------------------
plt.figure()
plt.scatter(customer_sales['CustomerID'], customer_sales['TotalSales'])
plt.title("Customer Sales Distribution")
plt.xticks(rotation=90)
plt.show()
