In [2]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LinearRegression
from sklearn.metrics import accuracy_score, mean_squared_error, r2_score

# Load dataset
data = pd.read_csv("mobile_sales_data.csv")
print(data.head())

# Encode categorical columns automatically
le = LabelEncoder()
for col in data.select_dtypes(include=['object']).columns:
    data[col] = le.fit_transform(data[col].astype(str))

print(data.head())

# Define features (X) and target (y)
# Example: Predict if customer purchased a mobile
# X = data.drop(columns=["Purchased"])   # all columns except target
# y = data["Purchased"]                  # target column

# Using 'Quantity Sold' as the target variable
X = data.drop(columns=["Quantity Sold"])   # all columns except target
y = data["Quantity Sold"]                  # target column


# Split into train/test
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)

# ---- Decision Tree Classifier ----
# Note: Decision Tree Classifier is typically used for classification tasks (predicting categories),
# while 'Quantity Sold' is a continuous variable (regression task).
# Using a Decision Tree Regressor might be more appropriate.
dt = DecisionTreeClassifier(criterion="entropy", random_state=42)
dt.fit(x_train, y_train)
y_dt_pred = dt.predict(x_test)
accuracy_dt = accuracy_score(y_test, y_dt_pred)
print("Decision Tree Accuracy:", accuracy_dt)


# ---- Linear Regression (just to compare) ----
lrr = LinearRegression()
lrr.fit(x_train, y_train)
y_lrr_pred = lrr.predict(x_test)

mse = mean_squared_error(y_test, y_lrr_pred)
r2 = r2_score(y_test, y_lrr_pred)

print("Linear Regression MSE:", mse)
print("Linear Regression R2:", r2)

        Product      Brand Product Code  \
0  Mobile Phone   Motorola     88EB4558   
1        Laptop       Oppo     416DFEEB   
2  Mobile Phone    Samsung     9F975B08   
3        Laptop       Sony     73D2A7CC   
4        Laptop  Microsoft     CCE0B80D   

                               Product Specification   Price Inward Date  \
0  Site candidate activity company there bit insi...   78570  2023-08-02   
1            Beat put care fight affect address his.   44613  2023-10-03   
2           Energy special low seven place audience.  159826  2025-03-19   
3  Friend record hard contain minute we role sea ...   20911  2024-02-06   
4   Program recently feeling save tree hotel people.   69832  2023-08-10   

  Dispatch Date  Quantity Sold  Customer Name Customer Location   Region  \
0    2023-08-03              6   William Hess      South Kelsey  Central   
1    2023-10-06              1    Larry Smith        North Lisa    South   
2    2025-03-20              5  Leah Copeland        Sou