In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score

# Load data
data = pd.read_excel('10000_sales_data.xlsx')

# Prepare features and target
features = data[['Season', 'Category', 'Price', 'Age', 'Rating']].copy()
target = data['Style Attributes']

# Encode categorical data safely using .loc
label_encoder = LabelEncoder()
features.loc[:, 'Season'] = label_encoder.fit_transform(features['Season'])
features.loc[:, 'Category'] = label_encoder.fit_transform(features['Category'])
target_encoded = label_encoder.fit_transform(target)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(features, target_encoded, test_size=0.2, random_state=42)

# Train model
classifier = GradientBoostingClassifier(random_state=42)
classifier.fit(X_train, y_train)

# Evaluate model
y_pred = classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy*100)


Accuracy: 9.85


In [None]:
#splitting dataset in terms of age range and running gradient booster on each age to check if there is a greater prediction accuracy
#specific example is predicting style attribute based on the young customer data

In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score

# Load data
data = pd.read_excel('10000_sales_data.xlsx')

# Defining age groups and filtering
bins = [0, 25, 50, 100]
labels = ['Young', 'Middle-aged', 'Senior']
data['Age Group'] = pd.cut(data['Age'], bins=bins, labels=labels, right=False)
young_data = data[data['Age Group'] == 'Young']

# Preparing features and target
features_young = young_data[['Season', 'Category', 'Price', 'Rating']].copy()
target_young = young_data['Style Attributes']

# Encoding categorical data
label_encoder = LabelEncoder()
features_young.loc[:, 'Season'] = label_encoder.fit_transform(features_young['Season'])
features_young.loc[:, 'Category'] = label_encoder.fit_transform(features_young['Category'])
target_young_encoded = label_encoder.fit_transform(target_young)

# Splitting the dataset
X_train_young, X_test_young, y_train_young, y_test_young = train_test_split(features_young, target_young_encoded, test_size=0.2, random_state=42)

# Model training
classifier_young = GradientBoostingClassifier(random_state=42)
classifier_young.fit(X_train_young, y_train_young)

# Predicting and evaluating accuracy
y_pred_young = classifier_young.predict(X_test_young)
accuracy_young = accuracy_score(y_test_young, y_pred_young)
print("Accuracy:", accuracy_young*100)


Accuracy: 10.069444444444445


In [None]:
#splitting dataset in terms of season and running gradient booster on each season to check if there is a greater prediction accuracy
#specific example is predicting style attribute based on Fall/Winter

In [15]:

# Load and prepare data
data = pd.read_excel('10000_sales_data.xlsx')
season_data = data[data['Season'] == 'Fall/Winter']

# Features and target
features = season_data[['Age', 'Price', 'Category', 'Rating']].copy()
target = season_data['Style Attributes']

# Encode categorical data safely using .loc
label_encoder = LabelEncoder()
features.loc[:, 'Category'] = label_encoder.fit_transform(features['Category'])
target_encoded = label_encoder.fit_transform(target)

# Splitting the dataset
X_train, X_test, y_train, y_test = train_test_split(features, target_encoded, test_size = 0.2, random_state = 42)

# Model training
classifier = GradientBoostingClassifier(random_state=42)
classifier.fit(X_train, y_train)

# Predictions and accuracy
y_pred = classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy * 100)


Accuracy: 9.467455621301776
