In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load and prepare data
data = pd.read_excel('10000_sales_data.xlsx')

# Prepare features and target
features = data[['Season', 'Category', 'Price', 'Age', 'Rating']].copy()
target = data['Style Attributes']

# Encode categorical data
label_encoder = LabelEncoder()
features.loc[:, 'Season'] = label_encoder.fit_transform(features['Season'])
features.loc[:, 'Category'] = label_encoder.fit_transform(features['Category'])
target_encoded = label_encoder.fit_transform(target)

# Splitting the dataset
X_train, X_test, y_train, y_test = train_test_split(features, target_encoded, test_size = 0.2, random_state = 42)

# Model training
random_forest_classifier = RandomForestClassifier(random_state=42)
random_forest_classifier.fit(X_train, y_train)

# Predictions and accuracy
y_pred = random_forest_classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy * 100)


Accuracy: 9.35


In [None]:
#splitting dataset in terms of age range and running random forest model on each age to check if there is a greater prediction accuracy
#specific example is predicting style attribute based on the young customer data

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load data
data = pd.read_excel('10000_sales_data.xlsx')

# Define age groups
bins = [0, 25, 50, 100]
labels = ['Young', 'Middle-aged', 'Senior']
data['Age Group'] = pd.cut(data['Age'], bins=bins, labels=labels, right=False)

# Filter data for the 'Young' age group
young_data = data[data['Age Group'] == 'Young']

# Prepare features and target
features = young_data[['Season', 'Category', 'Price', 'Rating']].copy()
target = young_data['Style Attributes']

# Encode categorical data
label_encoder = LabelEncoder()
features.loc[:, 'Season'] = label_encoder.fit_transform(features['Season'])
features.loc[:, 'Category'] = label_encoder.fit_transform(features['Category'])
target_encoded = label_encoder.fit_transform(target)

# Splitting the dataset
X_train, X_test, y_train, y_test = train_test_split(features, target_encoded, test_size = 0.2, random_state = 42)

# Model training
random_forest_classifier = RandomForestClassifier(random_state=42)
random_forest_classifier.fit(X_train, y_train)

# Predictions and accuracy
y_pred = random_forest_classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy * 100)


Accuracy: 8.680555555555555


In [None]:
#splitting dataset in terms of season and running random forest model on each season to check if there is a greater prediction accuracy
#specific example is predicting style attribute based on Fall/Winter

In [3]:


# Load data
data = pd.read_excel('10000_sales_data.xlsx')

# Filter data for the 'Fall/Winter' season
fall_winter_data = data[data['Season'] == 'Fall/Winter']

# Prepare features and target
features = fall_winter_data[['Age', 'Price', 'Category', 'Rating']].copy()
target = fall_winter_data['Style Attributes']

# Encode categorical data
label_encoder = LabelEncoder()
features.loc[:, 'Category'] = label_encoder.fit_transform(features['Category'])
target_encoded = label_encoder.fit_transform(target)

# Splitting the dataset
X_train, X_test, y_train, y_test = train_test_split(features, target_encoded, test_size = 0.2, random_state = 42)

# Model training
random_forest_classifier = RandomForestClassifier(random_state=42)
random_forest_classifier.fit(X_train, y_train)

# Predictions and accuracy
y_pred = random_forest_classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy * 100)


Accuracy: 8.579881656804734
