In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.pipeline import make_pipeline

# Load data
data = pd.read_excel('10000_sales_data.xlsx')

# Assume 'Style Attributes', 'Season', 'Category' are categorical
label_encoder = LabelEncoder()
data['Season'] = label_encoder.fit_transform(data['Season'])
data['Category'] = label_encoder.fit_transform(data['Category'])
data['Style Attributes'] = label_encoder.fit_transform(data['Style Attributes'])

# Prepare features and target
X = data[['Season', 'Category', 'Price', 'Age', 'Rating']]  # Example features
y = data['Style Attributes']

# Splitting the dataset into the Training set and Test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Creating a pipeline for scaling and model training
pipeline = make_pipeline(StandardScaler(), SVC(kernel='rbf', C=1, gamma='auto'))
pipeline.fit(X_train, y_train)

# Predicting the Test set results and calculating accuracy
y_pred = pipeline.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy * 100)


Accuracy: 9.950000000000001


In [None]:
#splitting dataset in terms of age range and running support vector on each age to check if there is a greater prediction accuracy
#specific example is predicting style attribute based on the young customer data

In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.pipeline import make_pipeline

# Load data
data = pd.read_excel('10000_sales_data.xlsx')

# Define age groups
bins = [0, 25, 50, 100]
labels = ['Young', 'Middle-aged', 'Senior']
data['Age Group'] = pd.cut(data['Age'], bins=bins, labels=labels, right=False)

# Filter data for the 'Young' age group and create a copy to avoid SettingWithCopyWarning
young_data = data[data['Age Group'] == 'Young'].copy()

# Encoding categorical variables properly using .loc to avoid SettingWithCopyWarning
label_encoder = LabelEncoder()
young_data.loc[:, 'Season'] = label_encoder.fit_transform(young_data['Season'])
young_data.loc[:, 'Category'] = label_encoder.fit_transform(young_data['Category'])
young_data.loc[:, 'Style Attributes'] = label_encoder.fit_transform(young_data['Style Attributes'])

# Prepare features and target
X = young_data[['Season', 'Category', 'Price', 'Age', 'Rating']]  # Example features
y = young_data['Style Attributes']

# Splitting the dataset into the Training set and Test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Creating a pipeline for scaling and model training
pipeline = make_pipeline(StandardScaler(), SVC(kernel='rbf', C=1, gamma='auto'))
pipeline.fit(X_train, y_train)

# Predicting the Test set results and calculating accuracy
y_pred = pipeline.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy * 100)


Accuracy: 13.541666666666666


In [None]:
#splitting dataset in terms of season and running support vector on each season to check if there is a greater prediction accuracy
#specific example is predicting style attribute based on Fall/Winter

In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.pipeline import make_pipeline

# Load data
data = pd.read_excel('10000_sales_data.xlsx')

# Filter data for the 'Fall/Winter' season and explicitly copy it to avoid SettingWithCopyWarning
season_data = data[data['Season'] == 'Fall/Winter'].copy()

# Preparing data using best practices with .loc
label_encoder = LabelEncoder()
season_data.loc[:, 'Season'] = label_encoder.fit_transform(season_data['Season'])
season_data.loc[:, 'Category'] = label_encoder.fit_transform(season_data['Category'])
season_data.loc[:, 'Style Attributes'] = label_encoder.fit_transform(season_data['Style Attributes'])

# Prepare features and target
X = season_data[['Season', 'Category', 'Price', 'Age', 'Rating']]
y = season_data['Style Attributes']

# Splitting the dataset into the Training set and Test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Creating a pipeline for scaling and model training
pipeline = make_pipeline(StandardScaler(), SVC(kernel='rbf', C=1, gamma='auto'))
pipeline.fit(X_train, y_train)

# Predicting the Test set results and calculating accuracy
y_pred = pipeline.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy * 100)


Accuracy: 11.242603550295858
