<a href="https://colab.research.google.com/github/lipa894/MLModelLipa/blob/main/SVMModel_Scikitlearn_pickle.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
import pickle

# Load the dataset
dataset = pd.read_csv('shopping_trends.csv')

# Drop irrelevant columns (e.g., Customer ID)
dataset = dataset.drop(columns=['Customer ID'])

# Encode categorical columns using LabelEncoder
categorical_columns = dataset.select_dtypes(include=['object']).columns
label_encoders = {}
for col in categorical_columns:
    le = LabelEncoder()
    dataset[col] = le.fit_transform(dataset[col])
    label_encoders[col] = le

# Define features (X) and target variable (y)
X = dataset.iloc[:, :-1]  # All columns except the last as features
y = dataset.iloc[:, -1]   # The last column as the target

# Split the dataset into training and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale or normalize input data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize the RandomForestClassifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the classifier
clf.fit(X_train, y_train)

# Predict on the test set
y_pred = clf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Save the model and scaler
pickle.dump(clf, open("model.sav", "wb"))
pickle.dump(scaler, open("scaler.sav", "wb"))

# Save label encoders if needed for decoding predictions
with open("label_encoders.pkl", "wb") as le_file:
    pickle.dump(label_encoders, le_file)


Accuracy: 0.1359
Classification Report:
              precision    recall  f1-score   support

           0       0.20      0.20      0.20       121
           1       0.14      0.17      0.15        96
           2       0.12      0.17      0.14       109
           3       0.09      0.09      0.09        98
           4       0.13      0.12      0.13       117
           5       0.19      0.14      0.16       129
           6       0.08      0.06      0.07       110

    accuracy                           0.14       780
   macro avg       0.14      0.13      0.13       780
weighted avg       0.14      0.14      0.14       780



In [3]:
import pandas as pd

# Load the uploaded dataset to examine its structure and verify correctness
file_path = '/content/shopping_trends.csv'
dataset = pd.read_csv(file_path)

# Display the first few rows of the dataset to understand its structure
dataset.head()


Unnamed: 0,Customer ID,Age,Gender,Item Purchased,Category,Purchase Amount (USD),Location,Size,Color,Season,Review Rating,Subscription Status,Payment Method,Shipping Type,Discount Applied,Promo Code Used,Previous Purchases,Preferred Payment Method,Frequency of Purchases
0,1,55,Male,Blouse,Clothing,53,Kentucky,L,Gray,Winter,3.1,Yes,Credit Card,Express,Yes,Yes,14,Venmo,Fortnightly
1,2,19,Male,Sweater,Clothing,64,Maine,L,Maroon,Winter,3.1,Yes,Bank Transfer,Express,Yes,Yes,2,Cash,Fortnightly
2,3,50,Male,Jeans,Clothing,73,Massachusetts,S,Maroon,Spring,3.1,Yes,Cash,Free Shipping,Yes,Yes,23,Credit Card,Weekly
3,4,21,Male,Sandals,Footwear,90,Rhode Island,M,Maroon,Spring,3.5,Yes,PayPal,Next Day Air,Yes,Yes,49,PayPal,Weekly
4,5,45,Male,Blouse,Clothing,49,Oregon,M,Turquoise,Spring,2.7,Yes,Cash,Free Shipping,Yes,Yes,31,PayPal,Annually


In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
import pickle

# Load the dataset
dataset = pd.read_csv('shopping_trends.csv')

# Drop rows with missing values (if any)
dataset.dropna(inplace=True)

# Encode categorical columns (if any) and prepare the target variable
categorical_columns = dataset.select_dtypes(include=['object']).columns
label_encoders = {}
for col in categorical_columns:
    le = LabelEncoder()
    dataset[col] = le.fit_transform(dataset[col])
    label_encoders[col] = le

# Define features (X) and target variable (y)
X = dataset.iloc[:, :-1]  # All columns except the last as features
y = dataset.iloc[:, -1]   # The last column as the target

# Split the dataset into training and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale or normalize input data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize the RandomForestClassifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the classifier
clf.fit(X_train, y_train)

# Predict on the test set
y_pred = clf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Save the model and scaler
pickle.dump(clf, open("model.sav", "wb"))
pickle.dump(scaler, open("scaler.sav", "wb"))

# Save label encoders for later use (optional)
with open("label_encoders.pkl", "wb") as le_file:
    pickle.dump(label_encoders, le_file)


Accuracy: 0.1385
Classification Report:
              precision    recall  f1-score   support

           0       0.14      0.13      0.14       121
           1       0.15      0.20      0.17        96
           2       0.13      0.17      0.15       109
           3       0.15      0.17      0.16        98
           4       0.10      0.09      0.10       117
           5       0.19      0.12      0.15       129
           6       0.12      0.09      0.10       110

    accuracy                           0.14       780
   macro avg       0.14      0.14      0.14       780
weighted avg       0.14      0.14      0.14       780



In [12]:
import pickle
import numpy as np

# Custom method for generating predictions
def getPredictions(model, scalermodel, feature_values):
    # Transform the input features using the scaler, then predict
    prediction = model.predict(scalermodel.transform([feature_values]))
    return prediction


# Main function
if __name__ == "__main__":
    # Load the model and scaler
    model = pickle.load(open("model.sav", "rb"))
    scalermodel = pickle.load(open("scaler.sav", "rb"))

    # Feature prompts (ensure all 18 features are included)
    print("Enter the following inputs for prediction:")

    # Replace these with all required features from your dataset
    age = float(input("Age: "))
    purchase_amount = float(input("Purchase Amount (USD): "))
    review_rating = float(input("Review Rating: "))
    previous_purchases = int(input("Previous Purchases: "))

    # Add default or placeholder values for other features if needed
    additional_features = [0] * (18 - 4)  # Adjust to match total feature count
    feature_values = [age, purchase_amount, review_rating, previous_purchases] + additional_features

    # Get the prediction
    result = getPredictions(model, scalermodel, feature_values)

    # Print the result
    print(f"Prediction: {result}")


Enter the following inputs for prediction:
Age: 25
Purchase Amount (USD): 150
Review Rating: 4.2
Previous Purchases: 3
Prediction: [0]


