<a href="https://colab.research.google.com/github/sanjida583/MLModelPythonTamanna/blob/main/MLModelPython.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
import pickle
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer


# Load the iris dataset
dataset = pd.read_csv('dataset.csv')

# Define features and target variable
X = dataset.iloc[:, :-1]  # Features (all columns except the last)
y = dataset.iloc[:, -1]   # Target variable (last column)

# Split the dataset into training and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Identify categorical features (assuming 'Gender' is a categorical feature)
categorical_features = ['Gender']  # Replace with actual categorical feature names

# Create a ColumnTransformer to handle numerical and categorical features
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), X_train.select_dtypes(include=['number']).columns), # Apply StandardScaler to numerical features
        ('cat', OneHotEncoder(sparse_output=False, handle_unknown='ignore'), categorical_features) # Apply OneHotEncoder to categorical features
    ])


# Fit and transform the training data
X_train = preprocessor.fit_transform(X_train)
X_test = preprocessor.transform(X_test) # Transform the test data using the same preprocessor


# Initialize the RandomForestClassifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the classifier
clf.fit(X_train, y_train)

# Predict on the test set
y_pred = clf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")
print("Classification Report:")
print(classification_report(y_test, y_pred))

# save model and scaler
pickle.dump(clf,open("model" + ".sav", "wb"))
pickle.dump(preprocessor, open("preprocessormodel" + ".sav", "wb")) # Save the preprocessor for later use

Accuracy: 0.1397
Classification Report:
                precision    recall  f1-score   support

      Annually       0.19      0.22      0.21       121
     Bi-Weekly       0.11      0.11      0.11        96
Every 3 Months       0.13      0.17      0.15       109
   Fortnightly       0.08      0.08      0.08        98
       Monthly       0.13      0.11      0.12       117
     Quarterly       0.15      0.12      0.13       129
        Weekly       0.17      0.15      0.16       110

      accuracy                           0.14       780
     macro avg       0.14      0.14      0.14       780
  weighted avg       0.14      0.14      0.14       780



In [None]:
import pandas as pd

# Load the uploaded dataset to examine its structure and verify correctness
file_path = '/content/dataset.csv'
dataset = pd.read_csv(file_path)

# Display the first few rows of the dataset to understand its structure
dataset.head()

Unnamed: 0,Customer ID,Age,Gender,Item Purchased,Category,Purchase Amount (USD),Location,Size,Color,Season,Review Rating,Subscription Status,Payment Method,Shipping Type,Discount Applied,Promo Code Used,Previous Purchases,Preferred Payment Method,Frequency of Purchases
0,1,55,Male,Blouse,Clothing,53,Kentucky,L,Gray,Winter,3.1,Yes,Credit Card,Express,Yes,Yes,14,Venmo,Fortnightly
1,2,19,Male,Sweater,Clothing,64,Maine,L,Maroon,Winter,3.1,Yes,Bank Transfer,Express,Yes,Yes,2,Cash,Fortnightly
2,3,50,Male,Jeans,Clothing,73,Massachusetts,S,Maroon,Spring,3.1,Yes,Cash,Free Shipping,Yes,Yes,23,Credit Card,Weekly
3,4,21,Male,Sandals,Footwear,90,Rhode Island,M,Maroon,Spring,3.5,Yes,PayPal,Next Day Air,Yes,Yes,49,PayPal,Weekly
4,5,45,Male,Blouse,Clothing,49,Oregon,M,Turquoise,Spring,2.7,Yes,Cash,Free Shipping,Yes,Yes,31,PayPal,Annually


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
import pickle

# Load the dataset
dataset = pd.read_csv('dataset.csv')

# Drop rows with missing values (if any)
dataset.dropna(inplace=True)

# Encode categorical columns (if any) and prepare the target variable
categorical_columns = dataset.select_dtypes(include=['object']).columns
label_encoders = {}
for col in categorical_columns:
    le = LabelEncoder()
    dataset[col] = le.fit_transform(dataset[col])
    label_encoders[col] = le

# Define features (X) and target variable (y)
X = dataset.iloc[:, :-1]  # All columns except the last as features
y = dataset.iloc[:, -1]   # The last column as the target

# Split the dataset into training and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale or normalize input data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize the RandomForestClassifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the classifier
clf.fit(X_train, y_train)

# Predict on the test set
y_pred = clf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Save the model and scaler
pickle.dump(clf, open("model.sav", "wb"))
pickle.dump(scaler, open("scaler.sav", "wb"))

# Save label encoders for later use (optional)
with open("label_encoders.pkl", "wb") as le_file:
    pickle.dump(label_encoders, le_file)

Accuracy: 0.1385
Classification Report:
              precision    recall  f1-score   support

           0       0.14      0.13      0.14       121
           1       0.15      0.20      0.17        96
           2       0.13      0.17      0.15       109
           3       0.15      0.17      0.16        98
           4       0.10      0.09      0.10       117
           5       0.19      0.12      0.15       129
           6       0.12      0.09      0.10       110

    accuracy                           0.14       780
   macro avg       0.14      0.14      0.14       780
weighted avg       0.14      0.14      0.14       780



In [None]:
import pickle
import numpy as np

# Custom method for generating predictions
def getPredictions(model, scalermodel, feature_values):
    # Transform the input features using the scaler, then predict
    prediction = model.predict(scalermodel.transform([feature_values]))
    return prediction

# Main function
if __name__ == "__main__":
    try:
        # Load the model and scaler
        model = pickle.load(open("model.sav", "rb"))
        scalermodel = pickle.load(open("scaler.sav", "rb"))
    except FileNotFoundError as e:
        print(f"Error: {e}")
        print("Ensure 'model.sav' and 'scaler.sav' exist in the current directory.")
        exit()

    # Feature prompts (ensure all required features are included)
    print("Enter the following inputs for prediction:")

    try:
        # Replace these with all required features from your dataset
        age = float(input("Age: "))
        purchase_amount = float(input("Purchase Amount (USD): "))
        review_rating = float(input("Review Rating: "))
        previous_purchases = int(input("Previous Purchases: "))
    except ValueError:
        print("Invalid input. Please enter numeric values.")
        exit()

    # Add default or placeholder values for other features if needed
    additional_features = [0] * (18 - 4)  # Adjust to match total feature count
    feature_values = [age, purchase_amount, review_rating, previous_purchases] + additional_features

    # Get the prediction
    try:
        result = getPredictions(model, scalermodel, feature_values)
        print(f"Prediction: {result}")
    except Exception as e:
        print(f"An error occurred during prediction: {e}")


Enter the following inputs for prediction:
Age: 25
Purchase Amount (USD): 150
Review Rating: 4.2
Previous Purchases: 3
Prediction: [0]


