In [33]:
import pandas as pd
import pickle
import json

In [34]:
# Load the exported models
with open('models.pkl', 'rb') as file:
    models = pickle.load(file)

model_discount = models['discount%']
model_free_delivery = models['free_delivery']

In [35]:
#  Load the JSON file
with open('scalers.json', 'r') as file:
    scalars = json.load(file)

scalars

{'rating_stars': [1.0, 5.0],
 'number_of_reviews': [2787.5377777777776, 14490.092269458171],
 'bought_last_month': [544.7777777777778, 1199.744987545194],
 'original_mrp': [57794.962222222224, 46894.61746496768],
 'discount%': [0.0, 65.0],
 'storage_in_GB': [212.69333333333333, 128.52294067840032],
 'RAM_in_GB': [7.6066666666666665, 2.458284479778313]}

In [36]:
final_features = {
    "discount%": [
        "brand_apple","brand_xiaomi","rating_stars","original_mrp","storage_in_GB","color_Natural Titanium","color_Teal","color_Ultramarine"
    ],
    "free_delivery": ["brand_apple","brand_xiaomi","rating_stars","original_mrp","bought_last_month","brand_samsung","color_Black","RAM_in_GB"
    ]
}

print(final_features['discount%'])
print(final_features['free_delivery'])

['brand_apple', 'brand_xiaomi', 'rating_stars', 'original_mrp', 'storage_in_GB', 'color_Natural Titanium', 'color_Teal', 'color_Ultramarine']
['brand_apple', 'brand_xiaomi', 'rating_stars', 'original_mrp', 'bought_last_month', 'brand_samsung', 'color_Black', 'RAM_in_GB']


In [37]:
data = pd.read_csv(r'C:\Users\Dell\Desktop\Programs\Scaraping\py-scraping\Combined_csvs\final_cleaned_smartphones2.csv')

In [38]:
data = data.sample(1)
data

Unnamed: 0,product_name,rating_stars,number_of_reviews,bought_last_month,discounted_mrp,original_mrp,discount%,free_delivery,brand,product,color,storage_in_GB,RAM_in_GB
129,"OnePlus Nord CE4 Lite 5G (Super , RAM, Storage)",4.1,1437,5000,17999.0,20999,14.0,1,oneplus,OnePlus Nord CE4 Lite 5G,Silver,128,8


In [39]:
def process_and_predict(data, scalars, model_discount, model_free_delivery):
    def scale_down(data):
        df = data.copy()
        df.drop(["product_name", "product", "discounted_mrp"], axis=1, inplace=True)

        # Scaling rating_stars
        min_val = scalars["rating_stars"][0]
        max_val = scalars["rating_stars"][1]
        df["rating_stars"] = -1 + (df["rating_stars"] - min_val) / (max_val - min_val) * 2
        df["rating_stars"] = df["rating_stars"].round(6)

        # Standardizing number_of_reviews
        means = scalars["number_of_reviews"][0]
        std_devs = scalars["number_of_reviews"][1]
        df["number_of_reviews"] = (df["number_of_reviews"] - means) / std_devs

        # Standardizing bought_last_month
        means = scalars["bought_last_month"][0]
        std_devs = scalars["bought_last_month"][1]
        df["bought_last_month"] = (df["bought_last_month"] - means) / std_devs

        # Standardizing original_mrp
        means = scalars["original_mrp"][0]
        std_devs = scalars["original_mrp"][1]
        df["original_mrp"] = (df["original_mrp"] - means) / std_devs

        # Scaling discount%
        min_val = scalars["discount%"][0]
        max_val = scalars["discount%"][1]
        df["discount%"] = -1 + (df["discount%"] - min_val) / (max_val - min_val) * 2
        df["discount%"] = df["discount%"].round(6)

        # Standardizing storage_in_GB
        means = scalars["storage_in_GB"][0]
        std_devs = scalars["storage_in_GB"][1]
        df["storage_in_GB"] = (df["storage_in_GB"] - means) / std_devs

        # Standardizing RAM_in_GB
        means = scalars["RAM_in_GB"][0]
        std_devs = scalars["RAM_in_GB"][1]
        df["RAM_in_GB"] = (df["RAM_in_GB"] - means) / std_devs

        # Replacing brand names
        df["brand"].replace({'iphone': 'apple', 'redmi': 'xiaomi'}, inplace=True)
        df = pd.get_dummies(df, columns=['brand'], prefix='brand')
        df = pd.get_dummies(df, columns=['color'], prefix='color')

        # Ensure all specified columns are present
        required_columns = ['rating_stars', 'number_of_reviews', 'bought_last_month', 'original_mrp', 'discount%', 'free_delivery', 'storage_in_GB', 'RAM_in_GB', 'brand_apple', 'brand_oneplus', 'brand_oppo', 'brand_realme', 'brand_samsung', 'brand_vivo', 'brand_xiaomi', 'color_Black', 'color_Black Titanium', 'color_Blue', 'color_Blue Titanium', 'color_Desert Titanium', 'color_Gold', 'color_Graphite', 'color_Gray', 'color_Green', 'color_Midnight', 'color_Natural Titanium', 'color_Pacific Blue', 'color_Pink', 'color_Purple', 'color_Red', 'color_Silver', 'color_Space Black', 'color_Starlight', 'color_Teal', 'color_Ultramarine', 'color_White', 'color_White Titanium', 'color_Yellow']

        for col in required_columns:
            if col not in df.columns:
                df[col] = 0

        cols_to_convert = ['brand_apple', 'brand_oneplus', 'brand_oppo', 'brand_realme', 'brand_samsung', 'brand_vivo', 'brand_xiaomi', 'color_Black', 'color_Black Titanium', 'color_Blue', 'color_Blue Titanium', 'color_Desert Titanium', 'color_Gold', 'color_Graphite', 'color_Gray', 'color_Green', 'color_Midnight', 'color_Natural Titanium', 'color_Pacific Blue', 'color_Pink', 'color_Purple', 'color_Red', 'color_Silver', 'color_Space Black', 'color_Starlight', 'color_Teal', 'color_Ultramarine', 'color_White', 'color_White Titanium', 'color_Yellow']

        df[cols_to_convert] = df[cols_to_convert].astype(int)
        return df

    def model_feed(sample):
        discount_sample = sample[['brand_apple', 'brand_xiaomi', 'rating_stars', 'original_mrp', 'storage_in_GB', 'color_Natural Titanium', 'color_Teal', 'color_Ultramarine']]
        free_delivery_sample = sample[['brand_apple', 'brand_xiaomi', 'rating_stars', 'original_mrp', 'bought_last_month', 'brand_samsung', 'color_Black', 'RAM_in_GB']]

        # Predict discount percentage
        discount_prediction = model_discount.predict(discount_sample)

        # Predict free delivery
        free_delivery_prediction = model_free_delivery.predict(free_delivery_sample)

        return {
            "discount%": discount_prediction[0],
            "free_delivery": free_delivery_prediction[0]
        }

    def upscale_predictions(predictions, scalars):
        upscaled_predictions = predictions.copy()

        # Upscale discount%
        min_val = scalars["discount%"][0]
        max_val = scalars["discount%"][1]
        upscaled_predictions["discount%"] = ((upscaled_predictions["discount%"] + 1) / 2) * (max_val - min_val) + min_val

        # Convert free_delivery from -1/1 to 0/1
        upscaled_predictions["free_delivery"] = 1 if upscaled_predictions["free_delivery"] == 1 else 0

        return upscaled_predictions

    def categorize_discount(discount):
        bins = list(range(0, 101, 10))
        for i in range(len(bins) - 1):
            if bins[i] <= discount < bins[i + 1]:
                return f"{bins[i]} to {bins[i + 1]}%"
        return "100%+"

    # Normalize the input data
    data_in = scale_down(data.copy())

    # Feed the normalized data to the model
    predictions = model_feed(data_in)

    # Upscale the model predictions
    upscaled_predictions = upscale_predictions(predictions, scalars)

    # Categorize the discount
    upscaled_predictions["discount%"] = categorize_discount(upscaled_predictions["discount%"])

    return upscaled_predictions

# Example usage
result = process_and_predict(data, scalars, model_discount, model_free_delivery)
print(result)

{'discount%': '10 to 20%', 'free_delivery': 1}


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["brand"].replace({'iphone': 'apple', 'redmi': 'xiaomi'}, inplace=True)


In [40]:
# Extract the actual discount percentage from the input data
actual_discount = data['discount%'].values[0]
# Extract the predicted discount percentage from the result
predicted_discount = result['discount%']

# Define the bins
bins = list(range(0, 101, 10))

# Function to categorize discount
def categorize_discount(discount):
    for i in range(len(bins) - 1):
        if bins[i] <= discount < bins[i + 1]:
            return f"{bins[i]} to {bins[i + 1]}%"
    return "100%+"

# Categorize the actual discount
actual_discount_category = categorize_discount(actual_discount)

# Calculate if the discount prediction is within the same bin
is_discount_correct = (predicted_discount == actual_discount_category)

# Extract the actual free delivery value from the input data
actual_free_delivery = data['free_delivery'].values[0]
# Extract the predicted free delivery value from the result
predicted_free_delivery = result['free_delivery']
# Calculate if the free delivery prediction is correct
is_free_delivery_correct = (predicted_free_delivery == actual_free_delivery)

# Calculate accuracy for discount
discount_accuracy = 1 if is_discount_correct else 0
# Calculate accuracy for free delivery
free_delivery_accuracy = 1 if is_free_delivery_correct else 0

print("Actual vs Predicted")
print(f"Discount: {actual_discount}% vs {predicted_discount}")
print(f"Free Delivery: {actual_free_delivery} vs {predicted_free_delivery}")
print()
print("Accuracy Scores")
print(f"Discount Prediction is within the same bin: {is_discount_correct}")
print(f"Discount Accuracy: {discount_accuracy * 100}%")
print(f"Free Delivery Prediction is correct: {is_free_delivery_correct}")
print(f"Free Delivery Accuracy: {free_delivery_accuracy * 100}%")

Actual vs Predicted
Discount: 14.0% vs 10 to 20%
Free Delivery: 1 vs 1

Accuracy Scores
Discount Prediction is within the same bin: True
Discount Accuracy: 100%
Free Delivery Prediction is correct: True
Free Delivery Accuracy: 100%
