In [1]:
from google.colab import files
uploaded = files.upload()

Saving flipkart_air_conditioner.csv to flipkart_air_conditioner.csv


In [48]:
import pandas as pd
import numpy as np
import re
import ast

from sklearn.preprocessing import MinMaxScaler

In [26]:
# Load the dataset
df = pd.read_csv("flipkart_air_conditioner.csv", encoding="latin-1")

In [33]:
# Basic overview
print(df.shape)
df.head()

(316, 13)


Unnamed: 0,Name,Brand,Ratings,No_of_ratings,No_of_reviews,Product_features,MSP,MRP,Discount,Power_Consumption_W,Room_Size_SqFt,Electricity_Consumption_per_SqFt,Final_Price
0,CARRIER 1.5 Ton 5 Star Split Inverter AC - White,CARRIER,4.3,23,3,"['Power Consumption: 1260 W', 'Room Size: 111 ...",39999,76090,47,1260.0,,,40327.7
1,IFB 7 Stage Air Treatment 1.5 Ton 3 Star Split...,IFB,4.3,1999,306,"['Power Consumption: 1088.64 kWh', 'Room Size:...",37990,49590,23,,,,38184.3
2,SAMSUNG Convertible 1 Ton 3 Star Split Inverte...,SAMSUNG,4.1,1128,121,"['Power Consumption: 624.48 W', 'Room Size: 90...",29999,50990,41,48.0,,,30084.1
3,Voltas 1.5 Ton 5 Star Split Inverter AC - White,Voltas,4.2,4734,494,"['Power Consumption: 1450 W', 'Room Size: 111 ...",37490,73990,49,1450.0,,,37734.9
4,LG Super Convertible 5-in-1 Cooling 1.5 Ton 3 ...,LG,4.1,1660,167,"['Power Consumption: 1050.84 kWh', 'Room Size:...",36990,68990,46,,,,37254.6


In [36]:
# Drop duplicates
df.drop_duplicates(inplace=True)

# Strip text columns
df['Name'] = df['Name'].str.strip()
df['Brand'] = df['Brand'].str.strip()

In [38]:
# Convert MRP and Discount to float
df['MRP'] = df['MRP'].astype(float)
df['Discount'] = df['Discount'].astype(float)

# Calculate Final Price (MRP - discount %)
df['Final_Price'] = df['MRP'] * (1 - df['Discount'] / 100)

In [40]:
# Convert string list to actual list
def safe_parse_features(x):
    try:
        return ast.literal_eval(x)
    except:
        return []

df['Parsed_features'] = df['Product_features'].apply(safe_parse_features)

In [41]:
# Extract room size
def extract_room_size(features):
    for item in features:
        if 'Room Size' in item:
            match = re.search(r'(\d+)\s*to\s*(\d+)', item)
            if match:
                return (int(match.group(1)) + int(match.group(2))) / 2
    return None

# Extract power consumption
def extract_power_watt(features):
    for item in features:
        if 'Power Consumption' in item:
            match = re.search(r'(\d+\.?\d*)\s*(kW|W)', item, re.IGNORECASE)
            if match:
                val = float(match.group(1))
                return val * 1000 if match.group(2).lower() == 'kw' else val
    return None

df['Room_Size_SqFt'] = df['Parsed_features'].apply(extract_room_size)
df['Power_Consumption_W'] = df['Parsed_features'].apply(extract_power_watt)

In [42]:
#Electricity consumption per feet
df['Electricity_Consumption_per_SqFt'] = df['Power_Consumption_W'] / df['Room_Size_SqFt']

In [44]:
#convert rating into numeric form

df['Ratings'] = pd.to_numeric(df['Ratings'], errors='coerce')

df['No_of_ratings'] = df['No_of_ratings'].astype(str).str.replace(",", "")
df['No_of_ratings'] = pd.to_numeric(df['No_of_ratings'], errors='coerce')

In [47]:
# Drop rows with essential fields missing
df.dropna(subset=['Name', 'Brand', 'Final_Price'], inplace=True)

# Handle missing values (safe version)
df['Ratings'] = df['Ratings'].fillna(df['Ratings'].mean())
df['No_of_ratings'] = df['No_of_ratings'].fillna(0)
df['Room_Size_SqFt'] = df['Room_Size_SqFt'].fillna(df['Room_Size_SqFt'].median())
df['Power_Consumption_W'] = df['Power_Consumption_W'].fillna(df['Power_Consumption_W'].median())

# Recalculate this after filling
df['Electricity_Consumption_per_SqFt'] = df['Power_Consumption_W'] / df['Room_Size_SqFt']

In [49]:
#build recommender system

def recommend_acs(df, user_budget_max, user_room_size_sqft, user_brand_priority=None, num_recommendations=5):
    """
    Recommend top N air conditioners based on user preferences.
    """

    #  Filter based on user inputs

    filtered_df = df.copy()

    if user_brand_priority:
        filtered_df = filtered_df[filtered_df['Brand'].str.contains(user_brand_priority, case=False, na=False)]

    filtered_df = filtered_df[
        (filtered_df['Final_Price'] <= user_budget_max) &
        (filtered_df['Room_Size_SqFt'] >= user_room_size_sqft)
    ]

    if filtered_df.empty:
        print("⚠️ No ACs found matching your criteria. Try changing budget/brand/room size.")
        return pd.DataFrame()

    #  Fill NaNs for scoring features (just in case)
    filtered_df['Ratings'] = filtered_df['Ratings'].fillna(df['Ratings'].mean())
    filtered_df['Discount'] = filtered_df['Discount'].fillna(0)
    filtered_df['Electricity_Consumption_per_SqFt'] = filtered_df['Electricity_Consumption_per_SqFt'].fillna(
        df['Electricity_Consumption_per_SqFt'].median()
    )

    #  Normalize features for scoring
    scaler = MinMaxScaler()
    filtered_df['Norm_Rating'] = scaler.fit_transform(filtered_df[['Ratings']])
    filtered_df['Norm_Discount'] = scaler.fit_transform(filtered_df[['Discount']])
    filtered_df['Norm_Efficiency'] = 1 - scaler.fit_transform(filtered_df[['Electricity_Consumption_per_SqFt']])

    #  Relevance Score Calculation
    w_rating = 0.4
    w_discount = 0.3
    w_efficiency = 0.3

    filtered_df['Relevance_Score'] = (
        w_rating * filtered_df['Norm_Rating'] +
        w_discount * filtered_df['Norm_Discount'] +
        w_efficiency * filtered_df['Norm_Efficiency']
    )

    #  Sort and return top N
    top_df = filtered_df.sort_values(by='Relevance_Score', ascending=False).head(num_recommendations)

    return top_df[[
        'Name', 'Brand', 'Ratings', 'No_of_ratings', 'Discount',
        'MSP', 'MRP', 'Final_Price',
        'Power_Consumption_W', 'Room_Size_SqFt', 'Electricity_Consumption_per_SqFt',
        'Relevance_Score'
    ]]

In [50]:
user_budget = 40000
user_room_size = 120
user_brand = "LG"  # try None for no brand filter

recommendations = recommend_acs(df, user_budget, user_room_size, user_brand, num_recommendations=5)

if not recommendations.empty:
    print("✅ Top AC Recommendations:\n")
    print(recommendations.to_string(index=False))

✅ Top AC Recommendations:

                                                                                                     Name Brand  Ratings  No_of_ratings  Discount   MSP     MRP  Final_Price  Power_Consumption_W  Room_Size_SqFt  Electricity_Consumption_per_SqFt  Relevance_Score
                                            LG 1.5 Ton 5 Star Window Smart AC with Wi-fi Connect  - White    LG      4.3           2461      33.0 36999 55990.0      37513.3               5000.0           130.5                         38.314176         0.748944
                                                          LG 1 Ton 3 Star Split Dual Inverter AC  - White    LG      4.4           3901      20.0 37500 46990.0      37592.0               1080.0           130.5                          8.275862         0.700000
LG Super Convertible 6-in-1 Cooling 1 Ton 5 Star Split Dual Inverter AI, 4 Way Swing, HD Filter with A...    LG      4.2           1666      38.0 37990 61990.0      38433.8             57199

In [51]:
def run_recommender():
    print("🔷 Welcome to AC Recommender 🔷\n")

    try:
        budget = int(input("Enter your maximum budget (₹): "))
        room_size = float(input("Enter your room size in sq.ft: "))
        brand = input("Enter preferred brand (optional, press Enter to skip): ").strip()

        if brand == "":
            brand = None

        n = int(input("How many ACs would you like to see? (default 5): ") or "5")

        recs = recommend_acs(df, budget, room_size, brand, num_recommendations=n)

        if not recs.empty:
            print("\n✅ Top AC Recommendations:\n")
            display(recs)
        else:
            print("😔 No results. Try relaxing your criteria.")

    except Exception as e:
        print("⚠️ Error:", e)

In [52]:
run_recommender()

🔷 Welcome to AC Recommender 🔷

Enter your maximum budget (₹): 50000
Enter your room size in sq.ft: 23
Enter preferred brand (optional, press Enter to skip): 
How many ACs would you like to see? (default 5): 4

✅ Top AC Recommendations:



Unnamed: 0,Name,Brand,Ratings,No_of_ratings,Discount,MSP,MRP,Final_Price,Power_Consumption_W,Room_Size_SqFt,Electricity_Consumption_per_SqFt,Relevance_Score
176,Voltas 1.5 Ton 3 Star Split Inverter AC with W...,Voltas,4.8,6,50.0,34890,69990.0,34995.0,1835.0,130.5,14.061303,0.966996
147,Panasonic 1 Ton 3 Star Split Inverter AC with ...,Panasonic,5.0,3,35.0,33477,51990.0,33793.5,915.0,130.5,7.011494,0.903879
19,Voltas 1.5 Ton 4 Star Split Inverter AC - White,Voltas,4.3,975,49.0,35302,69990.0,35694.9,1540.0,130.5,11.800766,0.894399
210,Voltas 1.5 Ton 5 Star Split Inverter AC - White,Voltas,4.6,7,42.0,39499,68990.0,40014.2,1455.0,130.5,11.149425,0.892419


In [57]:
import gradio as gr

# ---- Use your preprocessed df here ----
# For now, dummy copy to avoid errors
df = df.dropna(subset=['Name', 'Brand', 'Final_Price'])
df['Ratings'] = df['Ratings'].fillna(df['Ratings'].mean())
df['No_of_ratings'] = df['No_of_ratings'].fillna(0)
df['Room_Size_SqFt'] = df['Room_Size_SqFt'].fillna(df['Room_Size_SqFt'].median())
df['Power_Consumption_W'] = df['Power_Consumption_W'].fillna(df['Power_Consumption_W'].median())
df['Discount'] = df['Discount'].fillna(0)
df['Electricity_Consumption_per_SqFt'] = df['Power_Consumption_W'] / df['Room_Size_SqFt']


def recommend_acs(df, user_budget_max, user_room_size_sqft, user_brand_priority=None, num_recommendations=5):
    filtered_df = df.copy()

    if user_brand_priority:
        filtered_df = filtered_df[filtered_df['Brand'].str.contains(user_brand_priority, case=False, na=False)]

    filtered_df = filtered_df[
        (filtered_df['Final_Price'] <= user_budget_max) &
        (filtered_df['Room_Size_SqFt'] >= user_room_size_sqft)
    ]

    if filtered_df.empty:
        return pd.DataFrame()

    scaler = MinMaxScaler()
    filtered_df['Norm_Rating'] = scaler.fit_transform(filtered_df[['Ratings']])
    filtered_df['Norm_Discount'] = scaler.fit_transform(filtered_df[['Discount']])
    filtered_df['Norm_Efficiency'] = 1 - scaler.fit_transform(filtered_df[['Electricity_Consumption_per_SqFt']])

    filtered_df['Relevance_Score'] = (
        0.4 * filtered_df['Norm_Rating'] +
        0.3 * filtered_df['Norm_Discount'] +
        0.3 * filtered_df['Norm_Efficiency']
    )

    top_df = filtered_df.sort_values(by='Relevance_Score', ascending=False).head(num_recommendations)

    return top_df[[
        'Name', 'Brand', 'Ratings', 'Final_Price', 'Room_Size_SqFt', 'Relevance_Score'
    ]]


def recommend_acs_gradio(budget, room_size, brand, n_recommendations):
    brand = brand.strip() if brand else None
    results = recommend_acs(df, budget, room_size, brand, n_recommendations)

    if results.empty:
        return "😔 No matching ACs found."
    else:
        return results.reset_index(drop=True)


inputs = [
    gr.Number(label="Max Budget ₹", value=40000),
    gr.Number(label="Room Size (sq.ft)", value=120),
    gr.Textbox(label="Preferred Brand (optional)", placeholder="e.g. LG"),
    gr.Slider(minimum=1, maximum=10, step=1, value=5, label="No. of Recommendations")
]

outputs = gr.Dataframe(label="Top ACs")

demo = gr.Interface(
    fn=recommend_acs_gradio,
    inputs=inputs,
    outputs=outputs,
    title=" AC Recommender",
    description="Enter your budget, room size, and brand (optional) to get the best ACs!"
)

demo.launch()

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://3388d2323f612a9ee3.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [55]:
interface.launch()

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://d38d670d65a5b3279c.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


