In [4]:
# Cell 1: Import all required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# Set up plotting style
plt.style.use('default')
%matplotlib inline

print("‚úÖ Libraries imported successfully!")

‚úÖ Libraries imported successfully!


In [5]:
# Cell 2: Load the dataset
try:
    df = pd.read_csv('../data/cars_ds_final.csv')
    print("‚úÖ Data loaded successfully!")
    print(f"Dataset shape: {df.shape}")
    print(f"Number of columns: {len(df.columns)}")
    
    # Display first few rows
    print("\nüìä First 5 rows:")
    display(df.head())
    
except FileNotFoundError:
    print("‚ùå File not found! Check the file path.")
    print("Current working directory files:")
    import os
    print(os.listdir('.'))

‚úÖ Data loaded successfully!
Dataset shape: (1276, 141)
Number of columns: 141

üìä First 5 rows:


Unnamed: 0.1,Unnamed: 0,Make,Model,Variant,Ex-Showroom_Price,Displacement,Cylinders,Valves_Per_Cylinder,Drivetrain,Cylinder_Configuration,...,Leather_Wrapped_Steering,Automatic_Headlamps,Engine_Type,ASR_/_Traction_Control,Cruise_Control,USB_Ports,Heads-Up_Display,Welcome_Lights,Battery,Electric_Range
0,0,Tata,Nano Genx,Xt,"Rs. 2,92,667",624 cc,2.0,2.0,RWD (Rear Wheel Drive),In-line,...,,,,,,,,,,
1,1,Tata,Nano Genx,Xe,"Rs. 2,36,447",624 cc,2.0,2.0,RWD (Rear Wheel Drive),In-line,...,,,,,,,,,,
2,2,Tata,Nano Genx,Emax Xm,"Rs. 2,96,661",624 cc,2.0,2.0,RWD (Rear Wheel Drive),In-line,...,,,,,,,,,,
3,3,Tata,Nano Genx,Xta,"Rs. 3,34,768",624 cc,2.0,2.0,RWD (Rear Wheel Drive),In-line,...,,,,,,,,,,
4,4,Tata,Nano Genx,Xm,"Rs. 2,72,223",624 cc,2.0,2.0,RWD (Rear Wheel Drive),In-line,...,,,,,,,,,,


In [6]:
# Cell 3: Get basic information about the dataset
print("üìã Dataset Information:")
print("=" * 50)
df.info()

print("\n\nüîç Missing Values Summary:")
print("=" * 50)
missing_data = df.isnull().sum()
missing_percent = (missing_data / len(df)) * 100
missing_summary = pd.DataFrame({
    'Missing Count': missing_data,
    'Missing %': missing_percent
})
# Show only columns with missing values
display(missing_summary[missing_summary['Missing Count'] > 0].sort_values('Missing %', ascending=False))

üìã Dataset Information:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1276 entries, 0 to 1275
Columns: 141 entries, Unnamed: 0 to Electric_Range
dtypes: float64(6), int64(1), object(134)
memory usage: 1.4+ MB


üîç Missing Values Summary:


Unnamed: 0,Missing Count,Missing %
Other_Specs,1266,99.216301
Recommended_Tyre_Pressure,1266,99.216301
Battery,1263,98.981191
Electric_Range,1259,98.667712
ARAI_Certified_Mileage_for_CNG,1249,97.884013
...,...,...
Body_Type,6,0.470219
Doors,4,0.313480
Torque,2,0.156740
Height,1,0.078370


In [7]:
# Cell 4: Quick Data Preview
print("üéØ Key Columns Preview:")
print("=" * 50)

key_columns = ['Make', 'Model', 'Variant', 'Ex-Showroom_Price', 'Fuel_Type', 'Body_Type', 'Displacement']
display(df[key_columns].head(10))

print(f"\nüè∑Ô∏è Unique Car Makes: {df['Make'].nunique()}")
print(f"üöó Unique Models: {df['Model'].nunique()}")
print(f"üîß Body Types: {df['Body_Type'].unique().tolist()}")
print(f"‚õΩ Fuel Types: {df['Fuel_Type'].unique().tolist()}")

üéØ Key Columns Preview:


Unnamed: 0,Make,Model,Variant,Ex-Showroom_Price,Fuel_Type,Body_Type,Displacement
0,Tata,Nano Genx,Xt,"Rs. 2,92,667",Petrol,Hatchback,624 cc
1,Tata,Nano Genx,Xe,"Rs. 2,36,447",Petrol,Hatchback,624 cc
2,Tata,Nano Genx,Emax Xm,"Rs. 2,96,661",CNG,Hatchback,624 cc
3,Tata,Nano Genx,Xta,"Rs. 3,34,768",Petrol,Hatchback,624 cc
4,Tata,Nano Genx,Xm,"Rs. 2,72,223",Petrol,Hatchback,624 cc
5,Tata,Nano Genx,Xma,"Rs. 3,14,815",Petrol,Hatchback,624 cc
6,Datsun,Redi-Go,D,"Rs. 2,79,650",Petrol,Hatchback,799 cc
7,Datsun,Redi-Go,T,"Rs. 3,51,832",Petrol,Hatchback,799 cc
8,Datsun,Redi-Go,A,"Rs. 3,33,419",Petrol,Hatchback,799 cc
9,Datsun,Redi-Go,S,"Rs. 3,62,000",Petrol,Hatchback,799 cc



üè∑Ô∏è Unique Car Makes: 39
üöó Unique Models: 263
üîß Body Types: ['Hatchback', 'MPV', 'MUV', 'SUV', 'Sedan', 'Crossover', nan, 'Coupe', 'Convertible', 'Sports, Hatchback', 'Sedan, Coupe', 'Sports', 'Crossover, SUV', 'SUV, Crossover', 'Sedan, Crossover', 'Sports, Convertible', 'Pick-up', 'Coupe, Convertible']
‚õΩ Fuel Types: ['Petrol', 'CNG', 'Diesel', 'CNG + Petrol', 'Hybrid', 'Electric']


In [8]:
# Cell 5: Clean the Price Column
print("üí∞ Cleaning Price Column...")

def clean_price(price_str):
    if pd.isna(price_str):
        return np.nan
    # Remove 'Rs. ' and commas, then convert to float
    cleaned = str(price_str).replace('Rs. ', '').replace(',', '')
    try:
        return float(cleaned)
    except:
        return np.nan

df['Price_Cleaned'] = df['Ex-Showroom_Price'].apply(clean_price)

print(f"‚úÖ Price cleaning complete!")
print(f"Price range: ‚Çπ{df['Price_Cleaned'].min():,.0f} - ‚Çπ{df['Price_Cleaned'].max():,.0f}")
print(f"Missing prices: {df['Price_Cleaned'].isnull().sum()}")

üí∞ Cleaning Price Column...
‚úÖ Price cleaning complete!
Price range: ‚Çπ236,447 - ‚Çπ212,155,397
Missing prices: 0


In [9]:
# Cell 6: Clean Mileage Column
print("‚õΩ Cleaning Mileage Column...")

def clean_mileage(mileage_str):
    if pd.isna(mileage_str) or mileage_str == '?':
        return np.nan
    # Extract numbers from strings like "?23.6 km/litre" or "23.6 km/litre"
    if isinstance(mileage_str, str):
        import re
        numbers = re.findall(r'\d+\.?\d*', str(mileage_str))
        if numbers:
            return float(numbers[0])
    return np.nan

df['Mileage_Cleaned'] = df['ARAI_Certified_Mileage'].apply(clean_mileage)

print(f"‚úÖ Mileage cleaning complete!")
print(f"Mileage range: {df['Mileage_Cleaned'].min()} - {df['Mileage_Cleaned'].max()} km/l")
print(f"Missing mileage: {df['Mileage_Cleaned'].isnull().sum()}")

‚õΩ Cleaning Mileage Column...
‚úÖ Mileage cleaning complete!
Mileage range: 3.4 - 1449.0 km/l
Missing mileage: 114


In [10]:
# Cell 7: Clean Displacement Column
print("üîß Cleaning Displacement Column...")

def clean_displacement(disp_str):
    if pd.isna(disp_str):
        return np.nan
    # Extract numbers from "624 cc"
    if isinstance(disp_str, str):
        import re
        numbers = re.findall(r'\d+', str(disp_str))
        if numbers:
            return int(numbers[0])
    return np.nan

df['Displacement_Cleaned'] = df['Displacement'].apply(clean_displacement)

print(f"‚úÖ Displacement cleaning complete!")
print(f"Displacement range: {df['Displacement_Cleaned'].min()} - {df['Displacement_Cleaned'].max()} cc")
print(f"Missing displacement: {df['Displacement_Cleaned'].isnull().sum()}")

üîß Cleaning Displacement Column...
‚úÖ Displacement cleaning complete!
Displacement range: 72.0 - 7993.0 cc
Missing displacement: 12


In [11]:
# Debug: Find the correct column names
print("üîç Searching for relevant columns...")

# Find columns related to AC
ac_columns = [col for col in df.columns if 'air' in col.lower() or 'ac' in col.lower() or 'condition' in col.lower()]
print("AC-related columns:", ac_columns)

# Find columns related to ABS
abs_columns = [col for col in df.columns if 'abs' in col.lower()]
print("ABS columns:", abs_columns)

# Find columns related to Power Steering
power_columns = [col for col in df.columns if 'power' in col.lower() and 'steering' in col.lower()]
print("Power Steering columns:", power_columns)

# Find columns related to Power Windows
windows_columns = [col for col in df.columns if 'window' in col.lower()]
print("Windows columns:", windows_columns)

# Find airbag columns
airbag_columns = [col for col in df.columns if 'airbag' in col.lower()]
print("Airbag columns:", airbag_columns)

üîç Searching for relevant columns...
AC-related columns: ['Displacement', 'Fuel_Tank_Capacity', 'Front_Track', 'Rear_Track', 'Tachometer', 'Seating_Capacity', 'Boot_Space', 'Third_Row_AC_Vents', 'Seat_Back_Pockets', 'Airbags', 'Number_of_Airbags', 'Second_Row_AC_Vents', 'ASR_/_Traction_Control', 'Displacement_Cleaned']
ABS columns: ['ABS_(Anti-lock_Braking_System)']
Power Steering columns: ['Power_Steering']
Windows columns: ['Power_Windows']
Airbag columns: ['Airbags', 'Number_of_Airbags']


In [12]:
# Cell 8: Create Binary Features for Key Amenities (CORRECTED VERSION)
print("üéõÔ∏è Creating Binary Feature Columns...")

# Convert Yes/No columns to 1/0
def yes_no_to_binary(value):
    if value == 'Yes':
        return 1
    else:
        return 0

# Important features for recommendations - using exact column names from your dataset
# Note: We check if columns exist before creating them to avoid duplicates

if 'ABS_(Anti-lock_Braking_System)' in df.columns and 'Has_ABS' not in df.columns:
    df['Has_ABS'] = df['ABS_(Anti-lock_Braking_System)'].apply(yes_no_to_binary)

if 'Power_Steering' in df.columns and 'Has_Power_Steering' not in df.columns:
    df['Has_Power_Steering'] = df['Power_Steering'].apply(yes_no_to_binary)

if 'Power_Windows' in df.columns and 'Has_Power_Windows' not in df.columns:
    df['Has_Power_Windows'] = df['Power_Windows'].apply(yes_no_to_binary)

# For Airbags - we'll use the Number_of_Airbags column
if 'Number_of_Airbags' in df.columns and 'Has_Airbags' not in df.columns:
    df['Has_Airbags'] = df['Number_of_Airbags'].apply(lambda x: 1 if pd.notna(x) and float(x) > 0 else 0)

# For AC - we need to find the right column. Let's check what ventilation/AC columns exist
ac_related = [col for col in df.columns if 'vent' in col.lower() or 'ac' in col.lower() or 'climate' in col.lower()]
print("Potential AC columns:", ac_related)

# Let's use a simple approach - check if car has any AC vents
if 'Third_Row_AC_Vents' in df.columns and 'Has_AC' not in df.columns:
    df['Has_AC'] = df['Third_Row_AC_Vents'].apply(lambda x: 1 if pd.notna(x) and x != 'Not Applicable' else 0)
elif 'Second_Row_AC_Vents' in df.columns and 'Has_AC' not in df.columns:
    df['Has_AC'] = df['Second_Row_AC_Vents'].apply(lambda x: 1 if pd.notna(x) and x != 'Not Applicable' else 0)
else:
    # If no specific AC column, assume all cars have basic AC
    df['Has_AC'] = 1

print("‚úÖ Binary features created:")
print(f"  - Cars with ABS: {df['Has_ABS'].sum()} ({df['Has_ABS'].mean()*100:.1f}%)")
print(f"  - Cars with Power Steering: {df['Has_Power_Steering'].sum()} ({df['Has_Power_Steering'].mean()*100:.1f}%)")
print(f"  - Cars with Power Windows: {df['Has_Power_Windows'].sum()} ({df['Has_Power_Windows'].mean()*100:.1f}%)")
print(f"  - Cars with Airbags: {df['Has_Airbags'].sum()} ({df['Has_Airbags'].mean()*100:.1f}%)")
print(f"  - Cars with AC: {df['Has_AC'].sum()} ({df['Has_AC'].mean()*100:.1f}%)")

üéõÔ∏è Creating Binary Feature Columns...
Potential AC columns: ['Displacement', 'Fuel_Tank_Capacity', 'Front_Track', 'Rear_Track', 'Tachometer', 'Seating_Capacity', 'Boot_Space', 'Third_Row_AC_Vents', 'Ventilation_System', 'Seat_Back_Pockets', 'Second_Row_AC_Vents', 'ASR_/_Traction_Control', 'Displacement_Cleaned']
‚úÖ Binary features created:
  - Cars with ABS: 1144 (89.7%)
  - Cars with Power Steering: 85 (6.7%)
  - Cars with Power Windows: 0 (0.0%)
  - Cars with Airbags: 1141 (89.4%)
  - Cars with AC: 123 (9.6%)


In [13]:
# Debug Cell: Check actual values in these columns
print("üîç Debugging Column Values:")
print("=" * 50)

debug_columns = ['Power_Steering', 'Power_Windows', 'Third_Row_AC_Vents', 'Second_Row_AC_Vents']

for col in debug_columns:
    if col in df.columns:
        print(f"\n{col}:")
        print(df[col].value_counts().head(10))  # Show top 10 values
    else:
        print(f"\n{col}: Column not found")

üîç Debugging Column Values:

Power_Steering:
Power_Steering
Electric Power                     924
Electro-Hydraulic                  137
Yes                                 85
Hydraulic Power                     72
Electric Power, Hydraulic Power      1
Name: count, dtype: int64

Power_Windows:
Power_Windows
All Windows           1035
Only Front Windows     144
Name: count, dtype: int64

Third_Row_AC_Vents:
Third_Row_AC_Vents
Not Applicable    787
Yes               123
Name: count, dtype: int64

Second_Row_AC_Vents:
Second_Row_AC_Vents
Yes    674
Name: count, dtype: int64


In [14]:
# Cell 8 CORRECTED: Create Binary Features (Based on Actual Data)
print("üéõÔ∏è Creating Binary Feature Columns (Corrected)...")

# Create binary features based on actual column values
binary_features = {}

# Power Steering - Consider any non-null value as having power steering
if 'Power_Steering' in df.columns:
    binary_features['Has_Power_Steering'] = df['Power_Steering'].apply(
        lambda x: 1 if pd.notna(x) and str(x).strip() != '' else 0
    )

# Power Windows - Consider any value as having power windows
if 'Power_Windows' in df.columns:
    binary_features['Has_Power_Windows'] = df['Power_Windows'].apply(
        lambda x: 1 if pd.notna(x) and str(x).strip() != '' else 0
    )

# ABS - Already working correctly (89.7%)
if 'ABS_(Anti-lock_Braking_System)' in df.columns:
    binary_features['Has_ABS'] = df['ABS_(Anti-lock_Braking_System)'].apply(
        lambda x: 1 if x == 'Yes' else 0
    )

# Airbags - Already working correctly (89.7%)
if 'Number_of_Airbags' in df.columns:
    binary_features['Has_Airbags'] = df['Number_of_Airbags'].apply(
        lambda x: 1 if pd.notna(x) and float(x) > 0 else 0
    )

# AC - Use Second_Row_AC_Vents as primary indicator (more common)
if 'Second_Row_AC_Vents' in df.columns:
    binary_features['Has_AC'] = df['Second_Row_AC_Vents'].apply(
        lambda x: 1 if pd.notna(x) and str(x).strip() != '' else 0
    )
elif 'Third_Row_AC_Vents' in df.columns:
    binary_features['Has_AC'] = df['Third_Row_AC_Vents'].apply(
        lambda x: 1 if pd.notna(x) and x != 'Not Applicable' else 0
    )
else:
    binary_features['Has_AC'] = 1  # Fallback

# Add all binary features to dataframe
for feature_name, feature_values in binary_features.items():
    df[feature_name] = feature_values

print("‚úÖ Corrected binary features created:")
for feature in ['Has_ABS', 'Has_Power_Steering', 'Has_Power_Windows', 'Has_Airbags', 'Has_AC']:
    if feature in df.columns:
        count = df[feature].sum()
        percent = (count / len(df)) * 100
        print(f"  - {feature}: {count} cars ({percent:.1f}%)")

üéõÔ∏è Creating Binary Feature Columns (Corrected)...
‚úÖ Corrected binary features created:
  - Has_ABS: 1144 cars (89.7%)
  - Has_Power_Steering: 1219 cars (95.5%)
  - Has_Power_Windows: 1179 cars (92.4%)
  - Has_Airbags: 1141 cars (89.4%)
  - Has_AC: 674 cars (52.8%)


In [15]:
# Cell 9: Verify Our Cleaned Data
print("üîç Verifying Cleaned Data")
print("=" * 50)

cleaned_columns = ['Price_Cleaned', 'Mileage_Cleaned', 'Displacement_Cleaned', 
                  'Has_ABS', 'Has_Power_Steering', 'Has_Power_Windows', 
                  'Has_Airbags', 'Has_AC']

# Show summary of cleaned data
print("üìä Cleaned Data Summary:")
cleaned_summary = df[cleaned_columns].describe()
display(cleaned_summary)

print(f"\nüéØ Sample of 5 Recommended Cars (showing cleaned features):")
sample_display = df[['Make', 'Model', 'Price_Cleaned', 'Mileage_Cleaned', 
                    'Has_ABS', 'Has_AC', 'Has_Power_Steering']].head(5)
display(sample_display)

print(f"\n‚úÖ Data Cleaning Phase Complete!")
print(f"We now have {len(cleaned_columns)} clean features ready for our recommendation engine!")

üîç Verifying Cleaned Data
üìä Cleaned Data Summary:


Unnamed: 0,Price_Cleaned,Mileage_Cleaned,Displacement_Cleaned,Has_ABS,Has_Power_Steering,Has_Power_Windows,Has_Airbags,Has_AC
count,1276.0,1162.0,1264.0,1276.0,1276.0,1276.0,1276.0,1276.0
mean,4596538.0,19.902246,1858.804589,0.896552,0.955329,0.923981,0.894201,0.528213
std,12147350.0,42.839322,1063.152389,0.304663,0.206661,0.265132,0.307701,0.499399
min,236447.0,3.4,72.0,0.0,0.0,0.0,0.0,0.0
25%,743876.0,15.6,1198.0,1.0,1.0,1.0,1.0,0.0
50%,1060064.0,18.19,1497.0,1.0,1.0,1.0,1.0,1.0
75%,2979828.0,21.5,1998.0,1.0,1.0,1.0,1.0,1.0
max,212155400.0,1449.0,7993.0,1.0,1.0,1.0,1.0,1.0



üéØ Sample of 5 Recommended Cars (showing cleaned features):


Unnamed: 0,Make,Model,Price_Cleaned,Mileage_Cleaned,Has_ABS,Has_AC,Has_Power_Steering
0,Tata,Nano Genx,292667.0,23.6,0,0,1
1,Tata,Nano Genx,236447.0,23.6,0,0,0
2,Tata,Nano Genx,296661.0,,0,0,1
3,Tata,Nano Genx,334768.0,21.9,0,0,1
4,Tata,Nano Genx,272223.0,23.6,0,0,1



‚úÖ Data Cleaning Phase Complete!
We now have 8 clean features ready for our recommendation engine!


In [16]:
# Cell 10: Prepare Features for Recommendation Engine
print("ü§ñ Preparing Features for Recommendation Engine...")

# Select our cleaned features for the model
feature_columns = [
    'Price_Cleaned', 'Mileage_Cleaned', 'Displacement_Cleaned',
    'Has_ABS', 'Has_Power_Steering', 'Has_Power_Windows', 
    'Has_Airbags', 'Has_AC'
]

# Create feature matrix
feature_df = df[feature_columns].copy()

# Fill missing values (mileage has 114 missing)
feature_df['Mileage_Cleaned'].fillna(feature_df['Mileage_Cleaned'].median(), inplace=True)
feature_df['Displacement_Cleaned'].fillna(feature_df['Displacement_Cleaned'].median(), inplace=True)

print(f"‚úÖ Feature matrix prepared with {len(feature_columns)} features")
print(f"Feature matrix shape: {feature_df.shape}")
print(f"Missing values in each feature:")
print(feature_df.isnull().sum())

ü§ñ Preparing Features for Recommendation Engine...
‚úÖ Feature matrix prepared with 8 features
Feature matrix shape: (1276, 8)
Missing values in each feature:
Price_Cleaned           0
Mileage_Cleaned         0
Displacement_Cleaned    0
Has_ABS                 0
Has_Power_Steering      0
Has_Power_Windows       0
Has_Airbags             0
Has_AC                  0
dtype: int64


In [17]:
# Cell 11: Build the K-Nearest Neighbors Model
print("üîß Building Recommendation Model...")

from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import NearestNeighbors

# Scale the features (very important for distance-based algorithms)
scaler = StandardScaler()
scaled_features = scaler.fit_transform(feature_df)

# Build the KNN model
knn_model = NearestNeighbors(n_neighbors=5, metric='cosine')
knn_model.fit(scaled_features)

print("‚úÖ K-Nearest Neighbors model trained!")
print(f"Model configured to find 5 most similar cars")
print(f"Using cosine similarity metric")

üîß Building Recommendation Model...
‚úÖ K-Nearest Neighbors model trained!
Model configured to find 5 most similar cars
Using cosine similarity metric


In [18]:
# Cell 12 FIXED: Create Recommendation Function (with NaN handling)
print("üéØ Creating Recommendation Function...")

def recommend_cars(user_preferences, n_recommendations=5, max_price=None):
    """
    Recommend cars based on user preferences
    
    Parameters:
    user_preferences: list of 8 values matching feature_columns order
    [Price, Mileage, Displacement, ABS, Power_Steering, Power_Windows, Airbags, AC]
    
    n_recommendations: number of cars to recommend
    max_price: optional price filter
    """
    
    # Apply price filter if specified
    if max_price:
        filtered_df = df[df['Price_Cleaned'] <= max_price].copy()
        
        if len(filtered_df) == 0:
            print("‚ùå No cars found within your budget. Try increasing your budget.")
            return None
        
        # Prepare filtered features - handle any potential NaN values
        filtered_features = filtered_df[feature_columns].copy()
        
        # Fill any remaining NaN values (safety check)
        for col in ['Mileage_Cleaned', 'Displacement_Cleaned']:
            filtered_features[col].fillna(filtered_features[col].median(), inplace=True)
        
        # Scale the filtered features
        filtered_features_scaled = scaler.transform(filtered_features)
            
        # Build model on filtered data
        knn_filtered = NearestNeighbors(n_neighbors=min(n_recommendations, len(filtered_df)), metric='cosine')
        knn_filtered.fit(filtered_features_scaled)
        
        user_vector = scaler.transform([user_preferences])
        distances, indices = knn_filtered.kneighbors(user_vector)
        
        recommendations = filtered_df.iloc[indices[0]].copy()
        
    else:
        # Use full dataset (already cleaned)
        user_vector = scaler.transform([user_preferences])
        distances, indices = knn_model.kneighbors(user_vector, n_neighbors=n_recommendations)
        
        recommendations = df.iloc[indices[0]].copy()
    
    # Add similarity score
    recommendations['Similarity_Score'] = 1 - distances[0]
    
    # Display results
    print(f"üéâ Found {len(recommendations)} recommendations:")
    display(recommendations[['Make', 'Model', 'Variant', 'Price_Cleaned', 'Mileage_Cleaned', 
                           'Fuel_Type', 'Body_Type', 'Similarity_Score']].sort_values('Similarity_Score', ascending=False))
    
    return recommendations

print("‚úÖ Recommendation function created!")
print("Ready to test the system! üöÄ")

üéØ Creating Recommendation Function...
‚úÖ Recommendation function created!
Ready to test the system! üöÄ


In [19]:
# Quick Debug: Check how many cars are in our budget range
print("üîç Quick Budget Analysis:")
budget_5L = df[df['Price_Cleaned'] <= 600000]
print(f"Cars under ‚Çπ6L: {len(budget_5L)}")

if len(budget_5L) > 0:
    print("Sample budget cars:")
    display(budget_5L[['Make', 'Model', 'Price_Cleaned', 'Mileage_Cleaned', 'Displacement_Cleaned']].head())
    
    # Check for NaN values in budget cars
    print("\nüîé Checking for NaN values in budget cars:")
    nan_check = budget_5L[feature_columns].isnull().sum()
    print(nan_check[nan_check > 0])
else:
    print("No cars found under ‚Çπ6L - let's try a higher budget")

üîç Quick Budget Analysis:
Cars under ‚Çπ6L: 183
Sample budget cars:


Unnamed: 0,Make,Model,Price_Cleaned,Mileage_Cleaned,Displacement_Cleaned
0,Tata,Nano Genx,292667.0,23.6,624.0
1,Tata,Nano Genx,236447.0,23.6,624.0
2,Tata,Nano Genx,296661.0,,624.0
3,Tata,Nano Genx,334768.0,21.9,624.0
4,Tata,Nano Genx,272223.0,23.6,624.0



üîé Checking for NaN values in budget cars:
Mileage_Cleaned    20
dtype: int64


In [20]:
# Cell 13: Test the Recommendation System
print("üß™ Testing the Recommendation System...")
print("=" * 50)

# Test Case 1: Budget-conscious city driver
print("TEST 1: üèôÔ∏è Budget City Driver")
print("Preferences: Budget ‚Çπ5L, Good mileage, Basic features")
user_pref_1 = [
    500000,    # Budget: ‚Çπ5 lakh
    20,        # Good mileage: 20 km/l
    1200,      # Small engine: 1200cc
    1,         # Wants ABS
    1,         # Wants Power Steering  
    1,         # Wants Power Windows
    1,         # Wants Airbags
    1          # Wants AC
]

print("\nüîç Searching for recommendations...")
recommendations_1 = recommend_cars(user_pref_1, max_price=600000)

print("\n" + "="*50)

# Test Case 2: Performance enthusiast
print("\nTEST 2: üèéÔ∏è Performance Enthusiast")
print("Preferences: Higher budget, Powerful engine, Premium features")
user_pref_2 = [
    1500000,   # Budget: ‚Çπ15 lakh
    15,        # Willing to sacrifice mileage for power
    2000,      # Larger engine: 2000cc
    1,         # Wants ABS
    1,         # Wants Power Steering
    1,         # Wants Power Windows
    1,         # Wants Airbags
    1          # Wants AC
]

print("\nüîç Searching for recommendations...")
recommendations_2 = recommend_cars(user_pref_2, max_price=2000000)

üß™ Testing the Recommendation System...
TEST 1: üèôÔ∏è Budget City Driver
Preferences: Budget ‚Çπ5L, Good mileage, Basic features

üîç Searching for recommendations...
üéâ Found 5 recommendations:


Unnamed: 0,Make,Model,Variant,Price_Cleaned,Mileage_Cleaned,Fuel_Type,Body_Type,Similarity_Score
1063,Hyundai,Grand I10 Nios,Magna 1.2 Vtvt,589610.0,20.7,Petrol,Hatchback,0.999899
46,Hyundai,Santro,Era Mt,429990.0,20.3,Petrol,Hatchback,0.997632
47,Hyundai,Santro,Magna Mt,503990.0,20.3,Petrol,Hatchback,0.997594
50,Hyundai,Santro,Sportz Mt,512990.0,20.3,Petrol,Hatchback,0.997588
49,Hyundai,Santro,Magna Amt,530990.0,20.3,Petrol,Hatchback,0.997575




TEST 2: üèéÔ∏è Performance Enthusiast
Preferences: Higher budget, Powerful engine, Premium features

üîç Searching for recommendations...
üéâ Found 5 recommendations:


Unnamed: 0,Make,Model,Variant,Price_Cleaned,Mileage_Cleaned,Fuel_Type,Body_Type,Similarity_Score
1009,Hyundai,Elantra,S,1589000.0,14.6,Petrol,Sedan,0.999945
1010,Hyundai,Elantra,Sx,1849000.0,14.6,Petrol,Sedan,0.999667
1011,Hyundai,Elantra,Sx At,1949000.0,14.6,Petrol,Sedan,0.999472
1137,Tata,Harrier,Revotorq Xm,1471368.0,16.7,Diesel,SUV,0.99873
1139,Tata,Harrier,Revotorq Xz,1721368.0,16.7,Diesel,Hatchback,0.998634


In [21]:
# Cell 14: Test Family SUV Seeker
print("üß™ TEST 3: üë®‚Äçüë©‚Äçüëß‚Äçüë¶ Family SUV Seeker")
print("Preferences: SUV, 7-seater, Good safety, Spacious")

# Let's first check what body types and seating capacities we have
print("\nüîç Available Body Types:")
print(df['Body_Type'].value_counts())

print("\nüîç Available Seating Capacities:")
if 'Seating_Capacity' in df.columns:
    print(df['Seating_Capacity'].value_counts().head(10))

# Test preferences for family SUV
user_pref_3 = [
    1200000,   # Budget: ‚Çπ12 lakh
    18,        # Reasonable mileage
    1500,      # Medium engine size
    1,         # Must have ABS
    1,         # Must have Power Steering
    1,         # Must have Power Windows
    1,         # Must have Airbags
    1          # Must have AC
]

print(f"\nüîç Searching for family car recommendations...")
recommendations_3 = recommend_cars(user_pref_3, max_price=1500000)

# Let's also check if we can filter by body type manually
print("\nüîç Manual check: SUVs under ‚Çπ15L")
suvs_under_15L = df[
    (df['Body_Type'] == 'SUV') & 
    (df['Price_Cleaned'] <= 1500000)
][['Make', 'Model', 'Variant', 'Price_Cleaned', 'Mileage_Cleaned']]

print(f"Found {len(suvs_under_15L)} SUVs under ‚Çπ15L")
display(suvs_under_15L.head(10))

üß™ TEST 3: üë®‚Äçüë©‚Äçüëß‚Äçüë¶ Family SUV Seeker
Preferences: SUV, 7-seater, Good safety, Spacious

üîç Available Body Types:
Body_Type
SUV                    447
Sedan                  333
Hatchback              316
Coupe                   41
MUV                     39
MPV                     39
Convertible             20
Crossover               18
Sports                   3
Pick-up                  3
Sports, Convertible      2
Sedan, Coupe             2
Crossover, SUV           2
SUV, Crossover           2
Sports, Hatchback        1
Sedan, Crossover         1
Coupe, Convertible       1
Name: count, dtype: int64

üîç Available Seating Capacities:
Seating_Capacity
5.0     915
7.0     183
4.0      70
2.0      39
6.0      26
9.0      19
8.0      17
16.0      1
Name: count, dtype: int64

üîç Searching for family car recommendations...
üéâ Found 5 recommendations:


Unnamed: 0,Make,Model,Variant,Price_Cleaned,Mileage_Cleaned,Fuel_Type,Body_Type,Similarity_Score
1274,Honda,City,V Cvt Petrol,1201000.0,17.8,Petrol,Sedan,0.999989
1229,Toyota,Yaris,G Cvt,1175000.0,17.8,Petrol,Sedan,0.999986
1268,Honda,City,Vx Cvt Petrol,1312000.0,18.0,Petrol,Sedan,0.999968
1231,Toyota,Yaris,V Cvt,1294000.0,17.8,Petrol,Sedan,0.999966
1266,Honda,City,V Mt Petrol,1065900.0,17.8,Petrol,Sedan,0.999951



üîç Manual check: SUVs under ‚Çπ15L
Found 222 SUVs under ‚Çπ15L


Unnamed: 0,Make,Model,Variant,Price_Cleaned,Mileage_Cleaned
81,Premier,Rio,Gx,506000.0,16.0
82,Premier,Rio,Ex,568623.0,16.0
83,Premier,Rio,Glx,670000.0,16.0
84,Premier,Rio,Dx,603000.0,16.0
85,Premier,Rio,Lx,739000.0,16.0
86,Premier,Rio,Crdi4,739000.0,23.7
440,Icml,Extreme,Ld Di Non Ac 9 Seater Bsiii,642478.0,11.12
441,Icml,Extreme,Ld Crdfi Non Ac 9 Seater Bsiv,589678.0,11.12
442,Icml,Extreme,Ld Di Ps Ac 9 Seater Bsiii,689761.0,11.12
443,Icml,Extreme,Ld Crdfi Ps Ac 9 Seater Bsiii,788177.0,11.12


In [22]:
# Cell 15: Enhance Model with Body Type
print("üöÄ Enhancing Model with Body Type...")

# First, let's clean and standardize Body Types
print("üîß Cleaning Body Types...")

def clean_body_type(body_type):
    if pd.isna(body_type):
        return 'Unknown'
    
    body_type = str(body_type).lower()
    
    # Map to main categories
    if 'suv' in body_type:
        return 'SUV'
    elif 'sedan' in body_type:
        return 'Sedan'
    elif 'hatchback' in body_type:
        return 'Hatchback'
    elif 'muv' in body_type or 'mpv' in body_type:
        return 'MUV'
    elif 'crossover' in body_type:
        return 'Crossover'
    elif 'coupe' in body_type:
        return 'Coupe'
    elif 'convertible' in body_type:
        return 'Convertible'
    elif 'sports' in body_type:
        return 'Sports'
    else:
        return body_type.title()

df['Body_Type_Cleaned'] = df['Body_Type'].apply(clean_body_type)

print("‚úÖ Body Types Cleaned:")
print(df['Body_Type_Cleaned'].value_counts())

üöÄ Enhancing Model with Body Type...
üîß Cleaning Body Types...
‚úÖ Body Types Cleaned:
Body_Type_Cleaned
SUV            451
Sedan          336
Hatchback      317
MUV             78
Coupe           42
Convertible     22
Crossover       18
Unknown          6
Sports           3
Pick-Up          3
Name: count, dtype: int64


In [23]:
# Cell 16: Add Body Type to Our Feature Set
print("üéØ Adding Body Type to Recommendation Features...")

# Convert Body Type to numerical using one-hot encoding
body_type_dummies = pd.get_dummies(df['Body_Type_Cleaned'], prefix='Body')

# Add to our main dataframe
df = pd.concat([df, body_type_dummies], axis=1)

# Update our feature columns to include body types
body_type_columns = [col for col in df.columns if col.startswith('Body_')]

# New enhanced feature set
enhanced_feature_columns = feature_columns + body_type_columns

print(f"‚úÖ Added {len(body_type_columns)} body type features:")
print(body_type_columns)
print(f"Total features now: {len(enhanced_feature_columns)}")

üéØ Adding Body Type to Recommendation Features...
‚úÖ Added 12 body type features:
['Body_Type', 'Body_Type_Cleaned', 'Body_Convertible', 'Body_Coupe', 'Body_Crossover', 'Body_Hatchback', 'Body_MUV', 'Body_Pick-Up', 'Body_SUV', 'Body_Sedan', 'Body_Sports', 'Body_Unknown']
Total features now: 20


In [24]:
# Cell 17: Rebuild Enhanced Recommendation Model
print("üîß Rebuilding Enhanced Model with Body Types...")

# Use only the one-hot encoded body type columns (remove the categorical ones)
clean_body_type_columns = [col for col in body_type_columns if col not in ['Body_Type', 'Body_Type_Cleaned']]

# Update our enhanced feature set
enhanced_feature_columns = feature_columns + clean_body_type_columns

print(f"‚úÖ Using {len(clean_body_type_columns)} clean body type features:")
print(clean_body_type_columns)
print(f"Total features now: {len(enhanced_feature_columns)}")

# Prepare enhanced features
enhanced_feature_df = df[enhanced_feature_columns].copy()

# Fill any missing values
for col in ['Mileage_Cleaned', 'Displacement_Cleaned']:
    enhanced_feature_df[col].fillna(enhanced_feature_df[col].median(), inplace=True)

# Scale enhanced features
enhanced_scaler = StandardScaler()
enhanced_scaled_features = enhanced_scaler.fit_transform(enhanced_feature_df)

# Build enhanced KNN model
enhanced_knn_model = NearestNeighbors(n_neighbors=5, metric='cosine')
enhanced_knn_model.fit(enhanced_scaled_features)

print("‚úÖ Enhanced model trained with body types!")

üîß Rebuilding Enhanced Model with Body Types...
‚úÖ Using 10 clean body type features:
['Body_Convertible', 'Body_Coupe', 'Body_Crossover', 'Body_Hatchback', 'Body_MUV', 'Body_Pick-Up', 'Body_SUV', 'Body_Sedan', 'Body_Sports', 'Body_Unknown']
Total features now: 18
‚úÖ Enhanced model trained with body types!


In [25]:
# Cell 18: Enhanced Recommendation Function with Body Type
print("üéØ Creating Enhanced Recommendation Function...")

def recommend_cars_enhanced(user_preferences, body_type_preference=None, n_recommendations=5, max_price=None):
    """
    Enhanced recommendation with body type preference
    
    user_preferences: list of 8 basic preferences + body type preferences will be added
    body_type_preference: 'SUV', 'Sedan', 'Hatchback', etc.
    """
    
    # Start with basic preferences
    full_preferences = user_preferences.copy()
    
    # Add body type preferences
    if body_type_preference:
        body_pref = [0] * len(clean_body_type_columns)  # Start with zeros
        target_col = f'Body_{body_type_preference}'
        if target_col in clean_body_type_columns:
            idx = clean_body_type_columns.index(target_col)
            body_pref[idx] = 1  # Set preferred body type to 1
        full_preferences.extend(body_pref)
    else:
        # No body type preference - set all to neutral
        full_preferences.extend([0] * len(clean_body_type_columns))
    
    # Apply price filter if specified
    if max_price:
        filtered_df = df[df['Price_Cleaned'] <= max_price].copy()
        
        if len(filtered_df) == 0:
            print("‚ùå No cars found within your budget. Try increasing your budget.")
            return None
        
        # Prepare filtered features
        filtered_features = filtered_df[enhanced_feature_columns].copy()
        
        # Fill any remaining NaN values
        for col in ['Mileage_Cleaned', 'Displacement_Cleaned']:
            filtered_features[col].fillna(filtered_features[col].median(), inplace=True)
        
        # Scale filtered features
        filtered_features_scaled = enhanced_scaler.transform(filtered_features)
            
        # Build model on filtered data
        knn_filtered = NearestNeighbors(n_neighbors=min(n_recommendations, len(filtered_df)), metric='cosine')
        knn_filtered.fit(filtered_features_scaled)
        
        user_vector = enhanced_scaler.transform([full_preferences])
        distances, indices = knn_filtered.kneighbors(user_vector)
        
        recommendations = filtered_df.iloc[indices[0]].copy()
        
    else:
        # Use full dataset
        user_vector = enhanced_scaler.transform([full_preferences])
        distances, indices = enhanced_knn_model.kneighbors(user_vector, n_neighbors=n_recommendations)
        
        recommendations = df.iloc[indices[0]].copy()
    
    # Add similarity score
    recommendations['Similarity_Score'] = 1 - distances[0]
    
    # Display results
    body_type_msg = f" with body type: {body_type_preference}" if body_type_preference else ""
    print(f"üéâ Found {len(recommendations)} recommendations{body_type_msg}:")
    display(recommendations[['Make', 'Model', 'Variant', 'Price_Cleaned', 'Mileage_Cleaned', 
                           'Fuel_Type', 'Body_Type_Cleaned', 'Similarity_Score']].sort_values('Similarity_Score', ascending=False))
    
    return recommendations

print("‚úÖ Enhanced recommendation function created!")

üéØ Creating Enhanced Recommendation Function...
‚úÖ Enhanced recommendation function created!


In [26]:
# Cell 19: Test Enhanced System with Body Types
print("üß™ Testing Enhanced System with Body Types...")
print("=" * 50)

# Test Family SUV Seeker - NOW WITH BODY TYPE PREFERENCE!
print("TEST: üë®‚Äçüë©‚Äçüëß‚Äçüë¶ Family SUV Seeker (Enhanced)")
print("Preferences: SUV, Good safety, Spacious")

user_pref_family = [
    1200000,   # Budget: ‚Çπ12 lakh
    18,        # Reasonable mileage
    1500,      # Medium engine size
    1,         # Must have ABS
    1,         # Must have Power Steering
    1,         # Must have Power Windows
    1,         # Must have Airbags
    1          # Must have AC
]

print(f"\nüîç Searching for SUV recommendations...")
recommendations_family = recommend_cars_enhanced(
    user_pref_family, 
    body_type_preference='SUV',  # NEW: Specify body type!
    max_price=1500000
)

print("\n" + "="*50)

# Compare with Sedan preference
print("\nTEST: üöó Sedan Lover")
print(f"\nüîç Searching for Sedan recommendations...")
recommendations_sedan = recommend_cars_enhanced(
    user_pref_family, 
    body_type_preference='Sedan',
    max_price=1500000
)

üß™ Testing Enhanced System with Body Types...
TEST: üë®‚Äçüë©‚Äçüëß‚Äçüë¶ Family SUV Seeker (Enhanced)
Preferences: SUV, Good safety, Spacious

üîç Searching for SUV recommendations...
üéâ Found 5 recommendations with body type: SUV:


Unnamed: 0,Make,Model,Variant,Price_Cleaned,Mileage_Cleaned,Fuel_Type,Body_Type_Cleaned,Similarity_Score
971,Kia,Seltos,Htk Plus 1.5,1149000.0,16.8,Petrol,SUV,0.999892
972,Kia,Seltos,Htx 1.5,1309000.0,16.8,Petrol,SUV,0.999884
970,Kia,Seltos,Htk 1.5,1029000.0,16.8,Petrol,SUV,0.999871
969,Kia,Seltos,Hte 1.5,989000.0,16.8,Petrol,SUV,0.999859
973,Kia,Seltos,Htx Cvt 1.5,1409000.0,16.8,Petrol,SUV,0.999858




TEST: üöó Sedan Lover

üîç Searching for Sedan recommendations...
üéâ Found 5 recommendations with body type: Sedan:


Unnamed: 0,Make,Model,Variant,Price_Cleaned,Mileage_Cleaned,Fuel_Type,Body_Type_Cleaned,Similarity_Score
1274,Honda,City,V Cvt Petrol,1201000.0,17.8,Petrol,Sedan,0.999997
1229,Toyota,Yaris,G Cvt,1175000.0,17.8,Petrol,Sedan,0.999996
1268,Honda,City,Vx Cvt Petrol,1312000.0,18.0,Petrol,Sedan,0.999991
1231,Toyota,Yaris,V Cvt,1294000.0,17.8,Petrol,Sedan,0.999991
1266,Honda,City,V Mt Petrol,1065900.0,17.8,Petrol,Sedan,0.999986


In [27]:
# Cell 20: Test Mixed and No Body Type Preferences
print("üß™ TEST: Mixed Preferences")
print("=" * 50)

# Test 1: No body type preference (should get mix)
print("TEST 1: üéØ No Body Type Preference")
print("Preferences: Let AI decide the best body type")

user_pref_general = [
    1000000,   # Budget: ‚Çπ10 lakh
    19,        # Good mileage
    1400,      # Medium engine
    1, 1, 1, 1, 1  # All features wanted
]

print(f"\nüîç Searching for best overall recommendations...")
recommendations_general = recommend_cars_enhanced(
    user_pref_general, 
    body_type_preference=None,  # No preference - let AI decide
    max_price=1200000
)

print("\n" + "="*50)

# Test 2: Hatchback enthusiast
print("\nTEST 2: üöô Hatchback Enthusiast")
print(f"\nüîç Searching for Hatchback recommendations...")
recommendations_hatchback = recommend_cars_enhanced(
    user_pref_general, 
    body_type_preference='Hatchback',
    max_price=1200000
)

üß™ TEST: Mixed Preferences
TEST 1: üéØ No Body Type Preference
Preferences: Let AI decide the best body type

üîç Searching for best overall recommendations...
üéâ Found 5 recommendations:


Unnamed: 0,Make,Model,Variant,Price_Cleaned,Mileage_Cleaned,Fuel_Type,Body_Type_Cleaned,Similarity_Score
147,Volkswagen,Polo,Highline Plus 1.0 (P),776500.0,18.78,Petrol,Hatchback,0.41462
491,Hyundai,Venue,1.0 Turbo Gdi Mt S,826000.0,18.27,Petrol,SUV,0.414356
496,Hyundai,Venue,1.0 Turbo Gdi Dct S,940000.0,18.15,Petrol,SUV,0.413861
493,Hyundai,Venue,1.0 Turbo Gdi Mt Sx,959000.0,18.27,Petrol,SUV,0.413772
494,Hyundai,Venue,1.0 Turbo Gdi Mt Sx Dual Tone,974000.0,18.27,Petrol,SUV,0.413705




TEST 2: üöô Hatchback Enthusiast

üîç Searching for Hatchback recommendations...
üéâ Found 5 recommendations with body type: Hatchback:


Unnamed: 0,Make,Model,Variant,Price_Cleaned,Mileage_Cleaned,Fuel_Type,Body_Type_Cleaned,Similarity_Score
550,Hyundai,I20 Active,1.4 Sx,993393.0,21.19,Diesel,Hatchback,0.999742
1117,Fiat,Abarth Punto,1.4 T-Jet,980970.0,16.3,Petrol,Hatchback,0.999535
128,Hyundai,Elite I20,Asta (O) Crdi,934003.0,22.54,Diesel,Hatchback,0.999327
126,Hyundai,Elite I20,Sportz Plus Crdi Dual Tone,876103.0,22.54,Diesel,Hatchback,0.999321
132,Hyundai,Elite I20,Sportz Plus Crdi,846103.0,22.54,Diesel,Hatchback,0.999316


In [28]:
# Cell 21: Clean Transmission Data
print("‚öôÔ∏è Cleaning Transmission Data...")

# First, let's see what transmission values we have
if 'Type' in df.columns:
    print("Current Transmission Types:")
    print(df['Type'].value_counts())
    
    # Clean and standardize transmission types
    def clean_transmission(trans_type):
        if pd.isna(trans_type):
            return 'Unknown'
        trans_str = str(trans_type).lower()
        
        if 'automatic' in trans_str or 'amt' in trans_str or 'cvt' in trans_str or 'dsg' in trans_str:
            return 'Automatic'
        elif 'manual' in trans_str:
            return 'Manual'
        else:
            return 'Unknown'
    
    df['Transmission_Cleaned'] = df['Type'].apply(clean_transmission)
    
    print("\n‚úÖ Cleaned Transmission Types:")
    print(df['Transmission_Cleaned'].value_counts())
else:
    print("‚ùå 'Type' column not found - checking alternative transmission columns")
    trans_columns = [col for col in df.columns if 'transmission' in col.lower() or 'gear' in col.lower()]
    print("Alternative transmission columns:", trans_columns)

‚öôÔ∏è Cleaning Transmission Data...
Current Transmission Types:
Type
Manual       725
Automatic    522
AMT           18
DCT            7
CVT            3
Name: count, dtype: int64

‚úÖ Cleaned Transmission Types:
Transmission_Cleaned
Manual       725
Automatic    543
Unknown        8
Name: count, dtype: int64


In [29]:
# Cell 22: Clean Fuel Type Data
print("‚õΩ Cleaning Fuel Type Data...")

# Clean and standardize fuel types
def clean_fuel_type(fuel_type):
    if pd.isna(fuel_type):
        return 'Unknown'
    fuel_str = str(fuel_type).lower()
    
    if 'electric' in fuel_str:
        return 'Electric'
    elif 'petrol' in fuel_str and 'cng' in fuel_str:
        return 'CNG_Petrol'
    elif 'petrol' in fuel_str:
        return 'Petrol'
    elif 'diesel' in fuel_str:
        return 'Diesel'
    elif 'cng' in fuel_str:
        return 'CNG'
    elif 'hybrid' in fuel_str:
        return 'Hybrid'
    else:
        return 'Unknown'

df['Fuel_Type_Cleaned'] = df['Fuel_Type'].apply(clean_fuel_type)

print("‚úÖ Cleaned Fuel Types:")
print(df['Fuel_Type_Cleaned'].value_counts())

‚õΩ Cleaning Fuel Type Data...
‚úÖ Cleaned Fuel Types:
Fuel_Type_Cleaned
Petrol        643
Diesel        582
CNG            16
Hybrid         15
Electric       14
CNG_Petrol      6
Name: count, dtype: int64


In [30]:
# Cell 23: Implement Smart Transmission Logic
print("ü§ñ Implementing Smart Transmission Logic...")

# Create smart transmission that auto-sets electric to automatic
def get_smart_transmission(fuel_type, transmission):
    """
    Smart logic: Electric cars are always automatic
    Other fuel types use the specified transmission
    """
    if fuel_type == 'Electric':
        return 'Automatic'  # Override for electric cars
    else:
        return transmission  # Use specified transmission for others

# Apply to our dataset for consistency
df['Smart_Transmission'] = df.apply(
    lambda x: get_smart_transmission(x['Fuel_Type_Cleaned'], x['Transmission_Cleaned']),
    axis=1
)

print("‚úÖ Smart Transmission Applied:")
print(df[['Fuel_Type_Cleaned', 'Smart_Transmission']].value_counts().head(10))

ü§ñ Implementing Smart Transmission Logic...
‚úÖ Smart Transmission Applied:
Fuel_Type_Cleaned  Smart_Transmission
Diesel             Manual                367
Petrol             Manual                333
                   Automatic             302
Diesel             Automatic             215
CNG                Manual                 16
Electric           Automatic              14
Hybrid             Automatic              12
Petrol             Unknown                 8
CNG_Petrol         Manual                  6
Hybrid             Manual                  3
Name: count, dtype: int64


In [31]:
# Cell 24: Add Transmission & Fuel Type to Feature Set
print("üéØ Adding Transmission & Fuel Type to Features...")

# Convert to binary features
transmission_dummies = pd.get_dummies(df['Smart_Transmission'], prefix='Transmission')
fuel_type_dummies = pd.get_dummies(df['Fuel_Type_Cleaned'], prefix='Fuel')

# Add to dataframe
df = pd.concat([df, transmission_dummies, fuel_type_dummies], axis=1)

# Get the new feature columns
transmission_columns = [col for col in df.columns if col.startswith('Transmission_')]
fuel_columns = [col for col in df.columns if col.startswith('Fuel_')]

print(f"‚úÖ Added {len(transmission_columns)} transmission features:")
print(transmission_columns)
print(f"‚úÖ Added {len(fuel_columns)} fuel type features:")
print(fuel_columns)

# Update enhanced feature set
enhanced_feature_columns_v2 = enhanced_feature_columns + transmission_columns + fuel_columns
print(f"üéØ Total features now: {len(enhanced_feature_columns_v2)}")

üéØ Adding Transmission & Fuel Type to Features...
‚úÖ Added 4 transmission features:
['Transmission_Cleaned', 'Transmission_Automatic', 'Transmission_Manual', 'Transmission_Unknown']
‚úÖ Added 11 fuel type features:
['Fuel_System', 'Fuel_Tank_Capacity', 'Fuel_Type', 'Fuel_Gauge', 'Fuel_Type_Cleaned', 'Fuel_CNG', 'Fuel_CNG_Petrol', 'Fuel_Diesel', 'Fuel_Electric', 'Fuel_Hybrid', 'Fuel_Petrol']
üéØ Total features now: 33


In [32]:
# Debug Cell: Check What Columns Actually Exist
print("üîç Debug: Checking Available Columns...")

# Check which columns from enhanced_feature_columns_v2 actually exist
existing_columns = []
missing_columns = []

for col in enhanced_feature_columns_v2:
    if col in df.columns:
        existing_columns.append(col)
    else:
        missing_columns.append(col)

print(f"‚úÖ Existing columns: {len(existing_columns)}")
print(f"‚ùå Missing columns: {len(missing_columns)}")
if missing_columns:
    print("Missing:", missing_columns)

# Let's see what transmission and fuel columns we actually have
print("\nüîç Available Transmission Columns:")
trans_cols = [col for col in df.columns if 'transmission' in col.lower() or 'trans' in col.lower()]
print(trans_cols)

print("\nüîç Available Fuel Columns:")
fuel_cols = [col for col in df.columns if 'fuel' in col.lower()]
print(fuel_cols)

print("\nüîç Available Basic Features:")
basic_cols = [col for col in df.columns if any(x in col.lower() for x in ['price', 'mileage', 'displacement', 'abs', 'power', 'airbag', 'ac'])]
print([col for col in basic_cols if 'cleaned' in col.lower() or 'has_' in col.lower()])

üîç Debug: Checking Available Columns...
‚úÖ Existing columns: 33
‚ùå Missing columns: 0

üîç Available Transmission Columns:
['Transmission_Cleaned', 'Smart_Transmission', 'Transmission_Automatic', 'Transmission_Manual', 'Transmission_Unknown']

üîç Available Fuel Columns:

üîç Available Basic Features:
['Price_Cleaned', 'Mileage_Cleaned', 'Displacement_Cleaned', 'Has_ABS', 'Has_Power_Steering', 'Has_Power_Windows', 'Has_Airbags', 'Has_AC']


In [33]:
# Cell: Recreate ALL Features (Complete Version)
print("üîÑ Recreating ALL Features from Scratch...")

# 1. Recreate Basic Features
print("üîß Recreating Basic Features...")

# Clean Price (if not already done)
if 'Price_Cleaned' not in df.columns:
    def clean_price(price_str):
        if pd.isna(price_str):
            return np.nan
        cleaned = str(price_str).replace('Rs. ', '').replace(',', '')
        try:
            return float(cleaned)
        except:
            return np.nan
    df['Price_Cleaned'] = df['Ex-Showroom_Price'].apply(clean_price)

# Clean Mileage (if not already done)
if 'Mileage_Cleaned' not in df.columns:
    def clean_mileage(mileage_str):
        if pd.isna(mileage_str) or mileage_str == '?':
            return np.nan
        if isinstance(mileage_str, str):
            import re
            numbers = re.findall(r'\d+\.?\d*', str(mileage_str))
            if numbers:
                return float(numbers[0])
        return np.nan
    df['Mileage_Cleaned'] = df['ARAI_Certified_Mileage'].apply(clean_mileage)

# Clean Displacement (if not already done)
if 'Displacement_Cleaned' not in df.columns:
    def clean_displacement(disp_str):
        if pd.isna(disp_str):
            return np.nan
        if isinstance(disp_str, str):
            import re
            numbers = re.findall(r'\d+', str(disp_str))
            if numbers:
                return int(numbers[0])
        return np.nan
    df['Displacement_Cleaned'] = df['Displacement'].apply(clean_displacement)

# Recreate Binary Features
print("üîß Recreating Binary Features...")

def yes_no_to_binary(value):
    if pd.isna(value):
        return 0
    value_str = str(value).lower().strip()
    if value_str in ['yes', 'y', '1', 'true', 'electric power']:
        return 1
    else:
        return 0

# ABS
if 'Has_ABS' not in df.columns:
    df['Has_ABS'] = df['ABS_(Anti-lock_Braking_System)'].apply(yes_no_to_binary)

# Power Steering
if 'Has_Power_Steering' not in df.columns:
    df['Has_Power_Steering'] = df['Power_Steering'].apply(yes_no_to_binary)

# Power Windows
if 'Has_Power_Windows' not in df.columns:
    df['Has_Power_Windows'] = df['Power_Windows'].apply(yes_no_to_binary)

# Airbags
if 'Has_Airbags' not in df.columns:
    df['Has_Airbags'] = df['Number_of_Airbags'].apply(lambda x: 1 if pd.notna(x) and float(x) > 0 else 0)

# AC
if 'Has_AC' not in df.columns:
    # Use Second_Row_AC_Vents as proxy for AC
    if 'Second_Row_AC_Vents' in df.columns:
        df['Has_AC'] = df['Second_Row_AC_Vents'].apply(lambda x: 1 if pd.notna(x) and str(x).strip() != '' else 0)
    else:
        df['Has_AC'] = 1  # Fallback

# 2. Recreate Body Type Features
print("üîß Recreating Body Type Features...")

def clean_body_type(body_type):
    if pd.isna(body_type):
        return 'Unknown'
    body_type = str(body_type).lower()
    
    if 'suv' in body_type:
        return 'SUV'
    elif 'sedan' in body_type:
        return 'Sedan'
    elif 'hatchback' in body_type:
        return 'Hatchback'
    elif 'muv' in body_type or 'mpv' in body_type:
        return 'MUV'
    elif 'crossover' in body_type:
        return 'Crossover'
    elif 'coupe' in body_type:
        return 'Coupe'
    elif 'convertible' in body_type:
        return 'Convertible'
    elif 'sports' in body_type:
        return 'Sports'
    elif 'pick-up' in body_type or 'pick' in body_type:
        return 'Pick-Up'
    else:
        return 'Unknown'

df['Body_Type_Cleaned'] = df['Body_Type'].apply(clean_body_type)

# Create one-hot encoding for body types
body_type_dummies = pd.get_dummies(df['Body_Type_Cleaned'], prefix='Body')
df = pd.concat([df, body_type_dummies], axis=1)

# 3. Recreate Transmission Features
print("üîß Recreating Transmission Features...")

def clean_transmission(trans_type):
    if pd.isna(trans_type):
        return 'Unknown'
    trans_str = str(trans_type).lower()
    
    if 'automatic' in trans_str or 'amt' in trans_str or 'cvt' in trans_str or 'dsg' in trans_str:
        return 'Automatic'
    elif 'manual' in trans_str:
        return 'Manual'
    else:
        return 'Unknown'

df['Transmission_Cleaned'] = df['Type'].apply(clean_transmission)

# Create one-hot encoding for transmission
transmission_dummies = pd.get_dummies(df['Transmission_Cleaned'], prefix='Transmission')
df = pd.concat([df, transmission_dummies], axis=1)

# 4. Recreate Fuel Type Features
print("üîß Recreating Fuel Type Features...")

def clean_fuel_type(fuel_type):
    if pd.isna(fuel_type):
        return 'Unknown'
    fuel_str = str(fuel_type).lower()
    
    if 'electric' in fuel_str:
        return 'Electric'
    elif 'petrol' in fuel_str and 'cng' in fuel_str:
        return 'CNG_Petrol'
    elif 'petrol' in fuel_str:
        return 'Petrol'
    elif 'diesel' in fuel_str:
        return 'Diesel'
    elif 'cng' in fuel_str:
        return 'CNG'
    elif 'hybrid' in fuel_str:
        return 'Hybrid'
    else:
        return 'Unknown'

df['Fuel_Type_Cleaned'] = df['Fuel_Type'].apply(clean_fuel_type)

# Create one-hot encoding for fuel types
fuel_dummies = pd.get_dummies(df['Fuel_Type_Cleaned'], prefix='Fuel')
df = pd.concat([df, fuel_dummies], axis=1)

# 5. Apply Smart Transmission Logic
print("ü§ñ Applying Smart Transmission Logic...")

def get_smart_transmission(fuel_type, transmission):
    if fuel_type == 'Electric':
        return 'Automatic'
    else:
        return transmission

df['Smart_Transmission'] = df.apply(
    lambda x: get_smart_transmission(x['Fuel_Type_Cleaned'], x['Transmission_Cleaned']),
    axis=1
)

print("‚úÖ ALL features recreated successfully!")

# Verify all features exist
print("\nüîç Final Verification:")
basic_features = ['Price_Cleaned', 'Mileage_Cleaned', 'Displacement_Cleaned', 
                  'Has_ABS', 'Has_Power_Steering', 'Has_Power_Windows', 
                  'Has_Airbags', 'Has_AC']
print(f"Basic features: {all(col in df.columns for col in basic_features)}")
print(f"Body type columns: {len([col for col in df.columns if col.startswith('Body_')])}")
print(f"Transmission columns: {len([col for col in df.columns if col.startswith('Transmission_')])}")
print(f"Fuel columns: {len([col for col in df.columns if col.startswith('Fuel_')])}")

üîÑ Recreating ALL Features from Scratch...
üîß Recreating Basic Features...
üîß Recreating Binary Features...
üîß Recreating Body Type Features...
üîß Recreating Transmission Features...
üîß Recreating Fuel Type Features...
ü§ñ Applying Smart Transmission Logic...
‚úÖ ALL features recreated successfully!

üîç Final Verification:
Basic features: True
Body type columns: 22
Transmission columns: 7
Fuel columns: 17


In [34]:
# Debug Cell: Quick Check for Data Types
print("üîç Quick Data Type Check...")

# Check a few key columns first
test_columns = ['Price_Cleaned', 'Mileage_Cleaned', 'Displacement_Cleaned', 
                'Has_ABS', 'Body_SUV', 'Transmission_Automatic', 'Fuel_Petrol']

for col in test_columns:
    if col in df.columns:
        sample_val = df[col].iloc[0] if len(df) > 0 else 'N/A'
        print(f"  {col}: {type(sample_val)} - {sample_val}")
    else:
        print(f"  {col}: Column not found")

üîç Quick Data Type Check...
  Price_Cleaned: <class 'numpy.float64'> - 292667.0
  Mileage_Cleaned: <class 'numpy.float64'> - 23.6
  Displacement_Cleaned: <class 'numpy.float64'> - 624.0
  Has_ABS: <class 'numpy.int64'> - 0
  Body_SUV: <class 'pandas.core.series.Series'> - Body_SUV    False
Body_SUV    False
Name: 0, dtype: bool
  Transmission_Automatic: <class 'pandas.core.series.Series'> - Transmission_Automatic    False
Transmission_Automatic    False
Name: 0, dtype: bool
  Fuel_Petrol: <class 'pandas.core.series.Series'> - Fuel_Petrol    True
Fuel_Petrol    True
Name: 0, dtype: bool


In [35]:
# Fix Cell: Recreate One-Hot Encoded Columns Properly
print("üîß Fixing One-Hot Encoded Columns...")

# Remove existing problematic columns
cols_to_drop = [col for col in df.columns if col.startswith(('Body_', 'Transmission_', 'Fuel_')) 
                and col not in ['Body_Type', 'Body_Type_Cleaned', 'Transmission_Cleaned', 'Fuel_Type_Cleaned', 'Fuel_System', 'Fuel_Tank_Capacity', 'Fuel_Gauge']]
df = df.drop(columns=cols_to_drop)

print(f"‚úÖ Dropped {len(cols_to_drop)} problematic columns")

# Recreate one-hot encodings properly
print("üîß Recreating Body Type Encoding...")
body_type_dummies = pd.get_dummies(df['Body_Type_Cleaned'], prefix='Body')
df = pd.concat([df, body_type_dummies], axis=1)

print("üîß Recreating Transmission Encoding...")
transmission_dummies = pd.get_dummies(df['Transmission_Cleaned'], prefix='Transmission')
df = pd.concat([df, transmission_dummies], axis=1)

print("üîß Recreating Fuel Type Encoding...")
fuel_dummies = pd.get_dummies(df['Fuel_Type_Cleaned'], prefix='Fuel')
df = pd.concat([df, fuel_dummies], axis=1)

# Verify the fix
print("\nüîç Verification:")
test_cols = ['Body_SUV', 'Transmission_Automatic', 'Fuel_Petrol']
for col in test_cols:
    if col in df.columns:
        sample_val = df[col].iloc[0]
        print(f"  {col}: {type(sample_val)} - {sample_val}")
    else:
        print(f"  {col}: Column not found")

print("‚úÖ One-hot encodings fixed!")

üîß Fixing One-Hot Encoded Columns...
‚úÖ Dropped 39 problematic columns
üîß Recreating Body Type Encoding...
üîß Recreating Transmission Encoding...
üîß Recreating Fuel Type Encoding...

üîç Verification:
  Body_SUV: <class 'numpy.bool_'> - False
  Transmission_Automatic: <class 'numpy.bool_'> - False
  Fuel_Petrol: <class 'numpy.bool_'> - True
‚úÖ One-hot encodings fixed!


In [36]:
# Cell 25.5 SIMPLIFIED: Build Final Model
print("üîÑ Building Final Model...")

# Define feature set with verified columns
final_feature_columns = [
    'Price_Cleaned', 'Mileage_Cleaned', 'Displacement_Cleaned',
    'Has_ABS', 'Has_Power_Steering', 'Has_Power_Windows', 
    'Has_Airbags', 'Has_AC'
]

# Add verified categorical features
body_cols = [col for col in df.columns if col.startswith('Body_') and col not in ['Body_Type', 'Body_Type_Cleaned']]
final_feature_columns.extend(body_cols)

trans_cols = [col for col in df.columns if col.startswith('Transmission_') and col != 'Transmission_Cleaned']
final_feature_columns.extend(trans_cols)

fuel_cols = [col for col in df.columns if col.startswith('Fuel_') and col not in ['Fuel_Type_Cleaned', 'Fuel_System', 'Fuel_Tank_Capacity', 'Fuel_Gauge']]
final_feature_columns.extend(fuel_cols)

print(f"‚úÖ Using {len(final_feature_columns)} features")

# Quick verification
print("üîç Sample values:")
for col in ['Price_Cleaned', 'Body_SUV', 'Transmission_Automatic', 'Fuel_Petrol']:
    if col in final_feature_columns:
        print(f"  {col}: {df[col].iloc[0]} (type: {type(df[col].iloc[0])})")

# Prepare features
final_feature_df = df[final_feature_columns].copy()

# Fill missing values
for col in ['Price_Cleaned', 'Mileage_Cleaned', 'Displacement_Cleaned']:
    final_feature_df[col].fillna(final_feature_df[col].median(), inplace=True)

final_feature_df.fillna(0, inplace=True)

# Scale and build model
final_scaler = StandardScaler()
final_scaled_features = final_scaler.fit_transform(final_feature_df)

final_knn_model = NearestNeighbors(n_neighbors=5, metric='cosine')
final_knn_model.fit(final_scaled_features)

print("‚úÖ Final model built successfully!")

# Set global variables
clean_body_type_columns = body_cols
transmission_columns = trans_cols
fuel_columns = fuel_cols
enhanced_feature_columns_v2 = final_feature_columns

print(f"üéØ Feature breakdown:")
print(f"  - Basic features: 8")
print(f"  - Body types: {len(body_cols)}")
print(f"  - Transmission: {len(trans_cols)}")
print(f"  - Fuel types: {len(fuel_cols)}")
print(f"  - Total: {len(final_feature_columns)}")

üîÑ Building Final Model...
‚úÖ Using 27 features
üîç Sample values:
  Price_Cleaned: 292667.0 (type: <class 'numpy.float64'>)
  Body_SUV: False (type: <class 'numpy.bool_'>)
  Transmission_Automatic: False (type: <class 'numpy.bool_'>)
  Fuel_Petrol: True (type: <class 'numpy.bool_'>)
‚úÖ Final model built successfully!
üéØ Feature breakdown:
  - Basic features: 8
  - Body types: 10
  - Transmission: 3
  - Fuel types: 6
  - Total: 27


In [37]:
# Cell 25 UPDATED CORRECTED: Enhanced Recommendation Function with Smart Logic
print("üéØ Creating Smart Recommendation Function...")

def recommend_cars_smart(user_preferences, body_type_preference=None, 
                        fuel_type_preference=None, transmission_preference=None,
                        n_recommendations=5, max_price=None):
    """
    SMART recommendation with automatic transmission logic for electric cars
    
    Parameters:
    - user_preferences: list of 8 basic preferences
    - body_type_preference: 'SUV', 'Sedan', etc.
    - fuel_type_preference: 'Petrol', 'Diesel', 'Electric', etc.
    - transmission_preference: 'Manual', 'Automatic'
    """
    
    # Start with basic preferences
    full_preferences = user_preferences.copy()
    
    # SMART LOGIC: Electric cars = Automatic transmission
    if fuel_type_preference == 'Electric':
        transmission_preference = 'Automatic'  # Override
        print("üí° Smart Note: Electric cars automatically use automatic transmission")
    
    # Add body type preferences
    if body_type_preference:
        body_pref = [0] * len(clean_body_type_columns)
        target_col = f'Body_{body_type_preference}'
        if target_col in clean_body_type_columns:
            idx = clean_body_type_columns.index(target_col)
            body_pref[idx] = 1
        full_preferences.extend(body_pref)
    else:
        full_preferences.extend([0] * len(clean_body_type_columns))
    
    # Add transmission preferences
    if transmission_preference:
        trans_pref = [0] * len(transmission_columns)
        target_col = f'Transmission_{transmission_preference}'
        if target_col in transmission_columns:
            idx = transmission_columns.index(target_col)
            trans_pref[idx] = 1
        full_preferences.extend(trans_pref)
    else:
        full_preferences.extend([0] * len(transmission_columns))
    
    # Add fuel type preferences
    if fuel_type_preference:
        fuel_pref = [0] * len(fuel_columns)
        target_col = f'Fuel_{fuel_type_preference}'
        if target_col in fuel_columns:
            idx = fuel_columns.index(target_col)
            fuel_pref[idx] = 1
        full_preferences.extend(fuel_pref)
    else:
        full_preferences.extend([0] * len(fuel_columns))
    
    # Apply price filter if specified
    if max_price:
        filtered_df = df[df['Price_Cleaned'] <= max_price].copy()
        
        if len(filtered_df) == 0:
            print("‚ùå No cars found within your budget. Try increasing your budget.")
            return None
        
        # Prepare filtered features
        filtered_features = filtered_df[enhanced_feature_columns_v2].copy()
        
        # Fill any remaining NaN values
        for col in ['Mileage_Cleaned', 'Displacement_Cleaned']:
            filtered_features[col].fillna(filtered_features[col].median(), inplace=True)
        
        # Fill any other missing values with 0
        filtered_features.fillna(0, inplace=True)
        
        # Scale filtered features - USE CORRECT VARIABLE NAMES
        filtered_features_scaled = final_scaler.transform(filtered_features)
            
        # Build model on filtered data
        knn_filtered = NearestNeighbors(n_neighbors=min(n_recommendations, len(filtered_df)), metric='cosine')
        knn_filtered.fit(filtered_features_scaled)
        
        user_vector = final_scaler.transform([full_preferences])
        distances, indices = knn_filtered.kneighbors(user_vector)
        
        recommendations = filtered_df.iloc[indices[0]].copy()
        
    else:
        # Use full dataset - USE CORRECT VARIABLE NAMES
        user_vector = final_scaler.transform([full_preferences])
        distances, indices = final_knn_model.kneighbors(user_vector, n_neighbors=n_recommendations)
        
        recommendations = df.iloc[indices[0]].copy()
    
    # Add similarity score
    recommendations['Similarity_Score'] = 1 - distances[0]
    
    # Display results with all preferences
    pref_msg = []
    if body_type_preference:
        pref_msg.append(f"Body: {body_type_preference}")
    if fuel_type_preference:
        pref_msg.append(f"Fuel: {fuel_type_preference}")
    if transmission_preference:
        pref_msg.append(f"Transmission: {transmission_preference}")
    
    pref_str = " with " + ", ".join(pref_msg) if pref_msg else ""
    print(f"üéâ Found {len(recommendations)} recommendations{pref_str}:")
    
    # Show relevant columns including the smart features we added
    display_columns = ['Make', 'Model', 'Variant', 'Price_Cleaned', 'Mileage_Cleaned', 
                      'Fuel_Type_Cleaned', 'Smart_Transmission', 'Body_Type_Cleaned', 
                      'Similarity_Score']
    
    display(recommendations[display_columns].sort_values('Similarity_Score', ascending=False))
    
    return recommendations

print("‚úÖ Smart recommendation function created!")
print("Ready to test the enhanced system! üöÄ")

üéØ Creating Smart Recommendation Function...
‚úÖ Smart recommendation function created!
Ready to test the enhanced system! üöÄ


In [38]:
# Cell 26 UPDATED: Test Smart System with Electric Car Logic
print("üß™ Testing Smart System with Electric Car Logic...")
print("=" * 50)

# Test 1: Electric Car Seeker (should auto-set to automatic)
print("TEST 1: ‚ö° Electric Car Seeker")
print("User selects: Fuel=Electric, Transmission=Manual (should be overridden)")

user_pref_electric = [
    1500000,   # Budget: ‚Çπ15 lakh
    15,        # Range consideration
    0,         # Electric - no displacement
    1, 1, 1, 1, 1  # All features
]

print(f"\nüîç Searching for electric car recommendations...")
recommendations_electric = recommend_cars_smart(
    user_pref_electric,
    fuel_type_preference='Electric',
    transmission_preference='Manual',  # This should be overridden!
    max_price=2000000
)

print("\n" + "="*50)

# Test 2: Manual Petrol Enthusiast (should respect selection)
print("\nTEST 2: üèéÔ∏è Manual Petrol Enthusiast")
print("User selects: Fuel=Petrol, Transmission=Manual (should be respected)")

print(f"\nüîç Searching for manual petrol cars...")
recommendations_manual_petrol = recommend_cars_smart(
    user_pref_electric,
    fuel_type_preference='Petrol',
    transmission_preference='Manual',  # This should be respected
    max_price=1500000
)

print("\n" + "="*50)

# Test 3: Diesel Automatic Family Car
print("\nTEST 3: üë®‚Äçüë©‚Äçüëß‚Äçüë¶ Diesel Automatic Family Car")
print("User selects: Fuel=Diesel, Transmission=Automatic, Body=SUV")

user_pref_family = [
    1200000,   # Budget: ‚Çπ12 lakh
    18,        # Good mileage
    1500,      # Medium engine
    1, 1, 1, 1, 1  # All features
]

print(f"\nüîç Searching for diesel automatic SUV recommendations...")
recommendations_family = recommend_cars_smart(
    user_pref_family,
    body_type_preference='SUV',
    fuel_type_preference='Diesel',
    transmission_preference='Automatic',
    max_price=1500000
)

üß™ Testing Smart System with Electric Car Logic...
TEST 1: ‚ö° Electric Car Seeker
User selects: Fuel=Electric, Transmission=Manual (should be overridden)

üîç Searching for electric car recommendations...
üí° Smart Note: Electric cars automatically use automatic transmission
üéâ Found 5 recommendations with Fuel: Electric, Transmission: Automatic:


Unnamed: 0,Make,Model,Variant,Price_Cleaned,Mileage_Cleaned,Fuel_Type_Cleaned,Smart_Transmission,Body_Type_Cleaned,Similarity_Score
1006,Tata,Nexon Ev,Xm,1399000.0,,Electric,Automatic,SUV,0.949596
1007,Tata,Nexon Ev,Xz Plus,1499000.0,,Electric,Automatic,SUV,0.949595
1008,Tata,Nexon Ev,Xz Plus Lux,1599000.0,,Electric,Automatic,SUV,0.949594
618,Tata,Tigor Ev,Xt+,975868.0,,Electric,Automatic,Sedan,0.94598
617,Tata,Tigor Ev,Xm+,960868.0,,Electric,Automatic,Sedan,0.94598




TEST 2: üèéÔ∏è Manual Petrol Enthusiast
User selects: Fuel=Petrol, Transmission=Manual (should be respected)

üîç Searching for manual petrol cars...
üéâ Found 5 recommendations with Fuel: Petrol, Transmission: Manual:


Unnamed: 0,Make,Model,Variant,Price_Cleaned,Mileage_Cleaned,Fuel_Type_Cleaned,Smart_Transmission,Body_Type_Cleaned,Similarity_Score
491,Hyundai,Venue,1.0 Turbo Gdi Mt S,826000.0,18.27,Petrol,Manual,SUV,0.694654
493,Hyundai,Venue,1.0 Turbo Gdi Mt Sx,959000.0,18.27,Petrol,Manual,SUV,0.694617
494,Hyundai,Venue,1.0 Turbo Gdi Mt Sx Dual Tone,974000.0,18.27,Petrol,Manual,SUV,0.694612
495,Hyundai,Venue,1.0 Turbo Gdi Mt Sx(O),1065000.0,18.27,Petrol,Manual,SUV,0.69458
586,Mahindra,Xuv300,1.2 W6,915128.0,17.0,Petrol,Manual,SUV,0.667761




TEST 3: üë®‚Äçüë©‚Äçüëß‚Äçüë¶ Diesel Automatic Family Car
User selects: Fuel=Diesel, Transmission=Automatic, Body=SUV

üîç Searching for diesel automatic SUV recommendations...
üéâ Found 5 recommendations with Body: SUV, Fuel: Diesel, Transmission: Automatic:


Unnamed: 0,Make,Model,Variant,Price_Cleaned,Mileage_Cleaned,Fuel_Type_Cleaned,Smart_Transmission,Body_Type_Cleaned,Similarity_Score
593,Mahindra,Xuv300,1.5 W8 Amt,1149800.0,20.0,Diesel,Automatic,SUV,0.999866
594,Mahindra,Xuv300,1.5 W8 (O) Amt,1269131.0,20.0,Diesel,Automatic,SUV,0.999865
986,Nissan,Terrano,Xv D Premium 110 Ps Amt,1464900.0,19.01,Diesel,Automatic,SUV,0.999865
977,Kia,Seltos,Htk Plus At 1.5 Diesel,1354000.0,20.0,Diesel,Automatic,SUV,0.999856
1129,Hyundai,Creta,1.6 Crdi S At,1336033.0,17.01,Diesel,Automatic,SUV,0.999631


In [39]:
# Cell 27: Clean Seating Capacity Data
print("üë®‚Äçüë©‚Äçüëß‚Äçüë¶ Cleaning Seating Capacity Data...")

# First, let's see what seating capacity data we have
if 'Seating_Capacity' in df.columns:
    print("Current Seating Capacity Values:")
    print(df['Seating_Capacity'].value_counts().head(10))
    
    # Clean seating capacity - extract numbers
    def clean_seating_capacity(seating_str):
        if pd.isna(seating_str):
            return np.nan
        # Extract numbers from strings like "5", "7 Seater", "5 seats"
        if isinstance(seating_str, str):
            import re
            numbers = re.findall(r'\d+', str(seating_str))
            if numbers:
                return int(numbers[0])
        elif isinstance(seating_str, (int, float)):
            return int(seating_str)
        return np.nan
    
    df['Seating_Capacity_Cleaned'] = df['Seating_Capacity'].apply(clean_seating_capacity)
    
    print("\n‚úÖ Cleaned Seating Capacity:")
    print(df['Seating_Capacity_Cleaned'].value_counts().sort_index())
    
    # Create binary features for common seating capacities
    df['Seats_5'] = df['Seating_Capacity_Cleaned'].apply(lambda x: 1 if x == 5 else 0)
    df['Seats_7'] = df['Seating_Capacity_Cleaned'].apply(lambda x: 1 if x == 7 else 0)
    df['Seats_8'] = df['Seating_Capacity_Cleaned'].apply(lambda x: 1 if x == 8 else 0)
    
    print("\nüéØ Seating Capacity Binary Features Created:")
    print(f"  5-seaters: {df['Seats_5'].sum()} cars")
    print(f"  7-seaters: {df['Seats_7'].sum()} cars") 
    print(f"  8-seaters: {df['Seats_8'].sum()} cars")
    
else:
    print("‚ùå 'Seating_Capacity' column not found")
    # Check for alternative seating columns
    seating_columns = [col for col in df.columns if 'seat' in col.lower() or 'capacity' in col.lower()]
    print("Alternative seating columns:", seating_columns)

üë®‚Äçüë©‚Äçüëß‚Äçüë¶ Cleaning Seating Capacity Data...
Current Seating Capacity Values:
Seating_Capacity
5.0     915
7.0     183
4.0      70
2.0      39
6.0      26
9.0      19
8.0      17
16.0      1
Name: count, dtype: int64

‚úÖ Cleaned Seating Capacity:
Seating_Capacity_Cleaned
2.0      39
4.0      70
5.0     915
6.0      26
7.0     183
8.0      17
9.0      19
16.0      1
Name: count, dtype: int64

üéØ Seating Capacity Binary Features Created:
  5-seaters: 915 cars
  7-seaters: 183 cars
  8-seaters: 17 cars


In [40]:
# Cell 28: Rebuild Model with Seating Capacity
print("üîÑ Rebuilding Model with Seating Capacity...")

# Only use the binary seating columns (not Seats_Material which has string values)
seating_columns = ['Seats_5', 'Seats_7', 'Seats_8']  # Only use binary numeric columns
print(f"‚úÖ Adding {len(seating_columns)} seating capacity features: {seating_columns}")

# Update final feature set
final_feature_columns_v3 = final_feature_columns + seating_columns

print(f"üéØ Total features now: {len(final_feature_columns_v3)}")

# Prepare features with seating capacity
final_feature_df_v3 = df[final_feature_columns_v3].copy()

# Fill missing values
for col in ['Price_Cleaned', 'Mileage_Cleaned', 'Displacement_Cleaned']:
    if col in final_feature_df_v3.columns:
        final_feature_df_v3[col].fillna(final_feature_df_v3[col].median(), inplace=True)

# Fill any other missing with 0
final_feature_df_v3.fillna(0, inplace=True)

# Ensure all data types are numeric
for col in final_feature_df_v3.columns:
    if final_feature_df_v3[col].dtype == 'object':
        # Convert object columns to numeric, coercing errors to NaN then fill with 0
        final_feature_df_v3[col] = pd.to_numeric(final_feature_df_v3[col], errors='coerce').fillna(0)
    elif final_feature_df_v3[col].dtype == 'bool':
        # Convert boolean to int
        final_feature_df_v3[col] = final_feature_df_v3[col].astype(int)

# Scale features
final_scaler_v3 = StandardScaler()
final_scaled_features_v3 = final_scaler_v3.fit_transform(final_feature_df_v3)

# Build new KNN model
final_knn_model_v3 = NearestNeighbors(n_neighbors=5, metric='cosine')
final_knn_model_v3.fit(final_scaled_features_v3)

print("‚úÖ Model rebuilt with seating capacity!")
print(f"Feature breakdown:")
print(f"  - Basic: 8")
print(f"  - Body: {len(clean_body_type_columns)}")
print(f"  - Transmission: {len(transmission_columns)}")
print(f"  - Fuel: {len(fuel_columns)}")
print(f"  - Seating: {len(seating_columns)}")
print(f"  - Total: {len(final_feature_columns_v3)}")

üîÑ Rebuilding Model with Seating Capacity...
‚úÖ Adding 3 seating capacity features: ['Seats_5', 'Seats_7', 'Seats_8']
üéØ Total features now: 30
‚úÖ Model rebuilt with seating capacity!
Feature breakdown:
  - Basic: 8
  - Body: 10
  - Transmission: 3
  - Fuel: 6
  - Seating: 3
  - Total: 30


In [41]:
# Add this cell BEFORE Cell 29 to define missing variables
print("üîß Defining Required Variables...")

# Define the missing variables that the function needs
clean_body_type_columns = [col for col in df.columns if col.startswith('Body_') and col not in ['Body_Type', 'Body_Type_Cleaned']]
transmission_columns = [col for col in df.columns if col.startswith('Transmission_') and col != 'Transmission_Cleaned']
fuel_columns = [col for col in df.columns if col.startswith('Fuel_') and col not in ['Fuel_Type_Cleaned', 'Fuel_System', 'Fuel_Tank_Capacity', 'Fuel_Gauge']]
seating_columns = ['Seats_5', 'Seats_7', 'Seats_8']

print(f"‚úÖ Defined {len(clean_body_type_columns)} body type columns")
print(f"‚úÖ Defined {len(transmission_columns)} transmission columns") 
print(f"‚úÖ Defined {len(fuel_columns)} fuel columns")
print(f"‚úÖ Defined {len(seating_columns)} seating columns")

üîß Defining Required Variables...
‚úÖ Defined 10 body type columns
‚úÖ Defined 3 transmission columns
‚úÖ Defined 6 fuel columns
‚úÖ Defined 3 seating columns


In [53]:
# Cell 29: Update Recommendation Function with Seating Capacity and Brand Selection
print("üéØ Updating Recommendation Function with Brand Selection and Smart Efficiency...")

def recommend_cars_complete(user_preferences, brand_preference=None, 
                          body_type_preference=None, 
                          fuel_type_preference=None, transmission_preference=None,
                          seating_preference=None, n_recommendations=5, max_price=None):
    """
    COMPLETE recommendation function with ALL features including BRAND selection and Smart Efficiency
    
    Parameters:
    - user_preferences: list of 8 basic preferences
    - brand_preference: 'Tata', 'Hyundai', 'Toyota', etc.
    - body_type_preference: 'SUV', 'Sedan', etc.
    - fuel_type_preference: 'Petrol', 'Diesel', 'Electric', etc.
    - transmission_preference: 'Manual', 'Automatic'
    - seating_preference: 5, 7, 8 (number of seats)
    """
    
    # Start with basic preferences
    full_preferences = user_preferences.copy()
    
    # SMART LOGIC: Electric cars = Automatic transmission
    if fuel_type_preference == 'Electric':
        transmission_preference = 'Automatic'
        print("üí° Smart Note: Electric cars automatically use automatic transmission")
    
    # Add body type preferences
    if body_type_preference:
        body_pref = [0] * len(clean_body_type_columns)
        target_col = f'Body_{body_type_preference}'
        if target_col in clean_body_type_columns:
            idx = clean_body_type_columns.index(target_col)
            body_pref[idx] = 1
        full_preferences.extend(body_pref)
    else:
        full_preferences.extend([0] * len(clean_body_type_columns))
    
    # Add transmission preferences
    if transmission_preference:
        trans_pref = [0] * len(transmission_columns)
        target_col = f'Transmission_{transmission_preference}'
        if target_col in transmission_columns:
            idx = transmission_columns.index(target_col)
            trans_pref[idx] = 1
        full_preferences.extend(trans_pref)
    else:
        full_preferences.extend([0] * len(transmission_columns))
    
    # Add fuel type preferences
    if fuel_type_preference:
        fuel_pref = [0] * len(fuel_columns)
        target_col = f'Fuel_{fuel_type_preference}'
        if target_col in fuel_columns:
            idx = fuel_columns.index(target_col)
            fuel_pref[idx] = 1
        full_preferences.extend(fuel_pref)
    else:
        full_preferences.extend([0] * len(fuel_columns))
    
    # Add seating capacity preferences
    if seating_preference:
        seating_pref = [0] * len(seating_columns)
        target_col = f'Seats_{seating_preference}'
        if target_col in seating_columns:
            idx = seating_columns.index(target_col)
            seating_pref[idx] = 1
        full_preferences.extend(seating_pref)
    else:
        full_preferences.extend([0] * len(seating_columns))
    
    # Apply BRAND filter first (if specified)
    filtered_df = df.copy()
    
    if brand_preference:
        # Case-insensitive brand filtering
        filtered_df = filtered_df[filtered_df['Make'].str.lower() == brand_preference.lower()]
        if len(filtered_df) == 0:
            print(f"‚ùå No cars found from brand '{brand_preference}'. Try a different brand.")
            return None
        print(f"üîç Filtered by brand: {brand_preference}")
    
    # Apply price filter if specified
    if max_price:
        filtered_df = filtered_df[filtered_df['Price_Cleaned'] <= max_price]
        
        if len(filtered_df) == 0:
            brand_msg = f" from brand '{brand_preference}'" if brand_preference else ""
            print(f"‚ùå No cars found within your budget{brand_msg}. Try increasing your budget.")
            return None
    
    # Prepare filtered features
    filtered_features = filtered_df[final_feature_columns_v3].copy()
    
    # Fill any remaining NaN values
    for col in ['Mileage_Cleaned', 'Displacement_Cleaned']:
        filtered_features[col].fillna(filtered_features[col].median(), inplace=True)
    
    # Fill any other missing values with 0
    filtered_features.fillna(0, inplace=True)
    
    # Scale filtered features
    filtered_features_scaled = final_scaler_v3.transform(filtered_features)
        
    # Build model on filtered data
    knn_filtered = NearestNeighbors(n_neighbors=min(n_recommendations, len(filtered_df)), metric='cosine')
    knn_filtered.fit(filtered_features_scaled)
    
    user_vector = final_scaler_v3.transform([full_preferences])
    distances, indices = knn_filtered.kneighbors(user_vector)
    
    recommendations = filtered_df.iloc[indices[0]].copy()
    
    # Add similarity score
    recommendations['Similarity_Score'] = 1 - distances[0]
    
    # Display results with all preferences (INCLUDING BRAND)
    pref_msg = []
    if brand_preference:
        pref_msg.append(f"Brand: {brand_preference}")
    if body_type_preference:
        pref_msg.append(f"Body: {body_type_preference}")
    if fuel_type_preference:
        pref_msg.append(f"Fuel: {fuel_type_preference}")
    if transmission_preference:
        pref_msg.append(f"Transmission: {transmission_preference}")
    if seating_preference:
        pref_msg.append(f"Seats: {seating_preference}")
    
    pref_str = " with " + ", ".join(pref_msg) if pref_msg else ""
    print(f"üéâ Found {len(recommendations)} recommendations{pref_str}:")
    
    # Show relevant columns including seating capacity and Smart_Efficiency
    display_columns = ['Make', 'Model', 'Variant', 'Price_Cleaned', 'Mileage_Cleaned', 
                      'Fuel_Type_Cleaned', 'Smart_Transmission', 'Body_Type_Cleaned', 
                      'Seating_Capacity_Cleaned', 'Similarity_Score']
    
    display(recommendations[display_columns].sort_values('Similarity_Score', ascending=False))
    
    return recommendations

print("‚úÖ Complete recommendation function with BRAND selection and Smart Efficiency created!")
print("Ready to test the full system with brand selection! üöó")

üéØ Updating Recommendation Function with Brand Selection and Smart Efficiency...
‚úÖ Complete recommendation function with BRAND selection and Smart Efficiency created!
Ready to test the full system with brand selection! üöó


In [54]:
# Cell 30: Test Complete System with Brand Selection
print("üß™ Testing Complete System with Brand Selection...")
print("=" * 50)

# Test 1: Family Car with Brand Preference
print("TEST 1: üë®‚Äçüë©‚Äçüëß‚Äçüë¶ Family Car with Brand & 7 Seats")
print("Preferences: Brand=Tata, SUV, Diesel, Automatic, 7 Seats")

user_pref_family = [
    1500000,   # Budget: ‚Çπ15 lakh
    18,        # Good mileage
    2000,      # Medium engine
    1, 1, 1, 1, 1  # All features
]

print(f"\nüîç Searching for Tata family SUV with 7 seats...")
recommendations_family = recommend_cars_complete(
    user_pref_family,
    brand_preference='Tata',           # BRAND SELECTION
    body_type_preference='SUV',
    fuel_type_preference='Diesel', 
    transmission_preference='Automatic',
    seating_preference=7,
    max_price=2000000
)

print("\n" + "="*50)

# Test 2: Compact City Car with Brand Preference
print("\nTEST 2: üèôÔ∏è Compact City Car with Brand")
print("Preferences: Brand=Hyundai, Hatchback, Petrol, Manual, 5 Seats")

user_pref_city = [
    800000,    # Budget: ‚Çπ8 lakh
    20,        # Good mileage
    1200,      # Small engine
    1, 1, 1, 1, 1  # All features
]

print(f"\nüîç Searching for Hyundai compact city car...")
recommendations_city = recommend_cars_complete(
    user_pref_city,
    brand_preference='Hyundai',        # BRAND SELECTION
    body_type_preference='Hatchback',
    fuel_type_preference='Petrol',
    transmission_preference='Manual',
    seating_preference=5,
    max_price=1000000
)

print("\n" + "="*50)

# Test 3: Electric Car with Brand Preference (Test Smart Logic)
print("\nTEST 3: ‚ö° Electric Car with Brand Preference")
print("Preferences: Brand=Tata, Electric, SUV (should auto-set to Automatic)")

user_pref_electric = [
    1500000,   # Budget: ‚Çπ15 lakh
    15,        # Range consideration
    0,         # Electric - no displacement
    1, 1, 1, 1, 1  # All features
]

print(f"\nüîç Searching for Tata electric SUV...")
recommendations_electric = recommend_cars_complete(
    user_pref_electric,
    brand_preference='Tata',           # BRAND SELECTION
    body_type_preference='SUV',
    fuel_type_preference='Electric',
    transmission_preference='Manual',  # Should be overridden to Automatic!
    max_price=2000000
)

üß™ Testing Complete System with Brand Selection...
TEST 1: üë®‚Äçüë©‚Äçüëß‚Äçüë¶ Family Car with Brand & 7 Seats
Preferences: Brand=Tata, SUV, Diesel, Automatic, 7 Seats

üîç Searching for Tata family SUV with 7 seats...
üîç Filtered by brand: Tata
üéâ Found 5 recommendations with Brand: Tata, Body: SUV, Fuel: Diesel, Transmission: Automatic, Seats: 7:


Unnamed: 0,Make,Model,Variant,Price_Cleaned,Mileage_Cleaned,Fuel_Type_Cleaned,Smart_Transmission,Body_Type_Cleaned,Seating_Capacity_Cleaned,Similarity_Score
696,Tata,Hexa,Xma 4X2,1655587.0,17.6,Diesel,Automatic,SUV,7.0,0.999176
694,Tata,Hexa,Xe 4X2,1371864.0,17.6,Diesel,Manual,SUV,7.0,0.757908
695,Tata,Hexa,Xm 4X2,1531377.0,17.6,Diesel,Manual,SUV,7.0,0.757867
690,Tata,Hexa,Xm Plus 4X2,1639374.0,17.6,Diesel,Manual,SUV,7.0,0.757835
993,Tata,Safari Storme,2.2 Vx 4X2 Varicor 400,1479574.0,14.1,Diesel,Manual,SUV,7.0,0.75772




TEST 2: üèôÔ∏è Compact City Car with Brand
Preferences: Brand=Hyundai, Hatchback, Petrol, Manual, 5 Seats

üîç Searching for Hyundai compact city car...
üîç Filtered by brand: Hyundai
üéâ Found 5 recommendations with Brand: Hyundai, Body: Hatchback, Fuel: Petrol, Transmission: Manual, Seats: 5:


Unnamed: 0,Make,Model,Variant,Price_Cleaned,Mileage_Cleaned,Fuel_Type_Cleaned,Smart_Transmission,Body_Type_Cleaned,Seating_Capacity_Cleaned,Similarity_Score
917,Hyundai,Grand I10,1.2 Kappa Vtvt Sportz Dual Tone,640537.0,19.77,Petrol,Manual,Hatchback,5.0,0.999989
914,Hyundai,Grand I10,1.2 Kappa Vtvt Sportz,620637.0,19.77,Petrol,Manual,Hatchback,5.0,0.999987
1068,Hyundai,Grand I10 Nios,Asta 1.2 Vtvt,718950.0,20.7,Petrol,Manual,Hatchback,5.0,0.999982
1067,Hyundai,Grand I10 Nios,Sportz Dual Tone 1.2 Vtvt,673350.0,20.7,Petrol,Manual,Hatchback,5.0,0.999979
1065,Hyundai,Grand I10 Nios,Sportz 1.2 Vtvt,643350.0,20.7,Petrol,Manual,Hatchback,5.0,0.999976




TEST 3: ‚ö° Electric Car with Brand Preference
Preferences: Brand=Tata, Electric, SUV (should auto-set to Automatic)

üîç Searching for Tata electric SUV...
üí° Smart Note: Electric cars automatically use automatic transmission
üîç Filtered by brand: Tata
üéâ Found 5 recommendations with Brand: Tata, Body: SUV, Fuel: Electric, Transmission: Automatic:


Unnamed: 0,Make,Model,Variant,Price_Cleaned,Mileage_Cleaned,Fuel_Type_Cleaned,Smart_Transmission,Body_Type_Cleaned,Seating_Capacity_Cleaned,Similarity_Score
1006,Tata,Nexon Ev,Xm,1399000.0,,Electric,Automatic,SUV,5.0,0.950041
1007,Tata,Nexon Ev,Xz Plus,1499000.0,,Electric,Automatic,SUV,5.0,0.950041
1008,Tata,Nexon Ev,Xz Plus Lux,1599000.0,,Electric,Automatic,SUV,5.0,0.95004
618,Tata,Tigor Ev,Xt+,975868.0,,Electric,Automatic,Sedan,5.0,0.903831
617,Tata,Tigor Ev,Xm+,960868.0,,Electric,Automatic,Sedan,5.0,0.903831


In [46]:
# Cell 33: Add Smart Mileage/Range Logic (FIXED for string values)
print("‚ö° Adding Smart Mileage/Range Logic...")

def extract_number_from_string(value):
    """Extract numeric value from strings like '110 km/full charge'"""
    if pd.isna(value) or value is None:
        return np.nan
    if isinstance(value, (int, float)):
        return float(value)
    if isinstance(value, str):
        import re
        # Find numbers in the string (including decimals)
        numbers = re.findall(r'\d+\.?\d*', str(value))
        if numbers:
            return float(numbers[0])
    return np.nan

def get_smart_efficiency(fuel_type, mileage_value, electric_range_value=None):
    """
    Smart logic: Return appropriate efficiency metric based on fuel type
    - Electric: Range in km
    - Others: Mileage in km/l
    """
    if fuel_type == 'Electric':
        # For electric cars, use electric range if available
        range_val = extract_number_from_string(electric_range_value)
        if not pd.isna(range_val):
            return range_val  # Range in km
        else:
            # Fallback to mileage column if range not available
            mileage_val = extract_number_from_string(mileage_value)
            return mileage_val if not pd.isna(mileage_val) else np.nan
    else:
        # For fuel cars, use mileage in km/l
        mileage_val = extract_number_from_string(mileage_value)
        return mileage_val if not pd.isna(mileage_val) else np.nan

# Apply smart efficiency logic
df['Smart_Efficiency'] = df.apply(
    lambda x: get_smart_efficiency(
        x['Fuel_Type_Cleaned'], 
        x['Mileage_Cleaned'],
        x.get('Electric_Range')  # Use electric range column if it exists
    ), 
    axis=1
)

print("‚úÖ Smart efficiency logic applied!")

# Safe printing with error handling
try:
    electric_eff = df[df['Fuel_Type_Cleaned'] == 'Electric']['Smart_Efficiency']
    if not electric_eff.empty:
        electric_min = electric_eff.min()
        electric_max = electric_eff.max()
        print(f"Electric cars efficiency range: {electric_min:.0f} - {electric_max:.0f} km")
    else:
        print("No electric cars found in dataset")
except Exception as e:
    print(f"‚ö†Ô∏è Could not calculate electric car efficiency range: {e}")

try:
    petrol_eff = df[df['Fuel_Type_Cleaned'] == 'Petrol']['Smart_Efficiency']
    if not petrol_eff.empty:
        petrol_min = petrol_eff.min()
        petrol_max = petrol_eff.max()
        print(f"Petrol cars efficiency range: {petrol_min:.1f} - {petrol_max:.1f} km/l")
    else:
        print("No petrol cars found in dataset")
except Exception as e:
    print(f"‚ö†Ô∏è Could not calculate petrol car efficiency range: {e}")

# Check data types and sample values
print(f"\nüîç Smart_Efficiency data type: {df['Smart_Efficiency'].dtype}")
print(f"Sample values:")
print(df[df['Fuel_Type_Cleaned'] == 'Electric'][['Make', 'Model', 'Smart_Efficiency']].head(3))

‚ö° Adding Smart Mileage/Range Logic...
‚úÖ Smart efficiency logic applied!
Electric cars efficiency range: 110 - 462 km
Petrol cars efficiency range: 4.0 - 1449.0 km/l

üîç Smart_Efficiency data type: float64
Sample values:
         Make     Model  Smart_Efficiency
615  Mahindra  E2O Plus             110.0
616  Mahindra  E2O Plus             110.0
617      Tata  Tigor Ev             213.0


In [57]:
# Cell 34: Update Recommendation with Brand Selection & Smart Efficiency
print("üîÑ Updating Recommendation with Brand Selection & Smart Efficiency...")

def recommend_cars_complete_v2(user_preferences, brand_preference=None,  
                              body_type_preference=None, 
                              fuel_type_preference=None, transmission_preference=None,
                              seating_preference=None, n_recommendations=5, max_price=None):
    """
    UPDATED VERSION: Includes BRAND SELECTION + Smart_Efficiency display
    """
    
    # SMART LOGIC: Electric cars = Automatic transmission
    if fuel_type_preference == 'Electric':
        transmission_preference = 'Automatic'
        print("üí° Smart Note: Electric cars automatically use automatic transmission")
    
    # Start with basic preferences
    full_preferences = user_preferences.copy()
    
    # Add body type preferences
    if body_type_preference:
        body_pref = [0] * len(clean_body_type_columns)
        target_col = f'Body_{body_type_preference}'
        if target_col in clean_body_type_columns:
            idx = clean_body_type_columns.index(target_col)
            body_pref[idx] = 1
        full_preferences.extend(body_pref)
    else:
        full_preferences.extend([0] * len(clean_body_type_columns))
    
    # Add transmission preferences
    if transmission_preference:
        trans_pref = [0] * len(transmission_columns)
        target_col = f'Transmission_{transmission_preference}'
        if target_col in transmission_columns:
            idx = transmission_columns.index(target_col)
            trans_pref[idx] = 1
        full_preferences.extend(trans_pref)
    else:
        full_preferences.extend([0] * len(transmission_columns))
    
    # Add fuel type preferences
    if fuel_type_preference:
        fuel_pref = [0] * len(fuel_columns)
        target_col = f'Fuel_{fuel_type_preference}'
        if target_col in fuel_columns:
            idx = fuel_columns.index(target_col)
            fuel_pref[idx] = 1
        full_preferences.extend(fuel_pref)
    else:
        full_preferences.extend([0] * len(fuel_columns))
    
    # Add seating capacity preferences
    if seating_preference:
        seating_pref = [0] * len(seating_columns)
        target_col = f'Seats_{seating_preference}'
        if target_col in seating_columns:
            idx = seating_columns.index(target_col)
            seating_pref[idx] = 1
        full_preferences.extend(seating_pref)
    else:
        full_preferences.extend([0] * len(seating_columns))
    
    # Apply BRAND filter first (if specified)
    filtered_df = df.copy()
    
    if brand_preference:
        # Case-insensitive brand filtering
        filtered_df = filtered_df[filtered_df['Make'].str.lower() == brand_preference.lower()]
        if len(filtered_df) == 0:
            print(f"‚ùå No cars found from brand '{brand_preference}'. Try a different brand.")
            return None
        print(f"üîç Filtered by brand: {brand_preference}")
    
    # Apply price filter if specified
    if max_price:
        filtered_df = filtered_df[filtered_df['Price_Cleaned'] <= max_price]
        
        if len(filtered_df) == 0:
            brand_msg = f" from brand '{brand_preference}'" if brand_preference else ""
            print(f"‚ùå No cars found within your budget{brand_msg}. Try increasing your budget.")
            return None
    
    # Prepare filtered features
    filtered_features = filtered_df[final_feature_columns_v3].copy()
    
    # Fill any remaining NaN values
    for col in ['Mileage_Cleaned', 'Displacement_Cleaned']:
        filtered_features[col].fillna(filtered_features[col].median(), inplace=True)
    
    # Fill any other missing values with 0
    filtered_features.fillna(0, inplace=True)
    
    # Scale filtered features
    filtered_features_scaled = final_scaler_v3.transform(filtered_features)
        
    # Build model on filtered data
    knn_filtered = NearestNeighbors(n_neighbors=min(n_recommendations, len(filtered_df)), metric='cosine')
    knn_filtered.fit(filtered_features_scaled)
    
    user_vector = final_scaler_v3.transform([full_preferences])
    distances, indices = knn_filtered.kneighbors(user_vector)
    
    recommendations = filtered_df.iloc[indices[0]].copy()
    
    # Add similarity score
    recommendations['Similarity_Score'] = 1 - distances[0]
    
    # Display results with all preferences (INCLUDING BRAND)
    pref_msg = []
    if brand_preference:
        pref_msg.append(f"Brand: {brand_preference}")
    if body_type_preference:
        pref_msg.append(f"Body: {body_type_preference}")
    if fuel_type_preference:
        pref_msg.append(f"Fuel: {fuel_type_preference}")
    if transmission_preference:
        pref_msg.append(f"Transmission: {transmission_preference}")
    if seating_preference:
        pref_msg.append(f"Seats: {seating_preference}")
    
    pref_str = " with " + ", ".join(pref_msg) if pref_msg else ""
    print(f"üéâ Found {len(recommendations)} recommendations{pref_str}:")

    # SAFE DISPLAY: Check which columns actually exist
    base_columns = ['Make', 'Model', 'Variant', 'Price_Cleaned', 'Similarity_Score']
    optional_columns = [
        'Smart_Efficiency', 'Mileage_Cleaned', 'Fuel_Type_Cleaned', 
        'Smart_Transmission', 'Body_Type_Cleaned', 'Seating_Capacity_Cleaned'
    ]
    
    # Only include columns that exist in the dataframe
    available_columns = base_columns + [col for col in optional_columns if col in recommendations.columns]
    
    # Show which columns we're displaying
    print(f"üìä Displaying columns: {', '.join(available_columns)}")
    
    display(recommendations[available_columns].sort_values('Similarity_Score', ascending=False))
    
    return recommendations

print("‚úÖ Updated recommendation function with BRAND SELECTION & smart efficiency display!")

üîÑ Updating Recommendation with Brand Selection & Smart Efficiency...
‚úÖ Updated recommendation function with BRAND SELECTION & smart efficiency display!


In [58]:
# Cell 35: Test Brand Selection with Smart Efficiency
print("üß™ Testing Brand Selection with Smart Efficiency...")
print("=" * 50)

# Test: Electric Car with Brand + Smart Efficiency
print("TEST: ‚ö° Electric Car with Brand Selection & Smart Efficiency")

user_pref_electric = [
    1500000,   # Budget: ‚Çπ15 lakh
    200,       # Range preference
    0,         # Electric - no displacement
    1, 1, 1, 1, 1  # All features
]

print(f"\nüîç Searching for Tata electric cars...")
recommendations_electric_smart = recommend_cars_complete_v2(
    user_pref_electric,
    brand_preference='Tata',  # ‚Üê BRAND SELECTION
    fuel_type_preference='Electric',
    max_price=2000000
)

print("\n" + "=" * 50)

# Test: Regular Car with Brand + Smart Efficiency
print("\nTEST: ‚õΩ Regular Car with Brand Selection & Smart Efficiency")

user_pref_regular = [
    1000000,   # Budget: ‚Çπ10 lakh
    20,        # Mileage preference
    1500,      # Engine size
    1, 1, 1, 1, 1  # All features
]

print(f"\nüîç Searching for Hyundai regular cars...")
recommendations_regular_smart = recommend_cars_complete_v2(
    user_pref_regular,
    brand_preference='Hyundai',  # ‚Üê BRAND SELECTION
    body_type_preference='Sedan',
    fuel_type_preference='Petrol',
    transmission_preference='Manual',
    seating_preference=5,
    max_price=1200000
)

üß™ Testing Brand Selection with Smart Efficiency...
TEST: ‚ö° Electric Car with Brand Selection & Smart Efficiency

üîç Searching for Tata electric cars...
üí° Smart Note: Electric cars automatically use automatic transmission
üîç Filtered by brand: Tata
üéâ Found 5 recommendations with Brand: Tata, Fuel: Electric, Transmission: Automatic:
üìä Displaying columns: Make, Model, Variant, Price_Cleaned, Similarity_Score, Mileage_Cleaned, Fuel_Type_Cleaned, Smart_Transmission, Body_Type_Cleaned, Seating_Capacity_Cleaned


Unnamed: 0,Make,Model,Variant,Price_Cleaned,Similarity_Score,Mileage_Cleaned,Fuel_Type_Cleaned,Smart_Transmission,Body_Type_Cleaned,Seating_Capacity_Cleaned
1006,Tata,Nexon Ev,Xm,1399000.0,0.854004,,Electric,Automatic,SUV,5.0
1007,Tata,Nexon Ev,Xz Plus,1499000.0,0.854003,,Electric,Automatic,SUV,5.0
1008,Tata,Nexon Ev,Xz Plus Lux,1599000.0,0.854002,,Electric,Automatic,SUV,5.0
618,Tata,Tigor Ev,Xt+,975868.0,0.850795,,Electric,Automatic,Sedan,5.0
617,Tata,Tigor Ev,Xm+,960868.0,0.850795,,Electric,Automatic,Sedan,5.0




TEST: ‚õΩ Regular Car with Brand Selection & Smart Efficiency

üîç Searching for Hyundai regular cars...
üîç Filtered by brand: Hyundai
üéâ Found 5 recommendations with Brand: Hyundai, Body: Sedan, Fuel: Petrol, Transmission: Manual, Seats: 5:
üìä Displaying columns: Make, Model, Variant, Price_Cleaned, Similarity_Score, Mileage_Cleaned, Fuel_Type_Cleaned, Smart_Transmission, Body_Type_Cleaned, Seating_Capacity_Cleaned


Unnamed: 0,Make,Model,Variant,Price_Cleaned,Similarity_Score,Mileage_Cleaned,Fuel_Type_Cleaned,Smart_Transmission,Body_Type_Cleaned,Seating_Capacity_Cleaned
578,Hyundai,Verna,1.4 Vtvt Ex,933182.0,0.99946,19.1,Petrol,Manual,Sedan,5.0
572,Hyundai,Verna,1.6 Vtvt Sx,999900.0,0.999429,17.7,Petrol,Manual,Sedan,5.0
575,Hyundai,Verna,1.6 Vtvt Sx (O),1172999.0,0.999419,17.7,Petrol,Manual,Sedan,5.0
580,Hyundai,Verna,1.6 Vtvt Sx (O) Anniversary Edition,1178894.0,0.999418,17.7,Petrol,Manual,Sedan,5.0
1074,Hyundai,Xcent,Sx 1.2 (O),782346.0,0.995717,,Petrol,Manual,Sedan,5.0


In [49]:
# Cell 36: Final Enhancements and Summary
print("üöÄ Adding Final Enhancements...")

def get_car_recommendation_summary():
    """Display summary of available options in the dataset"""
    print("üìä CAR DATABASE SUMMARY")
    print("=" * 50)
    print(f"Total Cars: {len(df)}")
    print(f"Makes: {df['Make'].nunique()}")
    print(f"Models: {df['Model'].nunique()}")
    print(f"Price Range: ‚Çπ{df['Price_Cleaned'].min():,.0f} - ‚Çπ{df['Price_Cleaned'].max():,.0f}")
    print(f"Body Types: {df['Body_Type_Cleaned'].value_counts().to_dict()}")
    print(f"Fuel Types: {df['Fuel_Type_Cleaned'].value_counts().to_dict()}")
    print(f"Seating Options: {df['Seating_Capacity_Cleaned'].value_counts().sort_index().to_dict()}")

# Display summary
get_car_recommendation_summary()

print("\nüéØ QUICK RECOMMENDATION PROFILES:")
print("1. üèôÔ∏è City Commuter: Hatchback, Petrol, 5 seats, <‚Çπ8L")
print("2. üë®‚Äçüë©‚Äçüëß‚Äçüë¶ Family SUV: SUV, Diesel, 7 seats, <‚Çπ20L") 
print("3. ‚ö° Eco Warrior: Electric, Automatic, 5 seats, <‚Çπ25L")
print("4. üèéÔ∏è Performance: Sedan, Petrol, Manual, 5 seats, <‚Çπ15L")
print("5. üöê Large Family: MUV, Diesel, 8 seats, <‚Çπ25L")

print("\n‚úÖ CAR RECOMMENDATION SYSTEM - COMPLETE!")
print("=" * 50)
print("üéâ Your smart car recommendation system is fully operational!")
print("üìã Features included:")
print("   ‚úì Price & Budget filtering")
print("   ‚úì Body type preferences (SUV, Sedan, Hatchback, etc.)")
print("   ‚úì Fuel type selection (Petrol, Diesel, Electric, etc.)")
print("   ‚úì Transmission type (Manual/Automatic)")
print("   ‚úì Seating capacity (5, 7, 8 seats)")
print("   ‚úì Smart logic (Electric = Automatic)")
print("   ‚úì Smart efficiency (Range for electric, Mileage for fuel cars)")
print("   ‚úì 30 feature dimensions for accurate matching")
print("   ‚úì Similarity scoring for best matches")

üöÄ Adding Final Enhancements...
üìä CAR DATABASE SUMMARY
Total Cars: 1276
Makes: 39
Models: 263
Price Range: ‚Çπ236,447 - ‚Çπ212,155,397
Body Types: {'SUV': 451, 'Sedan': 336, 'Hatchback': 317, 'MUV': 78, 'Coupe': 42, 'Convertible': 22, 'Crossover': 18, 'Unknown': 6, 'Sports': 3, 'Pick-Up': 3}
Fuel Types: {'Petrol': 643, 'Diesel': 582, 'CNG': 16, 'Hybrid': 15, 'Electric': 14, 'CNG_Petrol': 6}
Seating Options: {2.0: 39, 4.0: 70, 5.0: 915, 6.0: 26, 7.0: 183, 8.0: 17, 9.0: 19, 16.0: 1}

üéØ QUICK RECOMMENDATION PROFILES:
1. üèôÔ∏è City Commuter: Hatchback, Petrol, 5 seats, <‚Çπ8L
2. üë®‚Äçüë©‚Äçüëß‚Äçüë¶ Family SUV: SUV, Diesel, 7 seats, <‚Çπ20L
3. ‚ö° Eco Warrior: Electric, Automatic, 5 seats, <‚Çπ25L
4. üèéÔ∏è Performance: Sedan, Petrol, Manual, 5 seats, <‚Çπ15L
5. üöê Large Family: MUV, Diesel, 8 seats, <‚Çπ25L

‚úÖ CAR RECOMMENDATION SYSTEM - COMPLETE!
üéâ Your smart car recommendation system is fully operational!
üìã Features included:
   ‚úì Price & Budget filtering
   

In [50]:
# Cell 37: Create Easy-to-Use Wrapper Functions (using the NEW smart features)
print("üéõÔ∏è Creating User-Friendly Interface...")

def recommend_family_car(budget, seating=7, fuel_type='Diesel'):
    """Easy function for family car seekers"""
    user_pref = [
        budget, 18, 2000, 1, 1, 1, 1, 1  # Standard family preferences
    ]
    return recommend_cars_complete_v2(  # Using the UPDATED function
        user_pref,
        body_type_preference='SUV',
        fuel_type_preference=fuel_type,
        transmission_preference='Automatic',
        seating_preference=seating,
        max_price=budget
    )

def recommend_city_car(budget, fuel_type='Petrol'):
    """Easy function for city car seekers"""
    user_pref = [
        budget, 20, 1200, 1, 1, 1, 1, 1  # Standard city preferences
    ]
    return recommend_cars_complete_v2(  # Using the UPDATED function
        user_pref,
        body_type_preference='Hatchback',
        fuel_type_preference=fuel_type,
        transmission_preference='Manual',
        seating_preference=5,
        max_price=budget
    )

def recommend_electric_car(budget):
    """Easy function for electric car seekers"""
    user_pref = [
        budget, 200, 0, 1, 1, 1, 1, 1  # Electric car preferences (200km range)
    ]
    return recommend_cars_complete_v2(  # Using the UPDATED function
        user_pref,
        fuel_type_preference='Electric',
        seating_preference=5,
        max_price=budget
    )

def recommend_performance_car(budget):
    """Easy function for performance car seekers"""
    user_pref = [
        budget, 15, 2000, 1, 1, 1, 1, 1  # Performance preferences
    ]
    return recommend_cars_complete_v2(
        user_pref,
        body_type_preference='Sedan',
        fuel_type_preference='Petrol', 
        transmission_preference='Manual',
        seating_preference=5,
        max_price=budget
    )

print("‚úÖ User-friendly functions created!")
print("Try: recommend_family_car(1500000)")
print("Try: recommend_city_car(800000)") 
print("Try: recommend_electric_car(2000000)")
print("Try: recommend_performance_car(1200000)")

üéõÔ∏è Creating User-Friendly Interface...
‚úÖ User-friendly functions created!
Try: recommend_family_car(1500000)
Try: recommend_city_car(800000)
Try: recommend_electric_car(2000000)
Try: recommend_performance_car(1200000)


In [51]:
# Cell 38: Test the Easy-to-Use Functions
print("üß™ Testing Easy-to-Use Functions...")
print("=" * 50)

# Test 1: Simple family car search
print("TEST 1: üë®‚Äçüë©‚Äçüëß‚Äçüë¶ Simple Family Car Search")
print("Using: recommend_family_car(1500000)")
family_cars = recommend_family_car(1500000)

print("\n" + "=" * 50)

# Test 2: Simple electric car search
print("\nTEST 2: ‚ö° Simple Electric Car Search")
print("Using: recommend_electric_car(2000000)")
electric_cars = recommend_electric_car(2000000)

print("\n" + "=" * 50)

# Test 3: Performance car search
print("\nTEST 3: üèéÔ∏è Performance Car Search")
print("Using: recommend_performance_car(1200000)")
performance_cars = recommend_performance_car(1200000)

üß™ Testing Easy-to-Use Functions...
TEST 1: üë®‚Äçüë©‚Äçüëß‚Äçüë¶ Simple Family Car Search
Using: recommend_family_car(1500000)
üéâ Found 5 recommendations with Body: SUV, Fuel: Diesel, Transmission: Automatic, Seats: 7:


Unnamed: 0,Make,Model,Variant,Price_Cleaned,Smart_Efficiency,Fuel_Type_Cleaned,Smart_Transmission,Body_Type_Cleaned,Seating_Capacity_Cleaned,Similarity_Score
1195,Mahindra,Nuvosport,N8 Amt,1048603.0,16.21,Diesel,Automatic,SUV,7.0,0.879541
1194,Mahindra,Nuvosport,N6 Amt,972359.0,16.21,Diesel,Automatic,SUV,7.0,0.87954
624,Honda,Brv,V Cvt Petrol,1285900.0,16.0,Petrol,Automatic,SUV,7.0,0.761353
694,Tata,Hexa,Xe 4X2,1371864.0,17.6,Diesel,Manual,SUV,7.0,0.757908
686,Mahindra,Xuv500,W3,1230924.0,16.0,Diesel,Manual,SUV,7.0,0.757905




TEST 2: ‚ö° Simple Electric Car Search
Using: recommend_electric_car(2000000)
üí° Smart Note: Electric cars automatically use automatic transmission
üéâ Found 5 recommendations with Fuel: Electric, Transmission: Automatic, Seats: 5:


Unnamed: 0,Make,Model,Variant,Price_Cleaned,Smart_Efficiency,Fuel_Type_Cleaned,Smart_Transmission,Body_Type_Cleaned,Seating_Capacity_Cleaned,Similarity_Score
1008,Tata,Nexon Ev,Xz Plus Lux,1599000.0,300.0,Electric,Automatic,SUV,5.0,0.870198
1007,Tata,Nexon Ev,Xz Plus,1499000.0,312.0,Electric,Automatic,SUV,5.0,0.870196
1006,Tata,Nexon Ev,Xm,1399000.0,312.0,Electric,Automatic,SUV,5.0,0.870194
618,Tata,Tigor Ev,Xt+,975868.0,213.0,Electric,Automatic,Sedan,5.0,0.866886
617,Tata,Tigor Ev,Xm+,960868.0,213.0,Electric,Automatic,Sedan,5.0,0.866885




TEST 3: üèéÔ∏è Performance Car Search
Using: recommend_performance_car(1200000)
üéâ Found 5 recommendations with Body: Sedan, Fuel: Petrol, Transmission: Manual, Seats: 5:


Unnamed: 0,Make,Model,Variant,Price_Cleaned,Smart_Efficiency,Fuel_Type_Cleaned,Smart_Transmission,Body_Type_Cleaned,Seating_Capacity_Cleaned,Similarity_Score
665,Skoda,Monte Carlo,Monte Carlo 1.6 Mpi Mt,1139599.0,15.41,Petrol,Manual,Sedan,5.0,0.992109
1111,Skoda,Rapid,1.6 Mpi Style,1115599.0,15.41,Petrol,Manual,Sedan,5.0,0.992109
1103,Skoda,Rapid,1.6 Mpi Ambition,998599.0,15.41,Petrol,Manual,Sedan,5.0,0.9921
1113,Skoda,Rapid,Onyx Mt Petrol,975599.0,15.41,Petrol,Manual,Sedan,5.0,0.992097
1105,Skoda,Rapid,1.6 Mpi Active,881916.0,15.41,Petrol,Manual,Sedan,5.0,0.99208


In [52]:
# Cell 39: Add Brand Preference Features
print("üè∑Ô∏è Adding Brand Preference Features...")

# Get top brands by count
top_brands = df['Make'].value_counts().head(15).index.tolist()
print(f"‚úÖ Top {len(top_brands)} brands identified: {top_brands}")

# Create binary features for top brands
for brand in top_brands:
    brand_col_name = f'Brand_{brand.replace(" ", "_")}'
    df[brand_col_name] = (df['Make'] == brand).astype(int)

# Get the new brand columns
brand_columns = [col for col in df.columns if col.startswith('Brand_')]
print(f"‚úÖ Created {len(brand_columns)} brand binary features")

# Update final feature set with brand preferences
final_feature_columns_v4 = final_feature_columns_v3 + brand_columns
print(f"üéØ Total features now: {len(final_feature_columns_v4)}")

üè∑Ô∏è Adding Brand Preference Features...
‚úÖ Top 15 brands identified: ['Maruti Suzuki', 'Hyundai', 'Mahindra', 'Tata', 'Toyota', 'Honda', 'Ford', 'Skoda', 'Bmw', 'Renault', 'Volkswagen', 'Audi', 'Nissan', 'Jeep', 'Land Rover Rover']
‚úÖ Created 15 brand binary features
üéØ Total features now: 45


In [53]:
# Cell 40: Rebuild Model with Brand Features
print("üîÑ Rebuilding Model with Brand Preferences...")

# Prepare features with brand preferences
final_feature_df_v4 = df[final_feature_columns_v4].copy()

# Fill missing values
for col in ['Price_Cleaned', 'Mileage_Cleaned', 'Displacement_Cleaned']:
    if col in final_feature_df_v4.columns:
        final_feature_df_v4[col].fillna(final_feature_df_v4[col].median(), inplace=True)

final_feature_df_v4.fillna(0, inplace=True)

# Scale features
final_scaler_v4 = StandardScaler()
final_scaled_features_v4 = final_scaler_v4.fit_transform(final_feature_df_v4)

# Build new KNN model
final_knn_model_v4 = NearestNeighbors(n_neighbors=5, metric='cosine')
final_knn_model_v4.fit(final_scaled_features_v4)

print("‚úÖ Model rebuilt with brand preferences!")
print(f"Feature breakdown:")
print(f"  - Basic: 8")
print(f"  - Body: {len(clean_body_type_columns)}")
print(f"  - Transmission: {len(transmission_columns)}")
print(f"  - Fuel: {len(fuel_columns)}")
print(f"  - Seating: {len(seating_columns)}")
print(f"  - Brands: {len(brand_columns)}")
print(f"  - Total: {len(final_feature_columns_v4)}")

üîÑ Rebuilding Model with Brand Preferences...
‚úÖ Model rebuilt with brand preferences!
Feature breakdown:
  - Basic: 8
  - Body: 10
  - Transmission: 3
  - Fuel: 6
  - Seating: 3
  - Brands: 15
  - Total: 45


In [63]:
# Cell 41 (Updated): Update Recommendation Function with Brand Preferences and Exclude Brands
print("üéØ Updating Recommendation Function with Brand Preferences and Exclude Brands...")

def recommend_cars_with_brands(user_preferences, body_type_preference=None, 
                              fuel_type_preference=None, transmission_preference=None,
                              seating_preference=None, brand_preferences=None,
                              exclude_brands=None, n_recommendations=5, max_price=None):
    """
    UPDATED VERSION: Includes brand preferences and exclude brands
    - brand_preferences: list of brand names e.g., ['Tata', 'Hyundai', 'Maruti Suzuki']
    - exclude_brands: list of brand names to exclude e.g., ['BMW', 'Audi']
    """
    
    # SMART LOGIC: Electric cars = Automatic transmission
    if fuel_type_preference == 'Electric':
        transmission_preference = 'Automatic'
        print("üí° Smart Note: Electric cars automatically use automatic transmission")
    
    # Start with the entire dataset
    base_df = df.copy()
    
    # If exclude_brands is provided, remove those brands
    if exclude_brands:
        if isinstance(exclude_brands, str):
            exclude_brands = [exclude_brands]
        base_df = base_df[~base_df['Make'].isin(exclude_brands)].copy()
        print(f"üö´ Excluded brands: {exclude_brands}")
    
    # If both brand_preferences and exclude_brands are provided, remove any excluded brands from brand_preferences
    if brand_preferences and exclude_brands:
        brand_preferences = [brand for brand in brand_preferences if brand not in exclude_brands]
    
    # Start with basic preferences
    full_preferences = user_preferences.copy()
    
    # Add body type preferences
    if body_type_preference:
        body_pref = [0] * len(clean_body_type_columns)
        target_col = f'Body_{body_type_preference}'
        if target_col in clean_body_type_columns:
            idx = clean_body_type_columns.index(target_col)
            body_pref[idx] = 1
        full_preferences.extend(body_pref)
    else:
        full_preferences.extend([0] * len(clean_body_type_columns))
    
    # Add transmission preferences
    if transmission_preference:
        trans_pref = [0] * len(transmission_columns)
        target_col = f'Transmission_{transmission_preference}'
        if target_col in transmission_columns:
            idx = transmission_columns.index(target_col)
            trans_pref[idx] = 1
        full_preferences.extend(trans_pref)
    else:
        full_preferences.extend([0] * len(transmission_columns))
    
    # Add fuel type preferences
    if fuel_type_preference:
        fuel_pref = [0] * len(fuel_columns)
        target_col = f'Fuel_{fuel_type_preference}'
        if target_col in fuel_columns:
            idx = fuel_columns.index(target_col)
            fuel_pref[idx] = 1
        full_preferences.extend(fuel_pref)
    else:
        full_preferences.extend([0] * len(fuel_columns))
    
    # Add seating capacity preferences
    if seating_preference:
        seating_pref = [0] * len(seating_columns)
        target_col = f'Seats_{seating_preference}'
        if target_col in seating_columns:
            idx = seating_columns.index(target_col)
            seating_pref[idx] = 1
        full_preferences.extend(seating_pref)
    else:
        full_preferences.extend([0] * len(seating_columns))
    
    # ADD BRAND PREFERENCES (NEW FEATURE)
    if brand_preferences:
        brand_pref = [0] * len(brand_columns)
        for preferred_brand in brand_preferences:
            target_col = f'Brand_{preferred_brand.replace(" ", "_")}'
            if target_col in brand_columns:
                idx = brand_columns.index(target_col)
                brand_pref[idx] = 1
            else:
                print(f"‚ö†Ô∏è Brand '{preferred_brand}' not found in top brands. Available: {[b.replace('Brand_', '').replace('_', ' ') for b in brand_columns]}")
        full_preferences.extend(brand_pref)
    else:
        full_preferences.extend([0] * len(brand_columns))
    
    # If we have to filter (by price or exclude_brands), then we build a new KNN model on the filtered data
    if max_price or exclude_brands:
        # Use base_df which might have been filtered by exclude_brands
        if max_price:
            filtered_df = base_df[base_df['Price_Cleaned'] <= max_price].copy()
        else:
            filtered_df = base_df.copy()

        if len(filtered_df) == 0:
            print("‚ùå No cars found within your criteria. Try relaxing your budget or exclude filters.")
            return None

        # Prepare filtered features
        filtered_features = filtered_df[final_feature_columns_v4].copy()

        # Fill any remaining NaN values
        for col in ['Mileage_Cleaned', 'Displacement_Cleaned']:
            filtered_features[col].fillna(filtered_features[col].median(), inplace=True)

        filtered_features.fillna(0, inplace=True)

        # Scale filtered features
        filtered_features_scaled = final_scaler_v4.transform(filtered_features)

        # Build model on filtered data
        knn_filtered = NearestNeighbors(n_neighbors=min(n_recommendations, len(filtered_df)), metric='cosine')
        knn_filtered.fit(filtered_features_scaled)

        user_vector = final_scaler_v4.transform([full_preferences])
        distances, indices = knn_filtered.kneighbors(user_vector)

        recommendations = filtered_df.iloc[indices[0]].copy()
        
    else:
        # Use the global model (no price filter, no exclude_brands)
        user_vector = final_scaler_v4.transform([full_preferences])
        distances, indices = final_knn_model_v4.kneighbors(user_vector, n_neighbors=n_recommendations)

        recommendations = base_df.iloc[indices[0]].copy()
    
    # Add similarity score
    recommendations['Similarity_Score'] = 1 - distances[0]
    
    # Display results with all preferences
    pref_msg = []
    if body_type_preference:
        pref_msg.append(f"Body: {body_type_preference}")
    if fuel_type_preference:
        pref_msg.append(f"Fuel: {fuel_type_preference}")
    if transmission_preference:
        pref_msg.append(f"Transmission: {transmission_preference}")
    if seating_preference:
        pref_msg.append(f"Seats: {seating_preference}")
    if brand_preferences:
        pref_msg.append(f"Brands: {', '.join(brand_preferences)}")
    
    pref_str = " with " + ", ".join(pref_msg) if pref_msg else ""
    print(f"üéâ Found {len(recommendations)} recommendations{pref_str}:")
    
    # Show relevant columns including brands
    display_columns = ['Make', 'Model', 'Variant', 'Price_Cleaned', 'Smart_Efficiency', 
                      'Fuel_Type_Cleaned', 'Smart_Transmission', 'Body_Type_Cleaned', 
                      'Seating_Capacity_Cleaned', 'Similarity_Score']
    
    display(recommendations[display_columns].sort_values('Similarity_Score', ascending=False))
    
    return recommendations

print("‚úÖ Brand preference and exclude brands recommendation function created!")

üéØ Updating Recommendation Function with Brand Preferences and Exclude Brands...
‚úÖ Brand preference and exclude brands recommendation function created!


In [64]:
# Cell 42: Test Brand Preferences Feature
print("üß™ Testing Brand Preferences Feature...")
print("=" * 50)

# Test 1: Specific Brand Preference
print("TEST 1: üéØ Tata & Mahindra Only")
print("Preferences: SUV, Diesel, 7 seats, Brands: Tata, Mahindra")

user_pref_brand = [
    1500000,   # Budget: ‚Çπ15 lakh
    18,        # Good mileage
    2000,      # Medium engine
    1, 1, 1, 1, 1  # All features
]

print(f"\nüîç Searching for Tata & Mahindra SUVs...")
recommendations_brand_specific = recommend_cars_with_brands(
    user_pref_brand,
    body_type_preference='SUV',
    fuel_type_preference='Diesel',
    transmission_preference='Automatic',
    seating_preference=7,
    brand_preferences=['Tata', 'Mahindra'],  # NEW: Brand preference!
    max_price=2000000
)

print("\n" + "=" * 50)

# Test 2: Multiple Brand Preferences
print("\nTEST 2: üöó Hyundai & Maruti City Cars")
print("Preferences: Hatchback, Petrol, 5 seats, Brands: Hyundai, Maruti Suzuki")

user_pref_city_brand = [
    800000,    # Budget: ‚Çπ8 lakh
    20,        # Good mileage
    1200,      # Small engine
    1, 1, 1, 1, 1  # All features
]

print(f"\nüîç Searching for Hyundai & Maruti city cars...")
recommendations_city_brand = recommend_cars_with_brands(
    user_pref_city_brand,
    body_type_preference='Hatchback',
    fuel_type_preference='Petrol',
    transmission_preference='Manual',
    seating_preference=5,
    brand_preferences=['Hyundai', 'Maruti Suzuki'],  # NEW: Brand preference!
    max_price=1000000
)

üß™ Testing Brand Preferences Feature...
TEST 1: üéØ Tata & Mahindra Only
Preferences: SUV, Diesel, 7 seats, Brands: Tata, Mahindra

üîç Searching for Tata & Mahindra SUVs...
üéâ Found 5 recommendations with Body: SUV, Fuel: Diesel, Transmission: Automatic, Seats: 7, Brands: Tata, Mahindra:


Unnamed: 0,Make,Model,Variant,Price_Cleaned,Smart_Efficiency,Fuel_Type_Cleaned,Smart_Transmission,Body_Type_Cleaned,Seating_Capacity_Cleaned,Similarity_Score
696,Tata,Hexa,Xma 4X2,1655587.0,17.6,Diesel,Automatic,SUV,7.0,0.837437
677,Mahindra,Xuv500,W7 At,1539488.0,16.0,Diesel,Automatic,SUV,7.0,0.806367
668,Mahindra,Xuv500,W7 At,1539488.0,16.0,Diesel,Automatic,SUV,7.0,0.806367
670,Mahindra,Xuv500,W9 At,1710118.0,16.0,Diesel,Automatic,SUV,7.0,0.806359
679,Mahindra,Xuv500,W9 At,1710118.0,16.0,Diesel,Automatic,SUV,7.0,0.806359




TEST 2: üöó Hyundai & Maruti City Cars
Preferences: Hatchback, Petrol, 5 seats, Brands: Hyundai, Maruti Suzuki

üîç Searching for Hyundai & Maruti city cars...
üéâ Found 5 recommendations with Body: Hatchback, Fuel: Petrol, Transmission: Manual, Seats: 5, Brands: Hyundai, Maruti Suzuki:


Unnamed: 0,Make,Model,Variant,Price_Cleaned,Smart_Efficiency,Fuel_Type_Cleaned,Smart_Transmission,Body_Type_Cleaned,Seating_Capacity_Cleaned,Similarity_Score
914,Hyundai,Grand I10,1.2 Kappa Vtvt Sportz,620637.0,19.77,Petrol,Manual,Hatchback,5.0,0.799517
917,Hyundai,Grand I10,1.2 Kappa Vtvt Sportz Dual Tone,640537.0,19.77,Petrol,Manual,Hatchback,5.0,0.799517
1063,Hyundai,Grand I10 Nios,Magna 1.2 Vtvt,589610.0,20.7,Petrol,Manual,Hatchback,5.0,0.799513
1065,Hyundai,Grand I10 Nios,Sportz 1.2 Vtvt,643350.0,20.7,Petrol,Manual,Hatchback,5.0,0.799512
1067,Hyundai,Grand I10 Nios,Sportz Dual Tone 1.2 Vtvt,673350.0,20.7,Petrol,Manual,Hatchback,5.0,0.799511


In [65]:
# Cell 43 (Updated): Update Easy-to-Use Functions with Brand Support and Exclude Brands
print("üîÑ Updating Easy-to-Use Functions with Brand Support and Exclude Brands...")

def recommend_family_car_v2(budget, seating=7, fuel_type='Diesel', brands=None, exclude_brands=None):
    """Easy function for family car seekers with brand preference and exclude brands"""
    user_pref = [
        budget, 18, 2000, 1, 1, 1, 1, 1  # Standard family preferences
    ]
    return recommend_cars_with_brands(
        user_pref,
        body_type_preference='SUV',
        fuel_type_preference=fuel_type,
        transmission_preference='Automatic',
        seating_preference=seating,
        brand_preferences=brands,  # NEW: Brand parameter
        exclude_brands=exclude_brands,  # NEW: Exclude brands
        max_price=budget
    )

def recommend_city_car_v2(budget, fuel_type='Petrol', brands=None, exclude_brands=None):
    """Easy function for city car seekers with brand preference and exclude brands"""
    user_pref = [
        budget, 20, 1200, 1, 1, 1, 1, 1  # Standard city preferences
    ]
    return recommend_cars_with_brands(
        user_pref,
        body_type_preference='Hatchback',
        fuel_type_preference=fuel_type,
        transmission_preference='Manual',
        seating_preference=5,
        brand_preferences=brands,  # NEW: Brand parameter
        exclude_brands=exclude_brands,  # NEW: Exclude brands
        max_price=budget
    )

print("‚úÖ Updated user-friendly functions with brand and exclude brands support!")
print("Try: recommend_family_car_v2(1500000, brands=['Tata', 'Mahindra'], exclude_brands=['BMW'])")
print("Try: recommend_city_car_v2(800000, brands=['Hyundai', 'Maruti Suzuki'], exclude_brands=['Audi'])")

üîÑ Updating Easy-to-Use Functions with Brand Support and Exclude Brands...
‚úÖ Updated user-friendly functions with brand and exclude brands support!
Try: recommend_family_car_v2(1500000, brands=['Tata', 'Mahindra'], exclude_brands=['BMW'])
Try: recommend_city_car_v2(800000, brands=['Hyundai', 'Maruti Suzuki'], exclude_brands=['Audi'])


In [57]:
# Cell 44: Test the Updated Brand-Aware Functions
print("üß™ Testing Brand-Aware Easy Functions...")
print("=" * 50)

# Test 1: Family Car with Specific Brands
print("TEST 1: üë®‚Äçüë©‚Äçüëß‚Äçüë¶ Family SUV from Tata & Mahindra Only")
print("Using: recommend_family_car_v2(1500000, brands=['Tata', 'Mahindra'])")

family_tata_mahindra = recommend_family_car_v2(
    1500000, 
    brands=['Tata', 'Mahindra']  # Only these two brands
)

print("\n" + "=" * 50)

# Test 2: City Car with Preferred Brands
print("\nTEST 2: üèôÔ∏è City Car from Hyundai & Maruti Only")
print("Using: recommend_city_car_v2(800000, brands=['Hyundai', 'Maruti Suzuki'])")

city_hyundai_maruti = recommend_city_car_v2(
    800000,
    brands=['Hyundai', 'Maruti Suzuki']  # Only these two brands
)

print("\n" + "=" * 50)

# Test 3: Compare with No Brand Preference
print("\nTEST 3: üîÑ Comparison - Same Search Without Brand Preference")
print("Using: recommend_family_car_v2(1500000) - No brand specified")

family_all_brands = recommend_family_car_v2(1500000)  # No brand preference

print("\n" + "=" * 50)

# Test 4: Single Brand Preference
print("\nTEST 4: üéØ Single Brand - Only Hyundai")
print("Using: recommend_city_car_v2(800000, brands=['Hyundai'])")

city_only_hyundai = recommend_city_car_v2(
    800000,
    brands=['Hyundai']  # Only one brand
)

üß™ Testing Brand-Aware Easy Functions...
TEST 1: üë®‚Äçüë©‚Äçüëß‚Äçüë¶ Family SUV from Tata & Mahindra Only
Using: recommend_family_car_v2(1500000, brands=['Tata', 'Mahindra'])
üéâ Found 5 recommendations with Body: SUV, Fuel: Diesel, Transmission: Automatic, Seats: 7, Brands: Tata, Mahindra:


Unnamed: 0,Make,Model,Variant,Price_Cleaned,Smart_Efficiency,Fuel_Type_Cleaned,Smart_Transmission,Body_Type_Cleaned,Seating_Capacity_Cleaned,Similarity_Score
1195,Mahindra,Nuvosport,N8 Amt,1048603.0,16.21,Diesel,Automatic,SUV,7.0,0.74345
1194,Mahindra,Nuvosport,N6 Amt,972359.0,16.21,Diesel,Automatic,SUV,7.0,0.743449
694,Tata,Hexa,Xe 4X2,1371864.0,17.6,Diesel,Manual,SUV,7.0,0.716013
993,Tata,Safari Storme,2.2 Vx 4X2 Varicor 400,1479574.0,14.1,Diesel,Manual,SUV,7.0,0.715909
686,Mahindra,Xuv500,W3,1230924.0,16.0,Diesel,Manual,SUV,7.0,0.68036




TEST 2: üèôÔ∏è City Car from Hyundai & Maruti Only
Using: recommend_city_car_v2(800000, brands=['Hyundai', 'Maruti Suzuki'])
üéâ Found 5 recommendations with Body: Hatchback, Fuel: Petrol, Transmission: Manual, Seats: 5, Brands: Hyundai, Maruti Suzuki:


Unnamed: 0,Make,Model,Variant,Price_Cleaned,Smart_Efficiency,Fuel_Type_Cleaned,Smart_Transmission,Body_Type_Cleaned,Seating_Capacity_Cleaned,Similarity_Score
914,Hyundai,Grand I10,1.2 Kappa Vtvt Sportz,620637.0,19.77,Petrol,Manual,Hatchback,5.0,0.799517
917,Hyundai,Grand I10,1.2 Kappa Vtvt Sportz Dual Tone,640537.0,19.77,Petrol,Manual,Hatchback,5.0,0.799517
1063,Hyundai,Grand I10 Nios,Magna 1.2 Vtvt,589610.0,20.7,Petrol,Manual,Hatchback,5.0,0.799513
1065,Hyundai,Grand I10 Nios,Sportz 1.2 Vtvt,643350.0,20.7,Petrol,Manual,Hatchback,5.0,0.799512
1067,Hyundai,Grand I10 Nios,Sportz Dual Tone 1.2 Vtvt,673350.0,20.7,Petrol,Manual,Hatchback,5.0,0.799511




TEST 3: üîÑ Comparison - Same Search Without Brand Preference
Using: recommend_family_car_v2(1500000) - No brand specified
üéâ Found 5 recommendations with Body: SUV, Fuel: Diesel, Transmission: Automatic, Seats: 7:


Unnamed: 0,Make,Model,Variant,Price_Cleaned,Smart_Efficiency,Fuel_Type_Cleaned,Smart_Transmission,Body_Type_Cleaned,Seating_Capacity_Cleaned,Similarity_Score
1194,Mahindra,Nuvosport,N6 Amt,972359.0,16.21,Diesel,Automatic,SUV,7.0,0.669708
1195,Mahindra,Nuvosport,N8 Amt,1048603.0,16.21,Diesel,Automatic,SUV,7.0,0.669681
686,Mahindra,Xuv500,W3,1230924.0,16.0,Diesel,Manual,SUV,7.0,0.56567
977,Kia,Seltos,Htk Plus At 1.5 Diesel,1354000.0,20.0,Diesel,Automatic,SUV,5.0,0.556934
694,Tata,Hexa,Xe 4X2,1371864.0,17.6,Diesel,Manual,SUV,7.0,0.545836




TEST 4: üéØ Single Brand - Only Hyundai
Using: recommend_city_car_v2(800000, brands=['Hyundai'])
üéâ Found 5 recommendations with Body: Hatchback, Fuel: Petrol, Transmission: Manual, Seats: 5, Brands: Hyundai:


Unnamed: 0,Make,Model,Variant,Price_Cleaned,Smart_Efficiency,Fuel_Type_Cleaned,Smart_Transmission,Body_Type_Cleaned,Seating_Capacity_Cleaned,Similarity_Score
917,Hyundai,Grand I10,1.2 Kappa Vtvt Sportz Dual Tone,640537.0,19.77,Petrol,Manual,Hatchback,5.0,0.999995
914,Hyundai,Grand I10,1.2 Kappa Vtvt Sportz,620637.0,19.77,Petrol,Manual,Hatchback,5.0,0.999993
1068,Hyundai,Grand I10 Nios,Asta 1.2 Vtvt,718950.0,20.7,Petrol,Manual,Hatchback,5.0,0.999991
1067,Hyundai,Grand I10 Nios,Sportz Dual Tone 1.2 Vtvt,673350.0,20.7,Petrol,Manual,Hatchback,5.0,0.999989
1065,Hyundai,Grand I10 Nios,Sportz 1.2 Vtvt,643350.0,20.7,Petrol,Manual,Hatchback,5.0,0.999988


In [58]:
# Cell 45: Advanced Brand Preference Tests
print("üß™ Advanced Brand Preference Tests...")
print("=" * 50)

# Test 5: Luxury Brands Only
print("TEST 5: üíé Luxury Cars from German Brands")
print("Preferences: Sedan, Automatic, Brands: BMW, Mercedes-Benz, Audi")

user_pref_luxury = [
    5000000,   # Higher budget for luxury
    12,        # Lower mileage expectation
    2000,      # Medium engine
    1, 1, 1, 1, 1  # All features
]

luxury_cars = recommend_cars_with_brands(
    user_pref_luxury,
    body_type_preference='Sedan',
    fuel_type_preference='Petrol',
    transmission_preference='Automatic',
    seating_preference=5,
    brand_preferences=['BMW', 'Mercedes-Benz', 'Audi'],  # Luxury brands
    max_price=8000000
)

print("\n" + "=" * 50)

# Test 6: Electric Cars from Specific Brands
print("\nTEST 6: ‚ö° Electric Cars from Tata & Hyundai Only")

user_pref_electric_brand = [
    2000000,   # Budget for electric
    300,       # Range preference
    0,         # Electric - no displacement
    1, 1, 1, 1, 1  # All features
]

electric_brand_specific = recommend_cars_with_brands(
    user_pref_electric_brand,
    fuel_type_preference='Electric',
    brand_preferences=['Tata', 'Hyundai'],  # Specific electric car brands
    max_price=2500000
)

üß™ Advanced Brand Preference Tests...
TEST 5: üíé Luxury Cars from German Brands
Preferences: Sedan, Automatic, Brands: BMW, Mercedes-Benz, Audi
‚ö†Ô∏è Brand 'BMW' not found in top brands. Available: ['Maruti Suzuki', 'Hyundai', 'Mahindra', 'Tata', 'Toyota', 'Honda', 'Ford', 'Skoda', 'Bmw', 'Renault', 'Volkswagen', 'Audi', 'Nissan', 'Jeep', 'Land Rover Rover']
‚ö†Ô∏è Brand 'Mercedes-Benz' not found in top brands. Available: ['Maruti Suzuki', 'Hyundai', 'Mahindra', 'Tata', 'Toyota', 'Honda', 'Ford', 'Skoda', 'Bmw', 'Renault', 'Volkswagen', 'Audi', 'Nissan', 'Jeep', 'Land Rover Rover']
üéâ Found 5 recommendations with Body: Sedan, Fuel: Petrol, Transmission: Automatic, Seats: 5, Brands: BMW, Mercedes-Benz, Audi:


Unnamed: 0,Make,Model,Variant,Price_Cleaned,Smart_Efficiency,Fuel_Type_Cleaned,Smart_Transmission,Body_Type_Cleaned,Seating_Capacity_Cleaned,Similarity_Score
286,Audi,A6,Premium Plus 45 Tfsi,5442200.0,14.11,Petrol,Automatic,Sedan,5.0,0.999959
287,Audi,A6,Technology 45 Tfsi,5942200.0,14.11,Petrol,Automatic,Sedan,5.0,0.999913
251,Audi,A3,35 Tfsi Technology,3120750.0,19.2,Petrol,Automatic,Sedan,5.0,0.996276
250,Audi,A3,35 Tfsi Premium Plus,2920750.0,19.2,Petrol,Automatic,Sedan,5.0,0.996224
294,Audi,S5,Sportback,7243000.0,13.57,Petrol,Automatic,Sedan,5.0,0.991182




TEST 6: ‚ö° Electric Cars from Tata & Hyundai Only
üí° Smart Note: Electric cars automatically use automatic transmission
üéâ Found 5 recommendations with Fuel: Electric, Transmission: Automatic, Brands: Tata, Hyundai:


Unnamed: 0,Make,Model,Variant,Price_Cleaned,Smart_Efficiency,Fuel_Type_Cleaned,Smart_Transmission,Body_Type_Cleaned,Seating_Capacity_Cleaned,Similarity_Score
1205,Hyundai,Kona Electric,Premium,2371858.0,462.0,Electric,Automatic,SUV,4.0,0.766875
1008,Tata,Nexon Ev,Xz Plus Lux,1599000.0,300.0,Electric,Automatic,SUV,5.0,0.759771
1007,Tata,Nexon Ev,Xz Plus,1499000.0,312.0,Electric,Automatic,SUV,5.0,0.75977
1006,Tata,Nexon Ev,Xm,1399000.0,312.0,Electric,Automatic,SUV,5.0,0.759768
618,Tata,Tigor Ev,Xt+,975868.0,213.0,Electric,Automatic,Sedan,5.0,0.757224


In [59]:
# Cell 46: Display Available Brands for User Reference
print("üè∑Ô∏è Available Car Brands in Dataset")
print("=" * 50)

# Show all available brands with car counts
brand_summary = df['Make'].value_counts().head(20)  # Top 20 brands
print("Top Brands Available:")
for brand, count in brand_summary.items():
    print(f"  {brand}: {count} cars")

print(f"\nüí° Tip: You can use any of these brands in the 'brands' parameter!")
print("Example: brands=['Tata', 'Mahindra', 'Hyundai', 'Maruti Suzuki']")

# Show brand categories
print(f"\nüéØ Brand Categories:")
indian_brands = ['Tata', 'Mahindra', 'Maruti Suzuki']
japanese_brands = ['Honda', 'Toyota', 'Nissan', 'Mitsubishi']
korean_brands = ['Hyundai', 'Kia']
european_brands = ['Volkswagen', 'Skoda', 'Renault', 'BMW', 'Mercedes-Benz', 'Audi', 'Jaguar', 'Land Rover', 'Volvo', 'Fiat']
american_brands = ['Ford', 'Jeep']

print(f"  Indian: {indian_brands}")
print(f"  Japanese: {japanese_brands}")
print(f"  Korean: {korean_brands}")
print(f"  European: {european_brands}")
print(f"  American: {american_brands}")

üè∑Ô∏è Available Car Brands in Dataset
Top Brands Available:
  Maruti Suzuki: 149 cars
  Hyundai: 130 cars
  Mahindra: 119 cars
  Tata: 100 cars
  Toyota: 82 cars
  Honda: 64 cars
  Ford: 43 cars
  Skoda: 43 cars
  Bmw: 37 cars
  Renault: 36 cars
  Volkswagen: 34 cars
  Audi: 31 cars
  Nissan: 29 cars
  Jeep: 28 cars
  Land Rover Rover: 27 cars
  Fiat: 23 cars
  Jaguar: 22 cars
  Kia: 21 cars
  Volvo: 18 cars
  Datsun: 15 cars

üí° Tip: You can use any of these brands in the 'brands' parameter!
Example: brands=['Tata', 'Mahindra', 'Hyundai', 'Maruti Suzuki']

üéØ Brand Categories:
  Indian: ['Tata', 'Mahindra', 'Maruti Suzuki']
  Japanese: ['Honda', 'Toyota', 'Nissan', 'Mitsubishi']
  Korean: ['Hyundai', 'Kia']
  European: ['Volkswagen', 'Skoda', 'Renault', 'BMW', 'Mercedes-Benz', 'Audi', 'Jaguar', 'Land Rover', 'Volvo', 'Fiat']
  American: ['Ford', 'Jeep']


In [60]:
# Cell 47: Comprehensive Brand Preference Demonstration
print("üéØ Comprehensive Brand Preference Demonstration")
print("=" * 50)

# Test 1: Indian Brands Only
print("TEST 1: üáÆüá≥ Indian Brands Only - Family SUV")
print("Preferences: SUV, Diesel, 7 seats, Indian brands only")

indian_family_cars = recommend_family_car_v2(
    1500000,
    seating=7,
    fuel_type='Diesel', 
    brands=['Tata', 'Mahindra', 'Maruti Suzuki']  # Indian brands only
)

print("\n" + "=" * 50)

# Test 2: Japanese Reliability Focus
print("\nTEST 2: üáØüáµ Japanese Brands - Reliable Sedans")
print("Preferences: Sedan, Petrol, Automatic, Japanese brands")

user_pref_japanese = [
    1200000,   # Budget
    18,        # Good mileage
    1500,      # Medium engine
    1, 1, 1, 1, 1  # All features
]

japanese_sedans = recommend_cars_with_brands(
    user_pref_japanese,
    body_type_preference='Sedan',
    fuel_type_preference='Petrol',
    transmission_preference='Automatic',
    seating_preference=5,
    brand_preferences=['Toyota', 'Honda', 'Nissan'],  # Japanese reliability
    max_price=1500000
)

print("\n" + "=" * 50)

# Test 3: European Luxury
print("\nTEST 3: üá™üá∫ European Luxury - Premium SUVs")
print("Preferences: SUV, Diesel, Automatic, European luxury brands")

user_pref_european = [
    5000000,   # Luxury budget
    15,        # Lower mileage expectation
    2000,      # Larger engine
    1, 1, 1, 1, 1  # All premium features
]

european_luxury = recommend_cars_with_brands(
    user_pref_european,
    body_type_preference='SUV',
    fuel_type_preference='Diesel',
    transmission_preference='Automatic',
    seating_preference=5,
    brand_preferences=['BMW', 'Mercedes-Benz', 'Audi', 'Volvo', 'Land Rover'],
    max_price=8000000
)

print("\n" + "=" * 50)

# Test 4: Mixed Brand Strategy
print("\nTEST 4: üåà Mixed Brands - Best of All Worlds")
print("Preferences: Hatchback, Petrol, Manual, Top brands from each region")

mixed_brands_cars = recommend_city_car_v2(
    900000,
    fuel_type='Petrol',
    brands=[
        'Maruti Suzuki',  # Indian (value)
        'Hyundai',        # Korean (features)
        'Honda',          # Japanese (reliability)
        'Volkswagen'      # European (build quality)
    ]
)

üéØ Comprehensive Brand Preference Demonstration
TEST 1: üáÆüá≥ Indian Brands Only - Family SUV
Preferences: SUV, Diesel, 7 seats, Indian brands only
üéâ Found 5 recommendations with Body: SUV, Fuel: Diesel, Transmission: Automatic, Seats: 7, Brands: Tata, Mahindra, Maruti Suzuki:


Unnamed: 0,Make,Model,Variant,Price_Cleaned,Smart_Efficiency,Fuel_Type_Cleaned,Smart_Transmission,Body_Type_Cleaned,Seating_Capacity_Cleaned,Similarity_Score
1194,Mahindra,Nuvosport,N6 Amt,972359.0,16.21,Diesel,Automatic,SUV,7.0,0.651006
1195,Mahindra,Nuvosport,N8 Amt,1048603.0,16.21,Diesel,Automatic,SUV,7.0,0.651005
694,Tata,Hexa,Xe 4X2,1371864.0,17.6,Diesel,Manual,SUV,7.0,0.626168
993,Tata,Safari Storme,2.2 Vx 4X2 Varicor 400,1479574.0,14.1,Diesel,Manual,SUV,7.0,0.626079
686,Mahindra,Xuv500,W3,1230924.0,16.0,Diesel,Manual,SUV,7.0,0.592305




TEST 2: üáØüáµ Japanese Brands - Reliable Sedans
Preferences: Sedan, Petrol, Automatic, Japanese brands
üéâ Found 5 recommendations with Body: Sedan, Fuel: Petrol, Transmission: Automatic, Seats: 5, Brands: Toyota, Honda, Nissan:


Unnamed: 0,Make,Model,Variant,Price_Cleaned,Smart_Efficiency,Fuel_Type_Cleaned,Smart_Transmission,Body_Type_Cleaned,Seating_Capacity_Cleaned,Similarity_Score
927,Nissan,Sunny,Xv Cvt,993997.0,17.97,Petrol,Automatic,Sedan,5.0,0.755755
923,Nissan,Sunny,Xl Petrol,836461.0,16.95,Petrol,Manual,Sedan,5.0,0.695514
922,Nissan,Sunny,Xe Petrol,707025.0,16.95,Petrol,Manual,Sedan,5.0,0.666302
1080,Nissan,Micra,Xl (O) Cvt,662880.0,19.34,Petrol,Automatic,Hatchback,5.0,0.650661
1081,Nissan,Micra,Xv Cvt,781686.0,19.34,Petrol,Automatic,Hatchback,5.0,0.650659




TEST 3: üá™üá∫ European Luxury - Premium SUVs
Preferences: SUV, Diesel, Automatic, European luxury brands
‚ö†Ô∏è Brand 'BMW' not found in top brands. Available: ['Maruti Suzuki', 'Hyundai', 'Mahindra', 'Tata', 'Toyota', 'Honda', 'Ford', 'Skoda', 'Bmw', 'Renault', 'Volkswagen', 'Audi', 'Nissan', 'Jeep', 'Land Rover Rover']
‚ö†Ô∏è Brand 'Mercedes-Benz' not found in top brands. Available: ['Maruti Suzuki', 'Hyundai', 'Mahindra', 'Tata', 'Toyota', 'Honda', 'Ford', 'Skoda', 'Bmw', 'Renault', 'Volkswagen', 'Audi', 'Nissan', 'Jeep', 'Land Rover Rover']
‚ö†Ô∏è Brand 'Volvo' not found in top brands. Available: ['Maruti Suzuki', 'Hyundai', 'Mahindra', 'Tata', 'Toyota', 'Honda', 'Ford', 'Skoda', 'Bmw', 'Renault', 'Volkswagen', 'Audi', 'Nissan', 'Jeep', 'Land Rover Rover']
‚ö†Ô∏è Brand 'Land Rover' not found in top brands. Available: ['Maruti Suzuki', 'Hyundai', 'Mahindra', 'Tata', 'Toyota', 'Honda', 'Ford', 'Skoda', 'Bmw', 'Renault', 'Volkswagen', 'Audi', 'Nissan', 'Jeep', 'Land Rover Rover']

Unnamed: 0,Make,Model,Variant,Price_Cleaned,Smart_Efficiency,Fuel_Type_Cleaned,Smart_Transmission,Body_Type_Cleaned,Seating_Capacity_Cleaned,Similarity_Score
785,Audi,Q5,35 Tdi Premium Plus,5021200.0,16.9,Diesel,Automatic,SUV,5.0,0.999969
786,Audi,Q5,35 Tdi Technology,5621200.0,15.73,Diesel,Automatic,SUV,5.0,0.999962
265,Audi,Q3,35 Tdi Quattro Technology,4361000.0,15.73,Diesel,Automatic,SUV,5.0,0.99996
264,Audi,Q3,35 Tdi Quattro Premium Plus,3992200.0,15.73,Diesel,Automatic,SUV,5.0,0.999919
787,Audi,Q5,45 Tfsi Premium Plus,5021200.0,12.44,Petrol,Automatic,SUV,5.0,0.919968




TEST 4: üåà Mixed Brands - Best of All Worlds
Preferences: Hatchback, Petrol, Manual, Top brands from each region
üéâ Found 5 recommendations with Body: Hatchback, Fuel: Petrol, Transmission: Manual, Seats: 5, Brands: Maruti Suzuki, Hyundai, Honda, Volkswagen:


Unnamed: 0,Make,Model,Variant,Price_Cleaned,Smart_Efficiency,Fuel_Type_Cleaned,Smart_Transmission,Body_Type_Cleaned,Seating_Capacity_Cleaned,Similarity_Score
147,Volkswagen,Polo,Highline Plus 1.0 (P),776500.0,18.78,Petrol,Manual,Hatchback,5.0,0.703963
146,Volkswagen,Polo,Comfortline 1.0 (P),676500.0,18.78,Petrol,Manual,Hatchback,5.0,0.671985
149,Volkswagen,Polo,Comfortline 1.5 (D),851500.0,20.14,Diesel,Manual,Hatchback,5.0,0.60711
145,Volkswagen,Polo,Trendline 1.0L (P),582000.0,18.78,Petrol,Manual,Hatchback,5.0,0.60224
179,Volkswagen,Ameo,Trendline 1.0L,594000.0,19.0,Petrol,Manual,Sedan,5.0,0.587414


In [61]:
# Cell 48: Brand-Based User Profiles
print("üë§ Brand-Based User Profiles")
print("=" * 50)

def get_user_profile_recommendations(profile_type, budget):
    """Get car recommendations based on user profile types"""
    
    profiles = {
        'value_seeker': {
            'name': 'üí∞ Value Seeker',
            'brands': ['Maruti Suzuki', 'Tata', 'Hyundai'],
            'description': 'Looking for best value, low maintenance costs'
        },
        'premium_lover': {
            'name': 'üíé Premium Lover', 
            'brands': ['BMW', 'Mercedes-Benz', 'Audi'],
            'description': 'Wants luxury features and premium brand image'
        },
        'reliability_focused': {
            'name': 'üîß Reliability Focused',
            'brands': ['Toyota', 'Honda', 'Hyundai'],
            'description': 'Prioritizes reliability and low maintenance'
        },
        'adventure_seeker': {
            'name': 'üèîÔ∏è Adventure Seeker',
            'brands': ['Mahindra', 'Toyota', 'Jeep'],
            'description': 'Wants rugged, off-road capable vehicles'
        },
        'eco_conscious': {
            'name': 'üå± Eco Conscious',
            'brands': ['Tata', 'Hyundai', 'MG'],
            'description': 'Interested in electric and hybrid vehicles'
        }
    }
    
    if profile_type not in profiles:
        print(f"‚ùå Profile '{profile_type}' not found. Available: {list(profiles.keys())}")
        return None
    
    profile = profiles[profile_type]
    print(f"üë§ {profile['name']}")
    print(f"üìù {profile['description']}")
    print(f"üè∑Ô∏è Preferred brands: {', '.join(profile['brands'])}")
    
    user_pref = [budget, 18, 1500, 1, 1, 1, 1, 1]
    
    return recommend_cars_with_brands(
        user_pref,
        brand_preferences=profile['brands'],
        max_price=budget
    )

# Test different user profiles
print("TESTING DIFFERENT USER PROFILES:")
print("\n" + "-" * 30)

# Test Value Seeker
print("1. Value Seeker Profile:")
value_cars = get_user_profile_recommendations('value_seeker', 800000)

print("\n" + "-" * 30)

# Test Reliability Focused
print("2. Reliability Focused Profile:")
reliable_cars = get_user_profile_recommendations('reliability_focused', 1200000)

print("\n" + "-" * 30)

# Test Adventure Seeker
print("3. Adventure Seeker Profile:")
adventure_cars = get_user_profile_recommendations('adventure_seeker', 1500000)

üë§ Brand-Based User Profiles
TESTING DIFFERENT USER PROFILES:

------------------------------
1. Value Seeker Profile:
üë§ üí∞ Value Seeker
üìù Looking for best value, low maintenance costs
üè∑Ô∏è Preferred brands: Maruti Suzuki, Tata, Hyundai
üéâ Found 5 recommendations with Brands: Maruti Suzuki, Tata, Hyundai:


Unnamed: 0,Make,Model,Variant,Price_Cleaned,Smart_Efficiency,Fuel_Type_Cleaned,Smart_Transmission,Body_Type_Cleaned,Seating_Capacity_Cleaned,Similarity_Score
432,Tata,Altroz,Xt Petrol,684000.0,,Petrol,Manual,Hatchback,5.0,0.365672
434,Tata,Altroz,Xz (O) Petrol,769000.0,,Petrol,Manual,Hatchback,5.0,0.365633
455,Tata,Tigor,Revotron Xma,660000.0,23.0,Petrol,Automatic,Sedan,5.0,0.311508
456,Tata,Tigor,Revotron Xza Plus,749000.0,23.0,Petrol,Automatic,Sedan,5.0,0.311462
59,Tata,Tiago,Revotron Xza,620000.0,24.0,Petrol,Automatic,Hatchback,5.0,0.310678



------------------------------
2. Reliability Focused Profile:
üë§ üîß Reliability Focused
üìù Prioritizes reliability and low maintenance
üè∑Ô∏è Preferred brands: Toyota, Honda, Hyundai
üéâ Found 5 recommendations with Brands: Toyota, Honda, Hyundai:


Unnamed: 0,Make,Model,Variant,Price_Cleaned,Smart_Efficiency,Fuel_Type_Cleaned,Smart_Transmission,Body_Type_Cleaned,Seating_Capacity_Cleaned,Similarity_Score
625,Honda,Brv,S Diesel,1187900.0,21.9,Diesel,Manual,SUV,7.0,0.463123
621,Honda,Brv,S Petrol,1052900.0,15.4,Petrol,Manual,SUV,7.0,0.461653
622,Honda,Brv,V Petrol,1167900.0,15.4,Petrol,Manual,SUV,7.0,0.461627
1269,Honda,City,Sv Mt Diesel,1111000.0,25.6,Diesel,Manual,Sedan,5.0,0.458169
1270,Honda,City,V Mt Diesel,1191000.0,25.6,Diesel,Manual,Sedan,5.0,0.458151



------------------------------
3. Adventure Seeker Profile:
üë§ üèîÔ∏è Adventure Seeker
üìù Wants rugged, off-road capable vehicles
üè∑Ô∏è Preferred brands: Mahindra, Toyota, Jeep
üéâ Found 5 recommendations with Brands: Mahindra, Toyota, Jeep:


Unnamed: 0,Make,Model,Variant,Price_Cleaned,Smart_Efficiency,Fuel_Type_Cleaned,Smart_Transmission,Body_Type_Cleaned,Seating_Capacity_Cleaned,Similarity_Score
1237,Toyota,Yaris,G (O) Cvt,1083000.0,17.1,Petrol,Automatic,Sedan,5.0,0.311288
1229,Toyota,Yaris,G Cvt,1175000.0,17.8,Petrol,Automatic,Sedan,5.0,0.311267
1231,Toyota,Yaris,V Cvt,1294000.0,17.8,Petrol,Automatic,Sedan,5.0,0.311244
1238,Toyota,Yaris,V (O) Cvt,1317000.0,17.1,Petrol,Automatic,Sedan,5.0,0.311238
1228,Toyota,Yaris,Vx Cvt,1418000.0,17.8,Petrol,Automatic,Sedan,5.0,0.311219


In [62]:
# Cell 49: Final System Summary with Brand Features
print("üöÄ FINAL SYSTEM SUMMARY - BRAND-AWARE CAR RECOMMENDATION")
print("=" * 60)

print("üéØ NOW AVAILABLE: Brand Preference Features")
print("\nüìã COMPLETE FEATURE SET:")
print("   ‚úì Price & Budget filtering")
print("   ‚úì Body type preferences (SUV, Sedan, Hatchback, etc.)")
print("   ‚úì Fuel type selection (Petrol, Diesel, Electric, etc.)")
print("   ‚úì Transmission type (Manual/Automatic)")
print("   ‚úì Seating capacity (5, 7, 8 seats)")
print("   ‚úì Smart logic (Electric = Automatic)")
print("   ‚úì Smart efficiency (Range for electric, Mileage for fuel cars)")
print("   ‚úì üÜï BRAND PREFERENCES (20+ brands supported)")
print("   ‚úì 50+ feature dimensions for accurate matching")
print("   ‚úì Similarity scoring for best matches")

print(f"\nüè∑Ô∏è SUPPORTED BRANDS: {len(brand_columns)} brands")
print("   Indian: Tata, Mahindra, Maruti Suzuki")
print("   Japanese: Toyota, Honda, Nissan")  
print("   Korean: Hyundai, Kia")
print("   European: BMW, Mercedes-Benz, Audi, Volkswagen, Skoda, Renault, Volvo")
print("   American: Ford, Jeep")
print("   Luxury: Jaguar, Land Rover, etc.")

print("\nüéõÔ∏è EASY-TO-USE FUNCTIONS:")
print("   recommend_family_car_v2(1500000, brands=['Tata', 'Mahindra'])")
print("   recommend_city_car_v2(800000, brands=['Hyundai', 'Maruti Suzuki'])")
print("   recommend_cars_with_brands(..., brand_preferences=['Toyota', 'Honda'])")

print("\nüë§ USER PROFILES:")
print("   Value Seeker, Premium Lover, Reliability Focused")
print("   Adventure Seeker, Eco Conscious")

print("\n‚úÖ SYSTEM STATUS: FULLY OPERATIONAL WITH BRAND INTELLIGENCE! üéâ")

üöÄ FINAL SYSTEM SUMMARY - BRAND-AWARE CAR RECOMMENDATION
üéØ NOW AVAILABLE: Brand Preference Features

üìã COMPLETE FEATURE SET:
   ‚úì Price & Budget filtering
   ‚úì Body type preferences (SUV, Sedan, Hatchback, etc.)
   ‚úì Fuel type selection (Petrol, Diesel, Electric, etc.)
   ‚úì Transmission type (Manual/Automatic)
   ‚úì Seating capacity (5, 7, 8 seats)
   ‚úì Smart logic (Electric = Automatic)
   ‚úì Smart efficiency (Range for electric, Mileage for fuel cars)
   ‚úì üÜï BRAND PREFERENCES (20+ brands supported)
   ‚úì 50+ feature dimensions for accurate matching
   ‚úì Similarity scoring for best matches

üè∑Ô∏è SUPPORTED BRANDS: 15 brands
   Indian: Tata, Mahindra, Maruti Suzuki
   Japanese: Toyota, Honda, Nissan
   Korean: Hyundai, Kia
   European: BMW, Mercedes-Benz, Audi, Volkswagen, Skoda, Renault, Volvo
   American: Ford, Jeep
   Luxury: Jaguar, Land Rover, etc.

üéõÔ∏è EASY-TO-USE FUNCTIONS:
   recommend_family_car_v2(1500000, brands=['Tata', 'Mahindra'])
   reco

In [66]:
# Cell 50: Test Exclude Brands Feature
print("üß™ Testing Exclude Brands Feature...")
print("=" * 50)

# Test 1: Exclude Luxury Brands
print("TEST 1: üö´ Exclude Luxury Brands - Family SUV")
print("Preferences: SUV, Diesel, 7 seats, Exclude: BMW, Audi, Mercedes-Benz")

family_no_luxury = recommend_family_car_v2(
    1500000,
    seating=7,
    fuel_type='Diesel',
    exclude_brands=['BMW', 'Audi', 'Mercedes-Benz']  # Exclude luxury brands
)

print("\n" + "=" * 50)

# Test 2: Exclude Specific Brands with Brand Preferences
print("\nTEST 2: üéØ Brand Preference with Exclude - City Car")
print("Preferences: Hatchback, Petrol, 5 seats, Brands: Hyundai, Maruti Suzuki, Exclude: Tata")

city_specific_exclude = recommend_city_car_v2(
    800000,
    fuel_type='Petrol',
    brands=['Hyundai', 'Maruti Suzuki'],
    exclude_brands=['Tata']  # Exclude Tata
)

print("\n" + "=" * 50)

# Test 3: Only Exclude (No Brand Preference)
print("\nTEST 3: üö´ Only Exclude - No Brand Preference")
print("Preferences: Sedan, Petrol, Automatic, Exclude: Ford, Nissan")

user_pref_exclude_only = [
    1000000,   # Budget
    18,        # Good mileage
    1500,      # Medium engine
    1, 1, 1, 1, 1  # All features
]

sedan_exclude = recommend_cars_with_brands(
    user_pref_exclude_only,
    body_type_preference='Sedan',
    fuel_type_preference='Petrol',
    transmission_preference='Automatic',
    seating_preference=5,
    exclude_brands=['Ford', 'Nissan']  # Only exclude, no brand preference
)

üß™ Testing Exclude Brands Feature...
TEST 1: üö´ Exclude Luxury Brands - Family SUV
Preferences: SUV, Diesel, 7 seats, Exclude: BMW, Audi, Mercedes-Benz
üö´ Excluded brands: ['BMW', 'Audi', 'Mercedes-Benz']
üéâ Found 5 recommendations with Body: SUV, Fuel: Diesel, Transmission: Automatic, Seats: 7:


Unnamed: 0,Make,Model,Variant,Price_Cleaned,Smart_Efficiency,Fuel_Type_Cleaned,Smart_Transmission,Body_Type_Cleaned,Seating_Capacity_Cleaned,Similarity_Score
1194,Mahindra,Nuvosport,N6 Amt,972359.0,16.21,Diesel,Automatic,SUV,7.0,0.669708
1195,Mahindra,Nuvosport,N8 Amt,1048603.0,16.21,Diesel,Automatic,SUV,7.0,0.669681
686,Mahindra,Xuv500,W3,1230924.0,16.0,Diesel,Manual,SUV,7.0,0.56567
977,Kia,Seltos,Htk Plus At 1.5 Diesel,1354000.0,20.0,Diesel,Automatic,SUV,5.0,0.556934
694,Tata,Hexa,Xe 4X2,1371864.0,17.6,Diesel,Manual,SUV,7.0,0.545836




TEST 2: üéØ Brand Preference with Exclude - City Car
Preferences: Hatchback, Petrol, 5 seats, Brands: Hyundai, Maruti Suzuki, Exclude: Tata
üö´ Excluded brands: ['Tata']
üéâ Found 5 recommendations with Body: Hatchback, Fuel: Petrol, Transmission: Manual, Seats: 5, Brands: Hyundai, Maruti Suzuki:


Unnamed: 0,Make,Model,Variant,Price_Cleaned,Smart_Efficiency,Fuel_Type_Cleaned,Smart_Transmission,Body_Type_Cleaned,Seating_Capacity_Cleaned,Similarity_Score
914,Hyundai,Grand I10,1.2 Kappa Vtvt Sportz,620637.0,19.77,Petrol,Manual,Hatchback,5.0,0.799517
917,Hyundai,Grand I10,1.2 Kappa Vtvt Sportz Dual Tone,640537.0,19.77,Petrol,Manual,Hatchback,5.0,0.799517
1063,Hyundai,Grand I10 Nios,Magna 1.2 Vtvt,589610.0,20.7,Petrol,Manual,Hatchback,5.0,0.799513
1065,Hyundai,Grand I10 Nios,Sportz 1.2 Vtvt,643350.0,20.7,Petrol,Manual,Hatchback,5.0,0.799512
1067,Hyundai,Grand I10 Nios,Sportz Dual Tone 1.2 Vtvt,673350.0,20.7,Petrol,Manual,Hatchback,5.0,0.799511




TEST 3: üö´ Only Exclude - No Brand Preference
Preferences: Sedan, Petrol, Automatic, Exclude: Ford, Nissan
üö´ Excluded brands: ['Ford', 'Nissan']
üéâ Found 5 recommendations with Body: Sedan, Fuel: Petrol, Transmission: Automatic, Seats: 5:


Unnamed: 0,Make,Model,Variant,Price_Cleaned,Smart_Efficiency,Fuel_Type_Cleaned,Smart_Transmission,Body_Type_Cleaned,Seating_Capacity_Cleaned,Similarity_Score
1037,,Mercedes-Benz C-Class,C 200 Prime,4090000.0,12.06,Petrol,Automatic,Sedan,5.0,0.996155
1038,,Mercedes-Benz C-Class,C 200 Progressive,4654000.0,12.06,Petrol,Automatic,Sedan,5.0,0.994999
256,,Mercedes-Benz Cla-Class,200 Sport,3599000.0,15.04,Petrol,Automatic,Sedan,5.0,0.988037
276,Jaguar,Xe,S Petrol,4498000.0,11.0,Petrol,Automatic,Sedan,5.0,0.98505
279,Jaguar,Xe,Se Petrol,4633000.0,11.0,Petrol,Automatic,Sedan,5.0,0.984756


In [67]:
import joblib
import os

# Save model in the main project folder
model_path = os.path.join(os.getcwd(), 'car_recommender_model.joblib')
joblib.dump({
    'model': final_knn_model_v4,
    'scaler': final_scaler_v4, 
    'features': final_feature_columns_v4,
    'df': df
}, model_path)

print(f"‚úÖ Model saved at: {model_path}")

‚úÖ Model saved at: c:\Users\malle\OneDrive\Desktop\sriya\car recommendation project\notebooks\car_recommender_model.joblib
