In [41]:
# napravio sam novo conda okruzenje (TensorFlow okruzenje tf311)

# conda env list // da vidimo koja sve okruzenja imamo
# conda activate tf311 // da aktiviramo koje hocemo

import pandas as pd
import numpy as np

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity

import tensorflow as tf
from tensorflow.keras import layers, Model

# ============================================================
# 1. UČITAVANJE I ČIŠĆENJE PODATAKA
# ============================================================

df = pd.read_csv("./car-details-v3.csv")

# Zadržavamo sve bitne kolone
df = df[
    [
        "name",
        "year",
        "selling_price",
        "km_driven",
        "fuel",
        "seller_type",
        "transmission",
        "owner",
        "mileage",
        "engine",
        "max_power",
        "torque",
        "seats",
    ]
]

# Parsiranje numeričkih vrednosti iz string kolona
# mileage: "18.9 kmpl" ili "23.4 km/kg" -> 18.9, 23.4
df["mileage"] = (
    df["mileage"]
    .astype(str)
    .str.extract(r"(\d+\.?\d*)")[0]
    .astype(float)
)

# engine: "1248 CC" -> 1248
df["engine"] = (
    df["engine"]
    .astype(str)
    .str.extract(r"(\d+\.?\d*)")[0]
    .astype(float)
)

# max_power: "74 bhp" -> 74
df["max_power"] = (
    df["max_power"]
    .astype(str)
    .str.extract(r"(\d+\.?\d*)")[0]
    .astype(float)
)

# torque: "190Nm@ 2000rpm" -> 190
df["torque"] = (
    df["torque"]
    .astype(str)
    .str.extract(r"(\d+\.?\d*)")[0]
    .astype(float)
)

# seats je već numerički u datasetu (obično float -> int), ali ćemo ga tretirati kao float
df["seats"] = df["seats"].astype(float)

# Uklanjamo redove sa nedostajućim vrednostima nakon parsiranja
df.dropna(inplace=True)

# ============================================================
# 2. NORMALIZACIJA NUMERIČKIH I ENKODOVANJE KATEGORIJSKIH
# ============================================================

NUM_COLS = [
    "year",
    "selling_price",
    "km_driven",
    "mileage",
    "engine",
    "max_power",
    "torque",
    "seats",
]

CAT_COLS = ["fuel", "seller_type", "transmission", "owner"]

# Skaliranje numeričkih kolona u [0,1]
scaler = MinMaxScaler()
df[NUM_COLS] = scaler.fit_transform(df[NUM_COLS])

# Pretvaranje kategorija u category tip
for col in CAT_COLS:
    df[col] = df[col].astype("category")

fuel_cat = df["fuel"].cat.categories
seller_cat = df["seller_type"].cat.categories
trans_cat = df["transmission"].cat.categories
owner_cat = df["owner"].cat.categories

# Mapiranje u ID-jeve
df["fuel"] = df["fuel"].cat.codes
df["seller_type"] = df["seller_type"].cat.codes
df["transmission"] = df["transmission"].cat.codes
df["owner"] = df["owner"].cat.codes
df["is_suv"] = df["name"].str.contains(
    "Scorpio|Bolero|Fortuner|Safari|Sumo|Innova|Jeep|4X4|4WD",
    case=False
).astype(int)
df["is_sport_model"] = df["name"].str.contains(
    "GTI|GT TSI|TSI|TFSI|vRS|RS\\b|iVTEC|VTEC|Type R|Sports|1.6S|Abarth|Turbo|N Line|Cooper S|ST Line|AMG|M\\b|M3|M4|M5",
    case=False
).astype(int)

# Brojevi kategorija za embedding
num_fuel = len(fuel_cat)
num_seller = len(seller_cat)
num_trans = len(trans_cat)
num_owner = len(owner_cat)

print("Kategorije fuel:", list(fuel_cat))
print("Kategorije seller_type:", list(seller_cat))
print("Kategorije transmission:", list(trans_cat))
print("Kategorije owner:", list(owner_cat))

# Helper: kodovi za često korišćene vrednosti
def get_code(categories, name, default=0):
    return int(np.where(categories == name)[0][0]) if name in categories else default

petrol_code = get_code(fuel_cat, "Petrol", default=0)
diesel_code = get_code(fuel_cat, "Diesel", default=0)

individual_code = 0
dealer_code = 0
for i, c in enumerate(seller_cat):
    if "Individual" in c:
        individual_code = i
    if "Dealer" in c:
        dealer_code = i

manual_code = get_code(trans_cat, "Manual", default=0)
auto_code = get_code(trans_cat, "Automatic", default=manual_code)

first_owner_code = 0
for i, c in enumerate(owner_cat):
    if "First Owner" in c:
        first_owner_code = i
        break

# Konačan skup feature-a za automobile
FEATURES = NUM_COLS + CAT_COLS + ["is_suv", "is_sport_model"]
df_items = df[FEATURES].copy()

# ============================================================
# 3. GENERISANJE REALNIH KORISNIČKIH SEGMENTA
# ============================================================

def generate_segment_users(n_per_segment=50):
    users = []

    for _ in range(n_per_segment):
        # Segment 1: Budget Buyer
        users.append(
            {
                # NUM_COLS
                "year": np.random.uniform(0.3, 0.6),
                "selling_price": np.random.uniform(0.1, 0.4),
                "km_driven": np.random.uniform(0.3, 0.7),
                "mileage": np.random.uniform(0.6, 1.0),
                "engine": np.random.uniform(0.2, 0.5),
                "max_power": np.random.uniform(0.2, 0.5),
                "torque": np.random.uniform(0.2, 0.5),
                "seats": np.random.uniform(0.3, 0.7),
                # CAT_COLS
                "fuel": petrol_code,
                "seller_type": individual_code,
                "transmission": manual_code,
                "owner": first_owner_code,
                # segment
                "segment": 1,
            }
        )

        # Segment 2: Diesel Commuter
        users.append(
            {
                "year": np.random.uniform(0.5, 0.8),
                "selling_price": np.random.uniform(0.3, 0.6),
                "km_driven": np.random.uniform(0.2, 0.5),
                "mileage": np.random.uniform(0.5, 0.9),
                "engine": np.random.uniform(0.4, 0.7),
                "max_power": np.random.uniform(0.3, 0.6),
                "torque": np.random.uniform(0.4, 0.8),
                "seats": np.random.uniform(0.4, 0.7),
                "fuel": diesel_code,
                "seller_type": dealer_code,
                "transmission": manual_code,
                "owner": first_owner_code,
                "segment": 2,
            }
        )

        # Segment 3: Family Buyer
        users.append(
            {
                "year": np.random.uniform(0.6, 0.9),
                "selling_price": np.random.uniform(0.4, 0.7),
                "km_driven": np.random.uniform(0.1, 0.4),
                "mileage": np.random.uniform(0.4, 0.8),
                "engine": np.random.uniform(0.4, 0.7),
                "max_power": np.random.uniform(0.4, 0.7),
                "torque": np.random.uniform(0.4, 0.7),
                "seats": np.random.uniform(0.6, 1.0),
                "fuel": np.random.choice([petrol_code, diesel_code]),
                "seller_type": dealer_code,
                "transmission": auto_code,
                "owner": first_owner_code,
                "segment": 3,
            }
        )

        # Segment 4: Sport Enthusiast (benzinski, jači, manja km)
        users.append(
            {
                "year": np.random.uniform(0.5, 0.9),
                "selling_price": np.random.uniform(0.6, 0.9),
                "km_driven": np.random.uniform(0.05, 0.3),
                "mileage": np.random.uniform(0.3, 0.7),
                "engine": np.random.uniform(0.5, 0.9),
                "max_power": np.random.uniform(0.6, 1.0),
                "torque": np.random.uniform(0.5, 0.9),
                "seats": np.random.uniform(0.3, 0.7),
                "fuel": petrol_code,
                "seller_type": np.random.choice([individual_code, dealer_code]),
                "transmission": manual_code,
                "owner": first_owner_code,
                "segment": 4,
            }
        )

        # Segment 5: Off-road Utility (SUV/jeep, često dizel)
        users.append(
            {
                "year": np.random.uniform(0.3, 0.7),
                "selling_price": np.random.uniform(0.4, 0.7),
                "km_driven": np.random.uniform(0.3, 0.7),
                "mileage": np.random.uniform(0.3, 0.7),
                "engine": np.random.uniform(0.6, 1.0),
                "max_power": np.random.uniform(0.5, 0.9),
                "torque": np.random.uniform(0.6, 1.0),
                "seats": np.random.uniform(0.6, 1.0),
                "fuel": diesel_code,
                "seller_type": np.random.choice([individual_code, dealer_code]),
                "transmission": manual_code,
                "owner": first_owner_code,
                "segment": 5,
            }
        )

        # Segment 6: Premium Urban Buyer
        users.append(
            {
                "year": np.random.uniform(0.75, 1.0),
                "selling_price": np.random.uniform(0.6, 1.0),
                "km_driven": np.random.uniform(0.0, 0.25),
                "mileage": np.random.uniform(0.3, 0.7),
                "engine": np.random.uniform(0.5, 0.9),
                "max_power": np.random.uniform(0.5, 0.9),
                "torque": np.random.uniform(0.4, 0.8),
                "seats": np.random.uniform(0.4, 0.8),
                "fuel": petrol_code,
                "seller_type": dealer_code,
                "transmission": auto_code,
                "owner": first_owner_code,
                "segment": 6,
            }
        )

    return pd.DataFrame(users)


users = generate_segment_users(50)
print(users.head())

Kategorije fuel: ['CNG', 'Diesel', 'LPG', 'Petrol']
Kategorije seller_type: ['Dealer', 'Individual', 'Trustmark Dealer']
Kategorije transmission: ['Automatic', 'Manual']
Kategorije owner: ['First Owner', 'Fourth & Above Owner', 'Second Owner', 'Test Drive Car', 'Third Owner']
       year  selling_price  km_driven   mileage    engine  max_power  \
0  0.514219       0.318515   0.699675  0.948275  0.350867   0.203439   
1  0.603451       0.504609   0.401035  0.890555  0.645450   0.340482   
2  0.770780       0.527326   0.364379  0.559442  0.431824   0.659170   
3  0.869189       0.625763   0.253439  0.652043  0.580395   0.886279   
4  0.421823       0.643717   0.631862  0.412055  0.909228   0.518595   

     torque     seats  fuel  seller_type  transmission  owner  segment  
0  0.388376  0.482743     3            1             1      0        1  
1  0.740887  0.469268     1            2             1      0        2  
2  0.422539  0.722871     1            2             0      0        3 

In [46]:
# ============================================================
# 4. USER–ITEM SCORE FUNKCIJA I GENERISANJE PAROVA
# ============================================================

def score_all_items_for_user(user_row, cars_df):

    diff = np.abs(cars_df[NUM_COLS].values - user_row[NUM_COLS].values)
    num_score = (1 - diff).sum(axis=1)

    # Dinamičke težine prema korisniku
    # ------------------------------------------------------

    # Kad je sport user → max power je KRALJ
    if user_row["max_power"] > 0.70:
        w_power = 12.0
        w_engine = 8.0
        w_torque = 4.0
        w_seats = 3.0
        w_fuel = 4.0
        w_trans = 5.0
        w_seller = 1.0
        w_owner = 2.0
    else:
        # default ne-sport weights
        w_power = 4.0
        w_engine = 3.0
        w_torque = 2.0
        w_seats = 1.0
        w_fuel = 2.0
        w_trans = 2.0
        w_seller = 1.0
        w_owner = 1.0

    # ------------------------------------------------------
    # Penalizacija SUV i OFFROAD vozila za sport korisnike
    # ------------------------------------------------------

    suv_penalty = cars_df["is_suv"].values * -8.0

    # ------------------------------------------------------
    # Kategorijski match
    # ------------------------------------------------------
    fuel_match   = (cars_df["fuel"].values == user_row["fuel"]) * w_fuel
    trans_match  = (cars_df["transmission"].values == user_row["transmission"]) * w_trans
    owner_match  = (cars_df["owner"].values == user_row["owner"]) * w_owner
    seller_match = (cars_df["seller_type"].values == user_row["seller_type"]) * w_seller

    # ------------------------------------------------------
    # NUMERIČKE sportske težine
    # ------------------------------------------------------
    sport_bonus = (
        cars_df["max_power"].values * w_power +
        cars_df["engine"].values * w_engine +
        cars_df["torque"].values * w_torque +
        (1 - cars_df["seats"].values) * w_seats   # manje sedišta = sportski
    )

    sport_model_bonus = cars_df["is_sport_model"].values * 20.0

    size_penalty = (
    (cars_df["seats"] > 0.6).astype(int) * -10.0   # 5+ seats nije sportski
)

    compact_bonus = (cars_df["seats"] <= 0.5).astype(int) * 10.0
    
    return (
    num_score +
    fuel_match + trans_match + owner_match + seller_match +
    sport_bonus +
    suv_penalty +
    sport_model_bonus +
    size_penalty +
    compact_bonus
)

def generate_training_pairs_fast(users_df, cars_df, n_pos=10, n_neg=10):
    X_user = []
    X_item = []
    y = []

    car_values = cars_df.values  # radi brže

    for _, user in users_df.iterrows():
        scores = score_all_items_for_user(user, cars_df)

        # najbolji automobili
        pos_idx = np.argsort(scores)[-n_pos:]
        # najgori
        neg_idx = np.argsort(scores)[:n_neg]

        # dodavanje pozitivnih
        for idx in pos_idx:
            X_user.append(user.drop("segment").values)
            X_item.append(car_values[idx])
            y.append(1.0)

        # dodavanje negativnih
        for idx in neg_idx:
            X_user.append(user.drop("segment").values)
            X_item.append(car_values[idx])
            y.append(0.0)

    return (
        np.array(X_user, dtype="float32"),
        np.array(X_item, dtype="float32"),
        np.array(y, dtype="float32"),
    )

X_user, X_item, y = generate_training_pairs_fast(users, df_items, n_pos=8, n_neg=8)

print("X_user shape:", X_user.shape)
print("X_item shape:", X_item.shape)
print("y shape:", y.shape)

X_user shape: (4800, 12)
X_item shape: (4800, 14)
y shape: (4800,)


In [47]:
# ============================================================
# 5. TWO–TOWER MODEL (sa svim feature-ima)
# ============================================================

embedding_dim = 32
num_numeric = len(NUM_COLS)  # 8

# USER tower
user_numeric_in = layers.Input(shape=(num_numeric,), name="user_num")
user_fuel_in = layers.Input(shape=(), dtype="int32", name="user_fuel")
user_seller_in = layers.Input(shape=(), dtype="int32", name="user_seller")
user_trans_in = layers.Input(shape=(), dtype="int32", name="user_trans")
user_owner_in = layers.Input(shape=(), dtype="int32", name="user_owner")

uf_emb = layers.Embedding(num_fuel, 16)(user_fuel_in)
us_emb = layers.Embedding(num_seller, 16)(user_seller_in)
ut_emb = layers.Embedding(num_trans, 16)(user_trans_in)
uo_emb = layers.Embedding(num_owner, 16)(user_owner_in)

u_concat = layers.Concatenate()(
    [
        user_numeric_in,
        layers.Flatten()(uf_emb),
        layers.Flatten()(us_emb),
        layers.Flatten()(ut_emb),
        layers.Flatten()(uo_emb),
    ]
)

u_hidden = layers.Dense(128, activation="relu")(u_concat)
u_hidden = layers.Dropout(0.2)(u_hidden)
u_hidden = layers.Dense(64, activation="relu")(u_hidden)
u_vec = layers.Dense(embedding_dim)(u_hidden)

user_tower = Model(
    inputs=[
        user_numeric_in,
        user_fuel_in,
        user_seller_in,
        user_trans_in,
        user_owner_in,
    ],
    outputs=u_vec,
)

# ITEM tower
item_numeric_in = layers.Input(shape=(num_numeric,), name="item_num")
item_fuel_in = layers.Input(shape=(), dtype="int32", name="item_fuel")
item_seller_in = layers.Input(shape=(), dtype="int32", name="item_seller")
item_trans_in = layers.Input(shape=(), dtype="int32", name="item_trans")
item_owner_in = layers.Input(shape=(), dtype="int32", name="item_owner")

if_emb = layers.Embedding(num_fuel, 16)(item_fuel_in)
is_emb = layers.Embedding(num_seller, 16)(item_seller_in)
it_emb = layers.Embedding(num_trans, 16)(item_trans_in)
io_emb = layers.Embedding(num_owner, 16)(item_owner_in)

i_concat = layers.Concatenate()(
    [
        item_numeric_in,
        layers.Flatten()(if_emb),
        layers.Flatten()(is_emb),
        layers.Flatten()(it_emb),
        layers.Flatten()(io_emb),
    ]
)

i_hidden = layers.Dense(128, activation="relu")(i_concat)
i_hidden = layers.Dropout(0.2)(i_hidden)
i_hidden = layers.Dense(64, activation="relu")(i_hidden)
i_vec = layers.Dense(embedding_dim)(i_hidden)

item_tower = Model(
    inputs=[
        item_numeric_in,
        item_fuel_in,
        item_seller_in,
        item_trans_in,
        item_owner_in,
    ],
    outputs=i_vec,
)

# Dot product score
dot_score = layers.Dot(axes=1)([u_vec, i_vec])

model = Model(
    inputs=[
        user_numeric_in,
        user_fuel_in,
        user_seller_in,
        user_trans_in,
        user_owner_in,
        item_numeric_in,
        item_fuel_in,
        item_seller_in,
        item_trans_in,
        item_owner_in,
    ],
    outputs=dot_score,
)

model.compile(optimizer="adam", loss="binary_crossentropy")
model.summary()

In [53]:
# ============================================================
# 6. PRIPREMA FEATURE-A ZA KERAS (split_features)
# ============================================================

def split_features(data: np.ndarray):
    """
    data: (N, len(FEATURES)) gde su prvo NUM_COLS, pa CAT_COLS
    """
    num = data[:, :num_numeric]
    fuel = data[:, num_numeric + 0].astype("int32")
    seller = data[:, num_numeric + 1].astype("int32")
    trans = data[:, num_numeric + 2].astype("int32")
    owner = data[:, num_numeric + 3].astype("int32")
    return num, fuel, seller, trans, owner


u_num, u_f, u_s, u_t, u_o = split_features(X_user)
i_num, i_f, i_s, i_t, i_o = split_features(X_item)

# ============================================================
# 7. TRENING
# ============================================================

history = model.fit(
    [u_num, u_f, u_s, u_t, u_o, i_num, i_f, i_s, i_t, i_o],
    y,
    epochs=10,
    batch_size=64,
    verbose=1,
)

# ============================================================
# 8. PRETPRIPREMA: EMBEDDING ZA SVE AUTOMOBILE
# ============================================================

item_array = df_items.values.astype("float32")
item_num, item_f, item_s, item_t, item_o = split_features(item_array)

item_embeddings = item_tower.predict(
    [item_num, item_f, item_s, item_t, item_o], verbose=0
)

# ============================================================
# 9. FUNKCIJA ZA PREPORUKE ZA BILO KOG KORISNIKA
# ============================================================

def recommend_for_user(user_pref: dict, top_n=10):
    """
    user_pref: dict sa ključevima NUM_COLS + CAT_COLS (vec u [0,1] / kodovi)
    npr:
    {
      "year": 0.8,
      "selling_price": 0.5,
      "km_driven": 0.2,
      "mileage": 0.6,
      "engine": 0.5,
      "max_power": 0.6,
      "torque": 0.6,
      "seats": 0.5,
      "fuel": petrol_code,
      "seller_type": dealer_code,
      "transmission": auto_code,
      "owner": first_owner_code
    }
    """
    values = [user_pref[col] for col in NUM_COLS + CAT_COLS]
    arr = np.array(values, dtype="float32").reshape(1, -1)

    u_num_new, u_f_new, u_s_new, u_t_new, u_o_new = split_features(arr)

    u_emb = user_tower.predict(
        [u_num_new, u_f_new, u_s_new, u_t_new, u_o_new], verbose=0
    )

    scores = cosine_similarity(u_emb, item_embeddings)[0]
    top_idx = np.argsort(scores)[::-1][:top_n]

    return df.iloc[top_idx][["name", "year", "selling_price", "km_driven", "fuel", "transmission", "owner"]]

Epoch 1/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 24ms/step - loss: 6.7557e-07
Epoch 2/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 23ms/step - loss: 1.1921e-07
Epoch 3/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 23ms/step - loss: 1.1921e-07
Epoch 4/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 24ms/step - loss: 1.1921e-07
Epoch 5/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 23ms/step - loss: 1.1921e-07
Epoch 6/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 22ms/step - loss: 1.6235e-05
Epoch 7/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 22ms/step - loss: 2.4267e-04
Epoch 8/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 22ms/step - loss: 1.6384e-05
Epoch 9/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 22ms/step - loss: 1.1921e-07
Epoch 10/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m

In [None]:
luxury_user = {
    "year": 0.85,            # želi veoma nov auto (godište 2015–2020+ u skaliranju)
    "selling_price": 0.85,   # visok budžet – premium segment
    "km_driven": 0.10,       # niska kilometraža – luksuzni kupci to zahtevaju
    "mileage": 0.45,         # umerena potrošnja, nije prioritet
    "engine": 0.75,          # veliki, glatki motori (V6, 2.0T, 3.0T)
    "max_power": 0.75,       # snažan ali ne ekstreman motor (premium, ne sportski)
    "torque": 0.75,          # glatka isporuka snage – luksuzno ponašanje
    "seats": 0.75,           # 5 sedišta – premium limuzine / SUV
    "fuel": petrol_code,     # premium modeli često benzin (može i diesel)
    "seller_type": dealer_code,   # premium kupci kupuju od dilerâ
    "transmission": auto_code,    # automatik je MUST u luksuz klasi
    "owner": first_owner_code     # želi top stanje i servisnu istoriju
}

recommendations = recommend_for_user(luxury_user, top_n=10)
print(recommendations)

In [54]:
sport_user = {
    "year": 0.55,           # ne mora biti najnovije, ali ne staro
    "selling_price": 0.75,  # voli skuplje i jače automobile
    "km_driven": 0.15,      # niža km (sportski auto, očuvan)
    "mileage": 0.35,        # sportski auti imaju nižu potrošnju efikasnost
    "engine": 0.80,         # veliki motor
    "max_power": 0.85,      # visoka snaga – ključno za sport segment
    "torque": 0.85,         # visok obrtni moment
    "seats": 0.40,          # 2–4 sedišta tipično
    "fuel": petrol_code,    # sportski = benzin
    "seller_type": dealer_code,  # sportski modeli često kod dilerâ
    "transmission": manual_code, # entuzijasti preferiraju manual
    "owner": first_owner_code    # bolje stanje vozila
}

recommendations = recommend_for_user(sport_user, top_n=10)
print(recommendations)

                                  name      year  selling_price  km_driven  \
170      Volvo XC90 T8 Excellence BSIV  0.884615       1.000000   0.012709   
4766  BMW 6 Series GT 630d Luxury Line  0.923077       0.598796   0.011438   
1071  BMW 6 Series GT 630d Luxury Line  0.923077       0.598796   0.011928   
4101  BMW 6 Series GT 630d Luxury Line  0.923077       0.598796   0.011928   
6258  BMW 6 Series GT 630d Luxury Line  0.923077       0.581745   0.012709   
7596  BMW 6 Series GT 630d Luxury Line  0.923077       0.581745   0.012709   
4753  BMW 6 Series GT 630d Luxury Line  0.923077       0.548646   0.009320   
1057                  BMW X6 xDrive30d  0.730769       0.373119   0.023724   
4083                  BMW X6 xDrive30d  0.730769       0.373119   0.023724   
7713   Mercedes-Benz S-Class S 350 CDI  0.884615       0.598796   0.015675   

      fuel  transmission  owner  
170      3             0      0  
4766     1             0      0  
1071     1             0      0  
4101 

In [51]:
family_user = {
    "year": 0.80,
    "selling_price": 0.50,
    "km_driven": 0.10,
    "mileage": 0.70,
    "engine": 0.45,
    "max_power": 0.40,
    "torque": 0.45,
    "seats": 0.90,      # 5–7 seats
    "fuel": petrol_code,
    "seller_type": dealer_code,
    "transmission": auto_code,
    "owner": first_owner_code,
}

recommendations = recommend_for_user(family_user, top_n=10)
print(recommendations)

                                  name      year  selling_price  km_driven  \
170      Volvo XC90 T8 Excellence BSIV  0.884615       1.000000   0.012709   
4101  BMW 6 Series GT 630d Luxury Line  0.923077       0.598796   0.011928   
1071  BMW 6 Series GT 630d Luxury Line  0.923077       0.598796   0.011928   
4766  BMW 6 Series GT 630d Luxury Line  0.923077       0.598796   0.011438   
7713   Mercedes-Benz S-Class S 350 CDI  0.884615       0.598796   0.015675   
5258   Mercedes-Benz S-Class S 350 CDI  0.884615       0.598796   0.015675   
136    Mercedes-Benz S-Class S 350 CDI  0.884615       0.598796   0.015675   
7596  BMW 6 Series GT 630d Luxury Line  0.923077       0.581745   0.012709   
6258  BMW 6 Series GT 630d Luxury Line  0.923077       0.581745   0.012709   
4753  BMW 6 Series GT 630d Luxury Line  0.923077       0.548646   0.009320   

      fuel  transmission  owner  
170      3             0      0  
4101     1             0      0  
1071     1             0      0  
4766 

In [52]:
budget_user = {
    "year": 0.35,
    "selling_price": 0.20,
    "km_driven": 0.60,
    "mileage": 0.75,
    "engine": 0.30,
    "max_power": 0.25,
    "torque": 0.30,
    "seats": 0.60,
    "fuel": petrol_code,
    "seller_type": individual_code,
    "transmission": manual_code,
    "owner": first_owner_code,
}

recommendations = recommend_for_user(budget_user, top_n=10)
print(recommendations)

                                  name      year  selling_price  km_driven  \
170      Volvo XC90 T8 Excellence BSIV  0.884615       1.000000   0.012709   
4766  BMW 6 Series GT 630d Luxury Line  0.923077       0.598796   0.011438   
1071  BMW 6 Series GT 630d Luxury Line  0.923077       0.598796   0.011928   
4101  BMW 6 Series GT 630d Luxury Line  0.923077       0.598796   0.011928   
7596  BMW 6 Series GT 630d Luxury Line  0.923077       0.581745   0.012709   
6258  BMW 6 Series GT 630d Luxury Line  0.923077       0.581745   0.012709   
4753  BMW 6 Series GT 630d Luxury Line  0.923077       0.548646   0.009320   
5258   Mercedes-Benz S-Class S 350 CDI  0.884615       0.598796   0.015675   
136    Mercedes-Benz S-Class S 350 CDI  0.884615       0.598796   0.015675   
7713   Mercedes-Benz S-Class S 350 CDI  0.884615       0.598796   0.015675   

      fuel  transmission  owner  
170      3             0      0  
4766     1             0      0  
1071     1             0      0  
4101 

In [11]:
offroad_user = {
    "year": 0.55,
    "selling_price": 0.65,
    "km_driven": 0.30,
    "mileage": 0.45,
    "engine": 0.85,
    "max_power": 0.70,
    "torque": 0.90,
    "seats": 0.85,
    "fuel": diesel_code,
    "seller_type": dealer_code,
    "transmission": manual_code,
    "owner": first_owner_code,
}

recommendations = recommend_for_user(offroad_user, top_n=10)
print(recommendations)

                                 name      year  selling_price  km_driven  \
7703  Jeep Wrangler 2016-2019 3.6 4X4  0.884615       0.408225   0.007202   
1564  Jeep Wrangler 2016-2019 3.6 4X4  0.884615       0.408225   0.007202   
134   Jeep Wrangler 2016-2019 3.6 4X4  0.884615       0.408225   0.007202   
1860  Jeep Wrangler 2016-2019 3.6 4X4  0.884615       0.408225   0.007202   
5248  Jeep Wrangler 2016-2019 3.6 4X4  0.884615       0.408225   0.007202   
3239  Jeep Wrangler 2016-2019 3.6 4X4  0.884615       0.408225   0.007202   
5608       Toyota Fortuner 3.0 Diesel  0.615385       0.117352   0.042364   
4707           Toyota Fortuner 4x4 MT  0.807692       0.197593   0.008473   
5124           Toyota Fortuner 4x4 MT  0.807692       0.197593   0.008473   
5647  Toyota Fortuner 2.8 4WD MT BSIV  0.923077       0.317954   0.021182   

      fuel  transmission  owner  
7703     3             0      0  
1564     3             0      0  
134      3             0      0  
1860     3      

In [12]:
premium_user = {
    "year": 0.95,
    "selling_price": 0.85,
    "km_driven": 0.05,
    "mileage": 0.55,
    "engine": 0.60,
    "max_power": 0.55,
    "torque": 0.55,
    "seats": 0.55,
    "fuel": petrol_code,
    "seller_type": dealer_code,
    "transmission": auto_code,
    "owner": first_owner_code,
}

recommendations = recommend_for_user(premium_user, top_n=10)
print(recommendations)

                                  name      year  selling_price  km_driven  \
170      Volvo XC90 T8 Excellence BSIV  0.884615       1.000000   0.012709   
1564   Jeep Wrangler 2016-2019 3.6 4X4  0.884615       0.408225   0.007202   
7703   Jeep Wrangler 2016-2019 3.6 4X4  0.884615       0.408225   0.007202   
1860   Jeep Wrangler 2016-2019 3.6 4X4  0.884615       0.408225   0.007202   
134    Jeep Wrangler 2016-2019 3.6 4X4  0.884615       0.408225   0.007202   
5248   Jeep Wrangler 2016-2019 3.6 4X4  0.884615       0.408225   0.007202   
3239   Jeep Wrangler 2016-2019 3.6 4X4  0.884615       0.408225   0.007202   
7516    Volvo XC60 Inscription D5 BSIV  0.923077       0.548646   0.021182   
2938             BMW X7 xDrive 30d DPE  1.000000       0.719158   0.002118   
6258  BMW 6 Series GT 630d Luxury Line  0.923077       0.581745   0.012709   

      fuel  transmission  owner  
170      3             0      0  
1564     3             0      0  
7703     3             0      0  
1860 