In [56]:
import numpy as np
import pandas as pd

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity

import tensorflow as tf
from tensorflow.keras import layers, Model

np.random.seed(42)
tf.random.set_seed(42)

df = pd.read_csv("./car-details-v3.csv")

df = df[
    [
        "name",
        "year",
        "selling_price",
        "km_driven",
        "fuel",
        "seller_type",
        "transmission",
        "owner",
        "mileage",
        "engine",
        "max_power",
        "torque",
        "seats",
    ]
]

# Parsiranje numerickih vrednosti iz string kolona
df["mileage"] = df["mileage"].astype(str).str.extract(r"(\d+\.?\d*)")[0].astype(float)
df["engine"] = df["engine"].astype(str).str.extract(r"(\d+\.?\d*)")[0].astype(float)
df["max_power"] = df["max_power"].astype(str).str.extract(r"(\d+\.?\d*)")[0].astype(float)
df["torque"] = df["torque"].astype(str).str.extract(r"(\d+\.?\d*)")[0].astype(float)
df["seats"] = df["seats"].astype(float)

df["body_coupe"] = df["name"].str.contains(
    "Coupe|Sports|GT|Roadster|Convertible|Cabrio|TT|Z4|S2000|Mustang",
    case=False
).astype(int)

df["body_sedan"] = df["name"].str.contains(
    "Sedan|Dzire|City|Verna|Civic|Corolla|Passat|Octavia|Jetta|C-Class|S-Class",
    case=False
).astype(int)

df["body_suv"] = df["name"].str.contains(
    "Scorpio|Bolero|Fortuner|Safari|Innova|Jeep|XUV|Endeavour|Creta|Harrier|Hector",
    case=False
).astype(int)

df["is_premium_brand"] = df["name"].str.contains(
    "Mercedes|BMW|Audi|Lexus|Jaguar|Volvo|Porsche|Land Rover",
    case=False
).astype(int)

df.dropna(inplace=True)

In [57]:
# Normalizacija i kategorije
NUM_COLS = [
    "year",
    "selling_price",
    "km_driven",
    "mileage",
    "engine",
    "max_power",
    "torque",
    "seats",
]

CAT_COLS = ["fuel", "seller_type", "transmission", "owner"]

scaler = MinMaxScaler()
df[NUM_COLS] = scaler.fit_transform(df[NUM_COLS])

for col in CAT_COLS:
    df[col] = df[col].astype("category")

fuel_cat = df["fuel"].cat.categories
seller_cat = df["seller_type"].cat.categories
trans_cat = df["transmission"].cat.categories
owner_cat = df["owner"].cat.categories

df["fuel"] = df["fuel"].cat.codes
df["seller_type"] = df["seller_type"].cat.codes
df["transmission"] = df["transmission"].cat.codes
df["owner"] = df["owner"].cat.codes

df["is_suv"] = df["name"].str.contains(
    "Scorpio|Bolero|Fortuner|Safari|Sumo|Innova|Jeep|4X4|4WD|Endeavour",
    case=False,
).astype(int)

df["is_sport_model"] = df["name"].str.contains(
    "GTI|GT TSI|TSI|TFSI|vRS|RS\\b|iVTEC|VTEC|Type R|Sports|1.6S|Abarth|Turbo|N Line|"
    "Cooper S|ST Line|AMG|M\\b|M3|M4|M5",
    case=False,
).astype(int)

BIN_COLS = ["is_suv", "is_sport_model"]

num_fuel = len(fuel_cat)
num_seller = len(seller_cat)
num_trans = len(trans_cat)
num_owner = len(owner_cat)

print("fuel:", list(fuel_cat))
print("seller_type:", list(seller_cat))
print("transmission:", list(trans_cat))
print("owner:", list(owner_cat))


def get_code(categories, name, default=0):
    return int(np.where(categories == name)[0][0]) if name in categories else default


petrol_code = get_code(fuel_cat, "Petrol", default=0)
diesel_code = get_code(fuel_cat, "Diesel", default=0)

individual_code = 0
dealer_code = 0
for i, c in enumerate(seller_cat):
    if "Individual" in c:
        individual_code = i
    if "Dealer" in c:
        dealer_code = i

manual_code = get_code(trans_cat, "Manual", default=0)
auto_code = get_code(trans_cat, "Automatic", default=manual_code)

first_owner_code = 0
for i, c in enumerate(owner_cat):
    if "First Owner" in c:
        first_owner_code = i
        break

df_items = df[NUM_COLS + CAT_COLS + BIN_COLS].copy()


fuel: ['CNG', 'Diesel', 'LPG', 'Petrol']
seller_type: ['Dealer', 'Individual', 'Trustmark Dealer']
transmission: ['Automatic', 'Manual']
owner: ['First Owner', 'Fourth & Above Owner', 'Second Owner', 'Test Drive Car', 'Third Owner']


In [58]:
def generate_segment_users(n_per_segment=40):
    """
    Generise sinteticke korisnike sa poljem 'segment' (1..6).
    Ti korisnici se koriste za generisanje trening parova.
    """
    users = []

    for _ in range(n_per_segment):
        # 1) Budget Buyer
        users.append(
            {
                "year": np.random.uniform(0.25, 0.55),
                "selling_price": np.random.uniform(0.1, 0.35),
                "km_driven": np.random.uniform(0.3, 0.8),
                "mileage": np.random.uniform(0.6, 1.0),
                "engine": np.random.uniform(0.2, 0.5),
                "max_power": np.random.uniform(0.2, 0.5),
                "torque": np.random.uniform(0.2, 0.5),
                "seats": np.random.uniform(0.3, 0.7),
                "fuel": petrol_code,
                "seller_type": individual_code,
                "transmission": manual_code,
                "owner": first_owner_code,
                "segment": 1,
            }
        )

        # 2) Diesel Commuter
        users.append(
            {
                "year": np.random.uniform(0.45, 0.8),
                "selling_price": np.random.uniform(0.3, 0.6),
                "km_driven": np.random.uniform(0.2, 0.6),
                "mileage": np.random.uniform(0.5, 0.9),
                "engine": np.random.uniform(0.4, 0.7),
                "max_power": np.random.uniform(0.3, 0.6),
                "torque": np.random.uniform(0.4, 0.8),
                "seats": np.random.uniform(0.4, 0.7),
                "fuel": diesel_code,
                "seller_type": dealer_code,
                "transmission": manual_code,
                "owner": first_owner_code,
                "segment": 2,
            }
        )

        # 3) Family Buyer
        users.append(
            {
                "year": np.random.uniform(0.6, 0.9),
                "selling_price": np.random.uniform(0.4, 0.7),
                "km_driven": np.random.uniform(0.1, 0.4),
                "mileage": np.random.uniform(0.4, 0.8),
                "engine": np.random.uniform(0.4, 0.7),
                "max_power": np.random.uniform(0.4, 0.7),
                "torque": np.random.uniform(0.4, 0.7),
                "seats": np.random.uniform(0.6, 1.0),
                "fuel": np.random.choice([petrol_code, diesel_code]),
                "seller_type": dealer_code,
                "transmission": auto_code,
                "owner": first_owner_code,
                "segment": 3,
            }
        )

        # 4) Sport Enthusiast
        users.append(
            {
                "year": np.random.uniform(0.5, 0.9),
                "selling_price": np.random.uniform(0.6, 0.9),
                "km_driven": np.random.uniform(0.05, 0.3),
                "mileage": np.random.uniform(0.3, 0.7),
                "engine": np.random.uniform(0.6, 0.95),
                "max_power": np.random.uniform(0.7, 1.0),
                "torque": np.random.uniform(0.6, 1.0),
                "seats": np.random.uniform(0.1, 0.3),
                "fuel": petrol_code,
                "seller_type": np.random.choice([individual_code, dealer_code]),
                "transmission": manual_code,
                "owner": first_owner_code,
                "segment": 4,
            }
        )

        # 5) Off-road Utility
        users.append(
            {
                "year": np.random.uniform(0.3, 0.8),
                "selling_price": np.random.uniform(0.4, 0.8),
                "km_driven": np.random.uniform(0.3, 0.8),
                "mileage": np.random.uniform(0.3, 0.7),
                "engine": np.random.uniform(0.6, 1.0),
                "max_power": np.random.uniform(0.6, 0.9),
                "torque": np.random.uniform(0.7, 1.0),
                "seats": np.random.uniform(0.6, 1.0),
                "fuel": diesel_code,
                "seller_type": np.random.choice([individual_code, dealer_code]),
                "transmission": manual_code,
                "owner": first_owner_code,
                "segment": 5,
            }
        )

        # 6) Premium / Luxury Urban
        users.append(
            {
                "year": np.random.uniform(0.8, 1.0),
                "selling_price": np.random.uniform(0.7, 1.0),
                "km_driven": np.random.uniform(0.0, 0.3),
                "mileage": np.random.uniform(0.3, 0.7),
                "engine": np.random.uniform(0.6, 0.9),
                "max_power": np.random.uniform(0.6, 0.9),
                "torque": np.random.uniform(0.5, 0.85),
                "seats": np.random.uniform(0.5, 0.9),
                "fuel": petrol_code,
                "seller_type": dealer_code,
                "transmission": auto_code,
                "owner": first_owner_code,
                "segment": 6,
            }
        )

    return pd.DataFrame(users)


users_df = generate_segment_users(40)
print("Users shape:", users_df.shape)

Users shape: (240, 13)


In [59]:
def score_items_for_segment(user_row, cars_df: pd.DataFrame):
    """
    Segment-based scoring funkcija
    """

    seg = int(user_row["segment"])

    cars = cars_df
    n = len(cars)

    user_num = user_row[NUM_COLS].values.astype("float32")
    car_num = cars[NUM_COLS].values.astype("float32")
    diff = np.abs(car_num - user_num)
    base_sim = 1.0 - diff
    base_sim = np.clip(base_sim, 0.0, 1.0)
    base_score = base_sim.sum(axis=1)

    fuel = cars["fuel"].values
    seller = cars["seller_type"].values
    trans = cars["transmission"].values
    owner = cars["owner"].values
    is_suv = cars["is_suv"].values
    is_sport = cars["is_sport_model"].values

    year = cars["year"].values
    price = cars["selling_price"].values
    km = cars["km_driven"].values
    mileage = cars["mileage"].values
    engine = cars["engine"].values
    power = cars["max_power"].values
    torque = cars["torque"].values
    seats = cars["seats"].values

    score = np.zeros(n, dtype="float32")

    # BUDGET
    if seg == 1:
        score += 2.5 * (1 - np.abs(price - user_row["selling_price"]))
        score += 1.0 * (1 - np.abs(year - user_row["year"]))
        score += 1.5 * (1 - np.abs(km - user_row["km_driven"]))
        score += 2.0 * (1 - np.abs(mileage - user_row["mileage"]))
        score += 1.0 * (fuel == user_row["fuel"])
        score += 1.0 * (seller == user_row["seller_type"])
        score += 0.5 * (owner == user_row["owner"])
        score += base_score

    # DIESEL COMMUTER
    elif seg == 2:
        score += 3.0 * (fuel == diesel_code)
        score += 2.0 * (1 - np.abs(mileage - user_row["mileage"]))
        score += 1.5 * (1 - np.abs(km - user_row["km_driven"]))
        score += 1.5 * (1 - np.abs(price - user_row["selling_price"]))
        score -= 1.5 * is_suv
        score += 1.0 * (trans == manual_code)
        score += base_score

    # FAMILY BUYER
    elif seg == 3:
        score += 2.0 * (seats >= 0.6).astype("float32")   # 5+ sedišta
        score += 1.5 * (1 - np.abs(km - user_row["km_driven"]))
        score += 1.5 * (1 - np.abs(year - user_row["year"]))
        score += 1.0 * (trans == auto_code)
        score += 1.0 * (seller == dealer_code)
        score += 0.5 * is_suv  # SUV-ovi blagi plus
        score += base_score

    # SPORT ENTHUSIAST
    elif seg == 4:
        score += 4.0 * power
        score += 3.0 * engine
        score += 2.0 * torque
        score += 2.0 * is_sport
        score -= 3.0 * is_suv
        score += 2.0 * (seats <= 0.5).astype("float32")
        score += 1.5 * (fuel == petrol_code)
        score += 1.0 * (trans == manual_code)
        score += 10 * df["body_coupe"]
        score -= 8 * df["body_suv"]
        score -= 5 * df["body_sedan"]
        score += 15 * df["is_sport_model"]
        score += base_score

    # OFF-ROAD
    elif seg == 5:
        score += 4.0 * is_suv
        score += 3.0 * torque
        score += 2.0 * engine
        score += 1.5 * (fuel == diesel_code)
        score += 1.0 * (seats >= 0.6).astype("float32")
        score += 1.0 * (trans == manual_code)
        score += base_score

    # LUXURY / PREMIUM
    elif seg == 6:
        score += 3.0 * (1 - np.abs(price - user_row["selling_price"]))
        score += 2.5 * (1 - np.abs(year - user_row["year"]))
        score += 2.0 * (trans == auto_code)
        score += 1.5 * (seller == dealer_code)
        score += 2.0 * (seats >= 0.6).astype("float32")
        score += 1.5 * (fuel == petrol_code)
        score += 1.0 * power
        score += 8 * df["body_sedan"]
        score += 10 * df["is_premium_brand"]
        score -= 6 * df["body_suv"] 
        score -= 20 * (df["is_premium_brand"] == 0)
        score += base_score

    else:
        score += base_score

    return score

In [60]:
def generate_training_pairs_fast(users_df, cars_df, n_pos=15, n_neg=15):
    u_num_list = []
    u_fuel_list = []
    u_seller_list = []
    u_trans_list = []
    u_owner_list = []

    i_num_list = []
    i_fuel_list = []
    i_seller_list = []
    i_trans_list = []
    i_owner_list = []

    y_list = []

    for _, user in users_df.iterrows():
        scores = score_items_for_segment(user, cars_df)
        idx_sorted = np.argsort(scores)
        pos_idx = idx_sorted[-n_pos:]
        neg_idx = idx_sorted[:n_neg]

        def add_pairs(indices, label):
            for idx in indices:
                car = cars_df.iloc[idx]

                u_num_list.append(user[NUM_COLS].values.astype("float32"))
                u_fuel_list.append(int(user["fuel"]))
                u_seller_list.append(int(user["seller_type"]))
                u_trans_list.append(int(user["transmission"]))
                u_owner_list.append(int(user["owner"]))

                i_num_list.append(car[NUM_COLS].values.astype("float32"))
                i_fuel_list.append(int(car["fuel"]))
                i_seller_list.append(int(car["seller_type"]))
                i_trans_list.append(int(car["transmission"]))
                i_owner_list.append(int(car["owner"]))

                y_list.append(float(label))

        add_pairs(pos_idx, 1.0)
        add_pairs(neg_idx, 0.0)

    u_num = np.stack(u_num_list).astype("float32")
    u_fuel = np.array(u_fuel_list, dtype="int32")
    u_seller = np.array(u_seller_list, dtype="int32")
    u_trans = np.array(u_trans_list, dtype="int32")
    u_owner = np.array(u_owner_list, dtype="int32")

    i_num = np.stack(i_num_list).astype("float32")
    i_fuel = np.array(i_fuel_list, dtype="int32")
    i_seller = np.array(i_seller_list, dtype="int32")
    i_trans = np.array(i_trans_list, dtype="int32")
    i_owner = np.array(i_owner_list, dtype="int32")

    y = np.array(y_list, dtype="float32")

    return (
        u_num,
        u_fuel,
        u_seller,
        u_trans,
        u_owner,
        i_num,
        i_fuel,
        i_seller,
        i_trans,
        i_owner,
        y,
    )


(
    u_num,
    u_fuel,
    u_seller,
    u_trans,
    u_owner,
    i_num,
    i_fuel,
    i_seller,
    i_trans,
    i_owner,
    y,
) = generate_training_pairs_fast(users_df, df_items, n_pos=15, n_neg=15)

print("u_num:", u_num.shape)
print("i_num:", i_num.shape)
print("y:", y.shape)


u_num: (7200, 8)
i_num: (7200, 8)
y: (7200,)


In [61]:
# Two-tower model
embedding_dim = 32
num_numeric = len(NUM_COLS)

# USER tower
user_numeric_in = layers.Input(shape=(num_numeric,), name="user_num")
user_fuel_in = layers.Input(shape=(), dtype="int32", name="user_fuel")
user_seller_in = layers.Input(shape=(), dtype="int32", name="user_seller")
user_trans_in = layers.Input(shape=(), dtype="int32", name="user_trans")
user_owner_in = layers.Input(shape=(), dtype="int32", name="user_owner")

uf_emb = layers.Embedding(num_fuel, 8)(user_fuel_in)
us_emb = layers.Embedding(num_seller, 8)(user_seller_in)
ut_emb = layers.Embedding(num_trans, 8)(user_trans_in)
uo_emb = layers.Embedding(num_owner, 8)(user_owner_in)

u_concat = layers.Concatenate()(
    [
        user_numeric_in,
        layers.Flatten()(uf_emb),
        layers.Flatten()(us_emb),
        layers.Flatten()(ut_emb),
        layers.Flatten()(uo_emb),
    ]
)

u_hidden = layers.Dense(128, activation="relu")(u_concat)
u_hidden = layers.Dropout(0.2)(u_hidden)
u_hidden = layers.Dense(64, activation="relu")(u_hidden)
u_vec = layers.Dense(embedding_dim)(u_hidden)

user_tower = Model(
    inputs=[user_numeric_in, user_fuel_in, user_seller_in, user_trans_in, user_owner_in],
    outputs=u_vec,
)

# ITEM tower
item_numeric_in = layers.Input(shape=(num_numeric,), name="item_num")
item_fuel_in = layers.Input(shape=(), dtype="int32", name="item_fuel")
item_seller_in = layers.Input(shape=(), dtype="int32", name="item_seller")
item_trans_in = layers.Input(shape=(), dtype="int32", name="item_trans")
item_owner_in = layers.Input(shape=(), dtype="int32", name="item_owner")

if_emb = layers.Embedding(num_fuel, 8)(item_fuel_in)
is_emb = layers.Embedding(num_seller, 8)(item_seller_in)
it_emb = layers.Embedding(num_trans, 8)(item_trans_in)
io_emb = layers.Embedding(num_owner, 8)(item_owner_in)

i_concat = layers.Concatenate()(
    [
        item_numeric_in,
        layers.Flatten()(if_emb),
        layers.Flatten()(is_emb),
        layers.Flatten()(it_emb),
        layers.Flatten()(io_emb),
    ]
)

i_hidden = layers.Dense(128, activation="relu")(i_concat)
i_hidden = layers.Dropout(0.2)(i_hidden)
i_hidden = layers.Dense(64, activation="relu")(i_hidden)
i_vec = layers.Dense(embedding_dim)(i_hidden)

item_tower = Model(
    inputs=[item_numeric_in, item_fuel_in, item_seller_in, item_trans_in, item_owner_in],
    outputs=i_vec,
)

dot_score = layers.Dot(axes=1)([u_vec, i_vec])

model = Model(
    inputs=[
        user_numeric_in,
        user_fuel_in,
        user_seller_in,
        user_trans_in,
        user_owner_in,
        item_numeric_in,
        item_fuel_in,
        item_seller_in,
        item_trans_in,
        item_owner_in,
    ],
    outputs=dot_score,
)

model.compile(optimizer="adam", loss="binary_crossentropy")
model.summary()

In [62]:
# Trening
history = model.fit(
    [u_num, u_fuel, u_seller, u_trans, u_owner, i_num, i_fuel, i_seller, i_trans, i_owner],
    y,
    epochs=10,
    batch_size=64,
    verbose=1,
)

def build_item_inputs_from_df(cars_df: pd.DataFrame):
    num = cars_df[NUM_COLS].values.astype("float32")
    fuel = cars_df["fuel"].values.astype("int32")
    seller = cars_df["seller_type"].values.astype("int32")
    trans = cars_df["transmission"].values.astype("int32")
    owner = cars_df["owner"].values.astype("int32")
    return num, fuel, seller, trans, owner


item_num_all, item_fuel_all, item_seller_all, item_trans_all, item_owner_all = build_item_inputs_from_df(df_items)
item_embeddings = item_tower.predict(
    [item_num_all, item_fuel_all, item_seller_all, item_trans_all, item_owner_all],
    verbose=0,
)

Epoch 1/10
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 23ms/step - loss: 0.5851
Epoch 2/10
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 21ms/step - loss: 0.9555
Epoch 3/10
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 22ms/step - loss: 0.3135
Epoch 4/10
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 21ms/step - loss: 1.1175
Epoch 5/10
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 22ms/step - loss: 0.8868
Epoch 6/10
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 22ms/step - loss: 0.2411
Epoch 7/10
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 21ms/step - loss: 0.0620
Epoch 8/10
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 21ms/step - loss: 0.0430
Epoch 9/10
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 21ms/step - loss: 0.0366
Epoch 10/10
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 21ms

In [63]:
def recommend_for_user(user_pref: dict, top_n=10):
    user_num = np.array([[user_pref[c] for c in NUM_COLS]], dtype="float32")
    user_fuel = np.array([user_pref["fuel"]], dtype="int32")
    user_seller = np.array([user_pref["seller_type"]], dtype="int32")
    user_trans = np.array([user_pref["transmission"]], dtype="int32")
    user_owner = np.array([user_pref["owner"]], dtype="int32")

    u_emb = user_tower.predict(
        [user_num, user_fuel, user_seller, user_trans, user_owner],
        verbose=0,
    )

    scores = cosine_similarity(u_emb, item_embeddings)[0]
    sorted_idx = np.argsort(scores)[::-1]

    seen_names = set()
    selected_idx = []

    for idx in sorted_idx:
        name = df.iloc[idx]["name"]
        if name not in seen_names:
            seen_names.add(name)
            selected_idx.append(idx)
        if len(selected_idx) == top_n:
            break

    return df.iloc[selected_idx][
        ["name", "year", "selling_price", "km_driven", "fuel", "transmission", "owner"]
    ]

In [66]:
sport_user = {
    "year": 0.6,
    "selling_price": 0.8,
    "km_driven": 0.2,
    "mileage": 0.4,
    "engine": 1,
    "max_power": 1,
    "torque": 0.85,
    "seats": 0.1,
    "fuel": petrol_code,
    "seller_type": dealer_code,
    "transmission": manual_code,
    "owner": first_owner_code,
}

print("sport user:")
print(recommend_for_user(sport_user, top_n=10))

sport user:
                                              name      year  selling_price  \
1338  Ford Ecosport 1.0 Ecoboost Titanium Optional  0.730769       0.040020   
170                  Volvo XC90 T8 Excellence BSIV  0.884615       1.000000   
1377                           Honda City i-VTEC S  0.615385       0.032096   
6289                           Honda City 1.5 V MT  0.730769       0.044634   
5399                          Hyundai Verna SX Opt  0.807692       0.051154   
6543                     Ford Figo Aspire Titanium  0.807692       0.047142   
93                Volkswagen Vento Petrol Highline  0.692308       0.050652   
4948                        Hyundai i20 Sportz 1.2  0.769231       0.043129   
6141                              Maruti Swift VXI  0.730769       0.034203   
591                                Toyota Etios VX  0.692308       0.034604   

      km_driven  fuel  transmission  owner  
1338   0.020801     3             1      2  
170    0.012709     3       

In [49]:
family_user = {
    "year": 0.75,
    "selling_price": 0.6,
    "km_driven": 0.25,
    "mileage": 0.6,
    "engine": 0.6,
    "max_power": 0.6,
    "torque": 0.6,
    "seats": 0.8,
    "fuel": diesel_code,
    "seller_type": dealer_code,
    "transmission": auto_code,
    "owner": first_owner_code,
}

print("\nfamily user:")
print(recommend_for_user(family_user, top_n=10))


family user:
                                             name      year  selling_price  \
1105       Toyota Innova 2.5 VX (Diesel) 7 Seater  0.730769       0.072217   
3169                                 Tata Sumo EX  0.846154       0.041625   
3575           Toyota Innova 2.5 EV PS 8 STR BSIV  0.653846       0.047142   
5519          Toyota Innova 2.5 V Diesel 8-seater  0.692308       0.088365   
2838        Toyota Innova 2.5 G (Diesel) 8 Seater  0.769231       0.065196   
1382                Mahindra Bolero 2011-2019 SLE  0.769231       0.054163   
4376                            Tata Sumo Gold EX  0.923077       0.067202   
5587                      Tata Sumo Gold CX BSIII  0.884615       0.054163   
48    Toyota Innova 2.5 G (Diesel) 7 Seater BS IV  0.730769       0.067202   
5124                       Toyota Fortuner 4x4 MT  0.807692       0.197593   

      km_driven  fuel  transmission  owner  
1105   0.033607     1             1      2  
3169   0.026689     1             1  

In [50]:
budget_user = {
    "year": 0.4,
    "selling_price": 0.2,
    "km_driven": 0.5,
    "mileage": 0.8,
    "engine": 0.4,
    "max_power": 0.4,
    "torque": 0.4,
    "seats": 0.5,
    "fuel": petrol_code,
    "seller_type": individual_code,
    "transmission": manual_code,
    "owner": first_owner_code,
}

print("\nbudget user:")
print(recommend_for_user(budget_user, top_n=10))


budget user:
                                    name      year  selling_price  km_driven  \
3978              Honda Mobilio V i VTEC  0.807692       0.047142   0.014827   
7815              Honda Mobilio S i VTEC  0.884615       0.057172   0.029655   
1249              Honda BR-V i-VTEC E MT  0.884615       0.056670   0.050837   
7061               Honda BRV i-VTEC V MT  0.846154       0.069709   0.059310   
3674  Chevrolet Enjoy Petrol LS 7 Seater  0.769231       0.028285   0.033891   
2324            Maruti Ertiga VXI Petrol  0.923077       0.067202   0.002626   
1348              Maruti Ertiga BSIV VXI  0.884615       0.067202   0.096167   
6333                   Maruti Ertiga ZXI  0.807692       0.054163   0.033891   
2710              Maruti Ertiga BSIV LXI  0.884615       0.057172   0.014827   
4448                   Maruti Ertiga VXI  0.807692       0.057172   0.038128   

      fuel  transmission  owner  
3978     3             1      0  
7815     3             1      0  
124

In [51]:
offroad_user = {
    "year": 0.6,
    "selling_price": 0.6,
    "km_driven": 0.5,
    "mileage": 0.5,
    "engine": 0.9,
    "max_power": 0.8,
    "torque": 0.9,
    "seats": 0.8,
    "fuel": diesel_code,
    "seller_type": dealer_code,
    "transmission": manual_code,
    "owner": first_owner_code,
}

print("\noffroad user:")
print(recommend_for_user(offroad_user, top_n=10))


offroad user:
                                         name      year  selling_price  \
1143   Toyota Innova 2.5 VX (Diesel) 7 Seater  0.730769       0.072217   
3643               Toyota Fortuner 3.0 Diesel  0.653846       0.087262   
5950                             Tata Sumo EX  0.846154       0.041625   
1039               Toyota Fortuner 4x2 Manual  0.769231       0.135406   
4900               Ford Endeavour 2.5L 4X2 MT  0.384615       0.037111   
233             Mahindra Ssangyong Rexton RX5  0.730769       0.087262   
5124                   Toyota Fortuner 4x4 MT  0.807692       0.197593   
3987  Toyota Fortuner 2.5 4x2 MT TRD Sportivo  0.807692       0.197593   
1382            Mahindra Bolero 2011-2019 SLE  0.769231       0.054163   
7892   Toyota Innova Crysta 2.4 VX MT 8S BSIV  0.846154       0.166499   

      km_driven  fuel  transmission  owner  
1143   0.033607     1             1      2  
3643   0.050837     1             1      0  
5950   0.026689     1             1

In [52]:
diesel_commuter_user = {
    "year": 0.7,
    "selling_price": 0.5,
    "km_driven": 0.4,
    "mileage": 0.8,
    "engine": 0.6,
    "max_power": 0.5,
    "torque": 0.7,
    "seats": 0.6,
    "fuel": diesel_code,
    "seller_type": dealer_code,
    "transmission": manual_code,
    "owner": first_owner_code,
}

print("\ndiesel commuter user:")
print(recommend_for_user(diesel_commuter_user, top_n=10))


diesel commuter user:
                                               name      year  selling_price  \
4132                     Mahindra Xylo E4 ABS BS IV  0.692308       0.029087   
2782               Mahindra Xylo E8 ABS Airbag BSIV  0.692308       0.067202   
2043                     Ford Endeavour 2.5L 4X2 MT  0.692308       0.052257   
6624                  Mahindra Scorpio VLX 2WD BSIV  0.692308       0.072217   
5027           Mahindra Scorpio VLX 2WD AIRBAG BSIV  0.692308       0.050351   
2815          Mahindra Scorpio VLX 2WD AIRBAG BSIII  0.692308       0.049649   
6410                 Mahindra Scorpio VLX 2WD BSIII  0.692308       0.043129   
6747           Mahindra Scorpio VLX 4WD AIRBAG BSIV  0.653846       0.049649   
2769                          Mahindra Xylo D4 BSIV  0.692308       0.028084   
5723  Mahindra Scorpio 2006-2009 LX 2.6 Turbo 9 Str  0.538462       0.042126   

      km_driven  fuel  transmission  owner  
4132   0.050837     1             1      0  
2782  