In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.preprocessing import OneHotEncoder

In [2]:
!pwd

/home/marcnaweb/code/marcnaweb/car_recommendation_engine/notebooks


# Creating the search data frame

In [3]:
price_df = pd.read_csv('/home/marcnaweb/code/marcnaweb/car_recommendation_engine/raw_data/car_prices_w_prices_scaled.csv', index_col=0)

In [4]:
features_df = pd.read_csv('/home/marcnaweb/code/marcnaweb/car_recommendation_engine/raw_data//scaled_cleaned.csv')

In [5]:
merged_df = price_df.merge(features_df, left_on="car_code", right_on="car_code", how="left")

In [6]:
search_df = merged_df[["car_manufacturer", "car_model", "car_model_year", "car_code"]].drop_duplicates()
search_df

Unnamed: 0,car_manufacturer,car_model,car_model_year,car_code
0,Volkswagen,Polo 1.6,2003,371
1,Ford,Fiesta Supercharger 1.0,2003,372
2,Audi,S3 1.8 Turbo Quattro,2003,376
3,Alfa Romeo,156 2.5 V6,2003,377
4,Volkswagen,Parati Tour 2.0,2003,379
...,...,...,...,...
84126,Mitsubishi,Eclipse GS 2.0 Turbo,1992,333
84127,Ford,Royale Ghia 2.0i,1992,334
84128,Fiat,Uno Mille 1.0,1992,336
84129,Chevrolet,Chevette Junior 1.0,1992,432


## Creating the car code finder

In [7]:
search_df["car_model_text"] = search_df["car_manufacturer"] + " - " + search_df["car_model"] + " - " + search_df["car_model_year"].map(str)

In [8]:
search_df

Unnamed: 0,car_manufacturer,car_model,car_model_year,car_code,car_model_text
0,Volkswagen,Polo 1.6,2003,371,Volkswagen - Polo 1.6 - 2003
1,Ford,Fiesta Supercharger 1.0,2003,372,Ford - Fiesta Supercharger 1.0 - 2003
2,Audi,S3 1.8 Turbo Quattro,2003,376,Audi - S3 1.8 Turbo Quattro - 2003
3,Alfa Romeo,156 2.5 V6,2003,377,Alfa Romeo - 156 2.5 V6 - 2003
4,Volkswagen,Parati Tour 2.0,2003,379,Volkswagen - Parati Tour 2.0 - 2003
...,...,...,...,...,...
84126,Mitsubishi,Eclipse GS 2.0 Turbo,1992,333,Mitsubishi - Eclipse GS 2.0 Turbo - 1992
84127,Ford,Royale Ghia 2.0i,1992,334,Ford - Royale Ghia 2.0i - 1992
84128,Fiat,Uno Mille 1.0,1992,336,Fiat - Uno Mille 1.0 - 1992
84129,Chevrolet,Chevette Junior 1.0,1992,432,Chevrolet - Chevette Junior 1.0 - 1992


In [9]:
def list_cars(car_model_text):
    return search_df.query(f'car_model_text.str.contains("{car_model_text}")', engine='python')[["car_model_text"]]

In [10]:
list_cars("307 Passion ")

Unnamed: 0,car_model_text
224,Peugeot - 307 Passion 1.6 - 2003
13503,Peugeot - 307 Passion 1.6 - 2002


In [11]:
def get_car_code(car_model_text):
    return search_df.query(f'car_model_text.str.contains("{car_model_text}")', engine='python').head(1)["car_code"].to_list()[0]

In [12]:
get_car_code("Peugeot - 307 Passion 1.6 - 2003")

13581

In [13]:
search_df.to_csv("search_df_csv.csv")