In [59]:
import pandas as pd
from datetime import datetime
import numpy as np
import skfuzzy as fuzz
from skfuzzy import control as ctrl
from sklearn.metrics import mean_squared_error
from math import sqrt

In [66]:
DATA_FILE = 'data/model_vectors.csv'
df = pd.read_csv(DATA_FILE)
# df = df.dropna()
df

Unnamed: 0.1,Unnamed: 0,Body,Brand,Model,Price,Year,HP,Torque,Braking,RoadHolding,FuelEconomy
0,0,Sedan,Audi,A4,51800.0,2017.0,3.128217,3.730637,5.0,4.019608,0.0
1,1,Sedan,BMW,330i,45900.0,2017.0,3.069724,3.407917,3.023114,2.476134,0.0
2,2,Sedan,Infinity,Q50,39900.0,2018.0,,,,,2.774941
3,3,Sedan,Lexus,IS 300,41050.0,2017.0,3.917876,3.816695,2.435227,2.54902,2.519606
4,4,Sedan,BMW,440i,56850.0,2018.0,4.122602,4.956971,3.327251,3.039216,2.738464
5,5,Sedan,Mercedes,C300,44700.0,2018.0,2.967361,3.730637,4.543796,5.0,2.738464
6,6,SUV,Volvo,XC90,59150.0,2018.0,,,3.175182,2.54902,2.738464
7,7,SUV,Lexus,RX 350,56500.0,2017.0,,,,,2.519606
8,8,SUV,BMW,X5,69950.0,2016.0,,,3.479319,2.386635,0.0
9,9,SUV,Audi,Q7,74750.0,2017.0,3.128217,3.730637,2.304301,2.386635,2.519606


In [67]:
df.dtypes

Unnamed: 0       int64
Body            object
Brand           object
Model           object
Price          float64
Year           float64
HP             float64
Torque         float64
Braking        float64
RoadHolding    float64
FuelEconomy    float64
dtype: object

In [47]:
user_vector = [3.2,4.2,2.0,3.3]

def calc_rmse(row, user_vec):
    car_vector = [row.HP, row.Torque, row.Braking, row.RoadHolding]
    return sqrt(mean_squared_error(user_vec, car_vector))

df['rmse_score'] = df.apply(lambda x: calc_rmse(x,user_vector), axis=1)
df.sort_values(by=['rmse_score'])

Unnamed: 0.1,Unnamed: 0,Body,Brand,Model,Price,Year,HP,Torque,Braking,RoadHolding,FuelEconomy,rmse_score
9,9,SUV,Audi,Q7,74750,2017.0,3.128217,3.730637,2.304301,2.386635,2.519606,0.536724
3,3,Sedan,Lexus,IS 300,41050,2017.0,3.917876,3.816695,2.435227,2.54902,2.519606,0.594908
15,15,Sedan,Toyota,Camry,35845,2018.0,3.844759,3.601549,3.175182,3.284314,2.811417,0.734023
1,1,Sedan,BMW,330i,45900,2017.0,3.069724,3.407917,3.023114,2.476134,0.0,0.769722
4,4,Sedan,BMW,440i,56850,2018.0,4.122602,4.956971,3.327251,3.039216,2.738464,0.901915
11,11,SUV,Range Rover,Velar,62000,2018.0,5.0,5.0,2.449143,2.476134,2.300834,1.090927
14,14,Sedan,Mazda,3,15900,2018.0,2.200536,2.143188,3.479319,2.794118,3.103228,1.385073
5,5,Sedan,Mercedes,C300,44700,2018.0,2.967361,3.730637,4.543796,5.0,2.738464,1.552041
0,0,Sedan,Audi,A4,51800,2017.0,3.128217,3.730637,5.0,4.019608,0.0,1.560712
13,13,Sedan,Honda,Civic,19990,2018.0,2.152698,2.05051,4.239659,4.754902,3.322086,1.792342


In [203]:
def generate_user_vec(driving_log, web_map):
    # Import library and generate user vectors from model
#     fuel_score = web_map.get('fuel_economy', 1)
    fuel_economy = 1.0
    fuel_pref = web_map.get('fuel_economy', 1)
    if fuel_pref == 2:
        fuel_economy = 3
    elif fuel_pref == 3:
        fuel_economy = 5.0
    return [3.0,3.2,3.2,3.3, fuel_economy]

def norm_fuel(row, web_map):
#     return 1.0
    fuel_economy = 1.0
    fuel_pref = web_map.get('fuel_economy', 1)
    if fuel_pref == 2:
        if row.FuelEconomy >= 3:
            fuel_economy = 3
        else:
            fuel_economy = row.FuelEconomy
    elif fuel_pref == 3:
        fuel_economy = row.FuelEconomy
    return fuel_economy

# if fuel economy > 2.5 = 2.5


def calc_rmse(row, user_vec):
    car_vector = [row.HP, row.Torque, row.Braking, row.RoadHolding, row.FuelEconomyNorm]
    return sqrt(mean_squared_error(user_vec, car_vector))

def filter_cars(row, web_map=dict()):
    cond1 = row.Price > web_map.get('price_min', 0)
    cond2 = row.Price < web_map.get('price_max', 100000)
    cond3 = True
    fuel_pref = web_map.get('fuel_economy', 1)
    if fuel_pref == 2 and row.FuelEconomy < 1:
        cond3 = False
    elif fuel_pref == 3 and row.FuelEconomy < 2:
        cond3 = False
    return cond1 & cond2 & cond3

def find_car(models_file, driving_log, web_inputs):
    user_vector = generate_user_vec(None, web_inputs)
    df = pd.read_csv(models_file).dropna()
    # Apply filtering
    df = df[df.apply(lambda x: filter_cars(x, web_inputs), axis=1)]
#     df[df.apply(lambda x: x['b'] > x['c'], axis=1)]
    df['FuelEconomyNorm'] = df.apply(lambda x: norm_fuel(x, web_inputs), axis=1)
    df['rmse_score'] = df.apply(lambda x: calc_rmse(x,user_vector), axis=1)
    df['percent_match'] = df.apply(lambda x: (5 - x.rmse_score) * 20, axis=1)
    rmse_norm_factor = df.rmse_score.min() / 2
    df['percent_match_norm'] = df.apply(lambda x: (5 - x.rmse_score + rmse_norm_factor) * 20, axis=1)
    return df.sort_values(by=['rmse_score'])

In [202]:
web_mapping = {
    'price_min': 10000,
    'price_max': 45000,
    'fuel_economy': 2, # 1,2,3
}
find_car(DATA_FILE, None, web_mapping)

Unnamed: 0.1,Unnamed: 0,Body,Brand,Model,Price,Year,HP,Torque,Braking,RoadHolding,FuelEconomy,FuelEconomyNorm,rmse_score,percent_match,percent_match_norm
15,15,Sedan,Toyota,Camry,35845.0,2018.0,3.844759,3.601549,3.175182,3.284314,2.811417,2.811417,0.426916,91.461687,95.730843
14,14,Sedan,Mazda,3,15900.0,2018.0,2.200536,2.143188,3.479319,2.794118,3.103228,3.0,0.646518,87.069632,91.338788
3,3,Sedan,Lexus,IS 300,41050.0,2017.0,3.917876,3.816695,2.435227,2.54902,2.519606,2.519606,0.721448,85.571041,89.840197
5,5,Sedan,Mercedes,C300,44700.0,2018.0,2.967361,3.730637,4.543796,5.0,2.738464,2.738464,1.004672,79.906562,84.175718
13,13,Sedan,Honda,Civic,19990.0,2018.0,2.152698,2.05051,4.239659,4.754902,3.322086,3.0,1.023414,79.531723,83.800879


In [199]:
find_car(DATA_FILE, None, None)

AttributeError: 'NoneType' object has no attribute 'get'