In [37]:
from sklearn.metrics import mean_squared_error
from math import sqrt

# Imports
import os
import pickle
import pandas as pd
from datetime import datetime
import numpy as np
import skfuzzy as fuzz
from skfuzzy import control as ctrl
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold, cross_val_score
import flModel
%matplotlib inline

In [38]:
DATA_FILE = 'data/model_vectors.csv'
df = pd.read_csv(DATA_FILE)
# df = df.dropna()
df

Unnamed: 0.1,Unnamed: 0,Body,Brand,Model,Price,Year,HP,Torque,Braking,RoadHolding,FuelEconomy
0,3,Sedan,Audi,A4,51800,2017,3.099128,3.733226,2.783251,4.074627,2.346729
1,4,Sedan,BMW,330i,45900,2017,3.039725,3.411165,2.392113,2.494184,2.346729
2,5,Sedan,Infinity,Q50,39900,2018,2.456814,3.411165,5.0,3.380597,2.172897
3,6,Sedan,Lexus,IS 300,41050,2017,3.901058,3.819109,2.283381,2.686567,1.91215
4,7,Sedan,BMW,440i,56850,2018,4.108966,4.957059,2.420934,3.149254,2.172897
5,8,Sedan,Mercedes,C300,44700,2018,2.935771,3.733226,2.635468,5.0,2.172897
6,11,SUV,Volvo,XC90,59150,2018,4.049564,4.205582,2.406437,2.686567,1.999066
7,12,SUV,Lexus,RX 350,56500,2017,3.737702,3.604402,4.359606,2.373982,1.91215
8,13,SUV,BMW,X5,69950,2016,3.811955,4.312936,2.435606,2.404033,2.085982
9,14,SUV,Audi,Q7,74750,2017,3.099128,3.733226,2.160618,2.404033,1.91215


In [39]:
df.dtypes

Unnamed: 0       int64
Body            object
Brand           object
Model           object
Price            int64
Year             int64
HP             float64
Torque         float64
Braking        float64
RoadHolding    float64
FuelEconomy    float64
dtype: object

In [40]:
user_vector = [3.2,4.2,2.0,3.3]

def calc_rmse(row, user_vec):
    car_vector = [row.HP, row.Torque, row.Braking, row.RoadHolding]
    return sqrt(mean_squared_error(user_vec, car_vector))

df['rmse_score'] = df.apply(lambda x: calc_rmse(x,user_vector), axis=1)
df.sort_values(by=['rmse_score'])

ValueError: ("Input contains NaN, infinity or a value too large for dtype('float64').", 'occurred at index 30')

In [41]:
def generate_user_vec(driving_log, web_map, m):
    run_df = m.format_df([driving_log])

    num_of_brakes_average, num_of_brakes_stepped = m.count_braking_steps(run_df)

    average_acc = run_df[run_df['acc'] > 0]['acc'].mean()
    average_rpm = run_df['rpm'].mean()
    average_vel = run_df['speed'].mean()
    average_throttle = run_df['throttle'].mean()
    average_lateral = run_df['lateral_velocity'].apply(abs).mean()


    run_df['deacc'] = -1 * run_df['acc'] 
    run_df = run_df.query('deacc > 1')

    average_deacc = run_df['deacc'].mean()

    brake_score = m.compute_brake(average_deacc, num_of_brakes_stepped)
    hp_score = m.compute_hp(average_acc, average_rpm)
    torque_score = m.compute_torque(average_vel, average_throttle)
    lateral_score = m.calculate_score(average_lateral, m.average_lateral_overall, m.max_lateral)
    
    
    
    fuel_economy = 1.0
    fuel_pref = web_map.get('fuel_economy', 1)
    if fuel_pref == 2:
        fuel_economy = 3
    elif fuel_pref == 3:
        fuel_economy = 5.0
    return [hp_score, torque_score, brake_score, lateral_score, fuel_economy]

def norm_fuel(row, web_map):
#     return 1.0
    fuel_economy = 1.0
    fuel_pref = web_map.get('fuel_economy', 1)
    if fuel_pref == 2:
        if row.FuelEconomy >= 3:
            fuel_economy = 3
        else:
            fuel_economy = row.FuelEconomy
    elif fuel_pref == 3:
        fuel_economy = row.FuelEconomy
    return fuel_economy

# if fuel economy > 2.5 = 2.5


def calc_rmse(row, user_vec):
    car_vector = [row.HP, row.Torque, row.Braking, row.RoadHolding, row.FuelEconomyNorm]
    return sqrt(mean_squared_error(user_vec, car_vector))

def filter_cars(row, web_map=dict()):
    cond1 = row.Price > web_map.get('price_min', 0)
    cond2 = row.Price < web_map.get('price_max', 100000)
    cond3 = True
    fuel_pref = web_map.get('fuel_economy', 1)
    if fuel_pref == 2 and row.FuelEconomy < 1:
        cond3 = False
    elif fuel_pref == 3 and row.FuelEconomy < 2:
        cond3 = False
    return cond1 & cond2 & cond3

def find_car(models_file, driving_log, web_inputs, fuzzy_model):
    user_vector = generate_user_vec(driving_log, web_inputs, fuzzy_model)
    df = pd.read_csv(models_file).dropna()
    # Apply filtering
    df = df[df.apply(lambda x: filter_cars(x, web_inputs), axis=1)]
#     df[df.apply(lambda x: x['b'] > x['c'], axis=1)]
    df['FuelEconomyNorm'] = df.apply(lambda x: norm_fuel(x, web_inputs), axis=1)
    df['rmse_score'] = df.apply(lambda x: calc_rmse(x,user_vector), axis=1)
    df['percent_match'] = df.apply(lambda x: (5 - x.rmse_score) * 20, axis=1)
    rmse_norm_factor = df.rmse_score.min() / 2
    df['percent_match_norm'] = df.apply(lambda x: (5 - x.rmse_score + rmse_norm_factor) * 20, axis=1)
    return df.sort_values(by=['rmse_score'])

In [42]:
web_mapping = {
    'price_min': 10000,
    'price_max': 45000,
    'fuel_economy': 1, # 1,2,3
}

with open('models/fuzzy_models.p', 'rb') as handle:
    fuzzy_models = pickle.load(handle)
    
model = find_car(DATA_FILE, 'hwd_data2/run_40.csv', web_mapping, fuzzy_models)
list_of_cars = model.T.to_dict().values()

In [43]:
find_car(DATA_FILE, None, None)

TypeError: find_car() missing 1 required positional argument: 'fuzzy_model'

In [44]:
def return_rmse(price, fuel_economy, data_file_name):
    
    web_mapping = {
        'price_min': 0,
        'price_max': 45000,
        'fuel_economy': 1, # 1,2,3
    }

    with open('models/fuzzy_models.p', 'rb') as handle:
        fuzzy_models = pickle.load(handle)

    model = find_car(DATA_FILE, 'hwd_data2/run_40.csv', web_mapping, fuzzy_models)
    list_of_cars = model.T.to_dict().values()
    
    return list_of_cars