In [162]:
import pandas as pd
from datetime import datetime
import numpy as np
import skfuzzy as fuzz
from skfuzzy import control as ctrl

DATA_FILE = 'data/list_of_cars.csv'

def preprocess(data_file):
    df = pd.read_csv(data_file,header=None, names=['Brand', 'Body', 'Model', 'Year', 'Price', 'HP', 'Torque', 'Acc', 'Braking', 'Road Holding', 'Fuel Economy'])
    df['HP'] = df['HP'].apply(lambda x: str(x)[:3].strip()).astype(float).dropna()
    df['Torque'] = df['Torque'].apply(lambda x: str(x)[:3].strip()).astype(float).dropna()
    df['Braking'] = df['Braking'].apply(lambda x: str(x)[:3].strip()).astype(float).dropna()
    df['Road Holding'] = df['Road Holding'].apply(lambda x: str(x)[:4].strip()).astype(float).dropna()
    return df

def calculate_score(row, average_score, max_score, attribute):
    
    final_score = 0
    
    if (attribute != 'Braking' and row[attribute] >= max_score) or (attribute == 'Braking' and row[attribute] <= max_score):
        return 5
    else: 
        
        if attribute == 'Braking':
            #Calculating braking is inverted, lower is better thus the if condition is different
            if row[attribute] < average_score: 
                calculate_difference= (max_score - row[attribute])/(max_score-average_score)
                final_score = 5 - 2.5 * calculate_difference
            else: 
                final_score = (average_score/row[attribute]) * 2.5
        elif row[attribute] > average_score:
            calculate_difference= (row[attribute] - average_score)/(max_score - average_score)
            final_score = 2.5 + 2.5 * calculate_difference
        else:
            final_score = (row[attribute]/average_score * 2.5)
            
    return final_score

data = preprocess(DATA_FILE)

avg_HP = data['HP'].mean()
max_HP = data['HP'].max()
avg_torque = data['Torque'].mean()
max_torque = data['Torque'].max()
avg_braking_distance = data['Braking'].mean()
max_braking_distance = data['Braking'].min()
avg_skid_pad = data['Road Holding'].mean()
max_skid_pad = data['Road Holding'].max()

d = {
    'Brand': data['Brand'], 
    'Body': data['Body'], 
    'Year': data['Year'],
    'Price': data['Price'],
    'Model': data['Model']
}

hp_score_list = []
torque_score_list = []
braking_score_list = []
turning_score_list = []

for index, row in data.iterrows():
    calculated_HP_score = calculate_score(row, avg_HP, max_HP, 'HP')
    hp_score_list.append(calculated_HP_score)
    calculated_torque_score = calculate_score(row, avg_torque, max_torque, 'Torque')
    torque_score_list.append(calculated_torque_score)
    calculated_braking_score = calculate_score(row, avg_braking_distance, max_braking_distance, 'Braking')
    braking_score_list.append(calculated_braking_score)
    calculated_turning_score = calculate_score(row, avg_skid_pad, max_skid_pad, 'Road Holding')
    turning_score_list.append(calculated_turning_score)

df = pd.DataFrame(data=d)

df['HP'] = hp_score_list
df['Torque'] =  torque_score_list
df['Braking'] = braking_score_list
df['Road Holding'] = turning_score_list
print(df[['Braking', 'Brand', 'Model']])

df.to_csv('data/model_vectors.csv')


        

     Braking                    Brand        Model
0   5.000000                     Audi           A4
1   3.023114                      BMW         330i
2        NaN                 Infinity          Q50
3   2.435227                    Lexus       IS 300
4   3.327251                      BMW         440i
5   4.543796                 Mercedes         C300
6   3.175182                    Volvo         XC90
7        NaN                    Lexus       RX 350
8   3.479319                      BMW           X5
9   2.304301                     Audi           Q7
10       NaN            Mercedes Benz      GLC 300
11  2.449143              Range Rover        Velar
12       NaN         Cheaper <$30,000          NaN
13  4.239659                    Honda        Civic
14  3.479319                    Mazda            3
15  3.175182                   Toyota        Camry
16       NaN                     Kia        Forte 
17  2.477457   Hyundai Elantra 15,999          NaN
18  3.479319       Chevy Cruze 