In [None]:
import pandas as pd
import numpy as np
import psycopg2
import sqlalchemy
from sqlalchemy.sql import text
import psql_auth

In [None]:
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler, MinMaxScaler

In [None]:
sql_auth = psql_auth.secrets()
alchemy_auth = psql_auth.alchemy(sql_auth)

conn = psycopg2.connect(
    host = sql_auth['host'],
    database = sql_auth['database'],
    port = sql_auth['port'],
    user = sql_auth['user'],
    password= sql_auth['pass'])

In [None]:
url = alchemy_auth
engine = sqlalchemy.create_engine(url)

In [None]:
sql = '''SELECT make, name, edition, min_year, max_year, hp, torque, body, door, cylinder, displacement, aspiration, transmission, drive
    FROM car_models AS M JOIN car_name AS N ON M.name_id = N.id JOIN car_make as MA ON N.make_id= MA.id;'''
with engine.connect() as connn:
    query = connn.execute(text(sql))
df = pd.DataFrame(query.fetchall())

In [None]:
# Replace year 9999 with current
df['max_year'] = [y if y <9999 else 2023 for y in df['max_year']]
df['turbo'] = [1 if a == 'turbo' else 0 for a in df.aspiration]
df['super'] = [1 if a == 'supercharged' else 0 for a in df.aspiration]
df['two_door'] = [1 if door == 2 else 0 for door in df.door]
df['man'] = [1 if t == 'man' or t=='both' else 0 for t in df.transmission]
df['auto'] = [1 if t == 'auto' or t=='both' else 0 for t in df.transmission]
df['fwd'] = [1 if d == 'fwd' else 0 for d in df.drive]
df['rwd'] = [1 if d == 'rwd' else 0 for d in df.drive]
df['awd'] = [1 if d == 'awd' or d == '4wd' else 0 for d in df.drive]

In [None]:
X = df[['hp','torque','turbo', 'super', 'two_door','man','auto','fwd','rwd','awd']].to_numpy()

# scaler= StandardScaler()
# X = scaler.fit_transform(X)

In [None]:
def mydist(x, y):
    x_perf = x[:2]
    y_perf = y[:2]
    x_feats = x[2:]
    y_feats = y[2:]
    
    perf = np.sqrt(np.sum((x_perf-y_perf)**2))
    feats = (len(x_feats) - np.sum((x_feats-y_feats)**2)) / len(x_feats)
    dist = perf/feats
    
    return dist
    

In [None]:
model_knn =NearestNeighbors(n_neighbors= 5, algorithm= 'ball_tree', metric= mydist)
model_knn.fit(X)

In [None]:
HP = 241
TORQUE = 273
TURBO = 1
SUPER = 0
TWO_DOOR = 0
MAN = 1
AUTO = 1
FWD = 1
RWD = 0
AWD = 0

In [None]:
similar_cars = model_knn.kneighbors([[HP,TORQUE,TURBO,SUPER,TWO_DOOR,MAN,AUTO,FWD,RWD,AWD]])[1][0]

In [None]:
similar_cars

In [None]:
for car in similar_cars:
    print(df.iloc[car])
    print('')
    print('')

In [None]:
df.iloc[2]

In [None]:
conn.close()