# Recommender System

In [1]:
# import libraries

import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import StandardScaler, OneHotEncoder

from sklearn.metrics.pairwise import cosine_similarity
from scipy import sparse

In [2]:
df = pd.read_csv('data/df_model.csv')
df.head()

Unnamed: 0,price,year,manufacturer,condition,cylinders,fuel,odometer,title_status,transmission,drive,size,type,paint_color,state
0,15000.0,2013.0,ford,excellent,6 cylinders,gas,128000.0,clean,automatic,rwd,full-size,truck,black,al
1,4500.0,1992.0,jeep,excellent,6 cylinders,gas,192000.0,clean,automatic,4wd,full-size,sedan,white,al
2,14000.0,2012.0,honda,excellent,6 cylinders,gas,95000.0,clean,automatic,fwd,full-size,mini-van,silver,al
3,15000.0,2017.0,dodge,excellent,8 cylinders,gas,90000.0,rebuilt,automatic,rwd,mid-size,sedan,grey,al
4,3000.0,2004.0,chrysler,good,6 cylinders,gas,176144.0,clean,automatic,fwd,mid-size,mini-van,silver,al


In [3]:
df.isnull().sum()

price           0
year            0
manufacturer    0
condition       0
cylinders       0
fuel            0
odometer        0
title_status    0
transmission    0
drive           0
size            0
type            0
paint_color     0
state           0
dtype: int64

In [None]:
df['']

### Search by brand base on given features

In [26]:
def brand_recommender(manufacturer,price_range,n=6):
    data = df.loc[(df['manufacturer']==manufacturer)
                 & ((df['price']>=price_range[0]) & (df['price']<=price_range[1]))]
    data.reset_index(level =0, inplace = True)
    
    # Top 6 car recommendations
    car_recommend = data[['price','manufacturer','type','year','condition','fuel','title_status',
                'transmission','paint_color']].sample(n, ignore_index=True, random_state=42)
    return car_recommend.sort_values(by ='price', ignore_index=True)

In [27]:
brand_recommender('toyota',(10000, 20000),7)

Unnamed: 0,price,manufacturer,type,year,condition,fuel,title_status,transmission,paint_color
0,10995.0,toyota,SUV,2008.0,excellent,gas,clean,automatic,white
1,12987.0,toyota,sedan,2012.0,excellent,gas,clean,automatic,white
2,14998.0,toyota,SUV,2012.0,excellent,gas,clean,automatic,white
3,14999.0,toyota,SUV,2013.0,excellent,gas,clean,automatic,white
4,16999.0,toyota,truck,2010.0,good,gas,clean,manual,silver
5,17995.0,toyota,sedan,2014.0,like new,gas,clean,automatic,red
6,19500.0,toyota,SUV,2013.0,excellent,gas,clean,automatic,white


In [15]:
df[df['manufacturer']=='aston-martin']

Unnamed: 0,price,year,manufacturer,condition,cylinders,fuel,odometer,title_status,transmission,drive,size,type,paint_color,state
128973,1947.0,2017.0,aston-martin,excellent,12 cylinders,gas,10157.0,clean,automatic,rwd,full-size,coupe,white,wa
149586,40000.0,2005.0,aston-martin,like new,12 cylinders,gas,11000.0,clean,automatic,rwd,full-size,coupe,white,ca
162543,32900.0,1997.0,aston-martin,excellent,6 cylinders,gas,34575.0,clean,automatic,rwd,mid-size,convertible,black,fl
164205,42995.0,2006.0,aston-martin,like new,12 cylinders,gas,39648.0,clean,automatic,rwd,full-size,convertible,silver,fl
166731,42500.0,2008.0,aston-martin,excellent,6 cylinders,gas,35146.0,clean,automatic,rwd,full-size,convertible,black,fl
228611,37000.0,2005.0,aston-martin,good,6 cylinders,gas,31000.0,clean,automatic,4wd,full-size,SUV,white,sc
252215,179888.0,2018.0,aston-martin,like new,12 cylinders,gas,7800.0,clean,automatic,4wd,full-size,truck,white,ca
252668,70000.0,2014.0,aston-martin,excellent,8 cylinders,gas,22800.0,clean,automatic,rwd,full-size,truck,white,co
252796,52000.0,2010.0,aston-martin,like new,12 cylinders,gas,29970.0,clean,automatic,rwd,full-size,sedan,grey,ct
253035,68950.0,2011.0,aston-martin,new,8 cylinders,gas,8222.0,clean,automatic,rwd,compact,coupe,blue,fl


### Search by type base on given features

In [42]:
data = df.loc[(df['year']==2010) & (df['manufacturer']=='acura')
                 & ((df['price']>=5000) & (df['price']<=10000))]
data.reset_index(level =0, inplace = True)
data

Unnamed: 0,index,price,year,manufacturer,condition,cylinders,fuel,odometer,title_status,transmission,drive,size,type,paint_color,state
0,853,6877.0,2010.0,acura,excellent,4 cylinders,gas,159000.0,rebuilt,automatic,fwd,full-size,sedan,black,al
1,3226,9999.0,2010.0,acura,excellent,6 cylinders,gas,204934.0,clean,automatic,4wd,full-size,SUV,grey,az
2,5205,8400.0,2010.0,acura,good,4 cylinders,gas,145531.0,clean,automatic,fwd,full-size,wagon,custom,ar
3,11525,9995.0,2010.0,acura,excellent,6 cylinders,gas,161080.0,clean,automatic,fwd,full-size,sedan,black,ca
4,12018,9995.0,2010.0,acura,excellent,4 cylinders,gas,153043.0,clean,automatic,fwd,full-size,sedan,white,ca
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
58,128403,9995.0,2010.0,acura,excellent,4 cylinders,gas,96937.0,clean,automatic,fwd,full-size,sedan,white,va
59,128824,9000.0,2010.0,acura,excellent,6 cylinders,gas,81320.0,clean,automatic,4wd,full-size,coupe,black,va
60,131672,7000.0,2010.0,acura,good,4 cylinders,gas,139720.0,rebuilt,automatic,4wd,full-size,sedan,silver,wa
61,135260,9999.0,2010.0,acura,excellent,4 cylinders,gas,154730.0,clean,automatic,fwd,full-size,sedan,white,wi


In [39]:
def type_recommender(manufacturer,year,car_type,price_range):
    data = df.loc[(df['year']==year) & (df['manufacturer']==manufacturer)
                 & ((df['price']>=price_range[0]) & (df['price']<=price_range[1]))]
    data.reset_index(level =0, inplace = True)
    
    indices = pd.Series(data.index, index=data['type'])
    
    # Converting the car manufacturer into vectors unigrams and bigrams
    # cvec = CountVectorizer(token_pattern=None)
    # cvec_matrix = cvec.fit_transform(data['type'])
    
    
    # Apply Cosine Similarity
    cs = cosine_similarity(cvec_matrix)
    
    # Sort by manufacturer and get the top 6 car with highest similiraty score
    score = sorted(list(enumerate(cs[indices[car_type]])), reverse=True)[0:6]

    # car indicies
    
    car_indices = [i[0] for i in score]
   
    # Top 6 car recommendations
    car_recommend = data[['price','manufacturer','type','year','condition','fuel','title_status',
                'transmission','paint_color']].iloc[car_indices]
    return car_recommend

In [40]:
type_recommender('acura',2007,'sedan',(5000, 10000))

TypeError: first argument must be string or compiled pattern

### Search by price base on given features

In [68]:
def price_recommender(price_range):
    data = df.loc[(df['price']>=price_range[0]) & (df['price']<=price_range[1])]
    data.reset_index(level =0, inplace = True)
   
    # Top 6 car recommendations
    car_recommend = data[['price','manufacturer','type','year','condition','fuel','title_status',
                'transmission','paint_color']].iloc[:6,:]
    return car_recommend

### Search with all features

In [67]:
def all_recommender(manufacturer,year,car_type,price_range):
    data = df.loc[(df['year']==year) & (df['type']==car_type) &(df['manufacturer']==manufacturer)
                 & ((df['price']>=price_range[0]) & (df['price']<=price_range[1]))]
    data.reset_index(level =0, inplace = True)
   
    # Top 6 car recommendations
    car_recommend = data[['price','manufacturer','type','year','condition','fuel','title_status'
                ,'transmission','paint_color']].iloc[:6,:]
    return car_recommend

In [69]:
price_recommender((5000,10000))

Unnamed: 0,price,manufacturer,type,year,condition,fuel,title_status,transmission,paint_color
0,9500.0,chrysler,mini-van,2003.0,excellent,gas,clean,automatic,blue
1,6000.0,mercedes-benz,sedan,2007.0,good,diesel,clean,automatic,blue
2,6995.0,volkswagen,sedan,2011.0,excellent,gas,clean,automatic,black
3,5900.0,ford,truck,1998.0,excellent,gas,clean,automatic,red
4,6500.0,ford,sedan,2002.0,good,gas,clean,automatic,white
5,6900.0,honda,sedan,2009.0,fair,gas,clean,automatic,white


In [70]:
all_recommender('acura',2007,'sedan',(5000, 10000))

Unnamed: 0,price,manufacturer,type,year,condition,fuel,title_status,transmission,paint_color
0,5500.0,acura,sedan,2007.0,excellent,gas,rebuilt,automatic,white
1,8450.0,acura,sedan,2007.0,good,gas,clean,automatic,grey
2,6500.0,acura,sedan,2007.0,good,gas,clean,automatic,silver
3,5900.0,acura,sedan,2007.0,excellent,gas,clean,automatic,blue
4,7000.0,acura,sedan,2007.0,excellent,gas,clean,automatic,grey
5,7500.0,acura,sedan,2007.0,excellent,gas,clean,automatic,black


In [49]:
# start new recommender

In [3]:
manufacturer_data = df[['manufacturer','price']]

In [4]:
manufacturer_data = manufacturer_data.loc[manufacturer_data['price']<10000]

In [5]:
manufacturer_data.head()

Unnamed: 0,manufacturer,price
1,jeep,4500.0
4,chrysler,3000.0
5,subaru,2100.0
7,chrysler,9500.0
8,mercedes-benz,6000.0


In [5]:
ohe = OneHotEncoder(drop='first')

In [6]:
ohe.fit(manufacturer_data[['manufacturer']])

In [7]:
ohe_data = ohe.transform(manufacturer_data[['manufacturer']])

In [8]:
ohe_df = pd.DataFrame(ohe_data.todense(), columns=ohe.get_feature_names_out())
ohe_df.head()

Unnamed: 0,manufacturer_alfa-romeo,manufacturer_aston-martin,manufacturer_audi,manufacturer_bmw,manufacturer_buick,manufacturer_cadillac,manufacturer_chevrolet,manufacturer_chrysler,manufacturer_datsun,manufacturer_dodge,...,manufacturer_nissan,manufacturer_pontiac,manufacturer_porsche,manufacturer_ram,manufacturer_saturn,manufacturer_subaru,manufacturer_tesla,manufacturer_toyota,manufacturer_volkswagen,manufacturer_volvo
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [9]:
df_trans = pd.merge(manufacturer_data[['price']], 
                    ohe_df, left_index=True, right_index=True)
df_trans.head(3)

Unnamed: 0,price,manufacturer_alfa-romeo,manufacturer_aston-martin,manufacturer_audi,manufacturer_bmw,manufacturer_buick,manufacturer_cadillac,manufacturer_chevrolet,manufacturer_chrysler,manufacturer_datsun,...,manufacturer_nissan,manufacturer_pontiac,manufacturer_porsche,manufacturer_ram,manufacturer_saturn,manufacturer_subaru,manufacturer_tesla,manufacturer_toyota,manufacturer_volkswagen,manufacturer_volvo
1,4500.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,3000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,2100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [10]:
df_trans.shape

(70248, 41)

In [11]:
sparse_pivot = sparse.csr_matrix(df_trans)

In [None]:
recommeder = cosine_similarity(sparse_pivot, dense_output=False)