# Recommender System

In [24]:
# import libraries

import pandas as pd
from scipy import sparse
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

In [3]:
df = pd.read_csv('data/df_model.csv')
df.head()

Unnamed: 0,price,year,manufacturer,condition,cylinders,fuel,odometer,title_status,transmission,drive,size,type,paint_color,state
0,15000.0,2013.0,ford,excellent,6 cylinders,gas,128000.0,clean,automatic,rwd,full-size,truck,black,al
1,4500.0,1992.0,jeep,excellent,6 cylinders,gas,192000.0,clean,automatic,4wd,full-size,sedan,white,al
2,14000.0,2012.0,honda,excellent,6 cylinders,gas,95000.0,clean,automatic,fwd,full-size,mini-van,silver,al
3,15000.0,2017.0,dodge,excellent,8 cylinders,gas,90000.0,rebuilt,automatic,rwd,mid-size,sedan,grey,al
4,3000.0,2004.0,chrysler,good,6 cylinders,gas,176144.0,clean,automatic,fwd,mid-size,mini-van,silver,al


In [3]:
df.isnull().sum()

price           0
year            0
manufacturer    0
condition       0
cylinders       0
fuel            0
odometer        0
title_status    0
transmission    0
drive           0
size            0
type            0
paint_color     0
state           0
dtype: int64

In [14]:
# pivot = pd.pivot_table(df, index='manufacturer', columns=df.index, values='price')

# pivot.head()

In [5]:
# sparse_pivot = sparse.csr_matrix(pivot.fillna(0))

In [6]:
# recommeder = cosine_similarity(sparse_pivot)

In [15]:
# recommender_df = pd.DataFrame(recommeder, columns=pivot.index, index=pivot.index)

# recommender_df.head()

In [8]:
# def manufacturer_recommender(manufacturer):
#     return 1- recommender_df[manufacturer].sort_values()[1:6]

In [16]:
# manufacturer_recommender('acura')

### Search by brand base on given features

In [71]:
def brand_recommender(manufacturer,year,car_type,price_range):
    data = df.loc[(df['year']==year) & (df['type']==car_type)
                 & ((df['price']>=price_range[0]) & (df['price']<=price_range[1]))]
    data.reset_index(level =0, inplace = True)
    
    indices = pd.Series(data.index, index=data['manufacturer'])
    
    # Converting the car manufacturer into vectors unigrams and bigrams
    cvec = CountVectorizer(ngram_range=(1, 2), stop_words='english')
    cvec_matrix = cvec.fit_transform(data['manufacturer'])
    
    # Apply Cosine Similarity
    cs = cosine_similarity(cvec_matrix)
    
    # Sort by manufacturer and get the top 6 car with highest similiraty score
    score = sorted(list(enumerate(cs[indices[manufacturer]])), reverse=True)[0:6]

    # car indicies
    car_indices = [i[0] for i in score]
   
    # Top 6 car recommendations
    car_recommend = data[['price','manufacturer','type','year','condition','fuel','title_status',
                'transmission','paint_color']].iloc[car_indices]
    return car_recommend

In [72]:
brand_recommender('acura',2007,'sedan',(5000, 10000))

Unnamed: 0,price,manufacturer,type,year,condition,fuel,title_status,transmission,paint_color
57,8900.0,lincoln,sedan,2007.0,excellent,gas,clean,automatic,silver
56,5999.0,chevrolet,sedan,2007.0,good,gas,clean,automatic,grey
55,7000.0,ford,sedan,2007.0,like new,gas,clean,automatic,silver
54,5000.0,mercedes-benz,sedan,2007.0,excellent,gas,clean,automatic,black
53,6900.0,honda,sedan,2007.0,excellent,gas,clean,automatic,silver
52,9488.0,ford,sedan,2007.0,excellent,gas,clean,automatic,white


### Search by type base on given features

In [63]:
def type_recommender(manufacturer,year,car_type,price_range):
    data = df.loc[(df['year']==year) & (df['manufacturer']==manufacturer)
                 & ((df['price']>=price_range[0]) & (df['price']<=price_range[1]))]
    data.reset_index(level =0, inplace = True)
    
    indices = pd.Series(data.index, index=data['type'])
    
    # Converting the car manufacturer into vectors unigrams and bigrams
    cvec = CountVectorizer(ngram_range=(1, 2), stop_words='english')
    cvec_matrix = cvec.fit_transform(data['type'])
    
    # Apply Cosine Similarity
    cs = cosine_similarity(cvec_matrix)
    
    # Sort by manufacturer and get the top 6 car with highest similiraty score
    score = sorted(list(enumerate(cs[indices[car_type]])), reverse=True)[0:6]

    # car indicies
    car_indices = [i[0] for i in score]
   
    # Top 6 car recommendations
    car_recommend = data[['price','manufacturer','type','year','condition','fuel','title_status',
                'transmission','paint_color']].iloc[car_indices]
    return car_recommend

In [65]:
type_recommender('acura',2007,'sedan',(5000, 10000))

Unnamed: 0,price,manufacturer,type,year,condition,fuel,title_status,transmission,paint_color
57,6499.0,acura,sedan,2007.0,excellent,gas,clean,automatic,grey
56,5750.0,acura,sedan,2007.0,excellent,gas,clean,automatic,silver
55,8000.0,acura,sedan,2007.0,good,gas,clean,automatic,grey
54,5500.0,acura,sedan,2007.0,good,gas,clean,automatic,grey
53,6995.0,acura,SUV,2007.0,excellent,gas,clean,automatic,red
52,7995.0,acura,SUV,2007.0,good,gas,clean,automatic,silver


### Search by price base on given features

In [68]:
def price_recommender(price_range):
    data = df.loc[(df['price']>=price_range[0]) & (df['price']<=price_range[1])]
    data.reset_index(level =0, inplace = True)
   
    # Top 6 car recommendations
    car_recommend = data[['price','manufacturer','type','year','condition','fuel','title_status',
                'transmission','paint_color']].iloc[:6,:]
    return car_recommend

### Search with all features

In [67]:
def all_recommender(manufacturer,year,car_type,price_range):
    data = df.loc[(df['year']==year) & (df['type']==car_type) &(df['manufacturer']==manufacturer)
                 & ((df['price']>=price_range[0]) & (df['price']<=price_range[1]))]
    data.reset_index(level =0, inplace = True)
   
    # Top 6 car recommendations
    car_recommend = data[['price','manufacturer','type','year','condition','fuel','title_status'
                ,'transmission','paint_color']].iloc[:6,:]
    return car_recommend

In [69]:
price_recommender((5000,10000))

Unnamed: 0,price,manufacturer,type,year,condition,fuel,title_status,transmission,paint_color
0,9500.0,chrysler,mini-van,2003.0,excellent,gas,clean,automatic,blue
1,6000.0,mercedes-benz,sedan,2007.0,good,diesel,clean,automatic,blue
2,6995.0,volkswagen,sedan,2011.0,excellent,gas,clean,automatic,black
3,5900.0,ford,truck,1998.0,excellent,gas,clean,automatic,red
4,6500.0,ford,sedan,2002.0,good,gas,clean,automatic,white
5,6900.0,honda,sedan,2009.0,fair,gas,clean,automatic,white


In [70]:
all_recommender('acura',2007,'sedan',(5000, 10000))

Unnamed: 0,price,manufacturer,type,year,condition,fuel,title_status,transmission,paint_color
0,5500.0,acura,sedan,2007.0,excellent,gas,rebuilt,automatic,white
1,8450.0,acura,sedan,2007.0,good,gas,clean,automatic,grey
2,6500.0,acura,sedan,2007.0,good,gas,clean,automatic,silver
3,5900.0,acura,sedan,2007.0,excellent,gas,clean,automatic,blue
4,7000.0,acura,sedan,2007.0,excellent,gas,clean,automatic,grey
5,7500.0,acura,sedan,2007.0,excellent,gas,clean,automatic,black
