# Recommender System

In [1]:
# import libraries

import pandas as pd
from scipy import sparse
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

In [2]:
df = pd.read_csv('data/df_model.csv')
df.head()

Unnamed: 0,price,year,manufacturer,condition,cylinders,fuel,odometer,title_status,transmission,drive,size,type,paint_color,state
0,15000.0,2013.0,ford,excellent,6 cylinders,gas,128000.0,clean,automatic,rwd,full-size,truck,black,al
1,4500.0,1992.0,jeep,excellent,6 cylinders,gas,192000.0,clean,automatic,4wd,full-size,sedan,white,al
2,14000.0,2012.0,honda,excellent,6 cylinders,gas,95000.0,clean,automatic,fwd,full-size,mini-van,silver,al
3,15000.0,2017.0,dodge,excellent,8 cylinders,gas,90000.0,rebuilt,automatic,rwd,mid-size,sedan,grey,al
4,3000.0,2004.0,chrysler,good,6 cylinders,gas,176144.0,clean,automatic,fwd,mid-size,mini-van,silver,al


In [3]:
df.isnull().sum()

price           0
year            0
manufacturer    0
condition       0
cylinders       0
fuel            0
odometer        0
title_status    0
transmission    0
drive           0
size            0
type            0
paint_color     0
state           0
dtype: int64

In [4]:
pivot = pd.pivot_table(df, index='manufacturer', columns=df.index, values='price')

pivot.head()

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,266755,266756,266757,266758,266759,266760,266761,266762,266763,266764
manufacturer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
acura,,,,,,,,,,,...,,,,,,,,,,
alfa-romeo,,,,,,,,,,,...,,,,,,,,,,
aston-martin,,,,,,,,,,,...,,,,,,,,,,
audi,,,,,,,,,,,...,,,,,,,,,,
bmw,,,,,,,,,,,...,,,,,,,,,,


In [5]:
sparse_pivot = sparse.csr_matrix(pivot.fillna(0))

In [6]:
recommeder = cosine_similarity(sparse_pivot)

In [7]:
recommender_df = pd.DataFrame(recommeder, columns=pivot.index, index=pivot.index)

recommender_df.head()

manufacturer,acura,alfa-romeo,aston-martin,audi,bmw,buick,cadillac,chevrolet,chrysler,datsun,...,pontiac,porsche,ram,rover,saturn,subaru,tesla,toyota,volkswagen,volvo
manufacturer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
acura,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
alfa-romeo,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
aston-martin,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
audi,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
bmw,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [8]:
def manufacturer_recommender(manufacturer):
    return 1- recommender_df[manufacturer].sort_values()[1:6]

In [9]:
manufacturer_recommender('acura')

manufacturer
lexus            1.0
lincoln          1.0
mazda            1.0
mercedes-benz    1.0
mercury          1.0
Name: acura, dtype: float64

In [22]:
def recommender(manufacturer,year,car_type,price_range):
    data = df.loc[(df['year']==year) & (df['type']==car_type)
                 & ((df['price']>=price_range[0]) & (df['price']<=price_range[1]))]
    data.reset_index(level =0, inplace = True)
    
    indices = pd.Series(data.index, index=data['manufacturer'])
    
     #Converting the car manufacturer country into vectors and used unigram
    tf = TfidfVectorizer(analyzer='word', ngram_range=(1, 1), min_df = 1, stop_words='english')
    tfidf_matrix = tf.fit_transform(data['manufacturer'])
    
    # Calculating the similarity measures based on Cosine Similarity
    sg = cosine_similarity(tfidf_matrix)
    
    # Get the index corresponding to original_manufacturer
    idx = indices[manufacturer]
    # Get the pairwsie similarity scores 
    sig = list(enumerate(sg[idx]))
    # Sort the cars
    sig = sorted(sig, reverse=True)
    # Scores of the 6 most similar cars 
    sig = sig[0:6]
    # car indicies
    car_indices = [i[0] for i in sig]
   
    # Top 6 car recommendations
    rec = data[['price','manufacturer','type','year','condition','fuel','title_status'
                ,'transmission','paint_color']].iloc[car_indices]
    return rec

In [31]:
recommender('acura',2007,'sedan',(5000, 10000))

Unnamed: 0,price,manufacturer,type,year,condition,fuel,title_status,transmission,paint_color
0,5500.0,acura,sedan,2007.0,excellent,gas,rebuilt,automatic,white
1,8450.0,acura,sedan,2007.0,good,gas,clean,automatic,grey
2,6500.0,acura,sedan,2007.0,good,gas,clean,automatic,silver
3,5900.0,acura,sedan,2007.0,excellent,gas,clean,automatic,blue
4,7000.0,acura,sedan,2007.0,excellent,gas,clean,automatic,grey
5,7500.0,acura,sedan,2007.0,excellent,gas,clean,automatic,black


In [30]:
def recommender(manufacturer,year,car_type,price_range):
    data = df.loc[(df['year']==year) & (df['type']==car_type) &(df['manufacturer']==manufacturer)
                 & ((df['price']>=price_range[0]) & (df['price']<=price_range[1]))]
    data.reset_index(level =0, inplace = True)
   
    # Top 6 car recommendations
    rec = data[['price','manufacturer','type','year','condition','fuel','title_status'
                ,'transmission','paint_color']].iloc[:6,:]
    return rec

In [41]:
def price_recommender(price_range):
    data = df.loc[(df['price']>=price_range[0]) & (df['price']<=price_range[1])]
    data.reset_index(level =0, inplace = True)
   
    # Top 6 car recommendations
    print("Result")
    print("manufacturer" data[['manufacturer']].iloc[:6,:])
    print(data[['year']].iloc[:6,:])
    #rec = data[['price','manufacturer','type','year','condition','fuel','title_status'
                #,'transmission','paint_color']].iloc[:6,:]
    #return rec

In [42]:
price_recommender((5000,10000))

    manufacturer
0       chrysler
1  mercedes-benz
2     volkswagen
3           ford
4           ford
5          honda
     year
0  2003.0
1  2007.0
2  2011.0
3  1998.0
4  2002.0
5  2009.0


In [7]:
tf = TfidfVectorizer(analyzer='word', ngram_range=(1, 1), min_df = 1, stop_words='english')
#tfidf_matrix = tf.fit_transform(df['manufacturer'])

In [8]:
tf

In [28]:
print(tfidf_matrix)

  (0, 15)	1.0
  (1, 22)	1.0
  (2, 18)	1.0
  (3, 12)	1.0
  (4, 9)	1.0
  (5, 41)	1.0
  (6, 18)	1.0
  (7, 45)	1.0
  (8, 9)	1.0
  (9, 4)	0.7071067811865476
  (9, 29)	0.7071067811865476
  (10, 22)	1.0
  (11, 5)	1.0
  (12, 34)	1.0
  (13, 43)	1.0
  (14, 5)	1.0
  (15, 44)	1.0
  (16, 15)	1.0
  (17, 18)	1.0
  (18, 28)	1.0
  (19, 28)	1.0
  (20, 28)	1.0
  (21, 25)	1.0
  (22, 25)	1.0
  (23, 43)	1.0
  :	:
  (266742, 37)	1.0
  (266743, 8)	1.0
  (266744, 8)	1.0
  (266745, 16)	1.0
  (266746, 16)	1.0
  (266747, 16)	1.0
  (266748, 16)	1.0
  (266749, 8)	1.0
  (266750, 12)	1.0
  (266751, 15)	1.0
  (266752, 16)	1.0
  (266753, 8)	1.0
  (266754, 15)	1.0
  (266755, 4)	0.7071067811865476
  (266755, 29)	0.7071067811865476
  (266756, 37)	1.0
  (266757, 15)	1.0
  (266758, 15)	1.0
  (266759, 15)	1.0
  (266760, 15)	1.0
  (266761, 4)	0.7071067811865476
  (266761, 29)	0.7071067811865476
  (266762, 15)	1.0
  (266763, 16)	1.0
  (266764, 7)	1.0
