# Vehicle Rental System | content Based Recommendation

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

In [2]:
vehicle = pd.read_csv('Data/CarRentalDataV1.csv', sep=",")

In [3]:
vehicle.head(5)

Unnamed: 0,fuelType,rating,renterTripsTaken,reviewCount,location.city,location.country,location.latitude,location.longitude,location.state,owner.id,rate.daily,vehicle.make,vehicle.model,vehicle.type,vehicle.year,airportcity
0,ELECTRIC,5.0,13.0,12.0,Seattle,US,47.449107,-122.308841,WA,12847615.0,135.0,Tesla,Model X,suv,2019.0,Albuquerque
1,ELECTRIC,5.0,2.0,1.0,Tijeras,US,35.11106,-106.276551,NM,15621242.0,190.0,Tesla,Model X,suv,2018.0,Albuquerque
2,HYBRID,4.92,28.0,24.0,Albuquerque,US,35.127163,-106.566681,NM,10199256.0,35.0,Toyota,Prius,car,2012.0,Albuquerque
3,GASOLINE,5.0,21.0,20.0,Albuquerque,US,35.149726,-106.711425,NM,9365496.0,75.0,Ford,Mustang,car,2018.0,Albuquerque
4,GASOLINE,5.0,3.0,1.0,Albuquerque,US,35.208659,-106.601008,NM,3553565.0,47.0,Chrysler,Sebring,car,2010.0,Albuquerque


In [4]:
vehicle.shape

(5851, 16)

In [5]:
vehicle = vehicle[['fuelType', 'location.city', 'location.state', 'owner.id','vehicle.make', 'vehicle.model', 'vehicle.type','airportcity']]

In [6]:
vehicle.rename(columns={
    "fuelType": "Fuel",
    "location.city": "city",
    "location.state": "state",
    "owner.id": "ownerid",
    "vehicle.make": "Brand",
    "vehicle.model": "model",
    "vehicle.type": "type",
    "airportcity":"airport"
}, inplace=True)

In [7]:
vehicle.head(2)

Unnamed: 0,Fuel,city,state,ownerid,Brand,model,type,airport
0,ELECTRIC,Seattle,WA,12847615.0,Tesla,Model X,suv,Albuquerque
1,ELECTRIC,Tijeras,NM,15621242.0,Tesla,Model X,suv,Albuquerque


In [8]:
vehicle.isnull().sum()

Fuel       75
city        0
state       0
ownerid     0
Brand       0
model       0
type        0
airport     0
dtype: int64

In [9]:
vehicle.dropna(inplace=True)

In [10]:
vehicle.isnull().sum()

Fuel       0
city       0
state      0
ownerid    0
Brand      0
model      0
type       0
airport    0
dtype: int64

In [11]:
vehicle.duplicated().sum()

554

In [12]:
vehicle['Fuel'] = vehicle['Fuel'].apply(lambda x:x.split())
vehicle['state'] = vehicle['state'].apply(lambda x:x.split())
vehicle['model'] = vehicle['model'].apply(lambda x:x.split())
vehicle['type'] = vehicle['type'].apply(lambda x:x.split())
vehicle['airport'] = vehicle['airport'].apply(lambda x:x.split())
vehicle.head()

Unnamed: 0,Fuel,city,state,ownerid,Brand,model,type,airport
0,[ELECTRIC],Seattle,[WA],12847615.0,Tesla,"[Model, X]",[suv],[Albuquerque]
1,[ELECTRIC],Tijeras,[NM],15621242.0,Tesla,"[Model, X]",[suv],[Albuquerque]
2,[HYBRID],Albuquerque,[NM],10199256.0,Toyota,[Prius],[car],[Albuquerque]
3,[GASOLINE],Albuquerque,[NM],9365496.0,Ford,[Mustang],[car],[Albuquerque]
4,[GASOLINE],Albuquerque,[NM],3553565.0,Chrysler,[Sebring],[car],[Albuquerque]


In [13]:
vehicle['tags'] = vehicle['Fuel']+vehicle['state']+vehicle['model']+vehicle['type']+vehicle['airport']

In [14]:
vehicle.iloc[0]['tags']

['ELECTRIC', 'WA', 'Model', 'X', 'suv', 'Albuquerque']

In [15]:
new_df = vehicle[['city','ownerid','Brand','tags']]

In [16]:
new_df.head(5)

Unnamed: 0,city,ownerid,Brand,tags
0,Seattle,12847615.0,Tesla,"[ELECTRIC, WA, Model, X, suv, Albuquerque]"
1,Tijeras,15621242.0,Tesla,"[ELECTRIC, NM, Model, X, suv, Albuquerque]"
2,Albuquerque,10199256.0,Toyota,"[HYBRID, NM, Prius, car, Albuquerque]"
3,Albuquerque,9365496.0,Ford,"[GASOLINE, NM, Mustang, car, Albuquerque]"
4,Albuquerque,3553565.0,Chrysler,"[GASOLINE, NM, Sebring, car, Albuquerque]"


In [17]:
new_df['tags'] = new_df['tags'].apply(lambda x:" ".join(x))
new_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['tags'] = new_df['tags'].apply(lambda x:" ".join(x))


Unnamed: 0,city,ownerid,Brand,tags
0,Seattle,12847615.0,Tesla,ELECTRIC WA Model X suv Albuquerque
1,Tijeras,15621242.0,Tesla,ELECTRIC NM Model X suv Albuquerque
2,Albuquerque,10199256.0,Toyota,HYBRID NM Prius car Albuquerque
3,Albuquerque,9365496.0,Ford,GASOLINE NM Mustang car Albuquerque
4,Albuquerque,3553565.0,Chrysler,GASOLINE NM Sebring car Albuquerque


In [18]:
new_df['tags'] = new_df['tags'].apply(lambda x:x.lower())
new_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['tags'] = new_df['tags'].apply(lambda x:x.lower())


Unnamed: 0,city,ownerid,Brand,tags
0,Seattle,12847615.0,Tesla,electric wa model x suv albuquerque
1,Tijeras,15621242.0,Tesla,electric nm model x suv albuquerque
2,Albuquerque,10199256.0,Toyota,hybrid nm prius car albuquerque
3,Albuquerque,9365496.0,Ford,gasoline nm mustang car albuquerque
4,Albuquerque,3553565.0,Chrysler,gasoline nm sebring car albuquerque


In [19]:
new_df.shape

(5776, 4)

In [20]:
import nltk
from nltk.stem import PorterStemmer  

In [21]:
ps = PorterStemmer()

In [22]:
def stems(text):
    l = []
    for i in text.split():
        l.append(ps.stem(i))
        
    return " ".join(l) 

In [23]:
new_df.iloc[0]['tags']

'electric wa model x suv albuquerque'

In [24]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features=5000)

In [25]:
vector = cv.fit_transform(new_df['tags']).toarray()

In [26]:
vector

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=int64)

In [27]:
vector.shape

(5776, 613)

In [28]:
from sklearn.metrics.pairwise import cosine_similarity

similary = cosine_similarity(vector)

In [29]:
similary

array([[1. , 0.8, 0.2, ..., 0. , 0. , 0. ],
       [0.8, 1. , 0.4, ..., 0. , 0. , 0. ],
       [0.2, 0.4, 1. , ..., 0.2, 0. , 0.2],
       ...,
       [0. , 0. , 0.2, ..., 1. , 0.6, 1. ],
       [0. , 0. , 0. , ..., 0.6, 1. , 0.6],
       [0. , 0. , 0.2, ..., 1. , 0.6, 1. ]])

In [30]:
similary.shape

(5776, 5776)

In [31]:
new_df[new_df['city'] == 'Phoenix'].index[0]

3716

In [32]:
def recommend(vehicle):
    index = new_df[new_df['city'] == vehicle].index[0]
    distances = sorted(list(enumerate(similary[index])), reverse=True, key=lambda x: x[1])
    recommended_owners = []
    recommended_models = []
    for i in distances[1:6]:
        recommended_models.append(new_df.iloc[i[0]].Brand)
        recommended_owners.append(new_df.iloc[i[0]].ownerid)
    print("Recommended owners for city {}: {}".format(vehicle, recommended_owners))
    print("Recommended vehicle models for city {}: {}".format(vehicle, recommended_models))
    print("Recommended vehicles for city {}: {}".format(vehicle, ', '.join(['{}={}'.format(owner, model) for owner, model in zip(recommended_owners, recommended_models)])))

recommend('Phoenix')

Recommended owners for city Phoenix: [3410675.0, 1143331.0, 3410675.0, 950349.0, 4009047.0]
Recommended vehicle models for city Phoenix: ['Nissan', 'Nissan', 'Nissan', 'Mazda', 'Chevrolet']
Recommended vehicles for city Phoenix: 3410675.0=Nissan, 1143331.0=Nissan, 3410675.0=Nissan, 950349.0=Mazda, 4009047.0=Chevrolet


In [33]:
import pickle
pickle.dump(new_df, open('artificats/vehicle_list.pkl', 'wb'))
pickle.dump(similary, open('artificats/similarity.pkl', 'wb'))