In [None]:
# Update sklearn to prevent version mismatches
!pip install sklearn --upgrade

## Dependencies

In [1]:
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pymongo
from sklearn.preprocessing import LabelEncoder, StandardScaler, MultiLabelBinarizer
from sklearn.feature_extraction import DictVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import Lasso, Ridge, ElasticNet
from config import mongo_conn

## Load Perfume Data

In [2]:
# Initialize PyMongo to work with MongoDBs
client = pymongo.MongoClient(mongo_conn)
db = client.perfume_db
perfume_df = pd.DataFrame(list(db.perfume_data.find()))
perfume_df.head()

Unnamed: 0,_id,name,company,image,for_gender,rating,number_votes,main accords,description,top notes,middle notes,base notes,longevity,sillage,gender_vote,price value
0,603be3a941af6364a33646a1,Versace Pour Homme,Versace,https://fimgs.net/mdimg/perfume/375x500.2318.jpg,for men,4.29,8242,"{'citrus': 100, 'aromatic': 69.834, 'fresh spi...",Versace Pour Homme by Versace is a Aromatic Fo...,"[Lemon, Neroli, Bergamot, Rose de Mai]","[Hyacinth, Cedar, Clary Sage, Geranium]","[Tonka Bean, Musk, Amber]","{'very weak': 208, 'weak': 373, 'moderate': 19...","{'intimate': 595, 'moderate': 2339, 'strong': ...","{'female': 3, 'more female': 0, 'unisex': 40, ...","{'way overpriced': 3, 'overpriced': 28, 'ok': ..."
1,603be3a941af6364a33646a2,Chloe Eau de Parfum,Chloé,https://fimgs.net/mdimg/perfume/375x500.1733.jpg,for women,3.95,13349,"{'floral': 100, 'rose': 89.3694, 'fresh': 69.7...",Chloe Eau de Parfum by Chloé is a Floral fragr...,"[Peony, Litchi, Freesia]","[Rose, Lily-of-the-Valley, Magnolia]","[Virginia Cedar, Amber]","{'very weak': 141, 'weak': 195, 'moderate': 91...","{'intimate': 401, 'moderate': 1592, 'strong': ...","{'female': 335, 'more female': 27, 'unisex': 1...","{'way overpriced': 25, 'overpriced': 73, 'ok':..."
2,603be3a941af6364a33646a3,1 Million,Paco Rabanne,https://fimgs.net/mdimg/perfume/375x500.3747.jpg,for men,3.7,11537,"{'warm spicy': 100, 'cinnamon': 84.2521, 'citr...",1 Million by Paco Rabanne is a Woody Spicy fra...,"[Blood Mandarin, Grapefruit, Mint]","[Cinnamon, Rose, Spicy Notes]","[Amber, Leather, Woody Notes, Indian Patchouli]","{'very weak': 110, 'weak': 134, 'moderate': 55...","{'intimate': 192, 'moderate': 670, 'strong': 1...","{'female': 7, 'more female': 4, 'unisex': 79, ...","{'way overpriced': 11, 'overpriced': 50, 'ok':..."
3,603be3a941af6364a33646a4,The One for Men,Dolce&Gabbana,https://fimgs.net/mdimg/perfume/375x500.2056.jpg,for men,4.27,8750,"{'warm spicy': 100, 'amber': 95.4969, 'tobacco...",The One for Men by Dolce&Gabbana is a Woody Sp...,"[Grapefruit, Coriander, Basil]","[Ginger, Cardamom, Orange Blossom]","[Amber, Tobacco, Cedar]","{'very weak': 307, 'weak': 591, 'moderate': 17...","{'intimate': 1311, 'moderate': 1683, 'strong':...","{'female': 2, 'more female': 1, 'unisex': 23, ...","{'way overpriced': 6, 'overpriced': 49, 'ok': ..."
4,603be3a941af6364a33646a5,Jubilation XXV Man,Amouage,https://fimgs.net/mdimg/perfume/375x500.2366.jpg,for men,4.46,3792,"{'amber': 100, 'sweet': 79.1774, 'warm spicy':...",Jubilation XXV Man by Amouage is a Oriental Fo...,"[Blackberry, Olibanum, Orange, Labdanum, Coria...","[Honey, Guaiac Wood, Cinnamon, Bay Leaf, Clove...","[Opoponax, Agarwood (Oud), Myrrh, Patchouli, C...","{'very weak': 84, 'weak': 90, 'moderate': 344,...","{'intimate': 233, 'moderate': 694, 'strong': 8...","{'female': 1, 'more female': 0, 'unisex': 28, ...","{'way overpriced': 27, 'overpriced': 51, 'ok':..."


## Preprocess Data

### Weighted Rating

In [3]:
# Find the weighted rating taking into consideration both mean rating and number of reviews
# Weighted Rating (WR) = (v ÷ (v+m)) × R + (m ÷ (v+m)) × C  where
# v is the number of votes 
# m is the minimum number of votes required (the prerequisite) 
# R is the mean rating
# C is the mean rating of all the perfumes in the dataset

C = perfume_df["rating"].mean()

# value of m is the number of votes garnered by the 80th percentile perfume
m = perfume_df['number_votes'].quantile(0.80)

# Function to compute the weighted rating for each perfume
def weighted_rating(x, m=m, C=C):
    v = x['number_votes']
    R = x['rating']
    
    # Compute the weighted score
    if v == 0:
        return 0
    else:
        return (v/(v+m) * R) + (m/(m+v) * C)


In [4]:
# Compute the weighted_rating using the weighted_rating function defined above
perfume_df["weighted_rating"] = perfume_df.apply(weighted_rating, axis=1)
perfume_df[["name", "rating", "number_votes", "weighted_rating"]].sort_values(by=['weighted_rating'], ascending=False)

Unnamed: 0,name,rating,number_votes,weighted_rating
45,La Nuit de l'Homme,4.44,12256,4.333332
89,Dior Homme Intense 2011,4.49,8172,4.331336
60,Aventus,4.42,10486,4.308202
91,A*Men Pure Havane,4.46,5665,4.279901
48,A*Men Pure Malt,4.46,5513,4.277386
...,...,...,...,...
34,Aura Mugler,3.63,5717,3.852857
105,Body,3.61,6296,3.831007
2,1 Million,3.70,11537,3.823913
68,Womanity,3.60,8844,3.785176


### MultiLabelBinarizer

In [5]:
# convert Notes list to multiple features columns
mlb = MultiLabelBinarizer()

# Top Notes
X_top_notes = mlb.fit_transform(perfume_df["top notes"])
column_names = ["top_note_" + note for note in mlb.classes_]
perfume_df = perfume_df.join(pd.DataFrame(X_top_notes, columns=column_names))

# Middle Notes
X_middle_notes = mlb.fit_transform(perfume_df["middle notes"])
column_names = ["middle_note_" + note for note in mlb.classes_]
perfume_df = perfume_df.join(pd.DataFrame(X_middle_notes, columns=column_names))

# Base notes
X_base_notes = mlb.fit_transform(perfume_df["base notes"])
column_names = ["base_note_" + note for note in mlb.classes_]
perfume_df = perfume_df.join(pd.DataFrame(X_base_notes, columns=column_names))

perfume_df.columns

Index(['_id', 'name', 'company', 'image', 'for_gender', 'rating',
       'number_votes', 'main accords', 'description', 'top notes',
       ...
       'base_note_Vetiver', 'base_note_Virginia Cedar', 'base_note_White Musk',
       'base_note_Woodsy Notes', 'base_note_Woody Notes', 'base_note_iris',
       'base_note_oak moss', 'base_note_resins', 'base_note_vetyver',
       'base_note_white honey'],
      dtype='object', length=433)

### DictVectorizer

In [6]:
# convert Accords dict to multiple features columns
dv = DictVectorizer(sparse=False)
X_accords = dv.fit_transform(perfume_df["main accords"])
perfume_df = perfume_df.join(pd.DataFrame(X_accords, columns=dv.feature_names_))
perfume_df.head()


Unnamed: 0,_id,name,company,image,for_gender,rating,number_votes,main accords,description,top notes,...,tropical,tuberose,vanilla,violet,vodka,warm spicy,whiskey,white floral,woody,yellow floral
0,603be3a941af6364a33646a1,Versace Pour Homme,Versace,https://fimgs.net/mdimg/perfume/375x500.2318.jpg,for men,4.29,8242,"{'citrus': 100, 'aromatic': 69.834, 'fresh spi...",Versace Pour Homme by Versace is a Aromatic Fo...,"[Lemon, Neroli, Bergamot, Rose de Mai]",...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,603be3a941af6364a33646a2,Chloe Eau de Parfum,Chloé,https://fimgs.net/mdimg/perfume/375x500.1733.jpg,for women,3.95,13349,"{'floral': 100, 'rose': 89.3694, 'fresh': 69.7...",Chloe Eau de Parfum by Chloé is a Floral fragr...,"[Peony, Litchi, Freesia]",...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,603be3a941af6364a33646a3,1 Million,Paco Rabanne,https://fimgs.net/mdimg/perfume/375x500.3747.jpg,for men,3.7,11537,"{'warm spicy': 100, 'cinnamon': 84.2521, 'citr...",1 Million by Paco Rabanne is a Woody Spicy fra...,"[Blood Mandarin, Grapefruit, Mint]",...,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,60.7131,0.0
3,603be3a941af6364a33646a4,The One for Men,Dolce&Gabbana,https://fimgs.net/mdimg/perfume/375x500.2056.jpg,for men,4.27,8750,"{'warm spicy': 100, 'amber': 95.4969, 'tobacco...",The One for Men by Dolce&Gabbana is a Woody Sp...,"[Grapefruit, Coriander, Basil]",...,0.0,0.0,0.0,0.0,0.0,100.0,0.0,53.043,54.2538,0.0
4,603be3a941af6364a33646a5,Jubilation XXV Man,Amouage,https://fimgs.net/mdimg/perfume/375x500.2366.jpg,for men,4.46,3792,"{'amber': 100, 'sweet': 79.1774, 'warm spicy':...",Jubilation XXV Man by Amouage is a Oriental Fo...,"[Blackberry, Olibanum, Orange, Labdanum, Coria...",...,0.0,0.0,0.0,0.0,0.0,66.3056,0.0,0.0,65.2735,0.0


In [7]:
# convert longevity dict to multiple features columns
dv = DictVectorizer(sparse=False)
X_longevity = dv.fit_transform(perfume_df["longevity"])
column_names = ["longevity_" + note for note in dv.feature_names_]
perfume_df = perfume_df.join(pd.DataFrame(X_longevity, columns=column_names))
perfume_df.head()

Unnamed: 0,_id,name,company,image,for_gender,rating,number_votes,main accords,description,top notes,...,warm spicy,whiskey,white floral,woody,yellow floral,longevity_eternal,longevity_long lasting,longevity_moderate,longevity_very weak,longevity_weak
0,603be3a941af6364a33646a1,Versace Pour Homme,Versace,https://fimgs.net/mdimg/perfume/375x500.2318.jpg,for men,4.29,8242,"{'citrus': 100, 'aromatic': 69.834, 'fresh spi...",Versace Pour Homme by Versace is a Aromatic Fo...,"[Lemon, Neroli, Bergamot, Rose de Mai]",...,0.0,0.0,0.0,0.0,0.0,242.0,1009.0,1907.0,208.0,373.0
1,603be3a941af6364a33646a2,Chloe Eau de Parfum,Chloé,https://fimgs.net/mdimg/perfume/375x500.1733.jpg,for women,3.95,13349,"{'floral': 100, 'rose': 89.3694, 'fresh': 69.7...",Chloe Eau de Parfum by Chloé is a Floral fragr...,"[Peony, Litchi, Freesia]",...,0.0,0.0,0.0,0.0,0.0,746.0,1480.0,916.0,141.0,195.0
2,603be3a941af6364a33646a3,1 Million,Paco Rabanne,https://fimgs.net/mdimg/perfume/375x500.3747.jpg,for men,3.7,11537,"{'warm spicy': 100, 'cinnamon': 84.2521, 'citr...",1 Million by Paco Rabanne is a Woody Spicy fra...,"[Blood Mandarin, Grapefruit, Mint]",...,100.0,0.0,0.0,60.7131,0.0,1659.0,1495.0,551.0,110.0,134.0
3,603be3a941af6364a33646a4,The One for Men,Dolce&Gabbana,https://fimgs.net/mdimg/perfume/375x500.2056.jpg,for men,4.27,8750,"{'warm spicy': 100, 'amber': 95.4969, 'tobacco...",The One for Men by Dolce&Gabbana is a Woody Sp...,"[Grapefruit, Coriander, Basil]",...,100.0,0.0,53.043,54.2538,0.0,154.0,588.0,1755.0,307.0,591.0
4,603be3a941af6364a33646a5,Jubilation XXV Man,Amouage,https://fimgs.net/mdimg/perfume/375x500.2366.jpg,for men,4.46,3792,"{'amber': 100, 'sweet': 79.1774, 'warm spicy':...",Jubilation XXV Man by Amouage is a Oriental Fo...,"[Blackberry, Olibanum, Orange, Labdanum, Coria...",...,66.3056,0.0,0.0,65.2735,0.0,489.0,773.0,344.0,84.0,90.0


In [8]:
# convert sillage dict to multiple features columns
dv = DictVectorizer(sparse=False)
X_sillage = dv.fit_transform(perfume_df["sillage"])
column_names = ["sillage_" + note for note in dv.feature_names_]
perfume_df = perfume_df.join(pd.DataFrame(X_sillage, columns=column_names))
perfume_df.head()

Unnamed: 0,_id,name,company,image,for_gender,rating,number_votes,main accords,description,top notes,...,yellow floral,longevity_eternal,longevity_long lasting,longevity_moderate,longevity_very weak,longevity_weak,sillage_enormous,sillage_intimate,sillage_moderate,sillage_strong
0,603be3a941af6364a33646a1,Versace Pour Homme,Versace,https://fimgs.net/mdimg/perfume/375x500.2318.jpg,for men,4.29,8242,"{'citrus': 100, 'aromatic': 69.834, 'fresh spi...",Versace Pour Homme by Versace is a Aromatic Fo...,"[Lemon, Neroli, Bergamot, Rose de Mai]",...,0.0,242.0,1009.0,1907.0,208.0,373.0,360.0,595.0,2339.0,788.0
1,603be3a941af6364a33646a2,Chloe Eau de Parfum,Chloé,https://fimgs.net/mdimg/perfume/375x500.1733.jpg,for women,3.95,13349,"{'floral': 100, 'rose': 89.3694, 'fresh': 69.7...",Chloe Eau de Parfum by Chloé is a Floral fragr...,"[Peony, Litchi, Freesia]",...,0.0,746.0,1480.0,916.0,141.0,195.0,666.0,401.0,1592.0,1254.0
2,603be3a941af6364a33646a3,1 Million,Paco Rabanne,https://fimgs.net/mdimg/perfume/375x500.3747.jpg,for men,3.7,11537,"{'warm spicy': 100, 'cinnamon': 84.2521, 'citr...",1 Million by Paco Rabanne is a Woody Spicy fra...,"[Blood Mandarin, Grapefruit, Mint]",...,0.0,1659.0,1495.0,551.0,110.0,134.0,1673.0,192.0,670.0,1624.0
3,603be3a941af6364a33646a4,The One for Men,Dolce&Gabbana,https://fimgs.net/mdimg/perfume/375x500.2056.jpg,for men,4.27,8750,"{'warm spicy': 100, 'amber': 95.4969, 'tobacco...",The One for Men by Dolce&Gabbana is a Woody Sp...,"[Grapefruit, Coriander, Basil]",...,0.0,154.0,588.0,1755.0,307.0,591.0,276.0,1311.0,1683.0,421.0
4,603be3a941af6364a33646a5,Jubilation XXV Man,Amouage,https://fimgs.net/mdimg/perfume/375x500.2366.jpg,for men,4.46,3792,"{'amber': 100, 'sweet': 79.1774, 'warm spicy':...",Jubilation XXV Man by Amouage is a Oriental Fo...,"[Blackberry, Olibanum, Orange, Labdanum, Coria...",...,0.0,489.0,773.0,344.0,84.0,90.0,412.0,233.0,694.0,842.0


In [9]:
# convert gender vote dict to multiple features columns
dv = DictVectorizer(sparse=False)
X_gender_vote = dv.fit_transform(perfume_df["gender_vote"])
perfume_df = perfume_df.join(pd.DataFrame(X_gender_vote, columns=dv.feature_names_))
perfume_df.head()

Unnamed: 0,_id,name,company,image,for_gender,rating,number_votes,main accords,description,top notes,...,longevity_weak,sillage_enormous,sillage_intimate,sillage_moderate,sillage_strong,female,male,more female,more male,unisex
0,603be3a941af6364a33646a1,Versace Pour Homme,Versace,https://fimgs.net/mdimg/perfume/375x500.2318.jpg,for men,4.29,8242,"{'citrus': 100, 'aromatic': 69.834, 'fresh spi...",Versace Pour Homme by Versace is a Aromatic Fo...,"[Lemon, Neroli, Bergamot, Rose de Mai]",...,373.0,360.0,595.0,2339.0,788.0,3.0,233.0,0.0,149.0,40.0
1,603be3a941af6364a33646a2,Chloe Eau de Parfum,Chloé,https://fimgs.net/mdimg/perfume/375x500.1733.jpg,for women,3.95,13349,"{'floral': 100, 'rose': 89.3694, 'fresh': 69.7...",Chloe Eau de Parfum by Chloé is a Floral fragr...,"[Peony, Litchi, Freesia]",...,195.0,666.0,401.0,1592.0,1254.0,335.0,2.0,27.0,0.0,11.0
2,603be3a941af6364a33646a3,1 Million,Paco Rabanne,https://fimgs.net/mdimg/perfume/375x500.3747.jpg,for men,3.7,11537,"{'warm spicy': 100, 'cinnamon': 84.2521, 'citr...",1 Million by Paco Rabanne is a Woody Spicy fra...,"[Blood Mandarin, Grapefruit, Mint]",...,134.0,1673.0,192.0,670.0,1624.0,7.0,155.0,4.0,148.0,79.0
3,603be3a941af6364a33646a4,The One for Men,Dolce&Gabbana,https://fimgs.net/mdimg/perfume/375x500.2056.jpg,for men,4.27,8750,"{'warm spicy': 100, 'amber': 95.4969, 'tobacco...",The One for Men by Dolce&Gabbana is a Woody Sp...,"[Grapefruit, Coriander, Basil]",...,591.0,276.0,1311.0,1683.0,421.0,2.0,150.0,1.0,68.0,23.0
4,603be3a941af6364a33646a5,Jubilation XXV Man,Amouage,https://fimgs.net/mdimg/perfume/375x500.2366.jpg,for men,4.46,3792,"{'amber': 100, 'sweet': 79.1774, 'warm spicy':...",Jubilation XXV Man by Amouage is a Oriental Fo...,"[Blackberry, Olibanum, Orange, Labdanum, Coria...",...,90.0,412.0,233.0,694.0,842.0,1.0,114.0,0.0,86.0,28.0


In [10]:
# convert price value dict to multiple features columns
dv = DictVectorizer(sparse=False)
X_price_value = dv.fit_transform(perfume_df["price value"])
perfume_df = perfume_df.join(pd.DataFrame(X_price_value, columns=dv.feature_names_))
perfume_df.head()

Unnamed: 0,_id,name,company,image,for_gender,rating,number_votes,main accords,description,top notes,...,female,male,more female,more male,unisex,good value,great value,ok,overpriced,way overpriced
0,603be3a941af6364a33646a1,Versace Pour Homme,Versace,https://fimgs.net/mdimg/perfume/375x500.2318.jpg,for men,4.29,8242,"{'citrus': 100, 'aromatic': 69.834, 'fresh spi...",Versace Pour Homme by Versace is a Aromatic Fo...,"[Lemon, Neroli, Bergamot, Rose de Mai]",...,3.0,233.0,0.0,149.0,40.0,149.0,68.0,147.0,28.0,3.0
1,603be3a941af6364a33646a2,Chloe Eau de Parfum,Chloé,https://fimgs.net/mdimg/perfume/375x500.1733.jpg,for women,3.95,13349,"{'floral': 100, 'rose': 89.3694, 'fresh': 69.7...",Chloe Eau de Parfum by Chloé is a Floral fragr...,"[Peony, Litchi, Freesia]",...,335.0,2.0,27.0,0.0,11.0,56.0,7.0,178.0,73.0,25.0
2,603be3a941af6364a33646a3,1 Million,Paco Rabanne,https://fimgs.net/mdimg/perfume/375x500.3747.jpg,for men,3.7,11537,"{'warm spicy': 100, 'cinnamon': 84.2521, 'citr...",1 Million by Paco Rabanne is a Woody Spicy fra...,"[Blood Mandarin, Grapefruit, Mint]",...,7.0,155.0,4.0,148.0,79.0,74.0,15.0,185.0,50.0,11.0
3,603be3a941af6364a33646a4,The One for Men,Dolce&Gabbana,https://fimgs.net/mdimg/perfume/375x500.2056.jpg,for men,4.27,8750,"{'warm spicy': 100, 'amber': 95.4969, 'tobacco...",The One for Men by Dolce&Gabbana is a Woody Sp...,"[Grapefruit, Coriander, Basil]",...,2.0,150.0,1.0,68.0,23.0,37.0,10.0,114.0,49.0,6.0
4,603be3a941af6364a33646a5,Jubilation XXV Man,Amouage,https://fimgs.net/mdimg/perfume/375x500.2366.jpg,for men,4.46,3792,"{'amber': 100, 'sweet': 79.1774, 'warm spicy':...",Jubilation XXV Man by Amouage is a Oriental Fo...,"[Blackberry, Olibanum, Orange, Labdanum, Coria...",...,1.0,114.0,0.0,86.0,28.0,42.0,15.0,71.0,51.0,27.0


### X (data) and y (target)

In [11]:
# drop unnecessary columns
X = perfume_df.drop(["_id", "name", "company", "image", "for_gender", "rating", "number_votes", "main accords", "description", "top notes", "middle notes", "base notes", "longevity", "sillage", "gender_vote", "price value"], axis=1)
y = perfume_df["weighted_rating"].values.reshape(-1, 1)
print(X.shape, y.shape)
X.head()

(134, 491) (134, 1)


Unnamed: 0,weighted_rating,top_note_African Orange flower,top_note_Agarwood (Oud),top_note_Aldehydes,top_note_Amalfi Lemon,top_note_Amber,top_note_Angelica,top_note_Anise,top_note_Apple,top_note_Apricot,...,female,male,more female,more male,unisex,good value,great value,ok,overpriced,way overpriced
0,4.211076,0,0,0,0,0,0,0,0,0,...,3.0,233.0,0.0,149.0,40.0,149.0,68.0,147.0,28.0,3.0
1,3.990176,0,0,0,0,0,0,0,0,0,...,335.0,2.0,27.0,0.0,11.0,56.0,7.0,178.0,73.0,25.0
2,3.823913,0,0,0,0,0,0,0,0,0,...,7.0,155.0,4.0,148.0,79.0,74.0,15.0,185.0,50.0,11.0
3,4.201523,0,0,0,0,0,0,0,0,0,...,2.0,150.0,1.0,68.0,23.0,37.0,10.0,114.0,49.0,6.0
4,4.243093,0,0,0,0,0,0,0,0,0,...,1.0,114.0,0.0,86.0,28.0,42.0,15.0,71.0,51.0,27.0


## Train and Test data

In [12]:
# Split the data into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [13]:
# Create a StandardScater model and fit it to the training data

X_scaler = StandardScaler().fit(X_train)
y_scaler = StandardScaler().fit(y_train)

In [14]:
# Transform the training and testing data using the X_scaler and y_scaler models

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)
y_train_scaled = y_scaler.transform(y_train)
y_test_scaled = y_scaler.transform(y_test)

## Linear Regression Model

In [15]:
# Create a LinearRegression model and fit it to the scaled training data

model = LinearRegression()
model.fit(X_train_scaled, y_train_scaled)


LinearRegression()

### Mean Squared Error and R2

In [16]:
# Used X_test_scaled, y_test_scaled, and model.predict(X_test_scaled) to calculate MSE and R2
predictions = model.predict(X_test_scaled)
MSE = mean_squared_error(y_test_scaled, predictions)
r2 = model.score(X_test_scaled, y_test_scaled)

print(f"MSE: {MSE}, R2: {r2}")

MSE: 0.5623053393137699, R2: 0.5296405970123749


### Lasso Model

In [17]:
lasso = Lasso(alpha=.01).fit(X_train_scaled, y_train_scaled)

predictions = lasso.predict(X_test_scaled)

MSE = mean_squared_error(y_test_scaled, predictions)
r2 = lasso.score(X_test_scaled, y_test_scaled)

print(f"MSE: {MSE}, R2: {r2}")

MSE: 0.00012759096472804107, R2: 0.999893272203196


### Ridge Model

In [18]:
ridge = Ridge(alpha=.01).fit(X_train_scaled, y_train_scaled)

predictions = ridge.predict(X_test_scaled)

MSE = mean_squared_error(y_test_scaled, predictions)
r2 = ridge.score(X_test_scaled, y_test_scaled)

print(f"MSE: {MSE}, R2: {r2}")

MSE: 0.5617973017220967, R2: 0.530065562314334


### ElasticNet model

In [19]:
elasticnet = ElasticNet(alpha=.01).fit(X_train_scaled, y_train_scaled)

predictions = elasticnet.predict(X_test_scaled)

MSE = mean_squared_error(y_test_scaled, predictions)
r2 = elasticnet.score(X_test_scaled, y_test_scaled)

print(f"MSE: {MSE}, R2: {r2}")

MSE: 0.00012632456100397402, R2: 0.9998943315296116


##### Save the Model

In [None]:
import joblib
final_model = lasso
filename = 'perfume_model.sav'
joblib.dump(final_model, filename)