In [31]:
# Update sklearn to prevent version mismatches
!pip install sklearn --upgrade



## Dependencies

In [32]:
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pymongo
from sklearn.preprocessing import LabelEncoder, StandardScaler, MultiLabelBinarizer
from sklearn.feature_extraction import DictVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import Lasso, Ridge, ElasticNet
from config import mongo_conn

## Load Perfume Data

In [33]:
# Initialize PyMongo to work with MongoDBs
client = pymongo.MongoClient(mongo_conn)
db = client.perfume_db
perfume_df = pd.DataFrame(list(db.perfume_data.find()))
perfume_df.head()

Unnamed: 0,_id,name,company,image,for_gender,rating,number_votes,main accords,description,top notes,middle notes,base notes,longevity,sillage,gender_vote,price value
0,603a92b3f6b1c8369675d852,Angels' Share,By Kilian,https://fimgs.net/mdimg/perfume/375x500.62615.jpg,for women and men,4.31,682,"{'woody': 100, 'sweet': 92.6987, 'warm spicy':...",Angels' Share by By Kilian is a Oriental Vanil...,[Cognac],"[Cinnamon, Tonka Bean, Oak]","[Praline, Vanilla, Sandalwood]","{'very weak': 21, 'weak': 17, 'moderate': 107,...","{'intimate': 40, 'moderate': 187, 'strong': 15...","{'female': 40, 'more female': 39, 'unisex': 22...","{'way overpriced': 64, 'overpriced': 143, 'ok'..."
1,603a92b3f6b1c8369675d853,My Way,Giorgio Armani,https://fimgs.net/mdimg/perfume/375x500.62036.jpg,for women,3.57,1471,"{'white floral': 100, 'citrus': 60.4322, 'tube...",My Way by Giorgio Armani is a Floral fragrance...,"[Orange Blossom, Bergamot]","[Tuberose, Indian Jasmine]","[White Musk, Madagascar Vanilla, Virginian Cedar]","{'very weak': 49, 'weak': 84, 'moderate': 200,...","{'intimate': 127, 'moderate': 322, 'strong': 2...","{'female': 349, 'more female': 21, 'unisex': 4...","{'way overpriced': 38, 'overpriced': 121, 'ok'..."
2,603a92b3f6b1c8369675d854,Libre Intense,Yves Saint Laurent,https://fimgs.net/mdimg/perfume/375x500.62318.jpg,for women,4.02,858,"{'vanilla': 100, 'aromatic': 71.4216, 'sweet':...",Libre Intense by Yves Saint Laurent is a Orien...,"[Lavender, Mandarin Orange, Bergamot]","[Lavender, Tunisian Orange Blossom, Jasmine Sa...","[Madagascar Vanilla, Tonka Bean, Ambergris, Ve...","{'very weak': 24, 'weak': 12, 'moderate': 103,...","{'intimate': 39, 'moderate': 155, 'strong': 23...","{'female': 162, 'more female': 91, 'unisex': 7...","{'way overpriced': 11, 'overpriced': 59, 'ok':..."
3,603a92b3f6b1c8369675d855,Dior Homme 2020,Christian Dior,https://fimgs.net/mdimg/perfume/375x500.58714.jpg,for men,3.42,1402,"{'woody': 100, 'musky': 72.7229, 'amber': 53.4...",Dior Homme 2020 by Christian Dior is a Woody f...,"[Bergamot, Pink Pepper, elemi]","[Cashmere Wood, Atlas Cedar, Patchouli]","[Iso E Super, Haitian Vetiver, White Musk]","{'very weak': 125, 'weak': 83, 'moderate': 174...","{'intimate': 214, 'moderate': 370, 'strong': 1...","{'female': 3, 'more female': 1, 'unisex': 17, ...","{'way overpriced': 31, 'overpriced': 59, 'ok':..."
4,603a92b3f6b1c8369675d856,Acqua di Giò Profondo,Giorgio Armani,https://fimgs.net/mdimg/perfume/375x500.59532.jpg,for men,4.03,869,"{'aromatic': 100, 'marine': 93.2493, 'citrus':...",Acqua di Giò Profondo by Giorgio Armani is a A...,"[Sea Notes, Aquozone, Bergamot, Green Mandarin]","[Rosemary, Cypress, Lavender, Mastic or Lentis...","[Mineral notes, Musk, Patchouli, Amber]","{'very weak': 59, 'weak': 66, 'moderate': 188,...","{'intimate': 115, 'moderate': 333, 'strong': 1...","{'female': 3, 'more female': 0, 'unisex': 5, '...","{'way overpriced': 32, 'overpriced': 84, 'ok':..."


## Preprocess Data

### Weighted Rating

In [34]:
# Find the weighted rating taking into consideration both mean rating and number of reviews
# Weighted Rating (WR) = (v ÷ (v+m)) × R + (m ÷ (v+m)) × C  where
# v is the number of votes 
# m is the minimum number of votes required (the prerequisite) 
# R is the mean rating
# C is the mean rating of all the perfumes in the dataset

C = perfume_df["rating"].mean()

# value of m is the number of votes garnered by the 80th percentile perfume
# m = perfume_df['votes'].quantile(0.80)
m = 56

# Function to compute the weighted rating for each perfume
def weighted_rating(x, m=m, C=C):
    v = x['number_votes']
    R = x['rating']
    # Compute the weighted score
    return (v/(v+m) * R) + (m/(m+v) * C)

In [35]:
# Compute the weighted_rating using the weighted_rating function defined above
perfume_df["weighted_rating"] = perfume_df.apply(weighted_rating, axis=1)
perfume_df["weighted_rating"].head()


0    4.285415
1    3.585256
2    4.017917
3    3.441739
4    4.027336
Name: weighted_rating, dtype: float64

In [36]:
perfume_df[["name", "rating", "number_votes", "weighted_rating"]].sort_values(by=['weighted_rating'], ascending=False)

Unnamed: 0,name,rating,number_votes,weighted_rating
9,L'Homme Idéal Extrême,4.38,434,4.334971
0,Angels' Share,4.31,682,4.285415
5,Le Male Le Parfum,4.26,690,4.239432
6,Beau De Jour Eau de Parfum,4.25,729,4.231167
8,L'Interdit Eau de Parfum Intense,4.08,596,4.071926
4,Acqua di Giò Profondo,4.03,869,4.027336
2,Libre Intense,4.02,858,4.017917
1,My Way,3.57,1471,3.585256
7,Angel Nova,3.54,814,3.568708
3,Dior Homme 2020,3.42,1402,3.441739


### MultiLabelBinarizer

In [37]:
# convert Notes list to multiple features columns
mlb = MultiLabelBinarizer()

# Top Notes
X_top_notes = mlb.fit_transform(perfume_df["top notes"])
column_names = ["top_note_" + note for note in mlb.classes_]
perfume_df = perfume_df.join(pd.DataFrame(X_top_notes, columns=column_names))

# Middle Notes
X_middle_notes = mlb.fit_transform(perfume_df["middle notes"])
column_names = ["middle_note_" + note for note in mlb.classes_]
perfume_df = perfume_df.join(pd.DataFrame(X_middle_notes, columns=column_names))

# Base notes
X_base_notes = mlb.fit_transform(perfume_df["base notes"])
column_names = ["base_note_" + note for note in mlb.classes_]
perfume_df = perfume_df.join(pd.DataFrame(X_base_notes, columns=column_names))

perfume_df.columns

Index(['_id', 'name', 'company', 'image', 'for_gender', 'rating',
       'number_votes', 'main accords', 'description', 'top notes',
       'middle notes', 'base notes', 'longevity', 'sillage', 'gender_vote',
       'price value', 'weighted_rating', 'top_note_Almond',
       'top_note_Aquozone', 'top_note_Bergamot', 'top_note_Black Pepper',
       'top_note_Cardamom', 'top_note_Cognac', 'top_note_Green Mandarin',
       'top_note_Lavender', 'top_note_Lavender Extract', 'top_note_Litchi',
       'top_note_Mandarin Orange', 'top_note_Orange Blossom',
       'top_note_Pink Pepper', 'top_note_Raspberry', 'top_note_Sea Notes',
       'top_note_elemi', 'middle_note_Atlas Cedar', 'middle_note_Basil',
       'middle_note_Cashmere Wood', 'middle_note_Cinnamon',
       'middle_note_Cypress', 'middle_note_Damask Rose',
       'middle_note_Geranium', 'middle_note_Heliotrope',
       'middle_note_Indian Jasmine', 'middle_note_Jasmine Sambac',
       'middle_note_Lavender', 'middle_note_Mastic or Le

### DictVectorizer

In [38]:
# convert Accords dict to multiple features columns
dv = DictVectorizer(sparse=False)
X_accords = dv.fit_transform(perfume_df["main accords"])
perfume_df = perfume_df.join(pd.DataFrame(X_accords, columns=dv.feature_names_))
perfume_df.head()


Unnamed: 0,_id,name,company,image,for_gender,rating,number_votes,main accords,description,top notes,...,rose,salty,sweet,tobacco,tropical,tuberose,vanilla,warm spicy,white floral,woody
0,603a92b3f6b1c8369675d852,Angels' Share,By Kilian,https://fimgs.net/mdimg/perfume/375x500.62615.jpg,for women and men,4.31,682,"{'woody': 100, 'sweet': 92.6987, 'warm spicy':...",Angels' Share by By Kilian is a Oriental Vanil...,[Cognac],...,0.0,0.0,92.6987,0.0,0.0,0.0,78.3058,89.4303,0.0,100.0
1,603a92b3f6b1c8369675d853,My Way,Giorgio Armani,https://fimgs.net/mdimg/perfume/375x500.62036.jpg,for women,3.57,1471,"{'white floral': 100, 'citrus': 60.4322, 'tube...",My Way by Giorgio Armani is a Floral fragrance...,"[Orange Blossom, Bergamot]",...,0.0,0.0,0.0,0.0,0.0,59.0888,0.0,0.0,100.0,0.0
2,603a92b3f6b1c8369675d854,Libre Intense,Yves Saint Laurent,https://fimgs.net/mdimg/perfume/375x500.62318.jpg,for women,4.02,858,"{'vanilla': 100, 'aromatic': 71.4216, 'sweet':...",Libre Intense by Yves Saint Laurent is a Orien...,"[Lavender, Mandarin Orange, Bergamot]",...,0.0,0.0,69.9134,0.0,0.0,0.0,100.0,0.0,69.338,0.0
3,603a92b3f6b1c8369675d855,Dior Homme 2020,Christian Dior,https://fimgs.net/mdimg/perfume/375x500.58714.jpg,for men,3.42,1402,"{'woody': 100, 'musky': 72.7229, 'amber': 53.4...",Dior Homme 2020 by Christian Dior is a Woody f...,"[Bergamot, Pink Pepper, elemi]",...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0
4,603a92b3f6b1c8369675d856,Acqua di Giò Profondo,Giorgio Armani,https://fimgs.net/mdimg/perfume/375x500.59532.jpg,for men,4.03,869,"{'aromatic': 100, 'marine': 93.2493, 'citrus':...",Acqua di Giò Profondo by Giorgio Armani is a A...,"[Sea Notes, Aquozone, Bergamot, Green Mandarin]",...,0.0,44.902,0.0,0.0,0.0,0.0,0.0,0.0,0.0,67.7451


In [39]:
# convert longevity dict to multiple features columns
dv = DictVectorizer(sparse=False)
X_longevity = dv.fit_transform(perfume_df["longevity"])
column_names = ["longevity_" + note for note in dv.feature_names_]
perfume_df = perfume_df.join(pd.DataFrame(X_longevity, columns=column_names))
perfume_df.head()

Unnamed: 0,_id,name,company,image,for_gender,rating,number_votes,main accords,description,top notes,...,tuberose,vanilla,warm spicy,white floral,woody,longevity_eternal,longevity_long lasting,longevity_moderate,longevity_very weak,longevity_weak
0,603a92b3f6b1c8369675d852,Angels' Share,By Kilian,https://fimgs.net/mdimg/perfume/375x500.62615.jpg,for women and men,4.31,682,"{'woody': 100, 'sweet': 92.6987, 'warm spicy':...",Angels' Share by By Kilian is a Oriental Vanil...,[Cognac],...,0.0,78.3058,89.4303,0.0,100.0,78.0,184.0,107.0,21.0,17.0
1,603a92b3f6b1c8369675d853,My Way,Giorgio Armani,https://fimgs.net/mdimg/perfume/375x500.62036.jpg,for women,3.57,1471,"{'white floral': 100, 'citrus': 60.4322, 'tube...",My Way by Giorgio Armani is a Floral fragrance...,"[Orange Blossom, Bergamot]",...,59.0888,0.0,0.0,100.0,0.0,89.0,216.0,200.0,49.0,84.0
2,603a92b3f6b1c8369675d854,Libre Intense,Yves Saint Laurent,https://fimgs.net/mdimg/perfume/375x500.62318.jpg,for women,4.02,858,"{'vanilla': 100, 'aromatic': 71.4216, 'sweet':...",Libre Intense by Yves Saint Laurent is a Orien...,"[Lavender, Mandarin Orange, Bergamot]",...,0.0,100.0,0.0,69.338,0.0,84.0,212.0,103.0,24.0,12.0
3,603a92b3f6b1c8369675d855,Dior Homme 2020,Christian Dior,https://fimgs.net/mdimg/perfume/375x500.58714.jpg,for men,3.42,1402,"{'woody': 100, 'musky': 72.7229, 'amber': 53.4...",Dior Homme 2020 by Christian Dior is a Woody f...,"[Bergamot, Pink Pepper, elemi]",...,0.0,0.0,0.0,0.0,100.0,77.0,288.0,174.0,125.0,83.0
4,603a92b3f6b1c8369675d856,Acqua di Giò Profondo,Giorgio Armani,https://fimgs.net/mdimg/perfume/375x500.59532.jpg,for men,4.03,869,"{'aromatic': 100, 'marine': 93.2493, 'citrus':...",Acqua di Giò Profondo by Giorgio Armani is a A...,"[Sea Notes, Aquozone, Bergamot, Green Mandarin]",...,0.0,0.0,0.0,0.0,67.7451,35.0,221.0,188.0,59.0,66.0


In [40]:
# convert sillage dict to multiple features columns
dv = DictVectorizer(sparse=False)
X_sillage = dv.fit_transform(perfume_df["sillage"])
column_names = ["sillage_" + note for note in dv.feature_names_]
perfume_df = perfume_df.join(pd.DataFrame(X_sillage, columns=column_names))
perfume_df.head()

Unnamed: 0,_id,name,company,image,for_gender,rating,number_votes,main accords,description,top notes,...,woody,longevity_eternal,longevity_long lasting,longevity_moderate,longevity_very weak,longevity_weak,sillage_enormous,sillage_intimate,sillage_moderate,sillage_strong
0,603a92b3f6b1c8369675d852,Angels' Share,By Kilian,https://fimgs.net/mdimg/perfume/375x500.62615.jpg,for women and men,4.31,682,"{'woody': 100, 'sweet': 92.6987, 'warm spicy':...",Angels' Share by By Kilian is a Oriental Vanil...,[Cognac],...,100.0,78.0,184.0,107.0,21.0,17.0,79.0,40.0,187.0,157.0
1,603a92b3f6b1c8369675d853,My Way,Giorgio Armani,https://fimgs.net/mdimg/perfume/375x500.62036.jpg,for women,3.57,1471,"{'white floral': 100, 'citrus': 60.4322, 'tube...",My Way by Giorgio Armani is a Floral fragrance...,"[Orange Blossom, Bergamot]",...,0.0,89.0,216.0,200.0,49.0,84.0,221.0,127.0,322.0,251.0
2,603a92b3f6b1c8369675d854,Libre Intense,Yves Saint Laurent,https://fimgs.net/mdimg/perfume/375x500.62318.jpg,for women,4.02,858,"{'vanilla': 100, 'aromatic': 71.4216, 'sweet':...",Libre Intense by Yves Saint Laurent is a Orien...,"[Lavender, Mandarin Orange, Bergamot]",...,0.0,84.0,212.0,103.0,24.0,12.0,131.0,39.0,155.0,237.0
3,603a92b3f6b1c8369675d855,Dior Homme 2020,Christian Dior,https://fimgs.net/mdimg/perfume/375x500.58714.jpg,for men,3.42,1402,"{'woody': 100, 'musky': 72.7229, 'amber': 53.4...",Dior Homme 2020 by Christian Dior is a Woody f...,"[Bergamot, Pink Pepper, elemi]",...,100.0,77.0,288.0,174.0,125.0,83.0,151.0,214.0,370.0,189.0
4,603a92b3f6b1c8369675d856,Acqua di Giò Profondo,Giorgio Armani,https://fimgs.net/mdimg/perfume/375x500.59532.jpg,for men,4.03,869,"{'aromatic': 100, 'marine': 93.2493, 'citrus':...",Acqua di Giò Profondo by Giorgio Armani is a A...,"[Sea Notes, Aquozone, Bergamot, Green Mandarin]",...,67.7451,35.0,221.0,188.0,59.0,66.0,106.0,115.0,333.0,183.0


In [41]:
# convert gender vote dict to multiple features columns
dv = DictVectorizer(sparse=False)
X_gender_vote = dv.fit_transform(perfume_df["gender_vote"])
perfume_df = perfume_df.join(pd.DataFrame(X_gender_vote, columns=dv.feature_names_))
perfume_df.head()

Unnamed: 0,_id,name,company,image,for_gender,rating,number_votes,main accords,description,top notes,...,longevity_weak,sillage_enormous,sillage_intimate,sillage_moderate,sillage_strong,female,male,more female,more male,unisex
0,603a92b3f6b1c8369675d852,Angels' Share,By Kilian,https://fimgs.net/mdimg/perfume/375x500.62615.jpg,for women and men,4.31,682,"{'woody': 100, 'sweet': 92.6987, 'warm spicy':...",Angels' Share by By Kilian is a Oriental Vanil...,[Cognac],...,17.0,79.0,40.0,187.0,157.0,40.0,20.0,39.0,51.0,226.0
1,603a92b3f6b1c8369675d853,My Way,Giorgio Armani,https://fimgs.net/mdimg/perfume/375x500.62036.jpg,for women,3.57,1471,"{'white floral': 100, 'citrus': 60.4322, 'tube...",My Way by Giorgio Armani is a Floral fragrance...,"[Orange Blossom, Bergamot]",...,84.0,221.0,127.0,322.0,251.0,349.0,0.0,21.0,0.0,4.0
2,603a92b3f6b1c8369675d854,Libre Intense,Yves Saint Laurent,https://fimgs.net/mdimg/perfume/375x500.62318.jpg,for women,4.02,858,"{'vanilla': 100, 'aromatic': 71.4216, 'sweet':...",Libre Intense by Yves Saint Laurent is a Orien...,"[Lavender, Mandarin Orange, Bergamot]",...,12.0,131.0,39.0,155.0,237.0,162.0,2.0,91.0,8.0,70.0
3,603a92b3f6b1c8369675d855,Dior Homme 2020,Christian Dior,https://fimgs.net/mdimg/perfume/375x500.58714.jpg,for men,3.42,1402,"{'woody': 100, 'musky': 72.7229, 'amber': 53.4...",Dior Homme 2020 by Christian Dior is a Woody f...,"[Bergamot, Pink Pepper, elemi]",...,83.0,151.0,214.0,370.0,189.0,3.0,199.0,1.0,56.0,17.0
4,603a92b3f6b1c8369675d856,Acqua di Giò Profondo,Giorgio Armani,https://fimgs.net/mdimg/perfume/375x500.59532.jpg,for men,4.03,869,"{'aromatic': 100, 'marine': 93.2493, 'citrus':...",Acqua di Giò Profondo by Giorgio Armani is a A...,"[Sea Notes, Aquozone, Bergamot, Green Mandarin]",...,66.0,106.0,115.0,333.0,183.0,3.0,151.0,0.0,71.0,5.0


In [42]:
# convert price value dict to multiple features columns
dv = DictVectorizer(sparse=False)
X_price_value = dv.fit_transform(perfume_df["price value"])
perfume_df = perfume_df.join(pd.DataFrame(X_price_value, columns=dv.feature_names_))
perfume_df.head()

Unnamed: 0,_id,name,company,image,for_gender,rating,number_votes,main accords,description,top notes,...,female,male,more female,more male,unisex,good value,great value,ok,overpriced,way overpriced
0,603a92b3f6b1c8369675d852,Angels' Share,By Kilian,https://fimgs.net/mdimg/perfume/375x500.62615.jpg,for women and men,4.31,682,"{'woody': 100, 'sweet': 92.6987, 'warm spicy':...",Angels' Share by By Kilian is a Oriental Vanil...,[Cognac],...,40.0,20.0,39.0,51.0,226.0,27.0,11.0,98.0,143.0,64.0
1,603a92b3f6b1c8369675d853,My Way,Giorgio Armani,https://fimgs.net/mdimg/perfume/375x500.62036.jpg,for women,3.57,1471,"{'white floral': 100, 'citrus': 60.4322, 'tube...",My Way by Giorgio Armani is a Floral fragrance...,"[Orange Blossom, Bergamot]",...,349.0,0.0,21.0,0.0,4.0,26.0,8.0,125.0,121.0,38.0
2,603a92b3f6b1c8369675d854,Libre Intense,Yves Saint Laurent,https://fimgs.net/mdimg/perfume/375x500.62318.jpg,for women,4.02,858,"{'vanilla': 100, 'aromatic': 71.4216, 'sweet':...",Libre Intense by Yves Saint Laurent is a Orien...,"[Lavender, Mandarin Orange, Bergamot]",...,162.0,2.0,91.0,8.0,70.0,35.0,6.0,136.0,59.0,11.0
3,603a92b3f6b1c8369675d855,Dior Homme 2020,Christian Dior,https://fimgs.net/mdimg/perfume/375x500.58714.jpg,for men,3.42,1402,"{'woody': 100, 'musky': 72.7229, 'amber': 53.4...",Dior Homme 2020 by Christian Dior is a Woody f...,"[Bergamot, Pink Pepper, elemi]",...,3.0,199.0,1.0,56.0,17.0,33.0,3.0,134.0,59.0,31.0
4,603a92b3f6b1c8369675d856,Acqua di Giò Profondo,Giorgio Armani,https://fimgs.net/mdimg/perfume/375x500.59532.jpg,for men,4.03,869,"{'aromatic': 100, 'marine': 93.2493, 'citrus':...",Acqua di Giò Profondo by Giorgio Armani is a A...,"[Sea Notes, Aquozone, Bergamot, Green Mandarin]",...,3.0,151.0,0.0,71.0,5.0,21.0,4.0,80.0,84.0,32.0


### X (data) and y (target)

In [43]:
# drop unnecessary columns
X = perfume_df.drop(["_id", "name", "company", "image", "for_gender", "rating", "number_votes", "main accords", "description", "top notes", "middle notes", "base notes", "longevity", "sillage", "gender_vote", "price value"], axis=1)
y = perfume_df["weighted_rating"].values.reshape(-1, 1)
print(X.shape, y.shape)
X.head()

(10, 118) (10, 1)


Unnamed: 0,weighted_rating,top_note_Almond,top_note_Aquozone,top_note_Bergamot,top_note_Black Pepper,top_note_Cardamom,top_note_Cognac,top_note_Green Mandarin,top_note_Lavender,top_note_Lavender Extract,...,female,male,more female,more male,unisex,good value,great value,ok,overpriced,way overpriced
0,4.285415,0,0,0,0,0,1,0,0,0,...,40.0,20.0,39.0,51.0,226.0,27.0,11.0,98.0,143.0,64.0
1,3.585256,0,0,1,0,0,0,0,0,0,...,349.0,0.0,21.0,0.0,4.0,26.0,8.0,125.0,121.0,38.0
2,4.017917,0,0,1,0,0,0,0,1,0,...,162.0,2.0,91.0,8.0,70.0,35.0,6.0,136.0,59.0,11.0
3,3.441739,0,0,1,0,0,0,0,0,0,...,3.0,199.0,1.0,56.0,17.0,33.0,3.0,134.0,59.0,31.0
4,4.027336,0,1,1,0,0,0,1,0,0,...,3.0,151.0,0.0,71.0,5.0,21.0,4.0,80.0,84.0,32.0


## Train and Test data

In [45]:
# Split the data into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [46]:
# Create a StandardScater model and fit it to the training data

X_scaler = StandardScaler().fit(X_train)
y_scaler = StandardScaler().fit(y_train)

In [47]:
# Transform the training and testing data using the X_scaler and y_scaler models

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)
y_train_scaled = y_scaler.transform(y_train)
y_test_scaled = y_scaler.transform(y_test)

## Linear Regression Model

In [48]:
# Create a LinearRegression model and fit it to the scaled training data

model = LinearRegression()
model.fit(X_train_scaled, y_train_scaled)


LinearRegression()

### Mean Squared Error and R2

In [49]:
# Used X_test_scaled, y_test_scaled, and model.predict(X_test_scaled) to calculate MSE and R2
predictions = model.predict(X_test_scaled)
MSE = mean_squared_error(y_test_scaled, predictions)
r2 = model.score(X_test_scaled, y_test_scaled)

print(f"MSE: {MSE}, R2: {r2}")

MSE: 0.07222256482888613, R2: 0.061843849945278584


### Lasso Model

In [50]:
lasso = Lasso(alpha=.01).fit(X_train_scaled, y_train_scaled)

predictions = lasso.predict(X_test_scaled)

MSE = mean_squared_error(y_test_scaled, predictions)
r2 = lasso.score(X_test_scaled, y_test_scaled)

print(f"MSE: {MSE}, R2: {r2}")

MSE: 0.10856127546104748, R2: -0.4101884704990635


  positive)


### Ridge Model

In [51]:
ridge = Ridge(alpha=.01).fit(X_train_scaled, y_train_scaled)

predictions = ridge.predict(X_test_scaled)

MSE = mean_squared_error(y_test_scaled, predictions)
r2 = ridge.score(X_test_scaled, y_test_scaled)

print(f"MSE: {MSE}, R2: {r2}")

MSE: 0.07222253405527303, R2: 0.06184424968813551


### ElasticNet model

In [None]:
elasticnet = ElasticNet(alpha=.01).fit(X_train_scaled, y_train_scaled)

predictions = elasticnet.predict(X_test_scaled)

MSE = mean_squared_error(y_test_scaled, predictions)
r2 = elasticnet.score(X_test_scaled, y_test_scaled)

print(f"MSE: {MSE}, R2: {r2}")

### Save the Model