In [1]:
import streamlit as st
import pandas as pd
import pickle
import numpy as np
import joblib
import seaborn as sns

from sklearn.metrics.pairwise import cosine_similarity
from surprise import SVDpp, SVD
from surprise import accuracy
from surprise import Dataset
from surprise import Reader
from surprise.model_selection import GridSearchCV, cross_validate, train_test_split

from PIL import Image

In [63]:
content_matrix = pd.read_csv("data/cleaned_data_exports/scraped_feature_df_3.csv")
sim_df = pd.read_csv("data/cleaned_data_exports/similarity_matrix.csv")
final_user_df = pd.read_csv("data/cleaned_data_exports/user_df_model.csv")
surprise_df = pd.read_csv("data/cleaned_data_exports/surprise_df.csv")

In [64]:
content_matrix.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 330 entries, 0 to 329
Columns: 103 entries, Unnamed: 0 to total_lifts
dtypes: float64(20), int64(70), object(13)
memory usage: 265.7+ KB


In [65]:
content_matrix['dec_mean_2_guests'].fillna(0).astype(int)

0      284
1      191
2      256
3      256
4      304
      ... 
325    232
326    128
327    177
328    180
329    160
Name: dec_mean_2_guests, Length: 330, dtype: int64

In [66]:
#resetting index
sim_df = sim_df.set_index('ski_resort')
content_matrix = content_matrix.set_index('ski_resort')

In [67]:
#dropping unnamed column
df_list = [content_matrix, final_user_df, surprise_df]

for x in df_list:
    x.drop(columns="Unnamed: 0", inplace=True)

In [68]:
#dropping duplicates
surprise_df = surprise_df.drop_duplicates()

In [69]:
#setting scale
reader = Reader(rating_scale=(1, 5))

#loading final dataset
data_full = Dataset.load_from_df(surprise_df[['user_name', 'ski_resort', 'rating']], reader)

#making trainset
full_trainset = data_full.build_full_trainset()

In [70]:
#saving new dataframe with only user information
user_df = surprise_df.reset_index()
user_df.set_index('user_name', inplace = True)
user_df.drop(columns = ['rating', 'index'], inplace =True)
user_df.head()

Unnamed: 0_level_0,ski_resort
user_name,Unnamed: 1_level_1
anon_1,Winter Park
anon_1,Arapahoe Basin
anon_1,Steamboat
anon_1,Copper Mountain
anon_2,Solitude Mountain


In [77]:
content_matrix.columns.to_list()

['address',
 'city',
 'state',
 'zipcode',
 'summit',
 'drop',
 'base',
 'gondolas_and_trams',
 'fast_eight',
 'high_speed_sixes',
 'quad_chairs',
 'triple_chairs',
 'double_chairs',
 'surface_lifts',
 'total_runs',
 'longest_run',
 'skiable_terrain',
 'snow_making',
 'daysOpenLastYear',
 'averageSnowfall',
 'projectedOpening',
 'projectedClosing',
 'nov_snow',
 'dec_snow',
 'jan_snow',
 'feb_snow',
 'mar_snow',
 'apr_snow',
 'childrenWeekdayPrice',
 'childrenWeekendPrice',
 'teenagerWeekdayPrice',
 'teenagerWeekendPrice',
 'adultWeekdayPrice',
 'adultWeekendPrice',
 'seniorWeekdayPrice',
 'seniorWeekendPrice',
 'childrenPrice_season',
 'teenagerPrice_season',
 'adultPrice_season',
 'Url',
 'beginner_runs',
 'intermediate_runs',
 'advanced_runs',
 'expert_runs',
 'night_skiing',
 'epic',
 'mountain_collective',
 'ikon',
 'indy',
 'dec_mean_2_guests',
 'dec_min_2_guests',
 'dec_max_2_guests',
 'jan_mean_2_guests',
 'jan_min_2_guests',
 'jan_max_2_guests',
 'feb_mean_2_guests',
 'feb_min

In [72]:
def load_model(model_filename):
    print (">> Loading dump")
    from surprise import dump
    import os
    file_name = os.path.expanduser(model_filename)
    _, loaded_model = dump.load(file_name)
    return loaded_model

def hybrid_model_content():
    # User inputs
    user = str(input('Name: '))
    n_recs = int(input('How many resort recommendations do you want? '))
    mountain_name = str(input("What's your favorite ski resort? "))
    travel_date = str(input('What month would you like to travel? '))
    mtn_pass = str(input('Are you using a multi-resort pass?  '))
    
    loaded_model = load_model('model.pickle')
    
    # Content-based model
    y = sim_df.loc[[mountain_name]]
    cos_sim = cosine_similarity(sim_df, y)
    cos_sim_df = pd.DataFrame(data=cos_sim, index=sim_df.index)
    cos_sim_df.sort_values(by=0, ascending=False, inplace=True)
    cos_sim_df = cos_sim_df.reset_index()

    #making list for column names
    rec_list = []
    
    #grabbing rows from content_matrix for final output
    for x in cos_sim_df['ski_resort']:
        rec_df = content_matrix.loc[[x]]  
        rec_list.append(rec_df)  #

    rec_df = pd.concat(rec_list)

    #Concatenate all the dataframes in rec_list into a single dataframe
    concat_df = rec_df[["city", "state", "summit", "drop", "base","adultWeekdayPrice", "adultWeekendPrice",
                           "beginner_runs", "intermediate_runs", "advanced_runs", "expert_runs",
                        "ikon", "epic", "mountain_collective"]]
    concat_df = concat_df.reset_index()

    #filtering based on month to return airbnb prices and turning into dataframe
    travel_date = travel_date.lower()

    month = ["december", "january", "february", "march", "april", "may"]
    month_abv = ["dec", "jan", "feb", "mar", "apr", "may"]

    selected_columns = []
    for x, y in zip(month_abv, month):
        if travel_date == y:
            selected_columns = [x + "_mean_4_guests", x + "_mean_2_guests"]

    result = rec_df[selected_columns]
    result = result.reset_index()                        
    content_recommendations = pd.merge(concat_df, result, on="ski_resort")
    
    #adding mountain fil
    if mtn_pass == "Ikon":
        content_recommendations = content_recommendations.loc[content_recommendations['ikon'] == 1]
    elif mtn_pass == "Epic":
        content_recommendations = content_recommendations.loc[content_recommendations['epic'] == 1]
    elif mtn_pass == "Mountain_collective":
        content_recommendations = content_recommendations.loc[content_recommendations['mountain_collective'] == 1]
    elif mtn_pass == "No":
        pass
    
    content_recommendations = content_recommendations[content_recommendations.ski_resort != mountain_name].head(20)

    # Collaborative model
    have_rated = list(user_df.loc[user, 'ski_resort'])
    not_rated = final_user_df.copy()
    not_rated = not_rated.loc[~not_rated['ski_resort'].isin(have_rated)]
    not_rated = not_rated.drop_duplicates(subset=['ski_resort'])
    not_rated.reset_index(inplace=True)
    not_rated['predicted_rating'] = not_rated['ski_resort'].apply(lambda x: loaded_model.predict(user, x).est)
    not_rated.sort_values(by='predicted_rating', ascending=False, inplace=True)
    collaborative_recommendations = not_rated[['ski_resort', 'predicted_rating']]

    # Combine content-based and collaborative recommendations
    combined_recommendations = pd.merge(content_recommendations, collaborative_recommendations, on='ski_resort', how='left')
    combined_recommendations = combined_recommendations.drop_duplicates(subset=['ski_resort'])
    combined_recommendations.sort_values(by='predicted_rating', ascending=False, inplace=True)
    combined_recommendations.drop(columns=['ikon', 'mountain_collective', 'epic'], inplace=True)
    return combined_recommendations.head(n_recs)

In [73]:
recommendations = hybrid_model_content()

Name: Stephanie Ciaccia
How many resort recommendations do you want? 5
What's your favorite ski resort? Park City Mountain
What month would you like to travel? December
Are you using a multi-resort pass?  Ikon
>> Loading dump


In [5]:
map_df = content_matrix[['ski_resort', 'latitude', 'longitude']]

In [74]:
recommendations

Unnamed: 0,ski_resort,city,state,summit,drop,base,adultWeekdayPrice,adultWeekendPrice,beginner_runs,intermediate_runs,advanced_runs,expert_runs,dec_mean_4_guests,dec_mean_2_guests,predicted_rating
5,Big Sky,Big Sky,Montana,11166,4350,7500,152.0,194.0,15,25,42,18.0,314,208,4.483927
0,Steamboat,Steamboat Springs,Colorado,10568,3668,6900,177.0,192.0,12,43,40,5.0,246,189,4.27356
16,Copper Mountain,Copper Mountain,Colorado,12313,2738,9712,129.0,179.0,25,24,34,17.0,274,218,4.16593
8,Snowbasin,Huntsville,Utah,9350,2900,6450,149.0,169.0,9,33,52,6.0,240,215,4.163306
15,Eldora Mountain,Nederland,Colorado,10600,1400,9200,99.0,99.0,18,48,18,16.0,287,212,4.09858


In [39]:
rec_list = recommendations['ski_resort'].to_list()
rec_map_df = content_matrix[content_matrix.index.isin(rec_list)]
rec_map_df = rec_map_df.reset_index()

In [40]:
rec_map_df

Unnamed: 0,ski_resort,address,city,state,zipcode,summit,drop,base,gondolas_and_trams,fast_eight,...,full_address,latitude,longitude,airport_1,distance_1,airport_2,distance_2,airport_3,distance_3,total_lifts
0,Snowbasin,P.O. Box 460,Huntsville,Utah,84317,9350,2900,6450,3.0,2.0,...,"P.O. Box 460, Huntsville, Utah",41.260774,-111.769938,Ogden-Hinckley,21.502312,Brigham City,40.568973,Ruth,977.506101,12.0
1,Copper Mountain,P.O. Box 3001,Copper Mountain,Colorado,80443,12313,2738,9712,1.0,4.0,...,"P.O. Box 3001, Copper Mountain, Colorado",39.501419,-106.151627,Lake County,34.332678,Eagle County Regional,67.510635,Big Sandy,1013.771656,24.0
2,Steamboat,2305 Mt. Werner Circle,Steamboat Springs,Colorado,80487,10568,3668,6900,2.0,6.0,...,"2305 Mt. Werner Circle, Steamboat Springs, Col...",40.457098,-106.804472,Steamboat Springs,8.403405,Yampa Valley,35.054858,Wayne County,2094.926203,21.0
3,Big Sky,P.O. Box 160001,Big Sky,Montana,59716-0001,11166,4350,7500,1.0,5.0,...,"P.O. Box 160001, Big Sky, Montana",45.261781,-111.308024,Ennis - Big Sky,26.679464,Gardiner,49.928602,Gillespie County,1988.97272,35.0


In [44]:
rec_map_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 96 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   ski_resort            4 non-null      object 
 1   address               4 non-null      object 
 2   city                  4 non-null      object 
 3   state                 4 non-null      object 
 4   zipcode               4 non-null      object 
 5   summit                4 non-null      int64  
 6   drop                  4 non-null      int64  
 7   base                  4 non-null      int64  
 8   gondolas_and_trams    4 non-null      float64
 9   fast_eight            4 non-null      float64
 10  high_speed_sixes      4 non-null      float64
 11  quad_chairs           4 non-null      float64
 12  triple_chairs         4 non-null      float64
 13  double_chairs         4 non-null      float64
 14  surface_lifts         4 non-null      float64
 15  total_runs            4 non

In [13]:
fig = px.scatter_mapbox(content_matrix.head(), lat='latitude', lon='longitude', hover_name="ski_resort",
                        hover_data=["summit", "base", "drop"], color="ski_resort")

# Update the map layout
fig.update_layout(mapbox_style='open-street-map')
fig.update_layout(margin={'r': 0, 't': 0, 'l': 0, 'b': 0})

# Display the map
fig.show()

In [43]:
image = Image.open('images/icons/cable-car.png')
image(image, caption='cable-car', width=300)

TypeError: 'PngImageFile' object is not callable