In [57]:
import streamlit as st
import pandas as pd
import pickle
import numpy as np
import joblib
import seaborn as sns

from sklearn.metrics.pairwise import cosine_similarity
from surprise import SVDpp, SVD
from surprise import accuracy
from surprise import Dataset
from surprise import Reader
from surprise.model_selection import GridSearchCV, cross_validate, train_test_split

from PIL import Image

In [124]:
content_matrix = pd.read_csv("data/cleaned_data_exports/scraped_feature_df_2.csv")
sim_df = pd.read_csv("data/cleaned_data_exports/similarity_matrix.csv")
final_user_df = pd.read_csv("data/cleaned_data_exports/user_df_model.csv")
surprise_df = pd.read_csv("data/cleaned_data_exports/surprise_df.csv")

In [133]:
content_matrix

Unnamed: 0_level_0,address,city,state,zipcode,summit,drop,base,gondolas_and_trams,fast_eight,high_speed_sixes,...,total_lifts,full_address_y,latitude_y,longitude_y,airport_1,distance_1,airport_2,distance_2,airport_3,distance_3
ski_resort,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Palisades Tahoe,PO Box 2007,Olympic Valley,California,96146,9050,2850,6200,3.0,6.0,4.0,...,36.0,"PO Box 2007, Olympic Valley, California",39.196980,-120.235705,Truckee-Tahoe,15.992749,Minden-Tahoe,47.213899,Greenville Muni,3357.306610
Mammoth Mountain,P.O. Box 24,Mammoth Mountain Lakes,California,93546,11053,3100,7953,3.0,9.0,2.0,...,25.0,"P.O. Box 24, Mammoth Lakes, California",37.648546,-118.972079,Mammoth Yosemite,12.136117,Bryant,71.790347,Grand Rapids-Itasca County,2330.068385
Donner Ski Ranch,P.O. Box 66,Norden,California,95724,8012,750,7031,0.0,0.0,0.0,...,8.0,"P.O. Box 66, Norden, California",39.317356,-120.354182,Truckee-Tahoe,18.464839,Reno/Tahoe International,54.237886,Mapleton Municipal,2085.874328
Sugar Bowl,P.O. Box 5,Norden,California,95724,8383,1500,6883,1.0,5.0,0.0,...,12.0,"P.O. Box 5, Norden, California",39.317356,-120.354182,Truckee-Tahoe,18.464839,Reno/Tahoe International,54.237886,Mapleton Municipal,2085.874328
Kirkwood,PO Box 1,Kirkwood,California,95646,9800,2000,7800,0.0,2.0,0.0,...,13.0,"PO Box 1, Kirkwood, California",38.702308,-120.072244,Lake Tahoe,22.320342,Minden-Tahoe,43.275852,Nogales International,1165.403603
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Oak Mountain,141 Novosel Way,Speculator,New York,12164,2400,650,1750,0.0,0.0,0.0,...,4.0,"141 Novosel Way, Speculator, New York",43.517884,-74.361618,Piseco Muni,14.487038,Saratoga Cty,65.825037,Hillsboro Municipal,1814.724642
Mt. Pleasant,23301 Plank Rd,Venango,Pennsylvania,16403,1540,340,1200,0.0,0.0,0.0,...,2.0,"23301 Plank Rd , Venango, Pennsylvania",41.795526,-80.097363,Port Meadville,21.167046,Erie Intl,32.517680,Nondalton,5284.952567
Hunt Hollow,7532 County Road 36,Naples,New York,14512,2030,825,1000,0.0,0.0,0.0,...,3.0,"7532 County Road 36, Naples, New York",42.643014,-77.469117,Dansville Muni,21.514044,Hornell Muni,33.855765,Perry-Warsaw,48.883513
Powder Ridge Connecticut,99 Powder Hill Road,Middlefield,Connecticut,06455,720,550,170,0.0,0.0,0.0,...,6.0,"99 Powder Hill Road, ddlefield, Connecticut",41.501600,-72.736408,Meriden-Markham Municipal,7.790552,Chester,23.247699,Piedmont Triad International,865.684349


In [125]:
#resetting index
sim_df = sim_df.set_index('ski_resort')
content_matrix = content_matrix.set_index('ski_resort')

In [126]:
#dropping unnamed column
df_list = [content_matrix, final_user_df, surprise_df]

for x in df_list:
    x.drop(columns="Unnamed: 0", inplace=True)

In [127]:
#dropping duplicates
surprise_df = surprise_df.drop_duplicates()

In [128]:
#setting scale
reader = Reader(rating_scale=(1, 5))

#loading final dataset
data_full = Dataset.load_from_df(surprise_df[['user_name', 'ski_resort', 'rating']], reader)

#making trainset
full_trainset = data_full.build_full_trainset()

In [129]:
#saving new dataframe with only user information
user_df = surprise_df.reset_index()
user_df.set_index('user_name', inplace = True)
user_df.drop(columns = ['rating', 'index'], inplace =True)
user_df.head()

Unnamed: 0_level_0,ski_resort
user_name,Unnamed: 1_level_1
anon_1,Winter Park
anon_1,Arapahoe Basin
anon_1,Steamboat
anon_1,Copper Mountain
anon_2,Solitude Mountain


In [130]:
content_matrix

Unnamed: 0_level_0,address,city,state,zipcode,summit,drop,base,gondolas_and_trams,fast_eight,high_speed_sixes,...,total_lifts,full_address_y,latitude_y,longitude_y,airport_1,distance_1,airport_2,distance_2,airport_3,distance_3
ski_resort,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Palisades Tahoe,PO Box 2007,Olympic Valley,California,96146,9050,2850,6200,3.0,6.0,4.0,...,36.0,"PO Box 2007, Olympic Valley, California",39.196980,-120.235705,Truckee-Tahoe,15.992749,Minden-Tahoe,47.213899,Greenville Muni,3357.306610
Mammoth Mountain,P.O. Box 24,Mammoth Mountain Lakes,California,93546,11053,3100,7953,3.0,9.0,2.0,...,25.0,"P.O. Box 24, Mammoth Lakes, California",37.648546,-118.972079,Mammoth Yosemite,12.136117,Bryant,71.790347,Grand Rapids-Itasca County,2330.068385
Donner Ski Ranch,P.O. Box 66,Norden,California,95724,8012,750,7031,0.0,0.0,0.0,...,8.0,"P.O. Box 66, Norden, California",39.317356,-120.354182,Truckee-Tahoe,18.464839,Reno/Tahoe International,54.237886,Mapleton Municipal,2085.874328
Sugar Bowl,P.O. Box 5,Norden,California,95724,8383,1500,6883,1.0,5.0,0.0,...,12.0,"P.O. Box 5, Norden, California",39.317356,-120.354182,Truckee-Tahoe,18.464839,Reno/Tahoe International,54.237886,Mapleton Municipal,2085.874328
Kirkwood,PO Box 1,Kirkwood,California,95646,9800,2000,7800,0.0,2.0,0.0,...,13.0,"PO Box 1, Kirkwood, California",38.702308,-120.072244,Lake Tahoe,22.320342,Minden-Tahoe,43.275852,Nogales International,1165.403603
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Oak Mountain,141 Novosel Way,Speculator,New York,12164,2400,650,1750,0.0,0.0,0.0,...,4.0,"141 Novosel Way, Speculator, New York",43.517884,-74.361618,Piseco Muni,14.487038,Saratoga Cty,65.825037,Hillsboro Municipal,1814.724642
Mt. Pleasant,23301 Plank Rd,Venango,Pennsylvania,16403,1540,340,1200,0.0,0.0,0.0,...,2.0,"23301 Plank Rd , Venango, Pennsylvania",41.795526,-80.097363,Port Meadville,21.167046,Erie Intl,32.517680,Nondalton,5284.952567
Hunt Hollow,7532 County Road 36,Naples,New York,14512,2030,825,1000,0.0,0.0,0.0,...,3.0,"7532 County Road 36, Naples, New York",42.643014,-77.469117,Dansville Muni,21.514044,Hornell Muni,33.855765,Perry-Warsaw,48.883513
Powder Ridge Connecticut,99 Powder Hill Road,Middlefield,Connecticut,06455,720,550,170,0.0,0.0,0.0,...,6.0,"99 Powder Hill Road, ddlefield, Connecticut",41.501600,-72.736408,Meriden-Markham Municipal,7.790552,Chester,23.247699,Piedmont Triad International,865.684349


In [136]:
def load_model(model_filename):
    print (">> Loading dump")
    from surprise import dump
    import os
    file_name = os.path.expanduser(model_filename)
    _, loaded_model = dump.load(file_name)
    return loaded_model

def hybrid_model_content():
    # User inputs
    user = str(input('Name: '))
    n_recs = int(input('How many resort recommendations do you want? '))
    mountain_name = str(input("What's your favorite ski resort? "))
    travel_date = str(input('What month would you like to travel? '))
    mtn_pass = str(input('Are you using a multi-resort pass?  '))
    
    loaded_model = load_model('model.pickle')
    
    # Content-based model
    y = sim_df.loc[[mountain_name]]
    cos_sim = cosine_similarity(sim_df, y)
    cos_sim_df = pd.DataFrame(data=cos_sim, index=sim_df.index)
    cos_sim_df.sort_values(by=0, ascending=False, inplace=True)
    cos_sim_df = cos_sim_df.reset_index()

    #making list for column names
    rec_list = []
    
    #grabbing rows from content_matrix for final output
    for x in cos_sim_df['ski_resort']:
        rec_df = content_matrix.loc[[x]]  
        rec_list.append(rec_df)  #

    rec_df = pd.concat(rec_list)

    #Concatenate all the dataframes in rec_list into a single dataframe
    concat_df = rec_df[["city", "state", "summit", "drop", "base","adultWeekdayPrice", "adultWeekendPrice",
                           "beginner_runs", "intermediate_runs", "advanced_runs", "expert_runs",
                        "ikon", "epic", "mountain_collective", "latitude_y", "longitude_y"]]
    concat_df = concat_df.reset_index()

    #filtering based on month to return airbnb prices and turning into dataframe
    travel_date = travel_date.lower()

    month = ["december", "january", "february", "march", "april", "may"]
    month_abv = ["dec", "jan", "feb", "mar", "apr", "may"]

    selected_columns = []
    for x, y in zip(month_abv, month):
        if travel_date == y:
            selected_columns = [x + "_mean_4_guests", x + "_mean_2_guests"]

    result = rec_df[selected_columns]
    result = result.reset_index()                        
    content_recommendations = pd.merge(concat_df, result, on="ski_resort")
    
    #adding mountain fil
    if mtn_pass == "Ikon":
        content_recommendations = content_recommendations.loc[content_recommendations['ikon'] == 1]
    elif mtn_pass == "Epic":
        content_recommendations = content_recommendations.loc[content_recommendations['epic'] == 1]
    elif mtn_pass == "Mountain_collective":
        content_recommendations = content_recommendations.loc[content_recommendations['mountain_collective'] == 1]
    elif mtn_pass == "No":
        pass
    
    content_recommendations = content_recommendations[content_recommendations.ski_resort != mountain_name].head(20)

    # Collaborative model
    have_rated = list(user_df.loc[user, 'ski_resort'])
    not_rated = final_user_df.copy()
    not_rated = not_rated.loc[~not_rated['ski_resort'].isin(have_rated)]
    not_rated = not_rated.drop_duplicates(subset=['ski_resort'])
    not_rated.reset_index(inplace=True)
    not_rated['predicted_rating'] = not_rated['ski_resort'].apply(lambda x: loaded_model.predict(user, x).est)
    not_rated.sort_values(by='predicted_rating', ascending=False, inplace=True)
    collaborative_recommendations = not_rated[['ski_resort', 'predicted_rating']]

    # Combine content-based and collaborative recommendations
    combined_recommendations = pd.merge(content_recommendations, collaborative_recommendations, on='ski_resort', how='left')
    combined_recommendations = combined_recommendations.drop_duplicates(subset=['ski_resort'])
    combined_recommendations.sort_values(by='predicted_rating', ascending=False, inplace=True)
    combined_recommendations.drop(columns=['ikon', 'mountain_collective', 'epic'], inplace=True)
    final_recs = combined_recommendations.drop(columns=['longitude_y', 'latitude_y'])
    map_df = combined_recommendations[["latitude_y", "longitude_y"]]
    return final_recs.head(n_recs), st.map(map_df)

In [137]:
hybrid_model_content()

Name: Stephanie Ciaccia
How many resort recommendations do you want? 3
What's your favorite ski resort? Park City Mountain
What month would you like to travel? December
Are you using a multi-resort pass?  Ikon
>> Loading dump


StreamlitAPIException: Map data must contain a latitude column named: 'LAT', 'LATITUDE', 'lat', 'latitude'. Existing columns: 'latitude_y', 'longitude_y'

In [120]:
st.title('Avant Ski')
st.header('Shred faster with Avant Ski')

# User inputs
user = st.text_input('Name')
n_recs = st.number_input('How many resort recommendations do you want?', min_value=1, step=1)
mountain_name = st.text_input("What's your favorite ski resort?")
travel_date = st.text_input('What month would you like to travel?')
mtn_pass = st.text_input('Are you using a multi-resort pass?')


# Button to trigger recommendation
if st.button("Get Recommendations"):
    # Call the hybrid model function and get recommendations
    recommendations = hybrid_model_content()

    # Display user inputs
    st.subheader("User Inputs")
    st.write("Name:", user)
    st.write("Favorite Ski Resort:", mountain_name)
    st.write("Month of Travel:", travel_date)
    st.write("Multi-Resort Pass:", mtn_pass)

    # Display the final recommendations
    st.subheader("Recommendations")
    st.dataframe(recommendations)

2023-05-28 20:42:53.923 
  command:

    streamlit run /Users/stephanie/opt/anaconda3/envs/streamlit/lib/python3.11/site-packages/ipykernel_launcher.py [ARGUMENTS]
