In [54]:
# %pip install kagglehub
# %pip install Unidecode
# %pip install rapidfuzz

In [55]:
import kagglehub
import os
import pandas as pd
import ast
from scipy.spatial import cKDTree
import numpy as np

pd.set_option('display.max_columns', None)

In [56]:
path = kagglehub.dataset_download("farheenshaukat/ski-resort")
print("Path to dataset files:", path)

Path to dataset files: C:\Users\Cyber_User\.cache\kagglehub\datasets\farheenshaukat\ski-resort\versions\1


In [57]:
csv_files = [f for f in os.listdir(path) if f.endswith('.csv')]
if csv_files:
    data_file = os.path.join(path, csv_files[0]) # code in prev.cell downloads 2 datasets: resorts.csv and snow.csv, we need the first one 
    ski_df = pd.read_csv(data_file, encoding='latin1')
    print("Columns:", list(ski_df.columns))
else:
    print("No CSV file found in the dataset directory.")

ski_df.columns = ski_df.columns.str.strip() #  what for is this line? 
ski_df.describe()


Columns: ['ID', 'Resort', 'Latitude', 'Longitude', 'Country', 'Continent', 'Price', 'Season', 'Highest point', 'Lowest point', 'Beginner slopes', 'Intermediate slopes', 'Difficult slopes', 'Total slopes', 'Longest run', 'Snow cannons', 'Surface lifts', 'Chair lifts', 'Gondola lifts', 'Total lifts', 'Lift capacity', 'Child friendly', 'Snowparks', 'Nightskiing', 'Summer skiing']


Unnamed: 0,ID,Latitude,Longitude,Price,Highest point,Lowest point,Beginner slopes,Intermediate slopes,Difficult slopes,Total slopes,Longest run,Snow cannons,Surface lifts,Chair lifts,Gondola lifts,Total lifts,Lift capacity
count,499.0,499.0,499.0,499.0,499.0,499.0,499.0,499.0,499.0,499.0,499.0,499.0,499.0,499.0,499.0,499.0,499.0
mean,250.0,43.205684,-6.006784,48.721443,2160.589178,1200.631263,31.819639,37.921844,16.164329,85.905812,3.54509,179.136273,11.282565,9.721443,3.258517,24.262525,31650.961924
std,144.193157,15.97579,59.990459,21.718733,774.339805,596.142294,47.401764,44.358524,20.115855,100.874157,3.947887,372.321111,13.36536,11.461502,5.867967,27.672736,40781.986019
min,1.0,-45.05496,-149.740657,0.0,163.0,36.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,125.5,43.67265,1.380975,36.0,1594.0,800.0,10.0,12.0,3.0,30.0,0.0,0.0,3.0,3.0,0.0,10.0,11645.0
50%,250.0,46.347316,8.206372,45.0,2175.0,1121.0,18.0,25.0,9.0,55.0,3.0,15.0,7.0,6.0,1.0,15.0,18510.0
75%,374.5,47.327089,12.429237,54.0,2700.0,1500.0,30.0,45.0,21.0,100.0,6.0,180.0,14.0,11.5,4.0,26.0,32829.0
max,499.0,67.784406,176.876668,141.0,3914.0,3286.0,312.0,239.0,126.0,600.0,16.0,2383.0,89.0,74.0,40.0,174.0,252280.0


In [58]:
rank_path = 'data\\ski-resorts_ranks.csv'
rank_df = pd.read_csv(rank_path)

In [59]:
print(ski_df.head())

   ID                        Resort   Latitude   Longitude  Country  \
0   1                      Hemsedal  60.928244    8.383487   Norway   
1   2              Geilosiden Geilo  60.534526    8.206372   Norway   
2   3                          Golm  47.057810    9.828167  Austria   
3   4  Red Mountain Resort-Rossland  49.105520 -117.846280   Canada   
4   5                       Hafjell  61.230369   10.529014   Norway   

       Continent  Price            Season  Highest point  Lowest point  \
0         Europe     46    November - May           1450           620   
1         Europe     44  November - April           1178           800   
2         Europe     48  December - April           2110           650   
3  North America     60  December - April           2075          1185   
4         Europe     45  November - April           1030           195   

   Beginner slopes  Intermediate slopes  Difficult slopes  Total slopes  \
0               29                   10              

In [60]:
print(rank_df.head())

   rank                                            name  rating  \
0     1                             4 Vallées (Verbier)  1311.7   
1     2                   Matterhorn (Zermatt/Cervinia)  1296.8   
2     3  Les 3 Vallées (Val Thorens/Méribel/Courchevel)  1233.0   
3     4                             Chamonix Mont-Blanc  1214.2   
4     5                              Zell am See-Kaprun  1181.7   

                                  url                    location_coordinate  \
0              http://www.4vallees.ch   {'lat': '46.1013', 'long': '7.2263'}   
1  https://www.matterhornparadise.ch/   {'lat': '45.9845', 'long': '7.7481'}   
2        https://www.les3vallees.com/     {'lat': '45.295', 'long': '6.580'}   
3            https://www.chamonix.com   {'lat': '45.9237', 'long': '6.8694'}   
4   https://www.zellamsee-kaprun.com/  {'lat': '47.3231', 'long': '12.7768'}   

  location_country       location_region  elevation_top_m  \
0      Switzerland                Valais           3330

# Merging

In [61]:
ski = ski_df.copy()
rank = rank_df.copy()
rank_coords = rank['location_coordinate'].apply(ast.literal_eval)
rank['Latitude'] = rank_coords.apply(lambda d: float(d['lat']))
rank['Longitude'] = rank_coords.apply(lambda d: float(d['long']))

In [62]:
# Build KDTree from rank_df coords
rank_coords = np.c_[rank['Latitude'], rank['Longitude']]
tree = cKDTree(rank_coords)

# For each ski_df point, find nearest rank_df point
ski_coords = np.c_[ski['Latitude'], ski['Longitude']]
distances, indices = tree.query(ski_coords, k=1)  # k=1 nearest neighbor

# Add matched indices back to ski_df
ski['rank_index'] = indices
ski['distance'] = distances  # keep distance to filter mismatches later

In [90]:
merged = ski.merge(rank, left_on='rank_index', right_index=True, suffixes=('_ski', '_rank'))

# If you want, filter out cases where distance is too large (>0.05 degrees ~5km)
merged = merged[merged['distance'] < 2]

In [72]:
ski.shape

(499, 27)

In [91]:
merged.shape

(441, 43)

In [66]:
merged[merged['Price'] == 0]

Unnamed: 0,ID,Resort,Latitude_ski,Longitude_ski,Country,Continent,Price,Season,Highest point,Lowest point,Beginner slopes,Intermediate slopes,Difficult slopes,Total slopes,Longest run,Snow cannons,Surface lifts,Chair lifts,Gondola lifts,Total lifts,Lift capacity,Child friendly,Snowparks,Nightskiing,Summer skiing,rank_index,distance,rank,name,rating,url,location_coordinate,location_country,location_region,elevation_top_m,elevation_difference_m,total_slope_length_km,number_of_lifts,number_of_slopes,annual_snowfall_cm,number_of_matches,Latitude_rank,Longitude_rank
334,335,Pragelato,45.016217,6.942453,Italy,Europe,0,December - April,2700,1335,14,32,4,50,0,0,4,1,0,5,3660,Yes,No,No,No,3090,0.008784,3091,Pragelato,820.0,https://www.scinordicopragelato.it/sciovia-baby/#,"{'lat': '45.009210', 'long': '6.947750'}",Italy,Piedmont,1584.0,52.0,0.3,1.0,1.0,120.0,1,45.00921,6.94775


In [92]:
diff = merged[merged['Country'] != merged['location_country']]
diff[diff['Country']!='United States']

Unnamed: 0,ID,Resort,Latitude_ski,Longitude_ski,Country,Continent,Price,Season,Highest point,Lowest point,Beginner slopes,Intermediate slopes,Difficult slopes,Total slopes,Longest run,Snow cannons,Surface lifts,Chair lifts,Gondola lifts,Total lifts,Lift capacity,Child friendly,Snowparks,Nightskiing,Summer skiing,rank_index,distance,rank,name,rating,url,location_coordinate,location_country,location_region,elevation_top_m,elevation_difference_m,total_slope_length_km,number_of_lifts,number_of_slopes,annual_snowfall_cm,number_of_matches,Latitude_rank,Longitude_rank
9,10,Rossfeld - Berchtesgaden - Oberau,47.651306,13.058977,Germany,Europe,20,December - April,1554,1300,7,1,0,8,6,0,4,0,0,4,3229,Yes,Yes,Yes,No,1155,0.029117,1156,Zinkenlifte,901.1,https://www.duerrnberg.at/,"{'lat': '47.6350', 'long': '13.0831'}",Austria,Hallein,1321.0,484.0,8.0,3.0,3.0,200.0,1,47.635,13.0831
60,61,Avoriaz (Les Portes du Soleil),46.192544,6.770286,France,Europe,51,December - April,2466,1000,310,210,60,580,0,1074,89,74,11,174,252280,Yes,Yes,Yes,No,9,0.017626,10,Les Portes du Soleil,1148.4,https://www.portesdusoleil.com/,"{'lat': '46.2090', 'long': '6.7766'}",Switzerland,Valais,2254.0,1269.0,580.0,165.0,53.0,700.0,1,46.209,6.7766
83,84,Kranjska Gora,46.485132,13.784396,Slovenia,Europe,33,December - April,1215,800,10,8,2,20,2,0,15,5,0,20,17774,Yes,Yes,Yes,No,614,0.099009,615,Arnoldstein – Dreiländereck,943.5,http://www.3laendereck.at,"{'lat': '46.5475', 'long': '13.7075'}",Austria,Villach-Land,1550.0,870.0,15.0,7.0,3.0,250.0,1,46.5475,13.7075
161,162,La Molina-?Masella-Alp2500,42.342739,1.956206,Spain,Europe,45,November - April,2531,1616,76,47,18,141,0,1010,19,13,1,33,39750,Yes,Yes,Yes,No,1085,0.114616,1086,Puigmal,905.7,https://www.puigmal2900.com/,"{'lat': '42.3958', 'long': '2.0578'}",France,Pyrénées-Orientales,2545.0,710.0,20.0,4.0,10.0,150.0,1,42.3958,2.0578
354,355,Mariborsko Pohorje-Maribor,46.515339,15.578231,Slovenia,Europe,31,December - March,1327,325,23,13,5,41,0,92,16,5,1,22,23436,Yes,No,Yes,No,2947,0.54939,2948,Gedersberg,827.0,https://www.gemeindekurier.at/,"{'lat': '47.0300', 'long': '15.3860'}",Austria,Graz-Umgebung,403.0,25.0,0.3,1.0,1.0,50.0,1,47.03,15.386
379,380,S?pindleru?v Mly?n,50.725645,15.606757,Czech Republic,Europe,34,December - April,1235,702,8,13,2,23,0,0,17,6,0,23,21638,Yes,Yes,Yes,No,2930,0.865735,2931,Johannisstein,827.5,https://oybin.com/bewegen-entspannen/wintersport/,"{'lat': '50.8500', 'long': '14.7500'}",Germany,Görlitz,590.0,40.0,0.3,1.0,1.0,50.0,1,50.85,14.75
418,419,Malbun,47.102619,9.608307,Liechtenstein,Europe,42,December - April,2000,1600,11,9,3,23,4,16,6,3,0,9,8800,Yes,Yes,No,No,1506,0.120012,1507,Gurtis/Bazora,881.9,https://www.gurtis.info/,"{'lat': '47.2200', 'long': '9.6333'}",Austria,Feldkirch,1390.0,470.0,5.0,4.0,3.0,150.0,1,47.22,9.6333
437,438,Kobla-Bohinj,46.233862,13.965064,Slovenia,Europe,22,December - March,1480,540,12,10,1,23,0,30,3,3,0,6,5720,Yes,No,No,No,614,0.405843,615,Arnoldstein – Dreiländereck,943.5,http://www.3laendereck.at,"{'lat': '46.5475', 'long': '13.7075'}",Austria,Villach-Land,1550.0,870.0,15.0,7.0,3.0,250.0,1,46.5475,13.7075
486,487,Kanin-Sella Nevea-?Bovec,46.364971,13.481817,Slovenia,Europe,35,December - April,1103,460,4,6,0,10,0,0,3,4,4,11,11670,Yes,No,No,No,448,0.178805,449,Tarvisio,966.6,https://www.turismofvg.it/Montagna/Home,"{'lat': '46.5110', 'long': '13.5850'}",Italy,Friuli-Venezia Giulia,1752.0,998.0,23.0,13.0,3.0,250.0,1,46.511,13.585
494,495,Puigmal,42.395007,2.108883,France,Europe,0,Unknown,2700,1830,9,15,7,31,0,0,11,2,0,13,11865,Yes,No,No,No,1288,0.049518,1289,Vall de Núria,893.4,http://www.valldenuria.cat/,"{'lat': '42.3946', 'long': '2.1584'}",Spain,Province of Girona,2260.0,296.0,7.6,5.0,8.0,300.0,1,42.3946,2.1584
