In [1]:
pip install rioxarray rasterio pystac_client planetary_computer

Collecting rioxarray
  Downloading rioxarray-0.18.2-py3-none-any.whl.metadata (5.4 kB)
Collecting rasterio
  Downloading rasterio-1.4.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.1 kB)
Collecting pystac_client
  Downloading pystac_client-0.8.6-py3-none-any.whl.metadata (3.0 kB)
Collecting planetary_computer
  Downloading planetary_computer-1.0.0-py3-none-any.whl.metadata (7.4 kB)
Collecting affine (from rasterio)
  Downloading affine-2.4.0-py3-none-any.whl.metadata (4.0 kB)
Collecting pystac>=1.10.0 (from pystac[validation]>=1.10.0->pystac_client)
  Downloading pystac-1.12.2-py3-none-any.whl.metadata (4.6 kB)
Collecting python-dotenv (from planetary_computer)
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Downloading rioxarray-0.18.2-py3-none-any.whl (61 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.9/61.9 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading rasterio-1.4.3-cp310-cp310-manylinux_2_17_x86_

In [2]:
# Supress Warnings
import warnings
warnings.filterwarnings('ignore')
# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
# Data Science
import numpy as np
import pandas as pd
# Multi-dimensional arrays and datasets
import xarray as xr
# Geospatial raster data handling
import rioxarray as rxr
# Geospatial data analysis
import geopandas as gpd
# Geospatial operations
import rasterio
from rasterio import windows
from rasterio import features
from rasterio import warp
from rasterio.warp import transform_bounds
from rasterio.windows import from_bounds
# Image Processing
from PIL import Image
# Coordinate transformations
from pyproj import Proj, Transformer, CRS
# Feature Engineering
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
# Machine Learning
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.linear_model import ElasticNet
from sklearn.ensemble import BaggingRegressor
from sklearn.ensemble import ExtraTreesRegressor
import xgboost as xgb
from sklearn.metrics import r2_score
# Planetary Computer Tools
import pystac_client
import planetary_computer as pc
from pystac.extensions.eo import EOExtension as eo
# Others
import os
from tqdm import tqdm

In [51]:
ground_df = pd.read_csv("/kaggle/input/ey-challenge/Training_data_uhi_index_2025-02-18.csv")
ground_df.head()

Unnamed: 0,Longitude,Latitude,datetime,UHI Index
0,-73.909167,40.813107,24-07-2021 15:53,1.030289
1,-73.909187,40.813045,24-07-2021 15:53,1.030289
2,-73.909215,40.812978,24-07-2021 15:53,1.023798
3,-73.909242,40.812908,24-07-2021 15:53,1.023798
4,-73.909257,40.812845,24-07-2021 15:53,1.021634


In [52]:
def map_satellite_data(tiff_path, csv_path):
    data = rxr.open_rasterio(tiff_path)
    tiff_crs = data.rio.crs
    df = pd.read_csv(csv_path)
    latitudes = df['Latitude'].values
    longitudes = df['Longitude'].values
    
    proj_wgs84 = Proj(init='epsg:4326')
    proj_tiff = Proj(tiff_crs)
    
    transformer = Transformer.from_proj(proj_wgs84, proj_tiff)
    B01_values = []
    B02_values = []
    B03_values = []
    B04_values = []
    B05_values = []
    B06_values = []
    B07_values = []
    B08_values = []
    B8A_values = []
    B11_values = []
    B12_values = []
    
    for lat, lon in tqdm(zip(latitudes, longitudes), total=len(latitudes), desc="Mapping values"):
        B01_value = data.sel(x=lon, y=lat, band=1, method="nearest").values
        B01_values.append(B01_value)
        
        B02_value = data.sel(x=lon, y=lat, band=2, method="nearest").values
        B02_values.append(B02_value)
        
        B03_value = data.sel(x=lon, y=lat, band=3, method="nearest").values
        B03_values.append(B03_value)
        
        B04_value = data.sel(x=lon, y=lat, band=4, method="nearest").values
        B04_values.append(B04_value)
        
        B05_value = data.sel(x=lon, y=lat, band=5, method="nearest").values
        B05_values.append(B05_value)
        
        B06_value = data.sel(x=lon, y=lat, band=6, method="nearest").values
        B06_values.append(B06_value)
        
        B07_value = data.sel(x=lon, y=lat, band=7, method="nearest").values
        B07_values.append(B07_value)
        
        B08_value = data.sel(x=lon, y=lat, band=8, method="nearest").values
        B08_values.append(B08_value)
        
        B8A_value = data.sel(x=lon, y=lat, band=8, method="nearest").values
        B8A_values.append(B8A_value)
        
        B11_value = data.sel(x=lon, y=lat, band=11, method="nearest").values
        B11_values.append(B11_value)
        
        B12_value = data.sel(x=lon, y=lat, band=12, method="nearest").values
        B12_values.append(B12_value)
        
    df = pd.DataFrame()
    df['B01'] = B01_values
    df['B02'] = B02_values
    df['B03'] = B03_values
    df['B04'] = B04_values
    df['B05'] = B05_values
    df['B06'] = B06_values
    df['B07'] = B07_values
    df['B08'] = B08_values
    df['B8A'] = B8A_values
    df['B11'] = B11_values
    df['B12'] = B12_values
    return df

In [53]:
final_data = map_satellite_data('/kaggle/input/ey-challenge/S2_sample.tiff', '/kaggle/input/ey-challenge/Training_data_uhi_index_2025-02-18.csv')
final_data

Mapping values: 100%|██████████| 11229/11229 [02:59<00:00, 62.72it/s]


Unnamed: 0,B01,B02,B03,B04,B05,B06,B07,B08,B8A,B11,B12
0,846.0,1042.0,1036.0,1036.0,1272.0,1502.0,1605.0,1906.0,1906.0,1265.0,1206.0
1,846.0,1042.0,1036.0,1036.0,1272.0,1502.0,1605.0,1906.0,1906.0,1265.0,1206.0
2,846.0,583.0,818.0,709.0,1054.0,1668.0,2097.0,2190.0,2190.0,991.0,777.0
3,846.0,581.0,733.0,657.0,1054.0,1668.0,2097.0,2182.0,2182.0,991.0,741.5
4,846.0,655.0,744.0,745.0,1021.0,1728.0,1943.0,2112.0,2112.0,1134.0,708.5
...,...,...,...,...,...,...,...,...,...,...,...
11224,481.0,473.0,708.0,528.0,990.0,2382.0,2494.0,3284.0,3284.0,1079.0,501.0
11225,481.0,540.0,742.0,610.0,990.0,2382.0,2494.0,2900.0,2900.0,1079.0,551.5
11226,481.0,540.0,742.0,610.0,990.0,2382.0,2494.0,2900.0,2900.0,1079.0,551.5
11227,481.0,540.0,742.0,610.0,990.0,2382.0,2494.0,2900.0,2900.0,1079.0,551.5


In [54]:
final_data['NDVI'] = (final_data['B08'] - final_data['B04']) / (final_data['B08'] + final_data['B04'])
final_data['NDVI'] = final_data['NDVI'].replace([np.inf, -np.inf], np.nan)

final_data['NDBI'] = (final_data['B11'] - final_data['B08']) / (final_data['B11'] + final_data['B08'])
final_data['NDBI'] = final_data['NDBI'].replace([np.inf, -np.inf], np.nan)

final_data['NDWI'] = (final_data['B03'] - final_data['B08']) / (final_data['B03'] + final_data['B08'])
final_data['NDWI'] = final_data['NDWI'].replace([np.inf, -np.inf], np.nan)

In [55]:
wind_df_bronx = pd.read_excel('/kaggle/input/ey-challenge/NY_Mesonet_Weather.xlsx', sheet_name='Bronx')
wind_df_bronx['Date / Time'] = pd.to_datetime(wind_df_bronx['Date / Time'])
wind_df_bronx['Time'] = wind_df_bronx['Date / Time'].dt.strftime('%H:%M')
wind_df_bronx_filtered = wind_df_bronx[(wind_df_bronx['Time'] >= '15:00') & (wind_df_bronx['Time'] <= '16:00')]
wind_df_bronx_filtered

Unnamed: 0,Date / Time,Air Temp at Surface [degC],Relative Humidity [percent],Avg Wind Speed [m/s],Wind Direction [degrees],Solar Flux [W/m^2],Time
108,2021-07-24 15:00:00,28.0,40.3,3.0,75,725,15:00
109,2021-07-24 15:05:00,28.1,40.2,1.7,92,558,15:05
110,2021-07-24 15:10:00,28.3,40.3,2.9,91,216,15:10
111,2021-07-24 15:15:00,28.0,40.7,3.1,114,236,15:15
112,2021-07-24 15:20:00,27.9,41.8,2.8,105,229,15:20
113,2021-07-24 15:25:00,27.3,44.4,3.7,162,511,15:25
114,2021-07-24 15:30:00,27.1,47.3,4.5,170,563,15:30
115,2021-07-24 15:35:00,26.9,47.7,3.5,149,292,15:35
116,2021-07-24 15:40:00,26.9,48.3,3.0,166,371,15:40
117,2021-07-24 15:45:00,27.3,47.4,3.5,146,646,15:45


In [56]:
wind_df_manhattan = pd.read_excel('/kaggle/input/ey-challenge/NY_Mesonet_Weather.xlsx', sheet_name='Manhattan')
wind_df_manhattan['Date / Time'] = pd.to_datetime(wind_df_manhattan['Date / Time'])
wind_df_manhattan['Time'] = wind_df_manhattan['Date / Time'].dt.strftime('%H:%M')
wind_df_manhattan_filtered = wind_df_manhattan[(wind_df_manhattan['Time'] >= '15:00') & (wind_df_manhattan['Time'] <= '16:00')]
wind_df_manhattan_filtered

Unnamed: 0,Date / Time,Air Temp at Surface [degC],Relative Humidity [percent],Avg Wind Speed [m/s],Wind Direction [degrees],Solar Flux [W/m^2],Time
108,2021-07-24 15:00:00,26.1,51.1,4.1,139,140,15:00
109,2021-07-24 15:05:00,26.3,51.1,2.5,161,128,15:05
110,2021-07-24 15:10:00,26.3,50.9,3.0,158,219,15:10
111,2021-07-24 15:15:00,26.6,50.5,3.1,154,584,15:15
112,2021-07-24 15:20:00,26.7,49.7,2.0,132,448,15:20
113,2021-07-24 15:25:00,27.2,46.4,1.4,175,725,15:25
114,2021-07-24 15:30:00,27.3,45.4,3.8,202,349,15:30
115,2021-07-24 15:35:00,26.8,47.6,2.4,209,511,15:35
116,2021-07-24 15:40:00,27.0,47.2,3.2,142,658,15:40
117,2021-07-24 15:45:00,27.1,47.6,3.2,163,565,15:45


In [57]:
def euclidean_distance(lat1, lon1, lat2, lon2):
    return np.sqrt((lat2 - lat1) ** 2 + (lon2 - lon1) ** 2)

def get_interpolated_wind_value(datetime, wind_df, name):
    datetime = pd.to_datetime(datetime)
    before_time = wind_df[wind_df['Date / Time'] <= datetime].iloc[-1]
    after_time = wind_df[wind_df['Date / Time'] > datetime].iloc[0]
    time_diff_before = (datetime - before_time['Date / Time']).total_seconds()
    time_diff_after = (after_time['Date / Time'] - datetime).total_seconds()
    total_time_diff = time_diff_before + time_diff_after
    interpolated_wind_value = (before_time[name] * time_diff_after + after_time[name] * time_diff_before) / total_time_diff
    return interpolated_wind_value

bronx_latitude, bronx_longitude = 40.87248, -73.89352
manhattan_latitude, manhattan_longitude = 40.76754, -73.96449

for name in ['Air Temp at Surface [degC]', 'Relative Humidity [percent]', 'Avg Wind Speed [m/s]', 'Wind Direction [degrees]', 'Solar Flux [W/m^2]']:
    ground_df[name] = ground_df.apply(
        lambda row: (
            # (1 / (euclidean_distance(row['Latitude'], row['Longitude'], bronx_latitude, bronx_longitude))) * get_interpolated_wind_value(row['datetime'], wind_df_bronx_filtered, name) +
            # (1 / (euclidean_distance(row['Latitude'], row['Longitude'], manhattan_latitude, manhattan_longitude))) * get_interpolated_wind_value(row['datetime'], wind_df_manhattan_filtered, name)
            (1 / (euclidean_distance(row['Latitude'], row['Longitude'], bronx_latitude, bronx_longitude))) * wind_df_bronx_filtered[name].mean() +
            (1 / (euclidean_distance(row['Latitude'], row['Longitude'], manhattan_latitude, manhattan_longitude))) * wind_df_manhattan_filtered[name].mean()
        ) / (
            (1 / (euclidean_distance(row['Latitude'], row['Longitude'], bronx_latitude, bronx_longitude))) +
            (1 / (euclidean_distance(row['Latitude'], row['Longitude'], manhattan_latitude, manhattan_longitude)))
        ), axis=1
    )

In [58]:
ground_df = ground_df.rename(columns={
    'Air Temp at Surface [degC]' : 'air', 
    'Relative Humidity [percent]' : 'humidity', 
    'Avg Wind Speed [m/s]' : 'wind', 
    'Wind Direction [degrees]' : 'dir', 
    'Solar Flux [W/m^2]' : 'solar'
})
ground_df.head()

Unnamed: 0,Longitude,Latitude,datetime,UHI Index,air,humidity,wind,dir,solar
0,-73.909167,40.813107,24-07-2021 15:53,1.030289,27.151579,46.340323,3.029015,151.687438,443.527262
1,-73.909187,40.813045,24-07-2021 15:53,1.030289,27.151246,46.34201,3.028945,151.703688,443.525144
2,-73.909215,40.812978,24-07-2021 15:53,1.023798,27.150867,46.343927,3.028867,151.722148,443.522738
3,-73.909242,40.812908,24-07-2021 15:53,1.023798,27.150478,46.345897,3.028786,151.741114,443.520266
4,-73.909257,40.812845,24-07-2021 15:53,1.021634,27.150151,46.347551,3.028717,151.757046,443.518189


In [60]:
uhi_data = pd.concat([ground_df,final_data], axis=1)
uhi_data.head()

Unnamed: 0,Longitude,Latitude,datetime,UHI Index,air,humidity,wind,dir,solar,B01,...,B05,B06,B07,B08,B8A,B11,B12,NDVI,NDBI,NDWI
0,-73.909167,40.813107,24-07-2021 15:53,1.030289,27.151579,46.340323,3.029015,151.687438,443.527262,846.0,...,1272.0,1502.0,1605.0,1906.0,1906.0,1265.0,1206.0,0.295717,-0.202144,-0.295717
1,-73.909187,40.813045,24-07-2021 15:53,1.030289,27.151246,46.34201,3.028945,151.703688,443.525144,846.0,...,1272.0,1502.0,1605.0,1906.0,1906.0,1265.0,1206.0,0.295717,-0.202144,-0.295717
2,-73.909215,40.812978,24-07-2021 15:53,1.023798,27.150867,46.343927,3.028867,151.722148,443.522738,846.0,...,1054.0,1668.0,2097.0,2190.0,2190.0,991.0,777.0,0.510866,-0.376925,-0.456117
3,-73.909242,40.812908,24-07-2021 15:53,1.023798,27.150478,46.345897,3.028786,151.741114,443.520266,846.0,...,1054.0,1668.0,2097.0,2182.0,2182.0,991.0,741.5,0.537161,-0.375355,-0.497084
4,-73.909257,40.812845,24-07-2021 15:53,1.021634,27.150151,46.347551,3.028717,151.757046,443.518189,846.0,...,1021.0,1728.0,1943.0,2112.0,2112.0,1134.0,708.5,0.478474,-0.301294,-0.478992


In [None]:
# columns_to_check = ["B01", "B02", "B03", "B04", "B05", "B06", "B07", "B08", "B8A", "B11", "B12", 'NDVI', 'NDBI', 'NDWI']
# for col in columns_to_check:
#     uhi_data[col] = uhi_data[col].apply(lambda x: tuple(x) if isinstance(x, np.ndarray) and x.ndim > 0 else x)

# uhi_data = uhi_data.drop_duplicates(subset=columns_to_check, keep='first')
# uhi_data = uhi_data.reset_index(drop=True)
# uhi_data

In [61]:
correlation_with_uhi = uhi_data.drop(columns=['datetime', 'Longitude', 'Latitude']).corr()['UHI Index']
correlation_with_uhi

UHI Index    1.000000
air          0.455829
humidity    -0.455829
wind         0.455829
dir         -0.455829
solar        0.455829
B01          0.193179
B02          0.163307
B03          0.168818
B04          0.175350
B05          0.170996
B06          0.117021
B07          0.094459
B08          0.082083
B8A          0.082083
B11          0.187564
B12          0.149961
NDVI        -0.254888
NDBI         0.187625
NDWI         0.250060
Name: UHI Index, dtype: float64

In [62]:
uhi_data = uhi_data[['air', 'humidity', 'wind', 'dir', 'solar', 'B01', 'B02', 'B03', 'B04', 'B05', 'B11', 'B12', 'NDVI', 'NDBI', 'NDWI', 'UHI Index']]

X = uhi_data.drop(columns=['UHI Index']).values
y = uhi_data ['UHI Index'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)

In [63]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [64]:
# model = AdaBoostRegressor(n_estimators=50, random_state=42)
# model.fit(X_train, y_train)

# model = xgb.XGBRegressor(n_estimators=100, random_state=42)
# model.fit(X_train, y_train)

# model = BaggingRegressor(base_estimator=DecisionTreeRegressor(), n_estimators=50, random_state=42)
# model.fit(X_train, y_train)

model = ExtraTreesRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# model = RandomForestRegressor(n_estimators=100, random_state=42)
# model.fit(X_train, y_train)

In [65]:
insample_predictions = model.predict(X_train)
Y_train = y_train.tolist()
r2_score(Y_train, insample_predictions)

1.0

In [66]:
outsample_predictions = model.predict(X_test)
Y_test = y_test.tolist()
r2_score(Y_test, outsample_predictions)

0.8728443275286528

In [67]:
test_file = pd.read_csv('/kaggle/input/ey-challenge/Submission_template_UHI2025-v2.csv')
test_file.head()

Unnamed: 0,Longitude,Latitude,UHI Index
0,-73.971665,40.788763,
1,-73.971928,40.788875,
2,-73.96708,40.78908,
3,-73.97255,40.789082,
4,-73.969697,40.787953,


In [68]:
val_data = map_satellite_data('/kaggle/input/ey-challenge/S2_sample.tiff', '/kaggle/input/ey-challenge/Submission_template_UHI2025-v2.csv')

Mapping values: 100%|██████████| 1040/1040 [00:17<00:00, 59.28it/s]


In [69]:
val_data['NDVI'] = (val_data['B08'] - val_data['B04']) / (val_data['B08'] + val_data['B04'])
val_data['NDVI'] = val_data['NDVI'].replace([np.inf, -np.inf], np.nan)

val_data['NDBI'] = (val_data['B11'] - val_data['B08']) / (val_data['B11'] + val_data['B08'])
val_data['NDBI'] = val_data['NDBI'].replace([np.inf, -np.inf], np.nan)

val_data['NDWI'] = (val_data['B03'] - val_data['B08']) / (val_data['B03'] + val_data['B08'])
val_data['NDWI'] = val_data['NDWI'].replace([np.inf, -np.inf], np.nan)

In [70]:
for name in ['Air Temp at Surface [degC]', 'Relative Humidity [percent]', 'Avg Wind Speed [m/s]', 'Wind Direction [degrees]', 'Solar Flux [W/m^2]']:
    test_file[name] = test_file.apply(
        lambda row: (
            (1 / (euclidean_distance(row['Latitude'], row['Longitude'], bronx_latitude, bronx_longitude))) * wind_df_bronx_filtered[name].mean() +
            (1 / (euclidean_distance(row['Latitude'], row['Longitude'], manhattan_latitude, manhattan_longitude))) * wind_df_manhattan_filtered[name].mean()
        ) / (
            (1 / (euclidean_distance(row['Latitude'], row['Longitude'], bronx_latitude, bronx_longitude))) +
            (1 / (euclidean_distance(row['Latitude'], row['Longitude'], manhattan_latitude, manhattan_longitude)))
        ), axis=1
    )

test_file = test_file.rename(columns={
    'Air Temp at Surface [degC]' : 'air', 
    'Relative Humidity [percent]' : 'humidity', 
    'Avg Wind Speed [m/s]' : 'wind', 
    'Wind Direction [degrees]' : 'dir', 
    'Solar Flux [W/m^2]' : 'solar'
})
test_file.head()

Unnamed: 0,Longitude,Latitude,UHI Index,air,humidity,wind,dir,solar
0,-73.971665,40.788763,,26.874672,47.742166,2.971326,165.186669,441.767747
1,-73.971928,40.788875,,26.875446,47.738249,2.971487,165.14895,441.772663
2,-73.96708,40.78908,,26.874395,47.743566,2.971268,165.200155,441.765989
3,-73.97255,40.789082,,26.877017,47.730294,2.971814,165.072351,441.782647
4,-73.969697,40.787953,,26.869208,47.769827,2.970188,165.453031,441.733029


In [71]:
cal_data = pd.concat([test_file, val_data], axis=1)
cal_data.head()

Unnamed: 0,Longitude,Latitude,UHI Index,air,humidity,wind,dir,solar,B01,B02,...,B05,B06,B07,B08,B8A,B11,B12,NDVI,NDBI,NDWI
0,-73.971665,40.788763,,26.874672,47.742166,2.971326,165.186669,441.767747,811.0,459.0,...,984.0,2089.0,2405.0,2502.0,2502.0,893.0,523.5,0.705521,-0.473932,-0.60436
1,-73.971928,40.788875,,26.875446,47.738249,2.971487,165.14895,441.772663,1208.0,562.0,...,1112.0,2076.0,2248.0,2906.0,2906.0,1188.0,566.0,0.635801,-0.419638,-0.59802
2,-73.96708,40.78908,,26.874395,47.743566,2.971268,165.200155,441.765989,899.0,955.0,...,979.0,995.0,1158.0,1246.0,1246.0,763.0,1155.0,0.023829,-0.240418,-0.084421
3,-73.97255,40.789082,,26.877017,47.730294,2.971814,165.072351,441.782647,1193.0,1132.0,...,1866.0,1939.0,2076.0,1774.0,1774.0,2346.0,1189.0,0.079732,0.138835,-0.130656
4,-73.969697,40.787953,,26.869208,47.769827,2.970188,165.453031,441.733029,1097.0,1506.0,...,1294.0,2204.0,2411.0,2834.0,2834.0,1848.0,1293.5,0.253428,-0.210594,-0.266309


In [72]:
submission_val_data = cal_data.loc[:,['air', 'humidity', 'wind', 'dir', 'solar', 'B01', 'B02', 'B03', 'B04', 'B05', 'B11', 'B12', 'NDVI', 'NDBI', 'NDWI']]
submission_val_data = submission_val_data.values
transformed_submission_data = sc.transform(submission_val_data)

In [73]:
final_predictions = model.predict(transformed_submission_data)
final_prediction_series = pd.Series(final_predictions)

submission_df = pd.DataFrame({'Longitude':test_file['Longitude'].values, 'Latitude':test_file['Latitude'].values, 'UHI Index': final_prediction_series.values})
submission_df

Unnamed: 0,Longitude,Latitude,UHI Index
0,-73.971665,40.788763,0.963384
1,-73.971928,40.788875,0.963151
2,-73.967080,40.789080,0.981153
3,-73.972550,40.789082,0.972865
4,-73.969697,40.787953,0.960540
...,...,...,...
1035,-73.919388,40.813803,1.022336
1036,-73.931033,40.833178,1.042334
1037,-73.934647,40.854542,1.010815
1038,-73.917223,40.815413,1.024988


In [74]:
submission_df.to_csv("submission.csv",index = False)

In [59]:
# final_data.to_csv("B_data.csv",index = False)
# ground_df.to_csv("ground_data_ver2.csv",index = False)