# Feature engineering

**Purpose of script:**

Creating new features 

- In: dataframe_plain
- Out: dataframe_extended (with additional feature columns)

In [1]:
import xarray
import rasterio
import gemgis as gg
import pandas as pd
import numpy as np
from tqdm import tqdm
from astral.sun import sun
from astral import LocationInfo
import pyproj

import datetime as dt

from os import listdir
from os.path import isfile, join
import scipy

##### Relevant paths

In [2]:
path_dataframe_plain = r"../Data/combined/dataframe_plain/"
mw_path = r"../Data/microwave-rs/mw_interpolated/"
path_elevation =  r"../Data/elevation_data/gimpdem_1km_compressed.tif"
out_path = r"../Data/combined/dataframe_extended/"

## Features

##### Row and column

In [3]:
def add_row_and_col(df):
        # add row and column features:
        df['col'] = df.groupby("x").ngroup() # xshape 2663 
        df['row'] = df.groupby("y").ngroup(ascending=False) # yshape 1462
        return df

##### Date

In [4]:
def add_date_year(df, melt_date):
    date = pd.to_datetime(melt_date).date()
    df['date'] = date
    df['year'] = date.year
    return df

##### Aggregated/pooled values

In [5]:
from typing import Union
from typing import Tuple
from scipy.stats import mode
from scipy.signal import convolve2d

def get_window(image: np.ndarray, window_size: int, center: Tuple[int, int]) -> np.ndarray:
    top = max(center[0] - window_size // 2, 0)
    bottom = min(center[0] + window_size // 2 + 1, image.shape[0])
    left = max(center[1] - window_size // 2, 0)
    right = min(center[1] + window_size // 2 + 1, image.shape[1])
    window = image[top:bottom, left:right]
    return window


# need to fix? : only calculate if the middle value is not nan - else all nan columns around 1 and 0 are going to have a value.

def convolve(image, window_size, convolution_fn: Union['mean', 'min', 'max', 'sum']):
    image = image[0].values
    image[image == -1] = np.nan
    
    if convolution_fn == 'mean':
        kernel = np.ones((window_size, window_size))  # kernel for mean convolution
        result = np.zeros_like(image, dtype=np.float64)
        # Compute the sum and count of non-NaN values in the kernel window
        counts = convolve2d(~np.isnan(image), kernel, mode='same', boundary='fill', fillvalue=0)
        sums = convolve2d(np.nan_to_num(image), kernel, mode='same', boundary='fill', fillvalue=0)
        # Calculate the mean, ignoring NaN values
        result[counts > 0] = sums[counts > 0] / counts[counts > 0]
        # Set the output to NaN where all values in the kernel window are NaN
        result[counts == 0] = np.nan
        return result
        
    elif convolution_fn == 'max':
        result = np.zeros_like(image, dtype=np.float64)
        for i in range(image.shape[0]):
            for j in range(image.shape[1]):
                window = get_window(image, window_size, (i, j))
                non_nan_values = window[~np.isnan(window)]
                if len(non_nan_values) == 0:
                    result[i, j] = np.nan
                else:
                    result[i, j] = np.nanmax(non_nan_values)

    elif convolution_fn == 'min':
        result = np.zeros_like(image, dtype=np.float64)
        for i in range(image.shape[0]):
            for j in range(image.shape[1]):
                window = get_window(image, window_size, (i, j))
                non_nan_values = window[~np.isnan(window)]
                if len(non_nan_values) == 0:
                    result[i, j] = np.nan
                else:
                    result[i, j] = np.nanmin(non_nan_values)
        return result

    elif convolution_fn == 'sum':
        result = np.zeros_like(image, dtype=np.float64)
        for i in range(image.shape[0]):
            for j in range(image.shape[1]):
                window = get_window(image, window_size, (i, j))
                non_nan_values = window[~np.isnan(window)]
                if len(non_nan_values) == 0:
                    result[i, j] = np.nan
                else:
                    result[i, j] = np.nansum(non_nan_values)
        return result
        
    else: 
        print('not available function')
    return

In [6]:
def convolution_to_df(convolution_raster, column_name):
    nrows, ncols = convolution_raster.shape
    # create an array of x and y positions
    x = np.tile(np.arange(ncols), nrows)
    y = np.repeat(np.arange(nrows), ncols)
    # create a DataFrame with x, y, and pixel values as columns
    df = pd.DataFrame({'col': x, 'row': y, column_name: convolution_raster.flatten()})
    return df 

##### Elevation data

In [7]:
def add_elevation(data):
    df = data.to_dataframe()
    df = df.reset_index()
    df = df[['x', 'y', 'band_data']]
    df.rename({'band_data': 'elevation_data'}, axis=1, inplace=True)
    return df

##### Slope

Slope is given as degree of incline angle: 0 means flat (no slope == horizontal), 90 means (most possible slope == vertical)

In [8]:
def get_slope(data):
    slope = gg.raster.calculate_slope(data)
    nrows, ncols = slope.shape
    # create an array of x and y positions
    x = np.tile(np.arange(ncols), nrows)
    y = np.repeat(np.arange(nrows), ncols)
    # create a DataFrame with x, y, and pixel values as columns
    df_slope = pd.DataFrame({'col': x, 'row': y, 'slope_data': slope.flatten()})
    return df_slope

##### Aspect

Aspect is given as cosine radian: 0 and 360 degree = 1, 180 degree = -1

In [9]:
def get_aspect(data):
    aspect = gg.raster.calculate_aspect(data)
    nrows, ncols = aspect.shape
    # create an array of x and y positions
    x = np.tile(np.arange(ncols), nrows)
    y = np.repeat(np.arange(nrows), ncols)
    # create a DataFrame with x, y, and pixel values as columns
    df_aspect = pd.DataFrame({'col': x, 'row': y, 'aspect_data': aspect.flatten()})
    df_aspect["aspect_data"] = np.cos(df_aspect["aspect_data"] * np.pi / 180.)
    return df_aspect

##### Distance from margin/shore

In [10]:
def distance_to_margin():
    data_microwave = xarray.open_dataarray(mw_path + '2019-06-08_mw.tif') # any microwave file
    mw_val_masked = data_microwave[0].values
    mw_val_masked = np.copy(mw_val_masked)
    mw_val_masked[mw_val_masked==1]=0
    dist_in_pixels = scipy.ndimage.morphology.distance_transform_edt(mw_val_masked==0, return_distances= True)
    return dist_in_pixels

##### Array to DF

In [11]:
def array_to_df(convolution_raster, column_name):
    nrows, ncols = convolution_raster.shape
    # create an array of x and y positions
    x = np.tile(np.arange(ncols), nrows)
    y = np.repeat(np.arange(nrows), ncols)
    # create a DataFrame with x, y, and pixel values as columns
    df = pd.DataFrame({'col': x, 'row': y, column_name: convolution_raster.flatten()})
    return df 

##### Get solar duration

## Main:

In [12]:
def get_files(mw_path, path_dataframe_plain):
    # get plain files:
    df_plain_files = [f for f in listdir(path_dataframe_plain) if isfile(join(path_dataframe_plain, f))]
    # microwave files:
    mw_files = [f for f in listdir(mw_path) if isfile(join(mw_path, f))]
    return  mw_files, df_plain_files

In [13]:
def main(mw_files_list, df_plain_files_list, path_elevation, out_path, write = False):
    # get plain files:
    df_plain_files = df_plain_files_list
    # microwave files:
    mw_files = mw_files_list
    # load elevation data:
    data_elevation_xarray = xarray.open_dataarray(path_elevation)
    data_elevation_rasterio = rasterio.open(path_elevation)
    # calculate distance to margin:
    distance_margin = distance_to_margin()

    for df_file in df_plain_files:
        melt_date =  df_file[5:15]
        print(melt_date)
        for mw_file in mw_files:
            if mw_file.startswith(melt_date):
                data_mw = xarray.open_dataarray(mw_path + mw_file)
                df = pd.read_parquet(path_dataframe_plain + df_file)
                # add row and column features:
                df = add_row_and_col(df)
                # get convolutions:
                df_conv_mean_3 = array_to_df(convolve(data_mw, 3, 'mean'), 'mean_3')
                df_conv_mean_9 = array_to_df(convolve(data_mw, 9, 'mean'), 'mean_9')
                df_conv_sum_5 = array_to_df(convolve(data_mw, 5, 'sum'), 'sum_5')
                # merge convolution:
                df_combined = pd.merge(df, df_conv_mean_3, how = 'left', on = ['row', 'col'])
                df_combined = pd.merge(df_combined, df_conv_mean_9, how = 'left', on = ['row', 'col'])
                df_combined = pd.merge(df_combined, df_conv_sum_5, how = 'left', on = ['row', 'col'])
                # remove water in mw:
                df_combined = df_combined.loc[df_combined['mw_value'] != -1]
                # add date:
                df = add_date_year(df_combined, melt_date)
                # add and merge elevation data:
                df_elevation = add_elevation(data_elevation_xarray)
                df = pd.merge(df, df_elevation, how = 'left', on = ['y', 'x'])
                # get and merge slope data:
                df_slope = get_slope(data_elevation_rasterio)
                df = pd.merge(df, df_slope[["slope_data"]], how="left", right_index=True, left_index=True)
                # get and merge aspect data:
                df_aspect = get_aspect(data_elevation_rasterio)
                df = pd.merge(df, df_aspect[["aspect_data"]], how="left", right_index=True, left_index=True) 
                # add and merge distance to margin data:
                df_distance = array_to_df(distance_margin, 'distance_to_margin')
                df = pd.merge(df, df_distance, how = 'left', on = ['row', 'col'])
                
                # write to parquet:
                if write == True:
                    df.to_parquet(out_path + 'melt_'+ melt_date + '_extended.parquet.gzip', index= False)                    
    return df
                
                

Main

In [None]:
#main(mw_path, path_dataframe_plain, path_elevation, out_path)
main(*get_files(mw_path, path_dataframe_plain), path_elevation, out_path)# , write = True)

## Testing

### Testing Lina

In [15]:
df = main(['2019-06-08_mw.tif'], ['melt_2019-06-08.parquet.gzip'], path_elevation, out_path, write= True)  

2019-06-08


In [17]:
df

Unnamed: 0,x,y,mw_value,opt_value,col,row,mean_3,mean_9,sum_5,date,year,elevation_data,slope_data,aspect_data,distance_to_margin
0,-636500.00,-662500.00,0.00,-1.00,0,0,0.00,0.00,0.00,2019-06-08,2019,14.00,0.00,1.00,150.00
1,-635500.00,-662500.00,0.00,-1.00,1,0,0.00,0.00,0.00,2019-06-08,2019,14.00,0.00,1.00,150.00
2,-634500.00,-662500.00,0.00,-1.00,2,0,0.00,0.00,0.00,2019-06-08,2019,14.00,0.00,1.00,150.00
3,-633500.00,-662500.00,0.00,-1.00,3,0,0.00,0.00,0.00,2019-06-08,2019,14.00,0.00,1.00,150.00
4,-632500.00,-662500.00,0.00,-1.00,4,0,0.00,0.00,0.00,2019-06-08,2019,14.00,0.00,1.00,150.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2278720,58500.00,-3324500.00,0.00,-1.00,695,2662,0.00,0.00,0.00,2019-06-08,2019,44.00,0.15,0.93,5.00
2278721,59500.00,-3324500.00,0.00,-1.00,696,2662,0.00,0.00,0.00,2019-06-08,2019,44.00,0.13,0.89,4.00
2278722,60500.00,-3324500.00,0.00,-1.00,697,2662,0.00,0.00,0.00,2019-06-08,2019,44.00,0.06,0.89,3.00
2278723,61500.00,-3324500.00,0.00,-1.00,698,2662,0.00,0.00,0.00,2019-06-08,2019,45.00,0.06,-0.45,2.00


In [103]:
# # for testing values around the 0-1 change in the data:

# tt = data_mw.values
# # indices = np.where(tt == 1)
# tt[0][74:80, 622:628]

### Testing Nina

In [18]:
# Define the source and destination coordinate reference systems
src_crs = pyproj.CRS.from_epsg(3413)  # WGS84 (longitude, latitude)
dst_crs = pyproj.CRS.from_epsg(4326)  # Web Mercator (used by most online maps)

# Define the transformer object
transformer = pyproj.Transformer.from_crs(src_crs, dst_crs)
# Convert all coordinates at once
lats, longs = transformer.transform(df["x"], df["y"])
# Define the location object
locations = [LocationInfo(lat, lon) for lat, lon in zip(lats, longs)]
# Parse all dates at once
dates = pd.to_datetime(df["date"])

# Define a function to calculate solar duration for a single location and date
def get_solar_duration(location, date):
    s = sun(location.observer, date=date)
    return (s['sunset'] - s['sunrise']).seconds / 60

#obs = ephem.Observer()
#sun = ephem.Sun()
# Apply the function to all locations and dates using vectorized operations
df["solar_duration"] = [get_solar_duration(loc, date) for loc, date in zip(locations, dates)]
df

Unnamed: 0,x,y,mw_value,opt_value,col,row,mean_3,mean_9,sum_5,date,year,elevation_data,slope_data,aspect_data,distance_to_margin,solar_duration
0,-636500.00,-662500.00,0.00,-1.00,0,0,0.00,0.00,0.00,2019-06-08,2019,14.00,0.00,1.00,150.00,988.72
1,-635500.00,-662500.00,0.00,-1.00,1,0,0.00,0.00,0.00,2019-06-08,2019,14.00,0.00,1.00,150.00,988.72
2,-634500.00,-662500.00,0.00,-1.00,2,0,0.00,0.00,0.00,2019-06-08,2019,14.00,0.00,1.00,150.00,988.72
3,-633500.00,-662500.00,0.00,-1.00,3,0,0.00,0.00,0.00,2019-06-08,2019,14.00,0.00,1.00,150.00,988.72
4,-632500.00,-662500.00,0.00,-1.00,4,0,0.00,0.00,0.00,2019-06-08,2019,14.00,0.00,1.00,150.00,988.72
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2278720,58500.00,-3324500.00,0.00,-1.00,695,2662,0.00,0.00,0.00,2019-06-08,2019,44.00,0.15,0.93,5.00,988.72
2278721,59500.00,-3324500.00,0.00,-1.00,696,2662,0.00,0.00,0.00,2019-06-08,2019,44.00,0.13,0.89,4.00,988.72
2278722,60500.00,-3324500.00,0.00,-1.00,697,2662,0.00,0.00,0.00,2019-06-08,2019,44.00,0.06,0.89,3.00,988.72
2278723,61500.00,-3324500.00,0.00,-1.00,698,2662,0.00,0.00,0.00,2019-06-08,2019,45.00,0.06,-0.45,2.00,988.72


In [76]:
# Define the source and destination coordinate reference systems
src_crs = pyproj.CRS.from_epsg(3413)  # WGS84 (longitude, latitude)
dst_crs = pyproj.CRS.from_epsg(4326)  # Web Mercator (used by most online maps)

# Define the transformer object
transformer = pyproj.Transformer.from_crs(src_crs, dst_crs)


def calculate_solar_duration(x, y, date):

      # Transform a pair of coordinates
      lat, lon = transformer.transform(x, y)

      # Define the suntime object
      location = LocationInfo(lat, lon)
      print(lat, lon)
      
      # specify the date for which to calculate sunrise and sunset
      date = dt.datetime.strptime(date, "%Y-%m-%d").date()

      # calculate the sunrise and sunset times
      s = sun(location.observer, date=date)
      sunrise = s['sunrise'].strftime('%H:%M:%S')
      sunset = s['sunset'].strftime('%H:%M:%S')
      print(sunrise)
      print(sunset)

      # calculate the solar duration
      duration = s['sunset'] - s['sunrise']

      # print the results
      print(f"Solar Duration in minutes: {duration.seconds / 60}")

      return duration.seconds / 60

calculate_solar_duration(-636500.00		, -662500.00	, "2019-06-08")

81.53389147728227 -88.85335536270664


TypeError: 'Sun' object is not callable

In [25]:
import pytz
import ephem
import datetime as dt

obs = ephem.Observer()
obs.lat = 64.1743
obs.lon = 51.7373
obs.date = dt.datetime(2019, 6, 10, 0, 0)

sun = ephem.Sun()
sun.compute(obs)
sunrise = obs.next_rising(sun).datetime()
sunset = obs.next_setting(sun).datetime()

print(f"Sunrise: {sunrise}")
print(f"Sunset: {sunset}")
print(f"Duration: {sunset- sunrise}")

AlwaysUpError: 'Sun' is above the horizon at 2019/6/10 18:22:10

In [33]:
df

Unnamed: 0,x,y,mw_value,opt_value,col,row,mean_3,mean_9,sum_5,date,year,elevation_data,slope_data,aspect_data,distance_to_margin,solar_duration
0,-636500.00,-662500.00,0.00,-1.00,0,0,0.00,0.00,0.00,2019-06-08,2019,14.00,0.00,1.00,150.00,988.72
1,-635500.00,-662500.00,0.00,-1.00,1,0,0.00,0.00,0.00,2019-06-08,2019,14.00,0.00,1.00,150.00,988.72
2,-634500.00,-662500.00,0.00,-1.00,2,0,0.00,0.00,0.00,2019-06-08,2019,14.00,0.00,1.00,150.00,988.72
3,-633500.00,-662500.00,0.00,-1.00,3,0,0.00,0.00,0.00,2019-06-08,2019,14.00,0.00,1.00,150.00,988.72
4,-632500.00,-662500.00,0.00,-1.00,4,0,0.00,0.00,0.00,2019-06-08,2019,14.00,0.00,1.00,150.00,988.72
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2278720,58500.00,-3324500.00,0.00,-1.00,695,2662,0.00,0.00,0.00,2019-06-08,2019,44.00,0.15,0.93,5.00,988.72
2278721,59500.00,-3324500.00,0.00,-1.00,696,2662,0.00,0.00,0.00,2019-06-08,2019,44.00,0.13,0.89,4.00,988.72
2278722,60500.00,-3324500.00,0.00,-1.00,697,2662,0.00,0.00,0.00,2019-06-08,2019,44.00,0.06,0.89,3.00,988.72
2278723,61500.00,-3324500.00,0.00,-1.00,698,2662,0.00,0.00,0.00,2019-06-08,2019,45.00,0.06,-0.45,2.00,988.72


In [52]:
lat = -636500.00
lon = -662500.00	
date = "2019-06-08"
url = f'https://api.sunrise-sunset.org/json?lat={lat}&lng={lon}&date={date}'
response = requests.get(url)

result = response.json(
).get('results').get('day_length')

t = datetime.datetime.strptime(result, '%H:%M:%S').time()

# get second count from datetime.time object t
seconds = t.hour * 3600 + t.minute * 60 + t.second
seconds

39460

In [57]:
# Define the source and destination coordinate reference systems
src_crs = pyproj.CRS.from_epsg(3413)  # WGS84 (longitude, latitude)
dst_crs = pyproj.CRS.from_epsg(4326)  # Web Mercator (used by most online maps)

# Define the transformer object
transformer = pyproj.Transformer.from_crs(src_crs, dst_crs)

# Transform a pair of coordinates
lat, lon = transformer.transform(-636500.00, -662500.00	)

# Define the suntime object
location = LocationInfo(lat, lon)
print(lat, lon)

81.53389147728227 -88.85335536270664


In [66]:
lat = 64.1750292
lon = -51.7355388
date = "2019-06-21"
url = f'https://api.sunrise-sunset.org/json?lat={lat}&lng={lon}&date={date}'
response = requests.get(url)

result = response.json(
).get('results').get('day_length')

t = datetime.datetime.strptime(result, '%H:%M:%S').time()

# get second count from datetime.time object t
seconds = t.hour * 3600 + t.minute * 60 + t.second
seconds

t

datetime.time(21, 25, 18)

In [51]:
t

datetime.time(10, 57, 40)

In [38]:
import requests


def get_day_length(x):
    lat = x['y']
    lon = x['x']
    date = x['date']
    url = f'https://api.sunrise-sunset.org/json?lat={lat}&lng={lon}&date={date}'
    response = requests.get(url)

    result = response.json(
    ).get('results').get('day_length')
    return result

df["day_length"] = df[:100].apply(get_day_length, axis=1)
df

Unnamed: 0,x,y,mw_value,opt_value,col,row,mean_3,mean_9,sum_5,date,year,elevation_data,slope_data,aspect_data,distance_to_margin,solar_duration,day_length
0,-636500.00,-662500.00,0.00,-1.00,0,0,0.00,0.00,0.00,2019-06-08,2019,14.00,0.00,1.00,150.00,988.72,18:02:49
1,-635500.00,-662500.00,0.00,-1.00,1,0,0.00,0.00,0.00,2019-06-08,2019,14.00,0.00,1.00,150.00,988.72,16:59:27
2,-634500.00,-662500.00,0.00,-1.00,2,0,0.00,0.00,0.00,2019-06-08,2019,14.00,0.00,1.00,150.00,988.72,16:01:36
3,-633500.00,-662500.00,0.00,-1.00,3,0,0.00,0.00,0.00,2019-06-08,2019,14.00,0.00,1.00,150.00,988.72,15:07:15
4,-632500.00,-662500.00,0.00,-1.00,4,0,0.00,0.00,0.00,2019-06-08,2019,14.00,0.00,1.00,150.00,988.72,14:15:10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2278720,58500.00,-3324500.00,0.00,-1.00,695,2662,0.00,0.00,0.00,2019-06-08,2019,44.00,0.15,0.93,5.00,988.72,
2278721,59500.00,-3324500.00,0.00,-1.00,696,2662,0.00,0.00,0.00,2019-06-08,2019,44.00,0.13,0.89,4.00,988.72,
2278722,60500.00,-3324500.00,0.00,-1.00,697,2662,0.00,0.00,0.00,2019-06-08,2019,44.00,0.06,0.89,3.00,988.72,
2278723,61500.00,-3324500.00,0.00,-1.00,698,2662,0.00,0.00,0.00,2019-06-08,2019,45.00,0.06,-0.45,2.00,988.72,


In [54]:
# Define the source and destination coordinate reference systems
src_crs = pyproj.CRS.from_epsg(3413)  # WGS84 (longitude, latitude)
dst_crs = pyproj.CRS.from_epsg(4326)  # Web Mercator (used by most online maps)

# Define the transformer object
transformer = pyproj.Transformer.from_crs(src_crs, dst_crs)

# Transform a pair of coordinates
lat, lon = transformer.transform(df["x"], df["y"])

df["lat"] = lat
df["lon"] = lon

df

Unnamed: 0,x,y,mw_value,opt_value,col,row,mean_3,mean_9,sum_5,date,year,elevation_data,slope_data,aspect_data,distance_to_margin,lat,lon
0,-636500.00,-662500.00,0.00,-1.00,0,0,0.00,0.00,0.00,2019-06-08,2019,14.00,0.00,1.00,150.00,81.53,-88.85
1,-635500.00,-662500.00,0.00,-1.00,1,0,0.00,0.00,0.00,2019-06-08,2019,14.00,0.00,1.00,150.00,81.54,-88.81
2,-634500.00,-662500.00,0.00,-1.00,2,0,0.00,0.00,0.00,2019-06-08,2019,14.00,0.00,1.00,150.00,81.55,-88.76
3,-633500.00,-662500.00,0.00,-1.00,3,0,0.00,0.00,0.00,2019-06-08,2019,14.00,0.00,1.00,150.00,81.55,-88.72
4,-632500.00,-662500.00,0.00,-1.00,4,0,0.00,0.00,0.00,2019-06-08,2019,14.00,0.00,1.00,150.00,81.56,-88.67
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2278720,58500.00,-3324500.00,0.00,-1.00,695,2662,0.00,0.00,0.00,2019-06-08,2019,44.00,0.15,0.93,5.00,59.98,-43.99
2278721,59500.00,-3324500.00,0.00,-1.00,696,2662,0.00,0.00,0.00,2019-06-08,2019,44.00,0.13,0.89,4.00,59.98,-43.97
2278722,60500.00,-3324500.00,0.00,-1.00,697,2662,0.00,0.00,0.00,2019-06-08,2019,44.00,0.06,0.89,3.00,59.98,-43.96
2278723,61500.00,-3324500.00,0.00,-1.00,698,2662,0.00,0.00,0.00,2019-06-08,2019,45.00,0.06,-0.45,2.00,59.98,-43.94


In [55]:
test = df.copy()


import concurrent.futures
import requests


def get_day_length(lat, lon, date):
    url = f'https://api.sunrise-sunset.org/json?lat={lat}&lng={lon}&date={date}'
    response = requests.get(url)
    result = response.json().get('results').get('day_length')
    return result


with concurrent.futures.ThreadPoolExecutor() as executor:
    futures = []
    for i, row in test[:1000].iterrows():
        print(i)
        lat, lon, date = row['lat'], row['lon'], row['date']
        futures.append(executor.submit(get_day_length, lat, lon, date))

    for i, future in enumerate(concurrent.futures.as_completed(futures)):
        test.loc[test.index[i], 'day_length'] = future.result()

test

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

In [28]:
# Define the source and destination coordinate reference systems
src_crs = pyproj.CRS.from_epsg(3413)  # WGS84 (longitude, latitude)
dst_crs = pyproj.CRS.from_epsg(4326)  # Web Mercator (used by most online maps)

# Define the transformer object
transformer = pyproj.Transformer.from_crs(src_crs, dst_crs)

# Transform a pair of coordinates
lat, lon = transformer.transform(df["x"], df["y"])
(lat, lon)

(array([81.53389148, 81.54025102, 81.54660541, ..., 59.98367852,
        59.98352011, 59.98335911]),
 array([-88.85335536, -88.80834901, -88.76327473, ..., -43.95743358,
        -43.94020497, -43.92297656]))

In [25]:
lat[0]

81.53389147728227

In [52]:
import datetime  
from suntime import Sun

lattitude = lat[0]
longitude = lon[0]
print(lattitude, longitude)

s = Sun(lat=lattitude,lon=longitude) 

# create list of dates from 2019-06-01 to 2019-06-30
dates = [datetime.date(2019,4,1) + datetime.timedelta(days=x) for x in range(0, 30)]
for date in dates:
    print(s._calc_sun_time(date))
    sunrise = s.get_sunrise_time(date)
    sunset = s.get_sunset_time(date)
    print('sunrise at ',sunrise.strftime('%H:%M:%S'))
    print("sunset at" ,sunset.strftime('%H:%M:%S'))
    # calculate solar duration in minutes
    duration = sunset - sunrise
    print(f"Solar Duration in minutes: {duration.seconds / 60}")

    print()

81.53389147728227 -88.85335536270664
2019-04-01 09:24:00+00:00
sunrise at  09:24:00
sunset at 02:41:00
Solar Duration in minutes: 1037.0

2019-04-02 09:10:00+00:00
sunrise at  09:10:00
sunset at 02:55:00
Solar Duration in minutes: 1065.0

2019-04-03 08:55:00+00:00
sunrise at  08:55:00
sunset at 03:10:00
Solar Duration in minutes: 1095.0

2019-04-04 08:40:00+00:00
sunrise at  08:40:00
sunset at 03:26:00
Solar Duration in minutes: 1126.0

2019-04-05 08:23:00+00:00
sunrise at  08:23:00
sunset at 03:43:00
Solar Duration in minutes: 1160.0

2019-04-06 08:04:00+00:00
sunrise at  08:04:00
sunset at 04:02:00
Solar Duration in minutes: 1198.0

2019-04-07 07:42:00+00:00
sunrise at  07:42:00
sunset at 04:26:00
Solar Duration in minutes: 1244.0

2019-04-08 07:15:00+00:00
sunrise at  07:15:00
sunset at 04:57:00
Solar Duration in minutes: 1302.0

2019-04-09 06:33:00+00:00


SunTimeException: The sun never sets on this location (on the specified date)