### This notebook exemplifies how the merging algorithm included in the GT-607 Toolbox, which is based on distance and dates between the subaquatic probe and the drone, works.
The code is fed .csv files and it uses coordinate and date time data in those files to merge points registered by the drone and by the probe. At first, the 
cartesian product elicited from	equal dates of the two data sets is computed. Then the distance between each merged point is calculated using the haversine formula to determinate the great-circle distance between two points on a sphere given their longitudes and latitudes.
At the end, points 100 meters further from each other are filtered out. 

In [2]:
import pandas as pd
import numpy as np
import math
import configparser

In [None]:
paths = configparser.ConfigParser()
paths.read_file(open(r'paths.txt'))
field = paths.get('paths', 'field')
field_name = field.split('\\')

### Haversine formula. Receives latitude and longitude of two points as parameters.

In [None]:
def haversine_meters(lat1, long1, lat2, long2):
    dLat = np.radians(lat2-lat1)
    dLong = np.radians(long2-long1)

    lat1 = np.radians(lat1)
    lat2 = np.radians(lat2)

    a = np.sin(dLat/2) * np.sin(dLat/2) + np.sin(dLong/2) * np.sin(dLong/2) * np.cos(lat1) * np.cos(lat2)

    m = 2 * math.atan2(np.sqrt(a), np.sqrt(1-a))
    return m * 6371 * 1000

### Data cleaning and cartesian product. 

In [3]:
def data_merger(medians, metadata, probe, merge):
    medians = pd.read_csv(medians)
    metadata = pd.read_csv(metadata, parse_dates=['DateTime'], infer_datetime_format= True)
    field_data = pd.concat([metadata, medians], axis=1)
    field_data['DateTime'] = pd.to_datetime(field_data['DateTime'], format = '%Y:%m:%d %H:%M:%S')
    field_data['DateTime'] = field_data['DateTime'].dt.date
    field_data = field_data.rename(columns ={'Latitude':'lat_d', 'Longitude':'long_d'})
    field_data.DateTime = pd.to_datetime(field_data.DateTime)
    print(field_data.DateTime.unique())

    #preparing probe data
    probe = pd.read_excel(probe)
    
    probe['date'] = pd.to_datetime(probe['date'], format = '%Y-%d-%m %H:%M:%S') #Funciona para a pampulha new general table
    
    #probe['date'] = pd.to_datetime(probe['date'], format='%d-%m-%Y') #funciona para tres marias general table


    #probe['date'] = pd.to_datetime(probe['date'], format = '%Y-%d-%m %H:%M:%S', exact=False) # funciona para pamps

    probe = probe.rename(columns={'lat':'lat_p', 'lon':'long_p'})
    probe['id'] = probe.index + 1

    #merging data based on date
    merged_data = pd.merge(left = field_data, left_on = 'DateTime', right = probe, 
                            right_on = 'date') #Dataframes merged on same date

merged_data['Distance'] = [haversine_meters(long1 = merged_data.long_p[i], lat1 = merged_data.lat_p[i],long2 = merged_data.long_d[i], lat2 = merged_data.lat_d[i]) for i in range(len(merged_data))]
merged_data['Distance'] = merged_data['Distance'].round(decimals=3)
merge_mask = merged_data[(merged_data.Distance <= 100)]