## Group feature extraction

In [1]:
import movekit as mkit
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
path = "./datasets/fish-5-features.csv"
data = mkit.read_data(path)
data.head()

Unnamed: 0,time,animal_id,x,y,distance,average_speed,average_acceleration,direction,stopped
0,1,312,405.29,417.76,0.0,0.210217,-0.018039,0.0,1
1000,1,511,369.99,428.78,0.0,0.020944,0.000236,0.0,1
2000,1,607,390.33,405.89,0.0,0.070235,0.004961,0.0,1
3000,1,811,445.15,411.94,0.0,0.3705,0.017482,0.0,1
4000,1,905,366.06,451.76,0.0,0.118,-0.006333,0.0,1


### Detecting outliers
Function performs detection of outliers, based on user's criteria, regarding features, method and share of outliers.

In [3]:
outs = mkit.outlier_detection(data)

# printing all rows where outliers are present
outs[outs.loc[:,"outlier"] == 1]

Unnamed: 0,time,animal_id,outlier,x,y,distance,average_speed,average_acceleration,direction,stopped
2479,480,607,1,60.63,401.17,2.418677,2.503849,0.07379,-97.125016,0
2481,482,607,1,60.32,396.22,2.522856,2.479927,0.056199,-92.726311,0
1877,878,511,1,511.8,69.58,4.623505,4.293873,0.241837,2.231175,0
1878,879,511,1,516.29,70.07,4.516658,4.303061,0.187692,6.228122,0
1881,882,511,1,528.46,72.19,4.0224,4.136429,0.040597,11.61722,0
1882,883,511,1,532.33,73.18,3.994621,4.036757,0.009188,14.349332,0
1982,983,511,1,651.33,170.35,1.787093,1.732611,0.15886,80.010593,0
1983,984,511,1,651.51,172.52,2.177453,2.115413,0.198432,85.25821,0
1984,985,511,1,651.65,175.29,2.773536,2.539209,0.259326,87.106646,0
1985,986,511,1,651.82,178.7,3.414235,2.95489,0.327106,87.145975,0


In [4]:
# same function, different parameters
other_outs = mkit.outlier_detection(dataset = data, features = ["average_speed", "average_acceleration"], contamination = 0.05, n_neighbors = 8, method = "median", metric = "euclidean")

# printing all rows where outliers are present
other_outs[other_outs.loc[:,"outlier"] == 1].head()

Unnamed: 0,time,animal_id,outlier,x,y,distance,average_speed,average_acceleration,direction,stopped
2324,325,607,1,126.9,410.58,1.05,1.296308,-0.224662,180.0,0
2325,326,607,1,126.04,410.56,0.860233,1.065177,-0.244628,-178.66778,0
2326,327,607,1,125.45,410.65,0.596825,0.876628,-0.2642,171.326826,0
2327,328,607,1,124.93,410.77,0.533667,0.738006,-0.26343,167.005383,0
365,366,312,1,257.86,403.82,2.462458,2.763925,-0.176182,177.439699,0


### Group-level Analysis

Below we perform Analysis on Group-Level. This consists of:
- Group-Level averages,
- Centroid Medoid computation
- A dynamic time warping matrix, 
- A clustering over time based on absolute features,
- The centroid direction,
- The heading difference of each animal with respect to the current centroid
- The group - polarization for each timestep. 

#### Obtain group-level records for each point in time
Records consist of total group-distance, mean-speed mean-acceleration and mean distance from centroid. If input doesn't contain centroid or feature data, it is calculated, showing a warning.
Parameter object_output produces a post gis - compatible point.

In [5]:
group_data = mkit.group_movement(data)
group_data.head()



Unnamed: 0_level_0,total_dist,mean_speed,mean_acceleration,mean_distance_centroid
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,0.0,0.157979,-0.000339,29.4616
2,1.174908,0.157641,-0.000339,29.585
3,1.025155,0.15561,-0.000339,29.6914
4,0.91896,0.153579,-0.000339,29.7782
5,0.830461,0.153341,-0.000339,29.8518


#### Obtain centroid, medoid and distance to centroid 
Parameter options go either for each unit per timestamp or as post-gis compatible object

In [6]:
movement = mkit.centroid_medoid_computation(data, object_output = False)
movement.head()

Unnamed: 0,time,animal_id,outlier,x,y,distance,average_speed,average_acceleration,direction,stopped,x_centroid,y_centroid,medoid,distance_to_centroid
0,1,312,0,405.29,417.76,0.0,0.210217,-0.018039,0.0,1,395.364,423.226,312,11.331
1,2,312,0,405.31,417.37,0.390512,0.192177,-0.018039,-87.064327,1,395.382,423.22,312,11.523
2,3,312,0,405.31,417.07,0.3,0.174723,-0.018039,-90.0,1,395.392,423.234,312,11.677
3,4,312,0,405.3,416.86,0.210238,0.159133,-0.018039,-92.726311,1,395.396,423.272,312,11.798
4,5,312,0,405.29,416.71,0.150333,0.155506,-0.018039,-93.814075,1,395.394,423.324,312,11.903


#### Get the centroid direction
If no centroid coordinates are found in input, function calculates centroids first, emitting a warning.

In [7]:
centroid_dir = mkit.compute_centroid_direction(data).sort_values(['time','animal_id'])
centroid_dir.head(10)



Unnamed: 0,time,animal_id,outlier,x,y,distance,average_speed,average_acceleration,direction,stopped,x_centroid,y_centroid,centroid_direction
0,1,312,0,405.29,417.76,0.0,0.210217,-0.018039,0.0,1,395.364,423.226,
1000,1,511,0,369.99,428.78,0.0,0.020944,0.000236,0.0,1,395.364,423.226,
2000,1,607,0,390.33,405.89,0.0,0.070235,0.004961,0.0,1,395.364,423.226,
3000,1,811,0,445.15,411.94,0.0,0.3705,0.017482,0.0,1,395.364,423.226,
4000,1,905,0,366.06,451.76,0.0,0.118,-0.006333,0.0,1,395.364,423.226,
1,2,312,0,405.31,417.37,0.390512,0.192177,-0.018039,-87.064327,1,395.382,423.22,-18.434949
1001,2,511,0,370.01,428.82,0.044721,0.02118,0.000236,63.434949,1,395.382,423.22,-18.434949
2001,2,607,0,390.25,405.89,0.08,0.075196,0.004961,180.0,1,395.382,423.22,-18.434949
3001,2,811,0,445.48,412.26,0.459674,0.387983,0.017482,44.118596,1,395.382,423.22,-18.434949
4001,2,905,0,365.86,451.76,0.2,0.111667,-0.006333,180.0,1,395.382,423.22,-18.434949


#### Getting the heading difference
Calculate the difference in degrees between the animal's direction and the centroid's direction for each timestep. Stronger gain in y gives positive difference, weaker gain in y gives negative difference, since constant y is defined to be 0 degrees.

In [8]:
heading_diff = mkit.get_heading_difference(data)
heading_diff.head()

Unnamed: 0,time,animal_id,outlier,x,y,distance,average_speed,average_acceleration,direction,stopped,x_centroid,y_centroid,medoid,distance_to_centroid,centroid_direction,heading_difference
0,1,312,0,405.29,417.76,0.0,0.210217,-0.018039,0.0,1,395.364,423.226,312,11.331,,
1,2,312,0,405.31,417.37,0.390512,0.192177,-0.018039,-87.064327,1,395.382,423.22,312,11.523,-18.434949,-68.629378
2,3,312,0,405.31,417.07,0.3,0.174723,-0.018039,-90.0,1,395.392,423.234,312,11.677,54.462322,-144.462322
3,4,312,0,405.3,416.86,0.210238,0.159133,-0.018039,-92.726311,1,395.396,423.272,312,11.798,83.990994,-176.717305
4,5,312,0,405.29,416.71,0.150333,0.155506,-0.018039,-93.814075,1,395.394,423.324,312,11.903,92.202598,173.983327


#### Computing polarization
Computing polarization of animals for each timestep. Value is between 0 and 1

In [9]:
pol = mkit.compute_polarization(data)
pol.head()

Unnamed: 0,time,animal_id,outlier,x,y,distance,average_speed,average_acceleration,direction,stopped,polarization
0,1,312,0,405.29,417.76,0.0,0.210217,-0.018039,0.0,1,1.0
1,2,312,0,405.31,417.37,0.390512,0.192177,-0.018039,-87.064327,1,0.248837
2,3,312,0,405.31,417.07,0.3,0.174723,-0.018039,-90.0,1,0.32395
3,4,312,0,405.3,416.86,0.210238,0.159133,-0.018039,-92.726311,1,0.125276
4,5,312,0,405.29,416.71,0.150333,0.155506,-0.018039,-93.814075,1,0.037233


#### Obtain a matrix, based on dynamic time warping
Each Animal-ID is displayed in the indices, the entries reflect the similarity of the animal's trajectories based on the DTW algorithm.

In [10]:
mkit.dtw_matrix(data)

Unnamed: 0,312,511,607,811,905
312,0.0,30843.085403,32859.600139,42461.524553,37916.447829
511,30843.085403,0.0,26931.014323,47116.708116,20967.960073
607,32859.600139,26931.014323,0.0,39859.787924,35711.718898
811,42461.524553,47116.708116,39859.787924,0.0,38379.806433
905,37916.447829,20967.960073,35711.718898,38379.806433,0.0
