In [1]:
#Required libraries
import json
import datetime
import random
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pathlib import Path
from mpl_toolkits import mplot3d
from IPython.display import display
import seaborn as sns
import pylab as pl
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
import warnings
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

# Methoden

In [48]:
%matplotlib notebook
warnings.filterwarnings("ignore", category=FutureWarning)  # ignores annoying FutureWarning regression classifier

# Methoden
def read_json_data(path: str) -> {}:
    '''
    reads the JSON data and stores it as a dictionary
    :param path: the path of the corresponding file
    :return: a dictionary containing the json data of the file
    '''
    with open(path) as data_file:
        data = json.load(data_file)
    return data


def euclidean_distance(point1: [], point2: []) -> float:
    '''
    measures the distance between two points
    :param point1: initial point
    :param point2: end point
    :return: a float corresponding to the value of the distance between the two points
    '''
    return ((point2[0] - point1[0]) ** 2 + (point2[1] - point1[1]) ** 2) ** 0.5


def plot_matches(matches: {}, matches_to_plot=0):
    '''
    Plots the line- and  figure distance with respect to the time in seconds
    :param match: a dictionary containing all the matches
    :param matches_to_plot: (optional) indicates the number of matches to be plotted
    '''
    %matplotlib notebook
    fig1, ax1 = plt.subplots(figsize=(10, 10))
    ax1.set_title('style: {!r}'.format('Game Matches'), color='C0')
    if matches_to_plot == 0 or matches_to_plot > len(matches):
        matches_to_plot = len(matches)
    for match in range(0, matches_to_plot):
        r = lambda: random.randint(0, 255)
        random_color = '#%02X%02X%02X' % (r(), r(), r())
        x = []
        y = []
        last_point = 0
        abstand = 0
        time = 0
        for point in range(len(matches[match]['points'])):
            if point != 0:
                last_point = 1

            abstand = abstand + euclidean_distance([matches[match]['points'][point - last_point]['x'],
                                                    matches[match]['points'][point - last_point]['y']],
                                                   [matches[match]['points'][point]['x'],
                                                    matches[match]['points'][point]['y']])
            y.append(abstand)
            time = matches[match]['points'][point]['deltaTime'] - matches[match]['points'][0]['deltaTime']
            x.append(time)
            # print('Match: ' + str(match) + '. Point: ' + str(point) + ' Abstand: ' + str(abstand) + ' Time: '+ str(x[point]))
            same_figures = matches[match]['sameFigures']
            for i in range(0, len(same_figures)):
                # print('figure timestamp: ' + str(same_figures[i]['timestampMatched']) + ' Matches: ' + str(matches[match]['points'][i]['timestamp']))
                if same_figures[i]['deltaTimeMatch'] == matches[match]['points'][point]['deltaTime']:
                    ax1.scatter(time, abstand, color=random_color)

        match_line = 'Match ' + str(match + 1)
        ax1.set_xlabel(' Time lapsed in seconds between points')
        ax1.set_ylabel('Distance between normalized x,y points')
        ax1.plot(x, y, random_color, label=match_line)
        ax1.legend()
        plt.show()


def plot_single_match_axis_points_with_respect_to_timedelta(points: [{}], point_axis: str):
    '''
    graph an axis of the point with respect to deltaTime
    :param points: the points of the match to be plotted
    :param point_axis: pass 'x' or 'y' according to the axis of the points to be graphed.
    :return: a graph of the points according to the axis indicated with respect to the deltatime values
    '''
    r = lambda: random.randint(0, 255)
    random_color = '#%02X%02X%02X' % (r(), r(), r())
    x_axis = [(i['deltaTime'] - points[0]['deltaTime']) for i in points]
    y_axis = [(i[point_axis]) for i in points]
    y_axis_label = ('x values' if point_axis == 'x' else 'y values')

    fig, ax = plt.subplots(figsize=(10, 10))
    ax.set_title(y_axis_label + ' Point match', color='C0')
    ax.set_xlabel(' Time lapsed in seconds between ' + y_axis_label)
    ax.set_ylabel(y_axis_label)
    ax.plot(x_axis, y_axis, random_color, label='')
    ax.legend()
    plt.show()


def total_displacement(x: [], y: []):
    '''
    calculatesvariation in the position between the starting point and an end point.
    The displacement takes the direction into consideration, in other words,
     a negative result means that the final position is less than the initial position
    :param x: the list of points corresponding to the (normalized) x-axis of the screen
    :param y: the list of points corresponding to the (normalized) y-axis of the screen
    :return: tuple (x,y) of the total displacement of the points of the match
    '''
    dx, dy = x[- 1] - x[0], y[- 1] - y[0]
    return dx, dy


def overall_distance(x: [], y: []):
    '''
    sums the length covered by each value of the coordinates(x,y)
    :param x: the list of points corresponding to the (normalized) x-axis of the screen
    :param y: the list of points corresponding to the (normalized) y-axis of the screen
    :return: a tuple of total distances of the axes (x,y) of the points of the match
    '''
    overall_distance_x = sum(abs(x[i] - x[i + 1]) for i in range(0, len(x) - 1))
    overall_distance_y = sum(abs(y[i] - y[i + 1]) for i in range(0, len(y) - 1))
    return overall_distance_x, overall_distance_y


def mean_velocity(total_displacement_x: float, total_displacement_y: float, deltaTime: float):
    '''
    calculates the relationship between the total displacement made of x and y and the total time (deltaTime) it took to make it.
    the mean velocity can be negative indicating its direction
    :param total_displacement_x:
    :param total_displacement_y:
    :param deltaTime: the difference in time between the start and the end of the match line
    :return: a tuple of the resulted mean velocity for each coordinate (x,y)
    '''
    mean_velocity_x, mean_velocity_y = total_displacement_x / deltaTime, total_displacement_y / deltaTime
    return mean_velocity_x, mean_velocity_y


def mean_speed(total_distance_x: float, total_distance_y: float, deltatime: float):
    '''
    calculates the relationship between the total distance made of x and y and the total time (deltaTime) it took to make it
    :param total_distance_x:
    :param total_distance_y:
    :param deltatime:the difference in time between the start and the end of the match line
    :return: a tuple of the resulted mean speeds for each coordinate (x,y)
    '''
    mean_speed_x, mean_speed_y = total_distance_x / deltatime, total_distance_y / deltatime
    return mean_speed_x, mean_speed_y


def analyse_match(match: {}):
    '''
    Extracts match features to be used in predictive model observations
    :param match: the match dictionary
    :return: a dictionary of the match analysed
    '''
    x = [i['x'] for i in match['points']]
    y = [i['y'] for i in match['points']]
    time = [i['deltaTime'] - match['points'][0]['deltaTime'] for i in match['points']]
    deltatime = time[-1] - time[0]
    total_displacement_x, total_displacement_y = total_displacement(x, y)
    mean_velocity_x, mean_velocity_y = mean_velocity(total_displacement_x, total_displacement_y, deltatime)
    overall_distance_x, overall_distance_y = overall_distance(x, y)
    mean_speed_x, mean_speed_y = mean_speed(overall_distance_x, overall_distance_y, deltatime)
    _euclidean_distance = sum(euclidean_distance([x[i], y[i]], [x[i + 1], y[i + 1]]) for i in range(0, len(x) - 1))
    return total_displacement_x, total_displacement_y, mean_velocity_x, mean_velocity_y, overall_distance_x, \
           overall_distance_y, mean_speed_x, mean_speed_y, _euclidean_distance


def get_filename_list_from_directory(directory_name: str) -> []:
    '''
    :param directory_name:
    :return: a list containing all the path file names stored in the given directory
    '''
    pathlist = Path(directory_name).glob('**/*.json')
    return pathlist


def analyze_json_files_matches(directory: str) -> [{}]:
    '''
    generates a list of dictionaries corresponding to the matches analyzed. It is important to emphasize that only
    single player matches are taken into account.
    :param directory:
    :return: A list of dictionaries derived from the analysis of each match of the JSON files stored in the directory
    '''
    json_filename_list = get_filename_list_from_directory('data')
    analized_matches = []  # list of dictionaries of the matches analysed
    for json_file in json_filename_list:
        json_file_data = read_json_data(json_file)
        if json_file_data['score']['numberOfPlayers'] == 1 and json_file_data['userData'][
            'uid1'] != 'unbekannt':  # This only works for single player rounds!!!
            matches = json_file_data['touchDataList']['matched']
            for match in matches:
                analized_match = {}
                total_displacement_x, total_displacement_y, mean_velocity_x, mean_velocity_y, overall_distance_x, \
                overall_distance_y, mean_speed_x, mean_speed_y, _euclidean_distance = analyse_match(match)
                figure_points = [[figure['x'], figure['y']] for figure in match['sameFigures']]
                distance_between_figures = sum(
                    euclidean_distance(figure_points[i], figure_points[i + 1]) for i in
                    range(0, len(figure_points) - 1))
                analized_match['uid'] = json_file_data['userData']['uid1']
                analized_match['number_of_matched_figures'] = len(match['sameFigures'])
                analized_match['figures_distance'] = distance_between_figures
                analized_match['total_displacement_x'] = total_displacement_x
                analized_match['total_displacement_y'] = total_displacement_y
                analized_match['mean_velocity_x'] = mean_velocity_x
                analized_match['mean_velocity_y'] = mean_velocity_y
                analized_match['overall_distance_x'] = overall_distance_x
                analized_match['overall_distance_y'] = overall_distance_y
                analized_match['mean_speed_x'] = mean_speed_x
                analized_match['mean_speed_y'] = mean_speed_y
                analized_match['euclidean_distance'] = _euclidean_distance
                analized_matches.append(analized_match)
    return analized_matches


def dataset_points(directory: str)->[{}]:
    
    '''
    returns the raw data list of dictionaries corresponding to Game Records from the directory.
    :param directory:
    :return: A list of dictionaries with the raw data
    '''
    
    json_filename_list = get_filename_list_from_directory(directory)
    
    raw_data = [read_json_data(game_data) for game_data in json_filename_list]
    points = {}
    matched_points = []
    unmatched_points = []
    trajectory_counter = 0
    for game in raw_data:
        for match in game['touchDataList']['matched']:
            trajectory_counter += 1
            for point in match['points']:
                point['uid1'] = game['userData']['uid1']
                point['uid2'] = game['userData']['uid2']
                point['deltaTimeTrajectory'] = point['deltaTime'] - match['points'][0]['deltaTime']
                point['trajectory_id'] = trajectory_counter
                matched_points.append(point)
        for unmatch in game['touchDataList']['unmatched']:
            for point in match['points']:
                unmatched_points.append(point)
    points['matched'] = matched_points
    points['unmatched'] = unmatched_points
    
    return points



def dataset_points_with_figures(directory: str)->[{}]:
    
    '''
    returns the raw data list of dictionaries corresponding to Game Records from the directory.
    :param directory:
    :return: A list of dictionaries with the raw data
    '''
    
    json_filename_list = get_filename_list_from_directory(directory)
    
    raw_data = [read_json_data(game_data) for game_data in json_filename_list]
    points = {}
    matched_points = []
    unmatched_points = []
    trajectory_counter = 0
    figure_groups = []
    for game in raw_data:
        for match in game['touchDataList']['matched']:
            temp_figures = []
            for figure in match['sameFigures']:
                temp_figures.append([figure['x'], figure['y']])
            counter = 0
            for figure_group in figure_groups:
                if len(figure_group) == len(temp_figures):
                    if counter != len(figure_group):
                        for temp_figure in temp_figures:
                            if temp_figure in figure_group:
                                counter +=1
                    if counter != len(temp_figures):
                        counter = 0
                    if counter == len(temp_figures):
                        temp_figures = figure_group
            if counter == 0:
                figure_groups.append(temp_figures)
            trajectory_counter += 1
            trajectory_group = figure_groups.index(temp_figures)
            for point in match['points']:
                point['uid1'] = game['userData']['uid1']
                point['uid2'] = game['userData']['uid2']
                point['deltaTimeTrajectory'] = point['deltaTime'] - match['points'][0]['deltaTime']
                point['trajectory_id'] = trajectory_counter
                point['trajectory_group'] = trajectory_group
                #for i in range(0,len(temp_figures)):
                #    key_name = 'figure'+str(i+1)
                #    point[key_name] = temp_figures[i]
                matched_points.append(point)
        for unmatch in game['touchDataList']['unmatched']:
            for point in match['points']:
                unmatched_points.append(point)
    points['matched'] = matched_points
    points['unmatched'] = unmatched_points
    return points 


def plot_match_in_3d(match: {}):
    '''
    :param match:
    :return: 3d graph of the match. x: horizontal path, y: vertical path. z: elapsed time in seconds
    '''
    x = [i['x'] for i in match['points']]
    y = [i['y'] for i in match['points']]
    z = [i['deltaTime'] - match['points'][0]['deltaTime'] for i in match['points']]
    fig, ax = plt.subplots(figsize=(10, 10))
    # fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    ax.set_xlabel('x')
    ax.set_ylabel('y')
    ax.set_zlabel('timeDelta')
    ax.plot(x, y, z)


def handle_non_numerical_data(matches_df):
    '''
    transforms data types not belonging to int or float into numerical values
    :param matches_df: the pandas.DataFrame dataset
    :return: the DataFrame containing only numeric values for each row in the columns
    '''
    columns = matches_df.columns.values
    for column in columns:
        text_digit_values = {}

        def convert_to_int(value):
            return text_digit_values[value]

        if matches_df[column].dtype != np.int64 and matches_df[column].dtype != np.float64:
            column_contents = matches_df[column].values.tolist()
            unique_elements = set(column_contents)
            x = 0
            for unique in unique_elements:
                if unique not in text_digit_values:
                    text_digit_values[unique] = x
                    x += 1
            matches_df[column] = list(map(convert_to_int, matches_df[column]))
    return matches_df


def group_match_by_size(pandas_dataFrame: pd.DataFrame):
    '''
    split the data size into groups based on user ids
    :param pandas_dataFrame:
    :return:Number of matches made by each user
    '''
    return pandas_dataFrame.groupby('uid').size()


def display_whisker_plots(pandas_dataFrame: pd.DataFrame):
    '''
    Whisker box plot for visualizing the data distribution of the input variables
    :param pandas_dataFrame:
    :return: Whisker box plots (excluding columns: uid and number of figures)
    '''
    analized_matches_df.drop('uid', axis=1).drop('number_of_matched_figures', axis=1).plot(kind='box', subplots=True,
                                                                                           layout=(11, 11),
                                                                                           sharex=False, sharey=False,
                                                                                           figsize=(10, 10),
                                                                                           title='Box Plot for each input variable')
    plt.savefig('matches_box')
    plt.show()

# Scaling is applied to the test datasets

# separation of the data set into train and test data

def scale_and_split_data(pandas_dataframe: pd.DataFrame):
    '''
    MinMaxScaler independently normalizes each column with values between 0 and 1.
    :param pandas_dataframe:
    :return: the segregated subsets of the normalized data set
    '''
    X = np.array(pandas_dataframe.drop(['uid'], 1).astype(float))
    # X = preprocessing.scale(X)
    y = np.array(pandas_dataframe['uid'])
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)
    scaler = MinMaxScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    return X_train, X_test, y_train, y_test


# Build Models


# Logistic Regression Classifier

def logistic_regression(X_train, X_test, y_train, y_test):
    '''
    predicts the solution of a categorical variable (mutually exclusive variables) based on other independent or predictive variables.
    :param X_train: train features
    :param X_test: test features
    :param y_train: labels (predictions) of the X train feature set
    :param y_test: labels (predictions) of the X test feature set
    :return: the logistic regression model
    '''
    logistic_regression = LogisticRegression(solver='lbfgs')
    logistic_regression.fit(X_train, y_train)
    print('Accuracy Logistic regression classifier on training set: {:.2f}'
          .format(logistic_regression.score(X_train, y_train)))
    print('Accuracy of Logistic regression classifier on test set: {:.2f}'
          .format(logistic_regression.score(X_test, y_test)))
    return logistic_regression


# Decision Tree Classifier


def decision_tree(X_train, X_test, y_train, y_test):
    '''
    Takes the most homogeneous data output set for every input(feature) node
    :param X_train: train features
    :param X_test: test features
    :param y_train: labels (predictions) of the X train feature set
    :param y_test: labels (predictions) of the X test feature set
    :return: the decision tree model
    '''
    decision_tree = DecisionTreeClassifier().fit(X_train, y_train)
    print('Accuracy of Decision Tree classifier on training set: {:.2f}'
          .format(decision_tree.score(X_train, y_train)))
    print('Accuracy of Decision Tree classifier on test set: {:.2f}'
          .format(decision_tree.score(X_test, y_test)))
    return decision_tree


def k_nearest_neighbor(X_train, X_test, y_train, y_test):
    '''
    Selects the K values closest to the value to predict.
    :param X_train: train features
    :param X_test: test features
    :param y_train: labels (predictions) of the X train feature set
    :param y_test: labels (predictions) of the X test feature set
    :return: the k nearest neighbor classifier
    '''
    k_nearest_neighbor = KNeighborsClassifier()
    k_nearest_neighbor.fit(X_train, y_train)
    print('Accuracy of K-NN classifier on training set: {:.2f}'
          .format(k_nearest_neighbor.score(X_train, y_train)))
    print('Accuracy of K-NN classifier on test set: {:.2f}'
          .format(k_nearest_neighbor.score(X_test, y_test)))
    return k_nearest_neighbor


# KNeighborsClassifier

def linear_discriminant_analysis(X_train, X_test, y_train, y_test):
    '''
    Finds linear combinations of variables to classify objects into groups
    :param X_train: train features
    :param X_test: test features
    :param y_train: labels (predictions) of the X train feature set
    :param y_test: labels (predictions) of the X test feature set
    :return: the linear discriminant analysis classifier
    '''
    linear_discriminant_analysis = LinearDiscriminantAnalysis()
    linear_discriminant_analysis.fit(X_train, y_train)
    print('Accuracy of LDA classifier on training set: {:.2f}'
          .format(linear_discriminant_analysis.score(X_train, y_train)))
    print('Accuracy of LDA classifier on test set: {:.2f}'
          .format(linear_discriminant_analysis.score(X_test, y_test)))
    return linear_discriminant_analysis


def naive_bayes(X_train, X_test, y_train, y_test):
    '''
    :param X_train: train features
    :param X_test: test features
    :param y_train: labels (predictions) of the X train feature set
    :param y_test: labels (predictions) of the X test feature set
    :return:
    '''
    naive_bayes = GaussianNB()
    naive_bayes.fit(X_train, y_train)
    print('Accuracy of GNB classifier on training set: {:.2f}'
          .format(naive_bayes.score(X_train, y_train)))
    print('Accuracy of GNB classifier on test set: {:.2f}'
          .format(naive_bayes.score(X_test, y_test)))
    return naive_bayes


def support_vector_machine(X_train, X_test, y_train, y_test):
    support_vector_machine = SVC()
    support_vector_machine.fit(X_train, y_train)
    print('Accuracy of SVM classifier on training set: {:.2f}'
          .format(support_vector_machine.score(X_train, y_train)))
    print('Accuracy of SVM classifier on test set: {:.2f}'
          .format(support_vector_machine.score(X_test, y_test)))
    return support_vector_machine


In [49]:
#Returns the dataset as json features
analyzed_matches = analyze_json_files_matches('data')
analized_matches_df = pd.DataFrame(analyzed_matches)
analized_matches_df = handle_non_numerical_data(analized_matches_df)
# print(analized_matches_df)
#analized_matches_df.head()

data_points = dataset_points_with_figures('data')

data_points_matched_df = pd.DataFrame(data_points['matched'])
data_points_matched_df = handle_non_numerical_data(data_points_matched_df)
#data_points_unmatched_df = pd.DataFrame(data_points['unmatched'], columns=['x','y'])
data_points_matched_df.head(10000)
#analized_matches_df.head()
#random_subset = data_points_matched_df.sample(n=30)
#random_subset.head(30)


Unnamed: 0,deltaTime,deltaTimeTrajectory,trajectory_group,trajectory_id,uid1,uid2,x,y
0,3.522298,0.000000,0,1,6,1,0.804539,0.820801
1,3.588984,0.066686,0,1,6,1,0.799414,0.813477
2,3.622319,0.100021,0,1,6,1,0.797950,0.796387
3,3.655739,0.133441,0,1,6,1,0.794656,0.769043
4,3.689029,0.166731,0,1,6,1,0.790630,0.730469
5,3.722405,0.200107,0,1,6,1,0.789531,0.683105
6,3.755755,0.233457,0,1,6,1,0.790264,0.640137
7,3.789138,0.266840,0,1,6,1,0.796120,0.607422
8,3.822471,0.300173,0,1,6,1,0.802709,0.593262
9,3.855841,0.333543,0,1,6,1,0.811127,0.587402


In [4]:
#data_points_matched_df.head(200)

#random_subset = analized_matches_df.sample(n=10)
#random_subset.head(10)

In [5]:
# Feature predictions

X_train, X_test, y_train, y_test = scale_and_split_data(analized_matches_df)

logistic_regression(X_train, X_test, y_train, y_test)

k_nearest_neighbor(X_train, X_test, y_train, y_test)

decision_tree(X_train, X_test, y_train, y_test)

linear_discriminant_analysis(X_train, X_test, y_train, y_test)

naive_bayes(X_train, X_test, y_train, y_test)

support_vector_machine(X_train, X_test, y_train, y_test)

Accuracy Logistic regression classifier on training set: 0.36
Accuracy of Logistic regression classifier on test set: 0.36
Accuracy of K-NN classifier on training set: 0.59
Accuracy of K-NN classifier on test set: 0.42
Accuracy of Decision Tree classifier on training set: 1.00
Accuracy of Decision Tree classifier on test set: 0.35
Accuracy of LDA classifier on training set: 0.37
Accuracy of LDA classifier on test set: 0.35
Accuracy of GNB classifier on training set: 0.30
Accuracy of GNB classifier on test set: 0.26
Accuracy of SVM classifier on training set: 0.34
Accuracy of SVM classifier on test set: 0.34


SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
    kernel='rbf', max_iter=-1, probability=False, random_state=None,
    shrinking=True, tol=0.001, verbose=False)

In [6]:
game_data_plot_sample = read_json_data('data/Game9.json')
game_data_plot_sample_df = pd.DataFrame(game_data_plot_sample)
match_data_plot_sample_df = game_data_plot_sample_df['touchDataList']['matched']

In [7]:
sns.countplot(x=analized_matches_df['uid'], data=analized_matches_df)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1a1ab4f5c0>

In [8]:
display_whisker_plots(game_data_plot_sample_df)


<IPython.core.display.Javascript object>

In [9]:
plot_single_match_axis_points_with_respect_to_timedelta(match_data_plot_sample_df[0]['points'], point_axis="x")

<IPython.core.display.Javascript object>

No handles with labels found to put in legend.


In [10]:
plot_matches(match_data_plot_sample_df,4)

<IPython.core.display.Javascript object>

In [11]:
plot_match_in_3d(match_data_plot_sample_df[0])

<IPython.core.display.Javascript object>

![alt text](https://www.profesorenlinea.cl/imagengeometria/Distancia_entre_dos_puntos_image001.gif "Logo Title Text 1")

In [12]:
# sns.jointplot(x="x", y="y", data=data_points_matched_df);
sns.jointplot(x="x", y="y", data=data_points_matched_df, kind="kde");
# Number of matches grouped by user
group_match_by_size(analized_matches_df)




<IPython.core.display.Javascript object>

uid
0     518
1     672
2     927
3    1176
4    1082
5    1072
6    1127
7    1008
8     760
9     624
dtype: int64

In [13]:
fig2 = plt.figure(figsize=(8, 6))
ax2 = fig2.add_subplot(111, projection='3d')
#x	y	uid1	deltaTime	deltaTimeTrajectory
j = 10
xrand = [data_points_matched_df['x'][random.randint(0, len(data_points_matched_df['x']))] for i in range(0,j)]
print(xrand)
xs = data_points_matched_df['x'][:100]
ys = data_points_matched_df['y'][:100]
zs = data_points_matched_df['deltaTimeTrajectory'][:100]

x=['A','B','B','C','A','B']
y=[15,30,25,18,22,13]


ax2.scatter(xs,ys,zs,
            color='b',      # <==  4th DIMENSION             
            edgecolors='w',
            alpha=0.1,
            s = 10)

ax2.set_xlabel('x')
ax2.set_ylabel('y')
ax2.set_zlabel('time in seconds')

plt.show()

<IPython.core.display.Javascript object>

[0.583089292049408, 0.2009516805410385, 0.7082723379135132, 0.4234992563724518, 0.5721083283424377, 0.7210834622383118, 0.4688872694969177, 0.5721083283424377, 0.5490483045578003, 0.4150805175304413]


# Cómo medir los puntos?


# Cuáles son los features necesarios?

# Entender el/los modelo(s)


In [14]:
def plot_match_points():
    # Create data
    N = 60
    g1 = (0.6 + 0.6 * np.random.rand(N), np.random.rand(N))
    g2 = (0.4 + 0.3 * np.random.rand(N), 0.5 * np.random.rand(N))
    g3 = (0.3 * np.random.rand(N), 0.3 * np.random.rand(N))

    data = (g1, g2, g3)
    colors = ("red", "green", "blue")
    groups = ("coffee", "tea", "water")

    # Create plot
    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1, axisbg="1.0")

    for data, color, group in zip(data, colors, groups):
        x, y = data
        ax.scatter(x, y, alpha=0.8, c=color, edgecolors='none', s=30, label=group)

    plt.title('Matplot scatter plot')
    plt.legend(loc=2)
    plt.show()


In [15]:
#!pip3 install dtw

In [17]:
from scipy.spatial.distance import euclidean


In [None]:
#!conda install -c bioconda fastdtw


In [18]:
import numpy as np
from scipy.spatial.distance import euclidean

from fastdtw import fastdtw

x = np.array([[1,1], [2,2], [3,3], [4,4], [5,5]])
y = np.array([[2,2], [3,3], [4,4]])
distance, path = fastdtw(x, y, dist=euclidean)
print(path)

[(0, 0), (1, 0), (2, 1), (3, 2), (4, 2)]


In [None]:
#deltaTime	deltaTimeTrajectory	trajectory_group	trajectory_id	uid1	uid2	x	y
