In [None]:
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.cluster import KMeans
from sklearn import linear_model
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

In [None]:
df = pd.read_csv('./csv/data/_fixed_train_data.csv')
df = df.drop(['screen_height', 'screen_width'], axis=1)

In [None]:
df_test = pd.read_csv('./csv/data/_fixed_train_data.csv')
df_test = df_test.drop(['screen_height', 'screen_width'], axis=1)

In [None]:
df.shape

In [None]:
df.head()

In [None]:
df.describe()

In [None]:
sns.pairplot(df, x_vars=['left_iris_y', 'right_iris_y', 'left_iris_x',
             'right_iris_x'], y_vars=['point_x', 'point_y'], kind='reg')
plt.show()

In [None]:
sns.heatmap(df.corr(), annot=True, cmap='RdYlGn', linewidths=0.2)
plt.show()

In [None]:
#plot eyes point
plt.scatter(df['left_iris_x'], df['left_iris_y'], color='blue')
plt.scatter(df['right_iris_x'], df['right_iris_y'], color='red')
plt.scatter(df['point_x'], df['point_y'], color='green')
plt.show()

In [None]:
X_train_x = df[['left_iris_x', 'right_iris_x']]
y_train_x = df['point_x']

In [None]:
sc = StandardScaler()
X_train_x = sc.fit_transform(X_train_x)

In [None]:
X_train_x[:5]

In [None]:
y_train_x[:5]

In [None]:
X_test_x = df_test[['left_iris_x', 'right_iris_x']]
y_test_x = df_test['point_x']

In [None]:
sc = StandardScaler()
X_test_x = sc.fit_transform(X_test_x)

In [None]:
model_x = make_pipeline(PolynomialFeatures(2), linear_model.LinearRegression())
model_x.fit(X_train_x, y_train_x)
y_pred_x = model_x.predict(X_test_x)
r2_score(y_test_x, y_pred_x)

In [None]:
y_pred_x

In [None]:
sns.regplot(x=y_test_x, y=y_pred_x)
plt.show()

In [None]:
X_train_y = df[['left_iris_y', 'right_iris_y']]
y_train_y = df['point_y']

In [None]:
sc = StandardScaler()
X_train_y = sc.fit_transform(X_train_y)

In [None]:
X_train_y[:5]

In [None]:
y_train_y[:5]

In [None]:
X_test_y = df_test[['left_iris_y', 'right_iris_y']]
y_test_y = df_test['point_y']

In [None]:
sc = StandardScaler()
X_test_y = sc.fit_transform(X_test_y)

In [None]:
model = make_pipeline(PolynomialFeatures(degree=2), linear_model.LinearRegression())
model.fit(X_train_y, y_train_y)
y_pred_y = model.predict(X_test_y)
r2_score(y_test_y, y_pred_y)

In [None]:
sns.regplot(x=y_test_y, y=y_pred_y)
plt.show()

In [None]:
data = {'True X': y_test_x, 'Predicted X': y_pred_x,
        'True Y': y_test_y, 'Predicted Y': y_pred_y}

sns.scatterplot(x='True X', y='True Y', data=data,
                label='True Values', alpha=0.7)
sns.scatterplot(x='Predicted X', y='Predicted Y', data=data,
                label='Predicted Values', alpha=0.7)

plt.title('True and Predicted Points for X and Y')
plt.xlabel('X Values')
plt.ylabel('Y Values')
plt.legend()

plt.show()

In [None]:
df_data = pd.DataFrame(data)
df_data['True XY'] = list(zip(df_data['True X'], df_data['True Y']))

In [None]:
df_data.head()

In [None]:
df_data.shape

In [None]:
df_data = df_data[(df_data['Predicted X'] >= 0) &
                  (df_data['Predicted Y'] >= 0)]
df_data = df_data[(abs(df_data['Predicted X'] - df_data['True X']) <= 100)
                  & (abs(df_data['Predicted Y'] - df_data['True Y']) <= 100)]

In [None]:
df_data.shape

In [None]:
# Precision is calculated via the Root Mean Square from the
# successive data points [in degrees of visual angle Î¸i between
# successive (x1,y1) to (xi+1, yi+1) samples], both for each eye
# individually and as a mean from the two

# Another option to describe the variation in the data is to
# measure the standard deviation of the data set, equivalent
# to the RMS normalized by the mean

def average_dist_from_truth(group):
    true_x = group['True X']
    predicted_x = group['Predicted X']
    true_y = group['True Y']
    predicted_y = group['Predicted Y']
    distance = np.sqrt((predicted_x - true_x) ** 2 + (predicted_y - true_y) ** 2)

    average_distance = np.mean(distance)
    return average_distance


def average_dist_from_center(group):
    predicted_x = group['Predicted X']
    predicted_y = group['Predicted Y']
    center_x =  np.mean(predicted_x)
    center_y =  np.mean(predicted_y)
    distances_to_center = np.sqrt((predicted_x - center_x) ** 2 + (predicted_y - center_y) ** 2)
    print('--------')
    avg_distance = np.mean(distances_to_center)
    print('average precision', avg_distance)

    return np.sqrt(np.sum(np.square([group['Predicted Y'], group['True Y']])))


accuracy = df_data.groupby('True XY').apply(average_dist_from_truth)
precision = df_data.groupby('True XY').apply(average_dist_from_center)

precision_xy = (accuracy + precision) / 2
precision_xy = precision_xy / np.mean(precision_xy)
print('accuracy', accuracy)
print('precision', precision)

In [None]:
data = {}

for index, row in df_data.iterrows():

    outer_key = str(row['True X'])
    inner_key = str(row['True Y'])

    if outer_key not in data:
        data[outer_key] = {}

    data[outer_key][inner_key] = {
        'predicted_x': df_data[(df_data['True X'] == row['True X']) & (df_data['True Y'] == row['True Y'])]['Predicted X'].values.tolist(),
        'predicted_y': df_data[(df_data['True X'] == row['True X']) & (df_data['True Y'] == row['True Y'])]['Predicted Y'].values.tolist(),
        'PrecisionSD': precision_xy[(row['True X'], row['True Y'])]
    }

In [None]:
data = np.array([y_pred_x, y_pred_y]).T

In [None]:
data.shape

In [None]:
model = KMeans(n_clusters=5, n_init='auto', init='k-means++')
y_kmeans = model.fit_predict(data)

In [None]:
plt.figure(figsize=(10, 6))

plt.scatter(data[y_kmeans == 0, 0], data[y_kmeans == 0, 1],
            s=90, c='red', label='Cluster 1')
plt.scatter(data[y_kmeans == 1, 0], data[y_kmeans == 1, 1],
            s=90, c='blue', label='Cluster 2')
plt.scatter(data[y_kmeans == 2, 0], data[y_kmeans == 2, 1],
            s=90, c='green', label='Cluster 3')
plt.scatter(data[y_kmeans == 3, 0], data[y_kmeans == 3, 1],
            s=90, c='cyan', label='Cluster 4')
plt.scatter(data[y_kmeans == 4, 0], data[y_kmeans == 4, 1],
            s=90, c='magenta', label='Cluster 5')
plt.scatter(model.cluster_centers_[:, 0], model.cluster_centers_[
            :, 1], s=120, c='yellow', label='Centroids')

plt.title('Clusters')

plt.xlabel('F1')
plt.ylabel('F2')

plt.legend()

plt.show()

### -------- teste --------

In [None]:
def plot(ax, y_test_x, y_pred_x, y_test_y, y_pred_y, title=None):

    y_test_x = np.array(y_test_x)
    y_test_y = np.array(y_test_y)

    true_points = [(y_test_x[i], y_test_y[i]) for i in range(len(y_test_x))]

    error_range = 0.05

    data = {'True X': y_test_x, 'Predicted X': y_pred_x,
            'True Y': y_test_y, 'Predicted Y': y_pred_y}

    sns.scatterplot(x='True X', y='True Y', data=data,
                    label='True Values', alpha=0.7, ax=ax, color='red')
    sns.scatterplot(x='Predicted X', y='Predicted Y', data=data,
                    label='Predicted Values', alpha=0.7, ax=ax, color='green')

    circle_radius = error_range * (max(y_test_x) - min(y_test_x)
                                   + max(y_test_y) - min(y_test_y)) / 2

    for true_x, true_y in true_points:

        x_within_range = [y_pred_x[j] for j in range(len(y_test_x)) if abs(
            y_test_x[j] - true_x) <= error_range]
        y_within_range = [y_pred_y[j] for j in range(len(y_test_y)) if abs(
            y_test_y[j] - true_y) <= error_range]

        if len(x_within_range) > 1 and len(y_within_range) > 1:

            combined_predictions = x_within_range + y_within_range
            combined_true = [true_x] * len(x_within_range) + \
                [true_y] * len(y_within_range)
            #true_values = [true_x] * len(x_within_range) + \
            #    [true_y] * len(y_within_range)

            r2_combined = r2_score(combined_true, combined_predictions)
            mae_combined = mean_absolute_error(
                combined_true, combined_predictions)

            circle = plt.Circle((true_x, true_y), circle_radius,
                                color='yellow', fill=False)
            ax.add_patch(circle)

            ax.text(true_x + 0.1, true_y + 0.1, f'R2={r2_combined:.2f}\nMAE={mae_combined:.2f}',
                    fontsize=8, color='blue')

    title = title if title else 'True and Predicted Points for X and Y'
    ax.set_title(title)
    ax.legend()

In [None]:
def analysis(df, ax=None, title=None):

    # x
    X_x = df[['left_iris_x', 'right_iris_x']]
    X_y = df['point_x']

    sc = StandardScaler()
    X_x = sc.fit_transform(X_x)

    X_train_x, X_test_x, y_train_x, y_test_x = train_test_split(
        X_x, X_y, test_size=0.2, random_state=42)

    model = linear_model.LinearRegression()
    model.fit(X_train_x, y_train_x)
    y_pred_x = model.predict(X_test_x)
    r2_score(y_test_x, y_pred_x)

    # y
    X_y = df[['left_iris_y', 'right_iris_y']]
    y_y = df['point_y']

    sc = StandardScaler()
    X_y = sc.fit_transform(X_y)

    X_train_y, X_test_y, y_train_y, y_test_y = train_test_split(
        X_y, y_y, test_size=0.2, random_state=42)

    model = linear_model.LinearRegression()
    model.fit(X_train_y, y_train_y)
    y_pred_y = model.predict(X_test_y)
    r2_score(y_test_y, y_pred_y)

    plot(ax, y_test_x, y_pred_x, y_test_y, y_pred_y, title)

In [None]:
df = pd.read_csv('./csv/data/_fixed_train_data.csv')
df = df.drop(['screen_height', 'screen_width'], axis=1)

df_list = [df]

# num_rows = len(df_list) // 2 + len(df_list) % 2
# num_cols = min(2, len(df_list))

num_rows = len(df_list)
num_cols = 1

fig_height = 5 * num_rows
fig, axes = plt.subplots(
    num_rows, num_cols, figsize=(10, fig_height), squeeze=False)

for i, df in enumerate(df_list):

    # row_idx = i // num_cols
    # col_idx = i % num_cols

    row_idx = i
    col_idx = 0

    ax = axes[row_idx, col_idx]
    analysis(df, ax)

plt.tight_layout()
plt.show()