In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import csv
import os
import glob
import numpy as np
import matplotlib.pyplot as pp
from skimage.measure import regionprops
from skimage.draw import ellipse_perimeter
from skimage.measure import regionprops
from skimage.draw import polygon_perimeter
import pandas as pd
import math
import time
import statistics
import random
pie = 3.14159265359


In [None]:
def get_eccentricity(x_coords, y_coords):
    # calculate the mean x and y coordinates
    x_coords = np.array(x_coords, dtype=np.int64)
    y_coords = np.array(y_coords, dtype=np.int64)
    x_mean = np.mean(x_coords)
    y_mean = np.mean(y_coords)

    # calculate the radius of the ellipse in x and y directions
    x_radius = np.abs(np.max(x_coords) - np.min(x_coords)) / 2
    y_radius = np.abs(np.max(y_coords) - np.min(y_coords)) / 2

    # calculate the angle of the major axis
    angle = 0

    # calculate the ellipse perimeter
    rr, cc = ellipse_perimeter(int(y_mean), int(x_mean), int(
        y_radius), int(x_radius), orientation=angle)

    # create a binary image with the ellipse
    img = np.zeros((np.max(rr)+1, np.max(cc)+1))
    img[rr, cc] = 1

    # calculate the eccentricity using regionprops
    eccentricity = regionprops(img.astype(int))[0].eccentricity

    return eccentricity


def get_solidity(x_coords, y_coords):
    # Create a binary image with the polygon defined by x and y coordinates
    x_coords = np.array(x_coords, dtype=np.int64)

    y_coords = np.array(y_coords, dtype=np.int64)
    img = np.zeros((np.max(y_coords)+1, np.max(x_coords)+1))
    img[y_coords, x_coords] = 1

    # Calculate the area of the polygon using regionprops
    area = regionprops(img.astype(int))[0].area

    # Create a binary image with the convex hull of the polygon
    hull_img = np.zeros_like(img)
    hull_img[tuple(map(tuple, np.fliplr(regionprops(
        img.astype(int))[0].convex_image).nonzero()))] = 1

    # Calculate the area of the convex hull
    hull_area = regionprops(hull_img.astype(int))[0].area

    # Calculate the solidity
    solidity = area / hull_area

    return solidity


def features(path):
    with open(f"{path}") as f:
        data = np.loadtxt(f)
        # shifting points to the origin
        xmin = data[0][0]
        ymin = data[0][1]
        xmax = data[0][0]
        ymax = data[0][1]
        for i in range(len(data)):
            xmin = min(xmin, data[i][0])
            ymin = min(ymin, data[i][1])
            xmax = max(xmax, data[i][0])
            ymax = max(ymax, data[i][1])
        # here totalpoints are the points with pendowns only
        totalpoints = 0

        x = []
        y = []
        pressure = []
        meanx = 0
        meany = 0
        meanpressure = 0
        timeseries = []

        for i in range(len(data)):
            if (data[i][3]):
                x.append((data[i][0]-xmin))
                y.append((data[i][1]-ymin))
                timeseries.append(data[i][2])
                pressure.append(data[i][4])
                totalpoints += 1

        meanx = sum(x)/totalpoints
        meany = sum(y)/totalpoints
        meanpressure = sum(pressure)/totalpoints

        stdsumx = 0  # (x^2-u)
        stdsumy = 0
        stdsumpressure = 0

        for i in range(len(x)):
            stdsumx += pow((x[i]-meanx), 2)
            stdsumy += pow((y[i]-meany), 2)
            stdsumpressure += pow((pressure[i]-meanpressure), 2)

        stdx = math.sqrt(stdsumx/totalpoints)
        stdy = math.sqrt(stdsumy/totalpoints)
        stdpressure = math.sqrt(stdsumpressure/totalpoints)

        skewnesssumx = 0
        skewnesssumy = 0
        skewnesssump = 0
        kurtosissumx = 0
        kurtosissumy = 0
        kurtosissump = 0

        for i in range(len(x)):
            skewnesssumx += pow((x[i]-meanx), 3)
            skewnesssumy += pow((y[i]-meany), 3)
            skewnesssump += pow((pressure[i]-meanpressure), 3)
            kurtosissumx += pow((x[i]-meanx), 4)
            kurtosissumy += pow((y[i]-meany), 4)
            kurtosissump += pow((pressure[i]-meanpressure), 4)

        skewnessx = skewnesssumx/((totalpoints-1)*pow(stdx, 3))
        skewnessy = skewnesssumy/((totalpoints-1)*pow(stdy, 3))
        skewnessp = skewnesssump/((totalpoints-1)*pow(stdpressure, 3))
        kurtosisx = kurtosissumx/((totalpoints-1)*pow(stdx, 3))
        kurtosisy = kurtosissumy/((totalpoints-1)*pow(stdy, 3))
        kurtosisp = kurtosissump/((totalpoints-1)*pow(stdpressure, 3))
        theta = []
        for i in range(len(data)-1):
            if (data[i][3]):
                if (data[i][0]-data[i-1][0] == 0):
                    theta.append(pie/2)
                else:
                    theta.append(
                        math.atan((data[i][1]-data[i-1][1])/(data[i][0]-data[i-1][0])))
        num_bins = 5  # 10-degree intervals
        bins = np.linspace(-np.pi, np.pi, num_bins+1)
        hist, _ = np.histogram(theta, bins=bins)
        direction_hist_list = list(hist)
        # print(direction_hist_list)
        # ------------------------section 3----------------------------------------------
        # velocity acceleration angular velocity
        tend = max(timeseries)
        tstart = min(timeseries)

        velocity = []

        for i in range(len(x)-1):
            temp = (math.sqrt(pow(x[i+1]-x[i], 2) +
                    pow(y[i+1]-y[i], 2)))*(tend-tstart)
            velocity.append(temp/(timeseries[i+1]-timeseries[i]))
        avgvelocity = sum(velocity)/len(velocity)
        stdsumv = 0

        for i in range(len(velocity)):
            stdsumv += pow((velocity[i]-avgvelocity), 2)

        stdv = math.sqrt(stdsumv/totalpoints)
        skewnesssumv = 0
        kurtosissumv = 0
        for i in range(len(x)):
            skewnesssumv += pow((x[i]-avgvelocity), 3)
            kurtosissumv += pow((x[i]-avgvelocity), 4)

        skewnessv = skewnesssumv/((totalpoints-1)*pow(stdv, 3))

        kurtosisv = kurtosissumv/((totalpoints-1)*pow(stdv, 3))
        # -------------------------section 4-----------------------------------------------
        # numbers of penups and down, and ratio of signature
        pencount = 0
        for i in range(len(data)-1):
            if (data[i+1][3] != data[i][3]):
                pencount += 1
        ratio = (ymax-ymin)/(xmax-xmin)
        m1=0
        m2=0
        if data[0][0]==xmin:
            m1=pie/2
        else:
            m1=(data[0][1]-ymin)/(data[0][0]-xmin)
        for i in range(len(data)-1):
            
            if data[i+1][3]==0   and data[i][0]!=xmin:
                m2=(data[i][1]-ymin)/(data[i][0]-xmin)
                break
        if (1+m1*m2)==0:
          first_angle=pie/2
        else:
          first_angle=math.atan(m2-m1/(1+m1*m2))
          
       
        if data[len(data)-1][0]!=xmin:
          m1=(data[len(data)-1][1]-ymin)/(data[len(data)-1][0]-xmin)
        else:
          m1=300
              
      
        for i in range(len(data),-1,-1):
            if data[i-1][3]==0 and data[i][0]!=xmin:
      
                m2=(data[i][1]-ymin)/(data[i][0]-xmin)
                break

        last_angle=math.atan((m2-m1)/(1+m1*m2))

        round_point=0
        upward_move=0
        downward_move=0
        m1=1
        for i in range(1,len(data)):
            if data[i][0]==data[i-1][0]:
                m2=pie/2
            else:
               m2=  (data[i][1]-data[i-1][1])/(data[i][0]-data[i-1][0])
            if m2>0:
                upward_move+=1
            else:
                downward_move+=1
            if(np.floor(m2*m1)<0):
                round_point+=1
            m1=m2
         
        upward_move=upward_move/len(data)
        downward_move=downward_move/len(data)
        # --------------------------making features vector----------------------------------
        features = []
        features.append(meanx/xmax)
        features.append(skewnessx)
        features.append(kurtosisx)
        features.append(meany)
        features.append(skewnessy)
        features.append(kurtosisy)
        features.append(meanpressure)
        features.append(skewnessp)
        features.append(kurtosisp)
        features.append(avgvelocity)
        features.append(skewnessv)
        features.append(kurtosisv)
        features = features + direction_hist_list
        features.append(pencount)
        features.append(ymax-ymin)
        features.append(xmax-xmin)
        features.append(first_angle)
        features.append(last_angle)
        features.append(round_point)
        features.append(upward_move)
        features.append(downward_move)
    return features

In [None]:
file = "/content/drive/My Drive/UGP23"
if not (os.path.exists('/content/drive/My Drive/UGP23/Features3')):
    os.mkdir('/content/drive/My Drive/UGP23/Features3')
    print('New folder "Features3" created')
if not (os.path.exists('/content/drive/My Drive/UGP23/Features3/Training')):
    os.mkdir('/content/drive/My Drive/UGP23/Features3/Training')
    print('New folder "Features3/Training" created')
if not (os.path.exists('/content/drive/My Drive/UGP23/Features3/Testing')):
    os.mkdir('/content/drive/My Drive/UGP23/Features3/Testing')
    print('New folder "Features3/Training" created')
data = "/content/drive/My Drive/UGP23/DS1_Modification_TimeFunctions"
traindata = "/content/drive/My Drive/UGP23/Features3/Training"
# /content/drive/My Drive/UGP23/DS1_Modification_TimeFunctions

In [None]:

# Define the path to the folder containing all the data folders
traindata = "/content/drive/My Drive/UGP23/Features3/Training"
data_folder_path = "/content/drive/My Drive/UGP23/DS1_Modification_TimeFunctions"

# Step 1: Get a list of all folder IDs
folder_ids = [f"usuario1{i:03}" for i in range(1, 351)]

# Step 2: Create a new CSV file for each folder ID
# Training
for folder_id in folder_ids:
    csv_file_path = os.path.join(traindata, f"Training{folder_id[-4:]}.csv")
    with open(csv_file_path, "w") as f:
        print("Creating Training csv file for usuario"+str(folder_id[-4:]))
        f.write(
            'meanx,skewx,kurtosisx,meany,skewy,kurtosisy,meanp,skewp,kurtosisp,avgv,skewnessv,kurtosisv,H1,H2,H3,H4,H5,pencount,ysize,xsize,firstangle,lastangle,roundpoint,upmove,downmove,output\n')
        # Step 3: Extract data from the 15 text files within the current folder
        for i in range(1, 21):
            text_file_path = os.path.join(
                data_folder_path, folder_id, f"u{folder_id[-4:]}_sg{i}.txt")
            with open(text_file_path, "r") as tf:
                data = tf.read()
                # Write the text data to the current row of the CSV file
                feature = features(path=text_file_path)
                f.write(','.join(map(str, feature))+',1\n')

        # Step 4: Extract data from the first text file of each of the other 349 folders
        randomfolder_ids = random.sample(folder_ids, k=50)
        for other_folder_id in randomfolder_ids:
            if other_folder_id != folder_id:
                other_text_file_path = os.path.join(
                    data_folder_path, other_folder_id, f"u{other_folder_id[-4:]}_sg1.txt")
                with open(other_text_file_path, "r") as otf:
                    other_text_data = otf.read()
                    # Write the other text data to the current row of the CSV file
                    feature = features(path=other_text_file_path)
                    f.write(','.join(map(str, feature))+',0\n')

# Testing
testdata = "/content/drive/My Drive/UGP23/Features3/Testing"
for folder_id in folder_ids:
    csv_file_path = os.path.join(testdata, f"Testing{folder_id[-4:]}.csv")
    with open(csv_file_path, "w") as f:
        print("Creating Testing csv file for usuario"+str(folder_id[-4:]))
        f.write(
            'meanx,skewx,kurtosisx,meany,skewy,kurtosisy,meanp,skewp,kurtosisp,avgv,skewnessv,kurtosisv,H1,H2,H3,H4,H5,pencount,ysize,xsize,firstangle,lastangle,roundpoint,upmove,downmove,output\n')
        # Step 3: Extract data from the 15 text files within the current folder
        for i in range(21, 26):
            text_file_path = os.path.join(
                data_folder_path, folder_id, f"u{folder_id[-4:]}_sg{i}.txt")
            with open(text_file_path, "r") as tf:
                data = tf.read()
                # Write the text data to the current row of the CSV file
                feature = features(path=text_file_path)
                f.write(','.join(map(str, feature))+',1\n')

        # Step 4: Extract data from the first text file of each of the other 349 folders
        randomfolder_ids = random.sample(folder_ids, k=15)
        for other_folder_id in randomfolder_ids:
            if other_folder_id != folder_id:
                other_text_file_path = os.path.join(
                    data_folder_path, other_folder_id, f"u{other_folder_id[-4:]}_sg2.txt")
                with open(other_text_file_path, "r") as otf:
                    other_text_data = otf.read()
                    # Write the other text data to the current row of the CSV file
                    feature = features(path=other_text_file_path)
                    f.write(','.join(map(str, feature))+',0\n')


Creating Testing csv file for usuario1001


  m1=(data[len(data)-1][1]-ymin)/(data[len(data)-1][0]-xmin)
  last_angle=math.atan((m2-m1)/(1+m1*m2))


Creating Testing csv file for usuario1002
Creating Testing csv file for usuario1003
Creating Testing csv file for usuario1004
Creating Testing csv file for usuario1005
Creating Testing csv file for usuario1006
Creating Testing csv file for usuario1007
Creating Testing csv file for usuario1008
Creating Testing csv file for usuario1009
Creating Testing csv file for usuario1010
Creating Testing csv file for usuario1011
Creating Testing csv file for usuario1012
Creating Testing csv file for usuario1013
Creating Testing csv file for usuario1014
Creating Testing csv file for usuario1015
Creating Testing csv file for usuario1016
Creating Testing csv file for usuario1017
Creating Testing csv file for usuario1018
Creating Testing csv file for usuario1019
Creating Testing csv file for usuario1020
Creating Testing csv file for usuario1021
Creating Testing csv file for usuario1022
Creating Testing csv file for usuario1023
Creating Testing csv file for usuario1024
Creating Testing csv file for usua

  m1=(data[len(data)-1][1]-ymin)/(data[len(data)-1][0]-xmin)


Creating Testing csv file for usuario1035
Creating Testing csv file for usuario1036
Creating Testing csv file for usuario1037
Creating Testing csv file for usuario1038
Creating Testing csv file for usuario1039
Creating Testing csv file for usuario1040
Creating Testing csv file for usuario1041
Creating Testing csv file for usuario1042
Creating Testing csv file for usuario1043
Creating Testing csv file for usuario1044
Creating Testing csv file for usuario1045
Creating Testing csv file for usuario1046
Creating Testing csv file for usuario1047
Creating Testing csv file for usuario1048
Creating Testing csv file for usuario1049
Creating Testing csv file for usuario1050
Creating Testing csv file for usuario1051
Creating Testing csv file for usuario1052
Creating Testing csv file for usuario1053
Creating Testing csv file for usuario1054
Creating Testing csv file for usuario1055
Creating Testing csv file for usuario1056
Creating Testing csv file for usuario1057
Creating Testing csv file for usua