In [10]:
import os.path
import os
import gzip
import io
import shutil
import pandas as pd
import numpy as np
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from googleapiclient.http import MediaIoBaseDownload
from operator import itemgetter
from ts2vg import NaturalVG
from ts2vg import HorizontalVG
from math import sqrt
from dotenv import load_dotenv

In [11]:
class Google :

    def __init__(self, cred_file_path, version, service_name, scopes) -> None:
        self.SCOPES = scopes
        self.cred_file_path = cred_file_path
        self.version = version
        self.service_name = service_name

    def connect(self) :
        """Makes a connection to the drive API and returns a service object
        """
        creds = None
        # The file token.json stores the user's access and refresh tokens, and is
        # created automatically when the authorization flow completes for the first
        # time.
        if os.path.exists('token.json'):
            creds = Credentials.from_authorized_user_file('token.json', self.SCOPES)
        # If there are no (valid) credentials available, let the user log in.
        if not creds or not creds.valid:
            if creds and creds.expired and creds.refresh_token:
                creds.refresh(Request())
            else:
                flow = InstalledAppFlow.from_client_secrets_file(self.cred_file_path, self.SCOPES)
                creds = flow.run_local_server(port=0)
            # Save the credentials for the next run
            with open('token.json', 'w') as token:
                token.write(creds.to_json())

        try:
            service = build(self.service_name, self.version, credentials=creds)
            return service

        except HttpError as error:
            # TODO(developer) - Handle errors from drive API.
            print(f'An error occurred: {error}')


In [12]:
load_dotenv()
connection_class = Google(os.getenv('credentials_file_path'), 'v3', 'drive', ['https://www.googleapis.com/auth/drive'])
service = connection_class.connect()

In [None]:
folder_id = os.getenv('folder_id')
query = f"'{folder_id}' in parents"

res = service.files().list(q = query).execute()
files = res.get('files')
next_page_token = res.get('nextPageToken')
while next_page_token:
    res = service.files().list(q = query, pageToken = next_page_token).execute()
    files.extend(res.get('files'))
    next_page_token = res.get('nextPageToken')

files = list(filter(lambda file: file['mimeType'] == 'application/gzip', files))
files = sorted(files, key=itemgetter('name'))

In [None]:
df_x = pd.DataFrame()
df_y = pd.DataFrame()
df_z = pd.DataFrame()
for file in files:
    try :
        file_object_request = service.files().get_media(fileId = file["id"])
        file_bytes = io.BytesIO()
        downloader = MediaIoBaseDownload(file_bytes, file_object_request)
        done = False
        while done is False:
            status, done = downloader.next_chunk()
    except Exception as ex:
        print(ex)
        raise ex

    filename_to_be_saved = str.lower("_".join([file["name"][1:4], str(int(file["name"][6:8]))]))
    file_bytes.seek(0)
    with gzip.open(file_bytes, 'rb') as f:
        file_content = f.read()
    values = list(map(lambda x: x.split("\t"), file_content.decode().split('\n')))
    x_values = []
    y_values = []
    z_values = []
    for value in values:
        if(len(value) == 3):
            x_values.append(value[0])
            y_values.append(value[1])
            z_values.append(value[2])

    df_x = pd.concat((df_x, pd.DataFrame(data = x_values,columns=[filename_to_be_saved])), axis=1)
    df_y = pd.concat((df_y, pd.DataFrame(data = y_values,columns=[filename_to_be_saved])), axis=1)
    df_z = pd.concat((df_z, pd.DataFrame(data = z_values,columns=[filename_to_be_saved])), axis=1)

In [None]:
df_x.to_csv("x_values.csv", index=False)
df_y.to_csv("y_values.csv", index=False)
df_z.to_csv("z_values.csv", index=False)

In [None]:
wear_lis_object = service.files().get_media(fileId = os.getenv('file_id'))
wear_lis_bytes = io.BytesIO()
downloader = MediaIoBaseDownload(wear_lis_bytes, wear_lis_object)
done1 = False
while done1 is False:
    status, done1 = downloader.next_chunk()
wear_lis_bytes.seek(0)
with gzip.open(wear_lis_bytes, 'rb') as f:
    wear_list = f.read().decode()
wear_list = list(map(lambda x: x.split("\n"), wear_list.split("\n\n")))
wear_list_dict = {}
for value in wear_list:
    file_name = str.lower("_".join([value[0].split(" ")[0], value[0].split(" ")[4]]))
    wear_value = value[2].split(" ")[0]
    wear_list_dict[file_name] = wear_value

In [13]:
df_x = pd.read_csv(os.getenv('x_Value_file_path'))
df_x.head(6)

Unnamed: 0,av1_1,av1_2,av1_3,av1_4,av1_5,av1_6,av1_7,av1_8,av1_9,av1_10,...,cz1_2,cz1_3,cz1_4,cz1_5,cz1_6,cz1_7,cz1_8,cz1_9,cz1_10,cz1_11
0,1.210937,0.981445,1.52832,0.615234,1.638184,0.969238,1.132812,1.259766,1.433105,1.276855,...,0.690918,0.766602,0.427246,1.911621,2.265625,1.967773,0.654297,1.367187,2.133789,1.560059
1,0.952148,1.069336,1.467285,0.92041,0.771484,1.308594,1.120605,1.242676,1.398926,1.228027,...,0.761719,2.104492,1.748047,0.471191,1.328125,1.069336,1.726074,1.621094,1.188965,1.796875
2,0.751953,1.330566,0.974121,1.535645,0.651855,1.191406,1.09375,1.254883,1.369629,1.23291,...,1.635742,2.167969,2.155762,0.900879,0.461426,1.577148,2.160645,1.176758,1.838379,1.71875
3,0.693359,1.220703,0.92041,1.103516,1.306152,0.766602,1.140137,1.242676,1.262207,1.218262,...,1.711426,0.74707,0.876465,2.268066,1.765137,2.094727,0.683594,1.804199,1.379395,1.662598
4,0.761719,0.844727,1.313477,0.76416,1.247559,1.208496,1.166992,1.235352,1.396484,1.247559,...,0.871582,0.48584,0.544434,1.889648,2.226562,1.352539,1.677246,1.306152,1.51123,1.755371


In [None]:
def graph_entropy(adjacency_matrix):
    
    if adjacency_matrix.shape[0] != adjacency_matrix.shape[1]:
        raise Exception("Input matrix should be a square matrix")
    
    no_of_nodes = adjacency_matrix.shape[0]

    normalized_graph_entropy = (1/(no_of_nodes * np.log(no_of_nodes-1))) * (sum(np.log(np.sum(adjacency_matrix, axis=0))))

    random_walk_mat = np.zeros(adjacency_matrix.shape)

    for i in range(no_of_nodes):
        random_walk_mat[i,] = adjacency_matrix[i,]/np.sum(adjacency_matrix[i,])

    node_information = np.zeros((1, no_of_nodes))

    for i in range(no_of_nodes):
        list_values = list(map((lambda j: ((sqrt(random_walk_mat[i, j+1]) - sqrt(random_walk_mat[i, j])) ** 2) if i != j else 0), range(no_of_nodes-1)))
        node_information[0, i] = (0.5 * (sum(list_values)))
        
    normalized_graph_fisher_information = np.sum(node_information[0,], axis=0)/no_of_nodes

    return normalized_graph_entropy, normalized_graph_fisher_information

In [None]:
col_names = df_x.columns
df_x_features = []
for i in range(len(col_names)):
    time_series = df_x.iloc[:, i]
    g_natural_vg = NaturalVG()
    g_natural_vg.build(time_series)
    natural_vg_adjacency = g_natural_vg.as_igraph().get_adjacency()
    normalized_graph_entropy, graph_fisher_information = graph_entropy(np.array(natural_vg_adjacency.data))
    df_x_features.append([normalized_graph_entropy, graph_fisher_information, col_names[i], wear_list_dict.get(col_names[i], 0)])
df_x_features = pd.DataFrame(data = df_x_features, columns = ["graph_entropy", "graph_fisher_information", "file_name", "wear_value"])
print(df_x_features)

In [None]:
df_x_features.to_csv("x_features.csv", index=False)