In [25]:
import io
import os
import random
import joblib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from google.cloud import storage
from assets.credential_key import key

In [16]:
key.get_credential_key()
client = storage.Client()
bucket = client.bucket("fall-detection-bucket")

In [5]:
def format_number(number):
    try:
        return f"{float(number):.2f}"
    except ValueError:
        return number

In [6]:
def get_folder_names():
    all_blobs = bucket.list_blobs()
    
    folder_names = set()
    for blob in all_blobs:
        if blob.name.startswith('sensor_data/'):
            folder_name = blob.name.split('/')[1]
            folder_names.add(folder_name)
    
    return folder_names

In [7]:
def create_folder_in_gcs(bucket_name, folder_name):
    blob = bucket.blob(folder_name)
    blob.upload_from_string('')
    print(f"Folder {folder_name} berhasil dibuat di bucket {bucket_name}")

In [8]:
def get_list_data(bucket, task_names):
    all_data = list(bucket.list_blobs())
     
    data_task = {
        task_name: [
            blob for blob in all_data if task_name in blob.name and "sensor_data/" in blob.name
        ] for task_name in task_names
    }
    
    return data_task

In [9]:
def replace_zeros_with_interpolation(df, columns):
    for column in columns:
        if column in df.columns:
            df[column] = pd.to_numeric(df[column], errors='coerce')  # Konversi ke tipe numerik
            df[column] = df[column].replace(0, np.nan)
            df[column] = df[column].interpolate(method='linear', limit_direction='both').infer_objects(copy=False)
    return df

In [74]:
def normalize_and_upload(blob):
    try:
        if 'sensor_data/' in blob.name:
            blob_data = blob.download_as_string()
            data = io.StringIO(blob_data.decode('utf-8'))
            df = pd.read_csv(data)

            # Kolom yang akan dinormalisasi
            normalize_columns = ["AccX", "AccY", "AccZ", "GyrX", "GyrY", "GyrZ"]

            # Ganti nilai nol dengan interpolasi
            df = replace_zeros_with_interpolation(df, normalize_columns)
            
            # Format angka dalam data CSV hanya untuk kolom yang ditentukan
            df[normalize_columns] = df[normalize_columns].apply(lambda col: col.map(format_number))

            # Tulis hasil ke file CSV baru dalam bentuk string
            output_data = io.StringIO()
            df.to_csv(output_data, index=False)
            output_data.seek(0)  # Kembali ke awal string

            # Menentukan nama folder hasil normalisasi di GCS
            folder_name = "normalized_sensor_data/" + blob.name.split("/")[1]
            
            # Normalisasi data dan menyimpannya ke Google Cloud Storage
            destination_blob_name = folder_name + "/normalized_" + blob.name.split('/')[-1]
            normalized_blob = bucket.blob(destination_blob_name)
            normalized_blob.upload_from_string(output_data.getvalue(), content_type='text/csv')
            
            print(f"File {blob.name} telah dinormalisasi dan disimpan kembali di GCS sebagai {destination_blob_name}.")
        else:
            print(f"File {blob.name} tidak berada dalam folder sensor_data. Melewati normalisasi.")
    except Exception as e:
        print(f"Terjadi kesalahan saat melakukan normalisasi: {e}")


In [10]:
# Mendapatkan nama folder yang ada di Google Cloud Storage
folder_names = get_folder_names()

# Membuat folder di dalam folder normalized_sensor_data
for folder_name in folder_names:
    create_folder_in_gcs(bucket, f"normalized_sensor_data/{folder_name}")
    
# Daftar nama tugas
tasks = ['T01', 'T06', 'T20', 'T21', 'T22']

# Mendapatkan daftar data dari Google Cloud Storage
tasks_data = get_list_data(bucket, tasks)

# Normalisasi dan mengunggah setiap file
for task_name, task_data in tasks_data.items():
    print(f'Task {task_name}: {len(task_data)} files')
    for blob in task_data:
        normalize_and_upload(blob)
    print("")

KeyboardInterrupt: 

In [11]:
def list_data_dir():
    # List semua blob (file) dalam bucket
    all_data = list(bucket.list_blobs(prefix="normalized_sensor_data/"))

    # Inisialisasi dictionary untuk menyimpan jumlah file untuk setiap tugas
    tasks_amount = {'T01': 0, 'T06': 0, 'T20': 0, 'T21': 0, 'T22': 0}
    
    task_blobs = {'T01': [], 'T06': [], 'T20': [], 'T21': [], 'T22': []}

    # Iterasi melalui setiap blob dan hitung jumlah file untuk setiap tugas
    for blob in all_data:
        for task in tasks_amount.keys():
            if task in blob.name:
                tasks_amount[task] += 1
                task_blobs[task].append(blob)

    return task_blobs, tasks_amount

task_dir, task_amount = list_data_dir()
print(f'Task 01: {task_amount["T01"]}')
print(f'Task 06: {task_amount["T06"]}')
print(f'Task 20: {task_amount["T20"]}')
print(f'Task 21: {task_amount["T21"]}')
print(f'Task 22: {task_amount["T22"]}')


Task 01: 32
Task 06: 151
Task 20: 159
Task 21: 161
Task 22: 161


In [12]:
length = []

for blob in task_dir['T22']:
    # Unduh konten dari objek Blob
    blob_content = blob.download_as_string()

    # Baca konten CSV dari objek buffer
    data = pd.read_csv(io.BytesIO(blob_content))

    # Tambahkan jumlah baris ke dalam daftar length
    length.append(data.shape[0])

# Hitung rata-rata jumlah baris
average_length = np.mean(np.array(length))
print("Average number of rows:", average_length)

Average number of rows: 719.9627329192547


In [20]:
task_01 = task_dir["T01"]
task_06 = task_dir["T06"]
task_20 = task_dir["T20"]
task_21 = task_dir["T21"]
task_22 = task_dir["T22"]

In [21]:
print(task_01)

[<Blob: fall-detection-bucket, normalized_sensor_data/SA06/normalized_S06T01R01.csv, 1717898465636295>, <Blob: fall-detection-bucket, normalized_sensor_data/SA07/normalized_S07T01R01.csv, 1717898467234048>, <Blob: fall-detection-bucket, normalized_sensor_data/SA08/normalized_S08T01R01.csv, 1717898468688117>, <Blob: fall-detection-bucket, normalized_sensor_data/SA09/normalized_S09T01R01.csv, 1717898470138932>, <Blob: fall-detection-bucket, normalized_sensor_data/SA10/normalized_S10T01R01.csv, 1717898471623768>, <Blob: fall-detection-bucket, normalized_sensor_data/SA11/normalized_S11T01R01.csv, 1717898472662079>, <Blob: fall-detection-bucket, normalized_sensor_data/SA12/normalized_S12T01R01.csv, 1717898474099214>, <Blob: fall-detection-bucket, normalized_sensor_data/SA13/normalized_S13T01R01.csv, 1717898475528893>, <Blob: fall-detection-bucket, normalized_sensor_data/SA14/normalized_S14T01R01.csv, 1717898477091719>, <Blob: fall-detection-bucket, normalized_sensor_data/SA15/normalized_S15

In [22]:
def group_tasks(task_blobs, task_name):
    all_data = []
    for blob in task_blobs:
        # Unduh konten dari objek Blob sebagai string
        blob_content = blob.download_as_string()

        # Baca konten CSV dari objek buffer
        df = pd.read_csv(io.BytesIO(blob_content))

        # Buang kolom yang tidak diperlukan
        drop_cols = ['TimeStamp(s)', 'FrameCounter', 'EulerX', 'EulerY', 'EulerZ']
        df.drop(columns=drop_cols, inplace=True)

        # Ubah DataFrame menjadi numpy array dan tambahkan ke dalam list all_data
        all_data.append(df.to_numpy())

    # Buat folder alldata jika belum ada
    if not os.path.exists('../../assets/pkl/update'):
        os.makedirs('../../assets/pkl/update')

    # Simpan list all_data ke dalam file .pkl
    joblib.dump(all_data, f'../../assets/pkl/update/{task_name}.pkl')

    print('Done!')

In [23]:
group_tasks(task_01, 'task_01')
group_tasks(task_06, 'task_06')
group_tasks(task_20, 'task_20')
group_tasks(task_21, 'task_21')
group_tasks(task_22, 'task_22')

Done!
Done!
Done!
Done!
Done!
