# TNT hand gestures study

# flow of program
DELETE BEFORE SUBMISSION

-   code to import data from 3 csv files held in the data folder
-   data is then combined in a dictionary and then converted to a pandas dataframe
-   the data is then cleaned and the columns are renamed
-   the data is then split into a training and testing set
-   a linear regression model is then fitted to the training data
-   the model is then used to predict the test data
-   the mean squared error and r2 score are then calculated
-   the results are then printed to the console

**Dependencies**

In [None]:
!pip install pandas
!pip install numpy
!pip install matplotlib
!pip install seaborn
!pip install scikit-learn
!pip install tensorflow

**Importing relevant libraries.**

In [None]:

#-----------------Information-----------------#

'''
    Title: Linear Regression Model for Predicting Absolute Acceleration
    Data Collection Declaration:

    This project is being developed for a Data Science and Machine Learning class.
    The data used in this project was collected by the student developers at the University of Nottingham. 

    Legal Aspects:

    The data collection process complied with all applicable laws and university policies. 
    Any personal data that was collected has been anonymized to protect the privacy of the individuals involved. 

    Please note that the use of this data must comply with all relevant data protection and privacy laws. 
    Unauthorized use, disclosure, or duplication of this data is strictly prohibited.
'''
'''
    Data Information:

    Data within the dataset being examined is of the format of a csv file with the following columns:
    Column Names and Types:
    
    '''

#-----------------Information-----------------#

#-----------------Importing Libraries-----------------#

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
import math
import tensorflow as tf
from tensorflow import keras

#-----------------Importing Libraries-----------------#

In [None]:

#-----------------Flags-----------------#

SYS_MSG = True # Flag to toggle control over printing system messages to console 
PLOT = True # Flag to toggle control over plotting graphs
TEST_PLOT = False # Flag to toggle control over plotting test graphs

#-----------------Flags-----------------#

#-----------------Basic Functions-----------------#

# Function to print system messages
def print_sys_msg(msg):
    if SYS_MSG:
        print('-'*10+'System control message \n'+msg)

# Function to print normal messages
def print_msg(msg):
    print('-'*10+'Message \n'+msg)

#-----------------Basic Functions-----------------#

## Preperation Phase  
We create base classes to manage our data, visualisation, analysis, and prediction.  
Creating DataHandler class. It wwill manage the import, stored manipulation, and final storage of data.  
\- [member name]

In [None]:
#-----------------DataHandler Class-----------------#
'''
Class DataHandler
    purpose: import and manage data (data manipulation, data wrangling, and data preprocessing)

    initialization example:
        data = DataHandler()
    
    functions:
    - import_data: import data from csv files and combine them into a single dataframe
        dependencies used: pandas
        function call example: data.import_data(['data/member1.csv', 'data/member2.csv', 'data/member3.csv'])
        input: list of file names
        output: none
    
    - import_data_system: import data from the system
        dependencies used: pandas, os
        function call example: data.import_data_system(['data/member1', 'data/member2', 'data/member3'])
        input: list of directory names
        output: none
        
    - data_shape: print the shape of the dataframe
        dependencies used: pandas
        function call example: data.data_shape()
        input: none
        output: none

    - data_head: print the first 5 rows of the dataframe
        dependencies used: pandas
        function call example: data.data_head()
        input: none
        output: none

    - data_info: print the information of the dataframe
        dependencies used: pandas
        function call example: data.data_info()
        input: none
        output: none

    - data_describe: print the description of the dataframe
        dependencies used: pandas
        function call example: data.data_describe()
        input: none
        output: none

    - data_null: print the null values in the dataframe
        dependencies used: pandas
        function call example: data.data_null()
        input: none
        output: none
    
    - data_corr: print the correlation matrix of the dataframe
        dependencies used: pandas
        function call example: data.data_corr()
        input: none
        output: none

    - drop_duplicates: drop duplicates        
        dependencies used: pandas
        function call example: data.drop_duplicates()
        input: none
        output: none

    - drop_null: drop null values     
        dependencies used: pandas
        function call example: data.drop_null()
        input: none
        output: none
    
    - drop_outliers: drop outliers
        dependencies used: pandas
        function call example: data.drop_outliers()
        input: none
        output: none

    - drop_negative_time: drop negative time values
        dependencies used: pandas
        function call example: data.drop_negative_time()
        input: none
        output: none
        
- Managed by: Samarth
- Created on: 03/02/2024
- Modified on: 03/02/2024
- Contact:  psxs2@nottingham.ac.uk
'''
class DataHandler:
    data = None
    df = None

    #-----------------Data Import Functions-----------------#
    
    # declaration example - data = DataHandler.import_data(['data/member1.csv', 'data/member2.csv', 'data/member3.csv'])
    def import_data(self, files):
            print_sys_msg('DataHandler:import_data: importing data from 3 csv files and combining them into a single dataframe')
            number_of_files = len(files)
            for i in range(number_of_files):
                if i == 0:
                    self.df = pd.read_csv(files[i])
                    self.data = {'data_'+str(i+1): pd.read_csv(files[i])}
                else:
                    self.df = pd.concat([self.df, pd.read_csv(files[i])], ignore_index=True)
                    self.data['data_'+str(i+1)] = pd.read_csv(files[i])
            print_sys_msg('DataHandler:import_data: data imported successfully')

    # declaration example - data = DataHandler.import_data_system(['data/member1', 'data/member2', 'data/member3'])
    def import_data_system(self, directorys):
        print_sys_msg('DataHandler:import_data: importing data from the system')
        
        # all files are imported for each directory listed in the directories list
        # the data is then combined into a single dataframe
        # the data is stored in a dictionary with the directory name as the key

        for directory in directorys:
            files = os.listdir(directory)
            number_of_files = len(files)
            for i in range(number_of_files):
                if i == 0:
                    self.df = pd.read_csv(directory+'/'+files[i])
                    self.data = {'data_'+str(i+1): pd.read_csv(directory+'/'+files[i])}
                else:
                    self.df = pd.concat([self.df, pd.read_csv(directory+'/'+files[i])], ignore_index=True)
                    self.data['data_'+str(i+1)] = pd.read_csv(directory+'/'+files[i])

    #-----------------Data Import Functions-----------------#
    #-----------------Basic Data Wrangling Functions-----------------#
    
    def data_shape(self):
        print_sys_msg('DataHandler:data_shape: printing the shape of the dataframe')
        print_sys_msg(str(self.df.shape))
    def data_head(self):
        print_sys_msg('DataHandler:data_head: printing the first 5 rows of the dataframe')
        print_sys_msg(str(self.df.head()))
    
    def data_info(self):
        print_sys_msg('DataHandler:data_info: printing the information of the dataframe')
        print_sys_msg(str(self.df.info()))
                      
    def data_describe(self):
        print_sys_msg('DataHandler:data_describe: printing the description of the dataframe')
        print_sys_msg(str(self.df.describe()))
                      
    def data_null(self):
        print_sys_msg('DataHandler:data_null: printing the null values in the dataframe')
        print_sys_msg(str(self.df.isnull().sum()))
                      
    def data_corr(self):
        print_sys_msg('DataHandler:data_corr: printing the correlation matrix of the dataframe')
        print_sys_msg(str(self.df.corr()))
    
    def data_missing(self):
        print_sys_msg('DataHandler:data_missing: printing the missing values in the dataframe')
        print_sys_msg(str(self.df.isna().any(axis=1)))
    #-----------------Basic Data Wrangling Functions-----------------#
        
    #-----------------Data Preprocessing Functions-----------------#

    def drop_duplicates(self):
        print_sys_msg('DataHandler:drop_duplicates: dropping duplicates')
        self.df = self.df.drop_duplicates()
    
    def drop_null(self):
        print_sys_msg('DataHandler:drop_null: dropping null values')
        self.df = self.df.dropna()


    # def drop_outliers(self):
    #     print_sys_msg('DataHandler:drop_outliers: dropping outliers'
    #     self.df = self.df[(self.df['Linear Acceleration x (m/s^2)'] > -10) & (self.df['Linear Acceleration x (m/s^2)'] < 10)]
    #     self.df = self.df[(self.df['Linear Acceleration y (m/s^2)'] > -10) & (self.df['Linear Acceleration y (m/s^2)'] < 10)]
    #     self.df = self.df[(self.df['Linear Acceleration z (m/s^2)'] > -10) & (self.df['Linear Acceleration z (m/s^2)'] < 10)]
    #     self.df = self.df[(self.df['Absolute acceleration (m/s^2)'] > 0) & (self.df['Absolute acceleration (m/s^2)'] < 10)]

    def drop_negative_time(self):
        print_sys_msg('DataHandler:drop_negative_time: dropping negative time values')
        self.df = self.df[self.df['Time (s)'] > 0]

    # missing values handling - drop rows with missing values
    def drop_missing(self, threshold=3):
        print_sys_msg('DataHandler:drop_missing: dropping missing values')
        self.df = self.df.dropna(thresh=threshold).copy()

    # missing values handling - fill missing values with mean of the column
    def fill_missing(self):
        print_sys_msg('DataHandler:fill_missing: filling missing values with mean of the column')
        self.df = self.df.fillna(self.df.mean())

    # missing values handling - fill missing values with median of the column
    def fill_missing_median(self):
        print_sys_msg('DataHandler:fill_missing_median: filling missing values with median of the column')
        self.df = self.df.fillna(self.df.median())
    
    # missing values handling - fill missing values with mode of the column
    def fill_missing_mode(self):
        print_sys_msg('DataHandler:fill_missing_mode: filling missing values with mode of the column')
        self.df = self.df.fillna(self.df.mode().iloc[0])
    
    # missing values handling - fill missing values with bill debth of the column
    #-----------------------------------
    #-----------------------------------To be written
    #-----------------------------------
    
    # data normalization - min-max normalization
    def min_max_normalization(self):
        print_sys_msg('DataHandler:min_max_normalization: min-max normalization')
        self.df = (self.df - self.df.min()) / (self.df.max() - self.df.min())
    
    # data normalization - standardization
    def standardization(self):
        print_sys_msg('DataHandler:standardization: standardization')
        self.df = (self.df - self.df.mean()) / self.df.std()
    

    #-----------------Data Preprocessing Functions-----------------#


    #-----------------Data Splitting Functions-----------------#
    
    #-----------------Data Splitting Functions-----------------#
        
    #-----------------Storing Data Functions-----------------#

    def store_data_with_name(self, file_name):
        print_sys_msg('DataHandler:store_data: storing data to a csv file')
        self.df.to_csv(file_name, index=False)
    
    def store_data_with_current_date_time(self):
        print_sys_msg('DataHandler:store_data_with_current_date_time: storing data to a csv file with current date and time')
        self.df.to_csv('data_'+str(pd.to_datetime('today'))+'.csv', index=False)
    
    def store_data_with_index(self):
        print_sys_msg('DataHandler:store_data_with_index: storing data to a csv file with index')
        
        # data is stored with index only
        # getting the highest index of the data in the data folder and then incrementing it by 1
        # storing the data with the new index
        #-----------------------------------
        #-----------------------------------To be written

    #-----------------Storing Data Functions-----------------#

#-----------------DataHandler Class-----------------#

Next we make a basic visualization class that will manage the plotting and core visualization functions.

In [None]:
#-----------------DataVisualization Class-----------------#
'''
Class DataVisualization

    purpose: visualize data (data visualization)
    charts included: line, scatter, bar, histogram, box plot, violin plot, bullet, table, sparkline, connected scatter plot, box, pie, doughnut, gauge, waffle
    
    functions:
    -   plt_create_linear_sub_plots: create linear sub plots
        dependencies used: matplotlib
        function call example: DataVisualization(DataHandler).plt_create_linear_sub_plots('left_to_right', [['line', 'Time (s)', 'Absolute acceleration (m/s^2)'], ['scatter', 'Time (s)', 'Absolute acceleration (m/s^2)']])
        input: arrangement type (arrangment), list of plots (plot_list)
        output: none

    -   plt_create_grid_sub_plots: create grid sub plots
        dependencies used: matplotlib, math
        function call example: DataVisualization(DataHandler).plt_create_grid_sub_plots([['line', 'Time (s)', 'Absolute acceleration (m/s^2)'], ['scatter', 'Time (s)', 'Absolute acceleration (m/s^2)']])
        input: list of plots (plot_list)
        output: none

    -   sns_create_linear_sub_plots: create linear sub plots
        dependencies used: seaborn
        function call example: DataVisualization(DataHandler).sns_create_linear_sub_plots('left_to_right', [['line', 'Time (s)', 'Absolute acceleration (m/s^2)'], ['scatter', 'Time (s)', 'Absolute acceleration (m/s^2)']])
        input: arrangement type (arrangment), list of plots (plot_list)
        output: none

    -   sns_create_grid_sub_plots: create grid sub plots
        dependencies used: seaborn, math
        function call example: DataVisualization(DataHandler).sns_create_grid_sub_plots([['line', 'Time (s)', 'Absolute acceleration (m/s^2)'], ['scatter', 'Time (s)', 'Absolute acceleration (m/s^2)']])
        input: list of plots (plot_list)
        output: none
    
- Managed by: 
- Created on: 03/02/2024
- Modified on: 03/02/2024
- Contact: @nottingham.ac.uk
'''

class DataVisualization:
    DataHandler = None
    def __init__(self, DataHandler):
        self.DataHandler = DataHandler
    
    #-----------------matplot based Base Data Visualization Functions-----------------#

    # declaration example - DataVisualization(DataHandler).create_sub_plots('left_to_right', [['line', 'Time (s)', 'Absolute acceleration (m/s^2)'], ['scatter', 'Time (s)', 'Absolute acceleration (m/s^2)']])
    def plt_create_linear_sub_plots(self, arrangment ,plot_list):
        print_sys_msg('DataVisualization:create_base_plots: creating base plots')
        number_of_plots = len(plot_list)

        if arrangment == 'left_to_right':
            fig, ax = plt.subplots(1, number_of_plots, figsize=(20, 20))
        elif arrangment == 'top_to_bottom':
            fig, ax = plt.subplots(number_of_plots, 1, figsize=(20, 20))  
        else:
            print_sys_msg('DataVisualization:create_base_plots: invalid arrangement type') 
            return    
        
        for i in range(number_of_plots):
            if len(plot_list[i]) >= 3:
                # plotting line graph
                if plot_list[i][0] == 'line':
                    ax[i].plot(self.DataHandler.df[plot_list[i][1]], self.DataHandler.df[plot_list[i][2]])
                # plotting scatter graph
                elif plot_list[i][0] == 'scatter':
                    ax[i].scatter(self.DataHandler.df[plot_list[i][1]], self.DataHandler.df[plot_list[i][2]])
                # plotting bar graph
                elif plot_list[i][0] == 'bar':
                    ax[i].bar(self.DataHandler.df[plot_list[i][1]], self.DataHandler.df[plot_list[i][2]])
                # plotting histogram
                elif plot_list[i][0] == 'hist':
                    ax[i].hist(self.DataHandler.df[plot_list[i][1]], bins=10)
                # plotting box plot
                elif plot_list[i][0] == 'box':
                    ax[i].boxplot(self.DataHandler.df[plot_list[i][1]])
                # plotting violin plot
                elif plot_list[i][0] == 'violin':
                    ax[i].violinplot(self.DataHandler.df[plot_list[i][1]])
                elif plot_list[i][0] == 'bullet':
                    ax[i].bullet(self.DataHandler.df[plot_list[i][1]])
                elif plot_list[i][0] == 'table':
                    ax[i].table(self.DataHandler.df[plot_list[i][1]])
                elif plot_list[i][0] == 'sparkline':
                    ax[i].sparkline(self.DataHandler.df[plot_list[i][1]])
                elif plot_list[i][0] == 'connected scatter plot':
                    ax[i].connectedscatterplot(self.DataHandler.df[plot_list[i][1]])
                elif plot_list[i][0] == 'pie':
                    ax[i].pie(self.DataHandler.df[plot_list[i][1]])
                elif plot_list[i][0] == 'doughnut':
                    ax[i].doughnut(self.DataHandler.df[plot_list[i][1]])
                elif plot_list[i][0] == 'gauge':
                    ax[i].gauge(self.DataHandler.df[plot_list[i][1]])
                elif plot_list[i][0] == 'waffle':
                    ax[i].waffle(self.DataHandler.df[plot_list[i][1]])
                # invalid plot type
                else:
                    print_sys_msg('DataVisualization:create_base_plots: invalid plot type')
                
                ax[i].set_xlabel(plot_list[i][1])
                ax[i].set_ylabel(plot_list[i][2])
                ax[i].set_title(plot_list[i][1]+' vs '+plot_list[i][2])
                                                                                
        plt.show()
    
    def plt_create_grid_sub_plots(self, plot_list):
        print_sys_msg('DataVisualization:create_base_plots: creating base plots')

        number_of_plots = len(plot_list)
        print_sys_msg('DataVisualization:create_base_plots: number_of_plots: '+str(number_of_plots))
        temp = math.ceil(math.sqrt(number_of_plots))
        print_sys_msg('DataVisualization:create_base_plots: temp: '+str(temp))
        fig, ax = plt.subplots(temp, temp, figsize=(20, 20))

        for i in range(temp):
            for j in range(temp):
                if (i*temp+j) < number_of_plots:
                    if len(plot_list[i*temp+j]) >= 3:
                        # plotting line graph
                        if plot_list[i*temp+j][0] == 'line':
                            ax[i, j].plot(self.DataHandler.df[plot_list[i*temp+j][1]], self.DataHandler.df[plot_list[i*temp+j][2]])
                        # plotting scatter graph
                        elif plot_list[i*temp+j][0] == 'scatter':
                            ax[i, j].scatter(self.DataHandler.df[plot_list[i*temp+j][1]], self.DataHandler.df[plot_list[i*temp+j][2]])
                        # plotting bar graph
                        elif plot_list[i*temp+j][0] == 'bar':
                            ax[i, j].bar(self.DataHandler.df[plot_list[i*temp+j][1]], self.DataHandler.df[plot_list[i*temp+j][2]])
                        # plotting histogram
                        elif plot_list[i*temp+j][0] == 'hist':
                            ax[i, j].hist(self.DataHandler.df[plot_list[i*temp+j][1]], bins=10)
                        # plotting box plot
                        elif plot_list[i*temp+j][0] == 'box':
                            ax[i, j].boxplot(self.DataHandler.df[plot_list[i*temp+j][1]])
                        # plotting violin plot
                        elif plot_list[i*temp+j][0] == 'violin':
                            ax[i, j].violinplot(self.DataHandler.df[plot_list[i*temp+j][1]])
                        elif plot_list[i*temp+j][0] == 'bullet':
                            ax[i, j].bullet(self.DataHandler.df[plot_list[i*temp+j][1]])
                        elif plot_list[i*temp+j][0] == 'table':
                            ax[i, j].table(self.DataHandler.df[plot_list[i*temp+j][1]])
                        elif plot_list[i*temp+j][0] == 'sparkline':
                            ax[i, j].sparkline(self.DataHandler.df[plot_list[i*temp+j][1]])
                        elif plot_list[i*temp+j][0] == 'connected scatter plot':
                            ax[i, j].connectedscatterplot(self.DataHandler.df[plot_list[i*temp+j][1]])
                        elif plot_list[i*temp+j][0] == 'pie':
                            ax[i, j].pie(self.DataHandler.df[plot_list[i*temp+j][1]])
                        elif plot_list[i*temp+j][0] == 'doughnut':
                            ax[i, j].doughnut(self.DataHandler.df[plot_list[i*temp+j][1]])
                        elif plot_list[i*temp+j][0] == 'gauge':
                            ax[i, j].gauge(self.DataHandler.df[plot_list[i*temp+j][1]])
                        elif plot_list[i*temp+j][0] == 'waffle':
                            ax[i, j].waffle(self.DataHandler.df[plot_list[i*temp+j][1]])
                        # invalid plot type
                        else:
                            print_sys_msg('DataVisualization:create_base_plots: invalid plot type')
                        ax[i, j].set_xlabel(plot_list[i*temp+j][1])
                        ax[i, j].set_ylabel(plot_list[i*temp+j][2])
                        ax[i, j].set_title(plot_list[i*temp+j][1]+' vs '+plot_list[i*temp+j][2])
                                                                                        
        plt.show()

    #-----------------matplot based Base Data Visualization Functions-----------------#
    
    #-----------------seaborn based Base Data Visualization Functions-----------------#
    
    def sns_create_linear_sub_plots(self, arrangment ,plot_list):
        print_sys_msg('DataVisualization:create_base_plots: creating base plots')
        number_of_plots = len(plot_list)

        if arrangment == 'left_to_right':
            fig, ax = plt.subplots(1, number_of_plots, figsize=(20, 20))
        elif arrangment == 'top_to_bottom':
            fig, ax = plt.subplots(number_of_plots, 1, figsize=(20, 20))  
        else:
            print_sys_msg('DataVisualization:create_base_plots: invalid arrangement type') 
            return    
        
        for i in range(number_of_plots):
            if len(plot_list[i]) >= 3:
                # plotting line graph
                if plot_list[i][0] == 'line':
                    sns.lineplot(x=plot_list[i][1], y=plot_list[i][2], data=self.DataHandler.df, ax=ax[i])
                # plotting scatter graph
                elif plot_list[i][0] == 'scatter':
                    sns.scatterplot(x=plot_list[i][1], y=plot_list[i][2], data=self.DataHandler.df, ax=ax[i])
                # plotting bar graph
                elif plot_list[i][0] == 'bar':
                    sns.barplot(x=plot_list[i][1], y=plot_list[i][2], data=self.DataHandler.df, ax=ax[i])
                # plotting histogram
                elif plot_list[i][0] == 'hist':
                    sns.histplot(x=plot_list[i][1], data=self.DataHandler.df, ax=ax[i])
                # plotting box plot
                elif plot_list[i][0] == 'box':
                    sns.boxplot(x=plot_list[i][1], data=self.DataHandler.df, ax=ax[i])
                # plotting violin plot
                elif plot_list[i][0] == 'violin':
                    sns.violinplot(x=plot_list[i][1], data=self.DataHandler.df, ax=ax[i])
                elif plot_list[i][0] == 'bullet':
                    sns.bullet(x=plot_list[i][1], data=self.DataHandler.df, ax=ax[i])
                elif plot_list[i][0] == 'table':
                    sns.table(x=plot_list[i][1], data=self.DataHandler.df, ax=ax[i])
                elif plot_list[i][0] == 'sparkline':
                    sns.sparkline(x=plot_list[i][1], data=self.DataHandler.df, ax=ax[i])
                elif plot_list[i][0] == 'connected scatter plot':
                    sns.connectedscatterplot(x=plot_list[i][1], data=self.DataHandler.df, ax=ax[i])
                elif plot_list[i][0] == 'pie':
                    sns.pie(x=plot_list[i][1], data=self.DataHandler.df, ax=ax[i])
                elif plot_list[i][0] == 'doughnut':
                    sns.doughnut(x=plot_list[i][1], data=self.DataHandler.df, ax=ax[i])
                elif plot_list[i][0] == 'gauge':
                    sns.gauge(x=plot_list[i][1], data=self.DataHandler.df, ax=ax[i])
                elif plot_list[i][0] == 'waffle':
                    sns.waffle(x=plot_list[i][1], data=self.DataHandler.df, ax=ax[i])
                # invalid plot type
                else:
                    print_sys_msg('DataVisualization:create_base_plots: invalid plot type')
                
                ax[i].set_xlabel(plot_list[i][1])
                ax[i].set_ylabel(plot_list[i][2])
                ax[i].set_title(plot_list[i][1]+' vs '+plot_list[i][2])
                                                                                
        plt.show()
    
    def sns_create_grid_sub_plots(self, plot_list):
        print_sys_msg('DataVisualization:create_base_plots: creating base plots')

        number_of_plots = len(plot_list)
        print_sys_msg('DataVisualization:create_base_plots: number_of_plots: '+str(number_of_plots))
        temp = math.ceil(math.sqrt(number_of_plots))
        print_sys_msg('DataVisualization:create_base_plots: temp: '+str(temp))
        fig, ax = plt.subplots(temp, temp, figsize=(20, 20))

        for i in range(temp):
            for j in range(temp):
                if (i*temp+j) < number_of_plots:
                    if len(plot_list[i*temp+j]) >= 3:
                        # plotting line graph
                        if plot_list[i*temp+j][0] == 'line':
                            sns.lineplot(x=plot_list[i*temp+j][1], y=plot_list[i*temp+j][2], data=self.DataHandler.df, ax=ax[i, j])
                        # plotting scatter graph
                        elif plot_list[i*temp+j][0] == 'scatter':
                            sns.scatterplot(x=plot_list[i*temp+j][1], y=plot_list[i*temp+j][2], data=self.DataHandler.df, ax=ax[i, j])
                        # plotting bar graph
                        elif plot_list[i*temp+j][0] == 'bar':
                            sns.barplot(x=plot_list[i*temp+j][1], y=plot_list[i*temp+j][2], data=self.DataHandler.df, ax=ax[i, j])
                        # plotting histogram
                        elif plot_list[i*temp+j][0] == 'hist':
                            sns.histplot(x=plot_list[i*temp+j][1], data=self.DataHandler.df, ax=ax[i, j])
                        # plotting box plot
                        elif plot_list[i*temp+j][0] == 'box':
                            sns.boxplot(x=plot_list[i*temp+j][1], data=self.DataHandler.df, ax=ax[i, j])
                        # plotting violin plot
                        elif plot_list[i*temp+j][0] == 'violin':
                            sns.violinplot(x=plot_list[i*temp+j][1], data=self.DataHandler.df, ax=ax[i, j])
                        elif plot_list[i*temp+j][0] == 'bullet':
                            sns.bullet(x=plot_list[i*temp+j][1], data=self.DataHandler.df, ax=ax[i, j])
                        elif plot_list[i*temp+j][0] == 'table':
                            sns.table(x=plot_list[i*temp+j][1], data=self.DataHandler.df, ax=ax[i, j])
                        elif plot_list[i*temp+j][0] == 'sparkline':
                            sns.sparkline(x=plot_list[i*temp+j][1], data=self.DataHandler.df, ax=ax[i, j])
                        elif plot_list[i*temp+j][0] == 'connected scatter plot':
                            sns.connectedscatterplot(x=plot_list[i*temp+j][1], data=self.DataHandler.df, ax=ax[i, j])
                        elif plot_list[i*temp+j][0] == 'pie':
                            sns.pie(x=plot_list[i*temp+j][1], data=self.DataHandler.df, ax=ax[i, j])
                        elif plot_list[i*temp+j][0] == 'doughnut':
                            sns.doughnut(x=plot_list[i*temp+j][1], data=self.DataHandler.df, ax=ax[i, j])
                        elif plot_list[i*temp+j][0] == 'gauge':
                            sns.gauge(x=plot_list[i*temp+j][1], data=self.DataHandler.df, ax=ax[i, j])
                        elif plot_list[i*temp+j][0] == 'waffle':
                            sns.waffle(x=plot_list[i*temp+j][1], data=self.DataHandler.df, ax=ax[i, j])
                        # invalid plot type
                        else:
                            print_sys_msg('DataVisualization:create_base_plots: invalid plot type')
                        ax[i, j].set_xlabel(plot_list[i*temp+j][1])
                        ax[i, j].set_ylabel(plot_list[i*temp+j][2])
                        ax[i, j].set_title(plot_list[i*temp+j][1]+' vs '+plot_list[i*temp+j][2])

        plt.show()

## DATA Initiation Phase  

### Data import  
importing 3 seperate files containing the data from phyphox of each member of the team

In [None]:
data = DataHandler()
data.import_data(['data/member1.csv', 'data/member2.csv', 'data/member3.csv'])

### Data Wrangling  

In [None]:
# get the shape of the dataframe
data.data_shape()

In [None]:
# get the first 5 rows of the dataframe
data.data_head()

In [None]:
# get the information of the dataframe
data.data_info()

In [None]:
# get the description of the dataframe
data.data_describe()

In [None]:
# get the null values in the dataframe
data.data_null()

In [None]:
# get the correlation matrix of the dataframe
data.data_corr()

In [None]:
# print duplicates in the data
data.drop_duplicates()

### Data Preprocessing

In [None]:
# drop null values
data.drop_null()

In [None]:
# drop negative time values
data.drop_negative_time()

**Managing missing values**

In [None]:
# # drop missing values
# data.drop_missing()

# # fill missing values with mean of the column 
# data.fill_missing()

# # fill missing values with median of the column
# data.fill_missing_median()

# # fill missing values with mode of the column
# data.fill_missing_mode()

**Normalizing data**

In [None]:
# # min-max normalization
# data.min_max_normalization()

# # standardization
# data.standardization()

### Data Visualization before Analysis

In [None]:
dv = DataVisualization(data)

In [None]:
# plotting left to right flowing subplots with line, scatter, and bar graphs. Matplotlib based.
dv.plt_create_linear_sub_plots('left_to_right',[['line', 'Time (s)', 'Absolute acceleration (m/s^2)'], ['scatter', 'Time (s)', 'Absolute acceleration (m/s^2)'], ['bar', 'Time (s)', 'Absolute acceleration (m/s^2)']])

In [None]:
# plotting grid subplots with line, scatter, and bar graphs. Matplotlib based.
dv.plt_create_grid_sub_plots([['bar', 'Time (s)', 'Absolute acceleration (m/s^2)'], ['box', 'Time (s)', 'Absolute acceleration (m/s^2)'], ['bar', 'Time (s)', 'Absolute acceleration (m/s^2)']])

In [None]:
# plotting left to right flowing subplots with line, scatter, and bar graphs. Seaborn based.
dv.sns_create_linear_sub_plots('left_to_right',[['line', 'Time (s)', 'Absolute acceleration (m/s^2)'], ['bar', 'Time (s)', 'Absolute acceleration (m/s^2)'], ['bar', 'Time (s)', 'Absolute acceleration (m/s^2)']])

In [None]:
# plotting grid subplots with line, scatter, and bar graphs. Seaborn based.
dv.sns_create_grid_sub_plots([['line', 'Time (s)', 'Absolute acceleration (m/s^2)'], ['scatter', 'Time (s)', 'Absolute acceleration (m/s^2)'], ['bar', 'Time (s)', 'Absolute acceleration (m/s^2)']])


### Saving Data after processing

In [None]:
# store the data to a csv file
data.store_data_with_name('data/cleaned_data.csv')