## Notebook - To split the dataset into Day Wise data

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats
%matplotlib inline

In [2]:
# Create the blank dataframe
combined_data = pd.DataFrame()

# Set the file directory
data_dir = 'D:/Springboard/Capstone_Works/Project/Proj_DataSet/2nd_test'

In [3]:
# Iterate through all the files
for file_name in os.listdir(data_dir):
    df = pd.read_csv(os.path.join(data_dir, file_name), sep='\t')
    
    # To convert into numpy values
    df_b1 = df.iloc[:,0].values
    df_b2 = df.iloc[:,1].values
    df_b3 = df.iloc[:,2].values
    df_b4 = df.iloc[:,3].values
    
    # To calculate the mean
    df_b1_mean = np.mean(np.absolute(df_b1))
    df_b2_mean = np.mean(np.absolute(df_b2))
    df_b3_mean = np.mean(np.absolute(df_b3))
    df_b4_mean = np.mean(np.absolute(df_b4))
    
    # To calculate RMS
    df_b1_rms = np.sqrt((np.sum(df_b1**2))/len(df_b1))
    df_b2_rms = np.sqrt((np.sum(df_b2**2))/len(df_b2))
    df_b3_rms = np.sqrt((np.sum(df_b3**2))/len(df_b3))
    df_b4_rms = np.sqrt((np.sum(df_b4**2))/len(df_b4))
    
    # To calculate kurtosis
    df_b1_kurt = scipy.stats.kurtosis(df_b1,fisher=False)
    df_b2_kurt = scipy.stats.kurtosis(df_b2,fisher=False)
    df_b3_kurt = scipy.stats.kurtosis(df_b3,fisher=False)
    df_b4_kurt = scipy.stats.kurtosis(df_b4,fisher=False)
    
    # Concate into Pandas DataFrame
    df_1 = pd.concat([pd.Series(df_b1_mean),pd.Series(df_b2_mean),pd.Series(df_b3_mean),pd.Series(df_b4_mean),\
                      pd.Series(df_b1_rms),pd.Series(df_b2_rms),pd.Series(df_b3_rms),pd.Series(df_b4_rms),\
                      pd.Series(df_b1_kurt),pd.Series(df_b2_kurt),pd.Series(df_b3_kurt),pd.Series(df_b4_kurt)],axis=1)
    df_1.index = [file_name]
    
    # Append individual dataframes to create a single combined dataset
    combined_data = combined_data.append(df_1)
    
# Insert Column headers    
combined_data.columns = ['Bearing1_Mean','Bearing2_Mean','Bearing3_Mean','Bearing4_Mean',\
                         'Bearing1_RMS','Bearing2_RMS','Bearing3_RMS','Bearing4_RMS',\
                        'Bearing1_Kurt','Bearing2_Kurt','Bearing3_Kurt','Bearing4_Kurt']

In [4]:
combined_data

Unnamed: 0,Bearing1_Mean,Bearing2_Mean,Bearing3_Mean,Bearing4_Mean,Bearing1_RMS,Bearing2_RMS,Bearing3_RMS,Bearing4_RMS,Bearing1_Kurt,Bearing2_Kurt,Bearing3_Kurt,Bearing4_Kurt
2004.02.12.10.32.39,0.058333,0.071832,0.083242,0.043067,0.074180,0.090945,0.109403,0.054105,3.628672,3.506748,6.213373,3.065809
2004.02.12.10.42.39,0.058995,0.074006,0.084435,0.044541,0.075382,0.093417,0.109815,0.056102,3.648479,3.253247,4.395699,3.107450
2004.02.12.10.52.39,0.060236,0.074227,0.083926,0.044443,0.076228,0.093720,0.109864,0.056146,3.513839,3.310628,5.639673,3.257147
2004.02.12.11.02.39,0.061455,0.073844,0.084457,0.045081,0.078726,0.092949,0.110662,0.056808,4.157776,3.235305,5.683643,3.806145
2004.02.12.11.12.39,0.061361,0.075609,0.082837,0.045118,0.078475,0.095350,0.107507,0.056841,3.603207,3.226185,4.578415,3.138989
...,...,...,...,...,...,...,...,...,...,...,...,...
2004.02.19.05.42.39,0.453335,0.161016,0.137440,0.119047,0.725019,0.218299,0.170574,0.148355,15.576977,5.452473,2.902161,2.906575
2004.02.19.05.52.39,0.337583,0.132400,0.144992,0.092125,0.462021,0.170820,0.177875,0.116632,6.759714,3.911109,2.680940,3.259509
2004.02.19.06.02.39,0.351111,0.152266,0.151299,0.100817,0.483847,0.193646,0.187409,0.130585,7.891370,3.330462,2.864218,3.697000
2004.02.19.06.12.39,0.001857,0.003732,0.003656,0.001786,0.002103,0.004018,0.003950,0.002154,6.637219,1.129031,1.064626,4.652532


In [5]:
# Set the data_time index and ensuring the proper format
combined_data.index = pd.to_datetime(combined_data.index, format='%Y.%m.%d.%H.%M.%S')

# Sort the index in chronological order
combined_data = combined_data.sort_index()

# Drop last 2 rows
combined_data = combined_data[:-2]

# Saving the dataset as csv file
combined_data.to_csv('Full_Dataset_All_Features.csv')

In [6]:
combined_data

Unnamed: 0,Bearing1_Mean,Bearing2_Mean,Bearing3_Mean,Bearing4_Mean,Bearing1_RMS,Bearing2_RMS,Bearing3_RMS,Bearing4_RMS,Bearing1_Kurt,Bearing2_Kurt,Bearing3_Kurt,Bearing4_Kurt
2004-02-12 10:32:39,0.058333,0.071832,0.083242,0.043067,0.074180,0.090945,0.109403,0.054105,3.628672,3.506748,6.213373,3.065809
2004-02-12 10:42:39,0.058995,0.074006,0.084435,0.044541,0.075382,0.093417,0.109815,0.056102,3.648479,3.253247,4.395699,3.107450
2004-02-12 10:52:39,0.060236,0.074227,0.083926,0.044443,0.076228,0.093720,0.109864,0.056146,3.513839,3.310628,5.639673,3.257147
2004-02-12 11:02:39,0.061455,0.073844,0.084457,0.045081,0.078726,0.092949,0.110662,0.056808,4.157776,3.235305,5.683643,3.806145
2004-02-12 11:12:39,0.061361,0.075609,0.082837,0.045118,0.078475,0.095350,0.107507,0.056841,3.603207,3.226185,4.578415,3.138989
...,...,...,...,...,...,...,...,...,...,...,...,...
2004-02-19 05:22:39,0.338265,0.136772,0.118172,0.116861,0.445929,0.173200,0.149481,0.146422,4.624054,3.128200,3.282547,2.957434
2004-02-19 05:32:39,0.301344,0.140436,0.111463,0.114495,0.386944,0.176706,0.139410,0.143402,3.469297,3.099120,3.126547,2.883139
2004-02-19 05:42:39,0.453335,0.161016,0.137440,0.119047,0.725019,0.218299,0.170574,0.148355,15.576977,5.452473,2.902161,2.906575
2004-02-19 05:52:39,0.337583,0.132400,0.144992,0.092125,0.462021,0.170820,0.177875,0.116632,6.759714,3.911109,2.680940,3.259509


In [7]:
Day_1 = combined_data[:'2004-02-12 23:52:39']
Day_2 = combined_data['2004-02-13 00:02:39':'2004-02-13 23:52:39']
Day_3 = combined_data['2004-02-14 00:02:39':'2004-02-14 23:52:39']
Day_4 = combined_data['2004-02-15 00:02:39':'2004-02-15 23:52:39']
Day_5 = combined_data['2004-02-16 00:02:39':'2004-02-16 23:52:39']
Day_6 = combined_data['2004-02-17 00:02:39':'2004-02-17 23:52:39']
Day_7 = combined_data['2004-02-18 00:02:39':'2004-02-18 23:52:39']
Day_8 = combined_data['2004-02-19 00:02:39':]

In [11]:
if os.path.isdir('Day_Wise_Data') is False:
    os.makedirs('Day_Wise_Data')
    Day_1.to_csv('Day_Wise_Data\Day_1.csv')
    Day_2.to_csv('Day_Wise_Data\Day_2.csv')
    Day_3.to_csv('Day_Wise_Data\Day_3.csv')
    Day_4.to_csv('Day_Wise_Data\Day_4.csv')
    Day_5.to_csv('Day_Wise_Data\Day_5.csv')
    Day_6.to_csv('Day_Wise_Data\Day_6.csv')
    Day_7.to_csv('Day_Wise_Data\Day_7.csv')
    Day_8.to_csv('Day_Wise_Data\Day_8.csv')