# ESS Data Analyze

## Import libraries

In [1]:
import os

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt


## Get data path

In [None]:
def get_files_path(files_name):
    files_path = []
    for root, dirs, files in os.walk('../data/'):
        for file in files:
            if file.endswith(files_name):
                files_path.append(os.path.join(root, file))
    return sorted(files_path)

In [None]:
def get_Sn_cell_path(files_name, n):
    csv_files = get_files_path(files_name)
    if n == 'all':
        return csv_files
    return [path for path in csv_files if n in path]

In [None]:
FILES = {
    'BMS':'_BMS_influxdb_data.csv',
    'voltage':'_voltage_influxdb_data.csv',
    'temperature':'temperature_influxdb_data.csv'
}

SnCELL =['all', 'S1_cell', 'S2_cell', 'S3_cell', 'S4_cell', 'S5_cell', 'S6_cell']

In [None]:
bms_files_path = get_files_path(FILES['BMS'])
bms_files_path

In [None]:
cell1 = get_Sn_cell_path(FILES['temperature'], SnCELL[1])
cell1

## Load Data

In [None]:
def read_rendom_samples(files, n, random_state):
    data = [pd.read_csv(file, index_col=[0]).sample(n=n, random_state=random_state) for file in files]
    df = pd.concat(data, ignore_index=True)
    df.sort_values('timestamp', inplace=True)
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    return df

In [None]:
def read_all_data(files):
    data = [pd.read_csv(file, index_col=[0]) for file in files]
    df = pd.concat(data, ignore_index=True)
    df.sort_values('timestamp', inplace=True)
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    return df

In [None]:
df = read_rendom_samples(bms_files_path, 20, 42)
df.head

In [None]:
df.info()

In [None]:
all_data = read_all_data(bms_files_path)
all_data.describe()

In [None]:
all_data.info()

In [None]:
all_data.set_index('timestamp', inplace=True)
s60 = all_data.resample('60S').max()

In [None]:
s60

## SOH 

In [None]:
plt.figure(figsize=(12, 8))
sns.lineplot(x='timestamp', y='SOH', data=all_data)
plt.title('SOH')
plt.ylabel('SOH')
plt.xlabel('Time')
plt.show()

In [None]:
filtered_data = all_data[(all_data['SOH'] != 0) & (all_data['SOH'] < 1000)]
filtered_data

In [None]:
plt.figure(figsize=(12, 8))
sns.lineplot(x='timestamp', y='SOH', data=filtered_data)
plt.title('SOH')
plt.ylabel('SOH')
plt.xlabel('Time')
plt.show()

## Histogram

In [None]:
features = ['System voltage', 'System Current', 'SOC', 'SOH', 
            'Battery Subsystem Voltage.1', 'Battery Subsystem Voltage.2', 'Battery Subsystem Voltage.3',
            'Battery Subsystem Voltage.4', 'Battery Subsystem Voltage.5', 'Battery Subsystem Voltage.6']

plt.figure(figsize=(10, 8))
df[features].hist(bins=20, layout=(4, 3), figsize=(16, 10))
plt.suptitle('Histograms of Features', y=1.02)
plt.show()

取出電壓爲0的資料

In [None]:
filtered_data = df[df['System voltage'] < 7000]
filtered_data.to_csv('loss.csv')


In [None]:
filtered_data = df[df['System voltage'] != 0]

features = ['System voltage', 'System Current', 'SOC', 'SOH', 
            'Battery Subsystem Voltage.1', 'Battery Subsystem Voltage.2', 'Battery Subsystem Voltage.3',
            'Battery Subsystem Voltage.4', 'Battery Subsystem Voltage.5', 'Battery Subsystem Voltage.6']

plt.figure(figsize=(10, 8))
filtered_data[features].hist(bins=20, layout=(4, 3), figsize=(16, 10))
plt.suptitle('Histograms of Features', y=1.02)
plt.show()

## Temperature

In [None]:
def read_temperature(files_path):
    data = []
    for file in files_path:
        df = pd.read_csv(file, index_col=[0])
        df['timestamp'] = pd.to_datetime(df['timestamp'])
        df.set_index('timestamp', inplace=True)
        df = df.resample('60s').max()
        for idx, row in df.iterrows():
            if not row.isnull().all():
                max_column = row.idxmax()
                max_value = row[max_column]
                data.append({'timestamp': idx, 'num': max_column, 'max': max_value})

    return pd.DataFrame(data)

In [None]:
temp_path = get_Sn_cell_path(FILES['temperature'], SnCELL[1])

In [None]:
temp1 = read_temperature(temp_path)
temp_path = get_Sn_cell_path(FILES['temperature'], SnCELL[2])
temp2 = read_temperature(temp_path)
temp_path = get_Sn_cell_path(FILES['temperature'], SnCELL[3])
temp3 = read_temperature(temp_path)

In [None]:
temp_path = get_Sn_cell_path(FILES['temperature'], SnCELL[4])
temp4 = read_temperature(temp_path)
temp_path = get_Sn_cell_path(FILES['temperature'], SnCELL[5])
temp5 = read_temperature(temp_path)
temp_path = get_Sn_cell_path(FILES['temperature'], SnCELL[6])
temp6 = read_temperature(temp_path)

In [None]:
import matplotlib.dates as mdates

In [None]:
plt.figure(figsize=(12, 8))
sns.set_style('whitegrid')
sns.lineplot(x='timestamp', y='max', data=temp1[temp1['max'] < 50])
sns.lineplot(x='timestamp', y='max', data=temp2[temp2['max'] < 50])
sns.lineplot(x='timestamp', y='max', data=temp3[temp3['max'] < 50])
sns.lineplot(x='timestamp', y='max', data=temp4[temp4['max'] < 50])
sns.lineplot(x='timestamp', y='max', data=temp5[temp5['max'] < 50])
sns.lineplot(x='timestamp', y='max', data=temp6[temp6['max'] < 50])
plt.title('Temperature')
plt.ylabel('Temperature')
plt.xlabel('Time')

plt.legend()
plt.show()

In [None]:
plt.figure(figsize=(12, 8))

plt.figure(figsize=(12, 8))


sns.histplot(data=temp2[temp2['max'] < 50], bins=10, palette='hls', label='temp2')

sns.histplot(data=temp5[temp5['max'] < 50], bins=10, label='temp5')

plt.title('Temperature Distribution')
plt.xlabel('Temperature (°C)')
plt.ylabel('Count')
plt.legend()  # 添加图例
plt.show()

In [None]:
temp1.head()

In [None]:
error_temp = temp1[temp1['max'] > 50]

In [None]:
error_temp.to_csv('error_temp.csv')

In [None]:
temp1.to_csv('tmp.csv')