In [16]:
import pandas as pd
import numpy as np
import os
import sys

In [17]:
def get_files(year):
    data_path = f'../data/raw/{year}'
    raw_files = os.listdir(data_path)
    df_discharging = pd.DataFrame()
    df_charging = pd.DataFrame()

    for idx, filename in enumerate(raw_files):
        filepath = os.path.join(data_path, filename)    
        
        if 'Charge' not in filename:
            if year == 2013:
                df_test = pd.read_csv(filepath, sep='\s+', index_col=False)
                test_id = filename.split(' ')[0]
            else:
                df_test = pd.read_csv(filepath, sep='\t', index_col=False)
                test_id = filename.split('_')[0]
            df_test['test_id'] = test_id
            df_discharging = pd.concat([df_discharging, df_test])
    return df_discharging

In [47]:
df_discharging_raw_2013 = get_files(2013)
df_discharging_raw_2013.head()

Unnamed: 0,Time[sec],Dyno_Speed[mph],Dyno_Tractive_Effort[N],Test_Cell_Temp[C],Test_Cell_RH[%],Phase_#,HV_Battery_Current[A],HV_Battery_Voltage[V],HV_Battery_SOC[%],Accelerator_Pedal_Position[%],test_id
0,-10.0,0.009,-12.17,22.256,40.693,0.0,-0.043,0.438,0.0,0.0,61403011
1,-9.9,0.01,-12.332,22.256,40.696,0.0,-0.037,0.437,0.0,0.0,61403011
2,-9.8,0.01,-12.272,22.256,40.694,0.0,-0.048,0.435,0.0,0.0,61403011
3,-9.7,0.009,-12.146,22.256,40.694,0.0,-0.052,0.438,0.0,0.0,61403011
4,-9.6,0.01,-12.24,22.255,40.693,0.0,-0.05,0.439,0.0,0.0,61403011


In [19]:
df_discharging_raw_2012 = get_files(2012)
df_discharging_raw_2012.head()

Unnamed: 0,Timestamp [sec],Dyno Speed [mph],Dyno Tractive Effort [N],Cell Temp [C],Cell RH [%],Phase #,HV Battery Voltage w/500A max clamp [V],HV Battery Current w/500A max clamp [A],HV Battery Voltage w/200A max clamp [V],HV Battery Current w/200A max clamp [A],DCDC Outlet Voltage [V],DCDC Outlet Current [A],Accelerator Pedal Position [%],Inverter Cooling Hose Temp [C],test_id
0,-10.0,-0.004166,-2.738338,-5.880604,9.674727,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-3.242471,61203025
1,-9.9,-0.004166,-2.738338,-5.879956,9.675699,1.0,0.0,0.0,0.0,0.0,12.027976,0.0,0.0,-3.254097,61203025
2,-9.8,-0.00449,-2.738338,-5.881252,9.673754,1.0,0.0,0.0,0.0,0.0,12.024676,0.0,0.0,-3.25991,61203025
3,-9.7,-0.004166,-2.738338,-5.879632,9.675375,1.0,0.0,0.0,0.0,0.0,12.021376,0.0,0.0,-3.254097,61203025
4,-9.6,-0.00449,-2.738338,-5.881576,9.675375,1.0,0.0,0.0,0.0,0.0,12.018077,0.0,0.0,-3.23885,61203025


In [20]:
def filter_columns(df, year):
    if year == 2012:
        valid_cols = ['Timestamp [sec]', 'Dyno Speed [mph]','Dyno Tractive Effort [N]',
                      'Cell Temp [C]', 'Cell RH [%]',
                      'HV Battery Voltage w/500A max clamp [V]','HV Battery Current w/500A max clamp [A]']
        rename_cols = ['t[s]','V[mph]','F[N]','T[°C]','SOC[%]','U[V]','I[A]']
    elif year == 2013:
        valid_cols = ['Time[sec]', 'Dyno_Speed[mph]','Dyno_Tractive_Effort[N]',
                      'Test_Cell_Temp[C]',
                      'HV_Battery_SOC[%]',
                     'HV_Battery_Current[A]', 'HV_Battery_Voltage[V]']
        rename_cols = ['t[s]','V[mph]','F[N]','T[°C]','SOC[%]','I[A]','U[V]']
    df_filtered = df[valid_cols]
    
    df_filtered.columns = rename_cols
    return df_filtered

In [21]:
def convert_mph_to_ms(df):
    df['V[m/s]'] = df['V[mph]'] / 2.237
    return df

In [22]:
def calculate_aceleration(df):    
    dt = df['t_acumulated[s]'] - df['t_acumulated[s]'].shift(1, fill_value=0)
    actual_velocity = df['V[m/s]']
    past_velocity = df['V[m/s]'].shift(1, fill_value=0)
    df['a[m/s2]'] = (actual_velocity - past_velocity) / dt
    df.loc[0, 'a[m/s2]'] = 0
    return df

In [23]:
def get_test_duration(df):
    df['t_acumulated[s]'] = np.linspace(0, (df.shape[0] - 1) * 0.1, df.shape[0])

    return df

In [24]:
def resample(df):
    df['t_acumulated[datetime]'] = pd.to_datetime(df['t_acumulated[s]'], unit='s')
    df.set_index('t_acumulated[datetime]', inplace=True)

    # Resample the DataFrame from 10 Hz to 1 Hz
    df_resampled = df.resample('1S').mean()
    
    df_resampled.reset_index(drop=True, inplace=True)
    df_resampled['t_acumulated[s]'] = df_resampled.index
    return df_resampled

In [25]:
def save_processed_files(df, test_ids, year):
    for charge, tests in test_ids.items():
        df_charge = df[df['test_id'].isin(tests)]
        df_charge_processed = (df_charge.pipe(filter_columns, year)
                                        .pipe(get_test_duration)
                                        .pipe(resample)
                                        .pipe(convert_mph_to_ms)
                                        .pipe(calculate_aceleration))
        df_charge_processed.to_excel(f'../data/processed/bronze/{charge}_1Hz.xlsx')
        df_charge_processed.to_parquet(f'../data/processed/bronze/{charge}_1Hz.parquet')
        print(f"{charge} saved!")

In [26]:
tests_charge_2012 =  {
    '2012_Charge-7°C': ['61203025', '61203026', '61203027', '61203028', '61203029', '61203030'],
    '2012_Charge 23°C': ['61203031', '61203032', '61203033', '61203034', '61203035', '61203036', '61203037', '61203038', '61203040'],
    '2012_Charge 35°C': ['61203052', '61203053', '61203054', '61203055']
}

tests_charge_2013 = {
    '2013_Charge-7°C': ['61403061', '61403062', '61403063', '61403064', '61403065', '61403066', '61403067'],
    '2013_Charge 23°C': ['61403011', '61403012', '61403013', '61403014', '61403015', '61403016', '61403017', '61403018', '61403019', '61403020'],
    '2013_Charge 35°C': ['61402070', '61402074', '61402072', '61403102', '61403103', '61403100', '61403021', '61403022', '61403023', '61403024',
                    '61403025', '61403026', '61403027', '61403028', '61403029', '61403030', '61403070', '61403072', '61403074']
}   

In [27]:
save_processed_files(df_discharging_raw_2012, tests_charge_2012, 2012)

save_processed_files(df_discharging_raw_2013, tests_charge_2013, 2013)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['t_acumulated[s]'] = np.linspace(0, (df.shape[0] - 1) * 0.1, df.shape[0])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['t_acumulated[datetime]'] = pd.to_datetime(df['t_acumulated[s]'], unit='s')


2012_Charge-7°C saved!


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['t_acumulated[s]'] = np.linspace(0, (df.shape[0] - 1) * 0.1, df.shape[0])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['t_acumulated[datetime]'] = pd.to_datetime(df['t_acumulated[s]'], unit='s')


2012_Charge 23°C saved!


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['t_acumulated[s]'] = np.linspace(0, (df.shape[0] - 1) * 0.1, df.shape[0])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['t_acumulated[datetime]'] = pd.to_datetime(df['t_acumulated[s]'], unit='s')


2012_Charge 35°C saved!


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['t_acumulated[s]'] = np.linspace(0, (df.shape[0] - 1) * 0.1, df.shape[0])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['t_acumulated[datetime]'] = pd.to_datetime(df['t_acumulated[s]'], unit='s')


2013_Charge-7°C saved!


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['t_acumulated[s]'] = np.linspace(0, (df.shape[0] - 1) * 0.1, df.shape[0])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['t_acumulated[datetime]'] = pd.to_datetime(df['t_acumulated[s]'], unit='s')


2013_Charge 23°C saved!


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['t_acumulated[s]'] = np.linspace(0, (df.shape[0] - 1) * 0.1, df.shape[0])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['t_acumulated[datetime]'] = pd.to_datetime(df['t_acumulated[s]'], unit='s')


2013_Charge 35°C saved!


In [31]:
df_wrong = pd.read_parquet("../data/processed/bronze/2013_Charge 23°C_1Hz.parquet")
df_wrong

Unnamed: 0,t[s],V[mph],F[N],T[°C],SOC[%],I[A],U[V],t_acumulated[s],V[m/s],a[m/s2]
0,-9.550,0.0095,-12.2340,22.2554,0.0000,-0.0422,0.4418,0,0.004247,0.000000
1,-8.550,0.0095,-12.2511,22.2544,0.0000,-0.0411,0.4387,1,0.004247,0.000000
2,-7.550,0.0091,-12.1675,22.2519,19.8912,-0.0278,1.0194,2,0.004068,-0.000179
3,-6.550,0.0098,-12.2816,22.2500,99.7049,0.1973,200.9945,3,0.004381,0.000313
4,-5.550,0.0094,-12.2513,22.2485,97.1000,0.7864,394.5561,4,0.004202,-0.000179
...,...,...,...,...,...,...,...,...,...,...
9977,919.549,0.0100,-12.3846,24.4014,2.0000,0.2974,23.3660,9977,0.004470,0.000045
9978,920.549,0.0099,-12.3400,24.3874,2.0000,0.3004,22.9136,9978,0.004426,-0.000045
9979,921.549,0.0100,-12.3360,24.3860,2.0000,0.3000,22.8840,9979,0.004470,0.000045
9980,922.549,0.0100,-12.3360,24.3860,2.0000,0.3000,22.8840,9980,0.004470,0.000000


In [48]:
df = df_discharging_raw_2013.copy()
test_ids= {'2013_Charge 23°C': ['61403011', '61403012', '61403013', '61403014', '61403015', '61403016', '61403017', '61403018', '61403019', '61403020']}
year = 2013
for charge, tests in test_ids.items():
    df_charge = df[df['test_id'].isin(tests)]
    df_charge_processed = (df_charge.pipe(filter_columns, year)
                                   .pipe(get_test_duration))
                                    #.pipe(resample)
                                    #.pipe(convert_mph_to_ms)
                                    #.pipe(calculate_aceleration))
df_charge_processed.head(5000)
    


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['t_acumulated[s]'] = np.linspace(0, (df.shape[0] - 1) * 0.1, df.shape[0])


Unnamed: 0,t[s],V[mph],F[N],T[°C],SOC[%],I[A],U[V],t_acumulated[s]
0,-10.0,0.009,-12.170,22.256,0.0,-0.043,0.438,0.0
1,-9.9,0.010,-12.332,22.256,0.0,-0.037,0.437,0.1
2,-9.8,0.010,-12.272,22.256,0.0,-0.048,0.435,0.2
3,-9.7,0.009,-12.146,22.256,0.0,-0.052,0.438,0.3
4,-9.6,0.010,-12.240,22.255,0.0,-0.050,0.439,0.4
...,...,...,...,...,...,...,...,...
4995,489.5,34.732,-28.919,22.313,92.7,9.037,390.287,499.5
4996,489.6,34.732,5.480,22.314,92.7,11.815,390.043,499.6
4997,489.7,34.734,72.264,22.314,92.7,14.254,389.864,499.7
4998,489.8,34.744,123.483,22.314,92.7,15.470,389.761,499.8


In [51]:
def calculate_scaled_soc(df):
    Ah_cap = 51
    df['dt'] = df['soc'].shift(1, fill_value=0)
    df['soc'] = 0
    df['soc'] = df['soc'].shift(1, fill_value=0) + (df['I[A]'] +
                                                    df['I[A]'].shift(1, fill_value=0))*(df['dt']/(2*3600*Ah_cap))
    episilon = 0.02
    df['scaled_soc'] = df['soc']*(1-2*episilon) + episilon
    return df

def calculate_scaled_soc_v2(df):
    Ah_cap = 53
    dt = df['t_acumulated[s]'].diff().iloc[1]    
    df['past_I[A]'] = df['I[A]'].shift(1, fill_value=0)
    
    df.loc[df.index[0], 'soc'] = 0
    # Calculate the SOC column iteratively
    for i in range(1, len(df)):
        print(i)
        actual_current = df.loc[df.index[i], 'I[A]']
        past_current = df.loc[df.index[i-1], 'past_I[A]']
        df.loc[df.index[i], 'soc'] = df.loc[df.index[i-1], 'soc'] + (actual_current+past_current)*(dt/(2*3600*Ah_cap))
    
    episilon = 0.02
    df['scaled_soc'] = df['soc']*(1-2*episilon) + episilon
    return df

In [50]:
df_charge_processed

Unnamed: 0,t[s],V[mph],F[N],T[°C],SOC[%],I[A],U[V],t_acumulated[s],past_I[A],soc
0,-10.000,0.009,-12.170,22.256,0.0,-0.043,0.438,0.0,0.000,0.0
1,-9.900,0.010,-12.332,22.256,0.0,-0.037,0.437,0.1,-0.043,
2,-9.800,0.010,-12.272,22.256,0.0,-0.048,0.435,0.2,-0.037,
3,-9.700,0.009,-12.146,22.256,0.0,-0.052,0.438,0.3,-0.048,
4,-9.600,0.010,-12.240,22.255,0.0,-0.050,0.439,0.4,-0.052,
...,...,...,...,...,...,...,...,...,...,...
9329,922.899,0.010,-12.336,24.386,2.0,0.300,22.884,9980.8,0.300,
9330,922.999,0.010,-12.336,24.386,2.0,0.300,22.884,9980.9,0.300,
9331,923.099,0.010,-12.336,24.386,2.0,0.300,22.884,9981.0,0.300,
9332,923.199,0.010,-12.336,24.386,2.0,0.300,22.884,9981.1,0.300,


In [52]:
df_scaled = calculate_scaled_soc_v2(df_charge_processed)
df_scaled

1


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['past_I[A]'] = df['I[A]'].shift(1, fill_value=0)
  df.loc[df.index[i], 'soc'] = df.loc[df.index[i-1], 'soc'] + (actual_current+past_current)*(dt/(2*3600*Ah_cap))


ValueError: cannot reindex on an axis with duplicate labels