<a href="https://colab.research.google.com/github/kevinknights29/Regression--Battery-Life-Prediction/blob/main/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Dependencies

In [200]:
import requests
import zipfile
from pathlib import Path
import scipy.io
import pandas as pd
import numpy as np

## Data Availability

In [201]:
dataset_url = 'https://phm-datasets.s3.amazonaws.com/NASA/5.+Battery+Data+Set.zip'
dataset_zip = "5.+Battery+Data+Set.zip"

response = requests.get(dataset_url)
with open(dataset_zip, "wb") as f:
  f.write(response.content)

In [202]:
with zipfile.ZipFile(dataset_zip, "r") as f:
    f.extractall(".")
Path(dataset_zip).unlink()

In [203]:
for zip in Path(".").rglob("*.zip"):
  with zipfile.ZipFile(zip, "r") as f:
    output_folder = zip.parent / zip.stem
    f.extractall(output_folder)
  Path(zip).unlink()

## Data Preparation

In [204]:
  # Docs:
  # inside a matrix we have the following structure
  # [0][0][0][0][2][3][0][0][5]
  #                          ^
  #                        column
  #                 ^
  #                data
  #              ^
  #             type
  # 
  # type: [charge | discharge | impedance]
  # data structure
  # columns if charge: ["voltage_measured", "Current_measured","Temperature_measured","Current_charge","Voltage_charge","Time"]
  # columns if discharge: ["voltage_measured", "Current_measured","Temperature_measured","Current_charge","Voltage_charge","Time","Capacity"]
  # columns if impedance: ["Sense_current","Battery_current","Current_ratio","Battery_impedance","Rectified_impedance""Re","Rct"]

In [217]:
column_structure = {
    "discharge": [
      "voltage_measured",
      "current_measured",
      "temperature_measured",
      "current_charge",
      "voltage_charge",
      "time",
      "capacity",
    ],
    "impedance": [
      "sense_current",
      "battery_current",
      "current_ratio",
      "battery_impedance",
      "rectified_impedance",
      "re",
      "rct",
    ],
    "charge": [
     "voltage_measured",
     "current_measured",
     "temperature_measured",
     "current_charge",
     "voltage_charge",
     "time",
    ],
}

In [215]:
def load_mat(path):
    data_mat = scipy.io.loadmat(path)
    data_key = list(data_mat.keys())[-1]
    data_dict = {
        data_key: {
            "discharge": data_mat[data_key][0][0][0][0][0][3],
            "impedance": data_mat[data_key][0][0][0][0][1][3],
            "charge":    data_mat[data_key][0][0][0][0][2][3],
        }
    }
    return data_key, data_dict

In [207]:
def convert_mat_to_df(mat, columns):
  data_dict = {
      column: pd.Series(mat[0][0][idx][0][:]) for idx, column in enumerate(columns)
  }
  data_df = pd.DataFrame(data=data_dict)
  return data_df

In [219]:
mats = [mat for mat in Path(".").rglob("*.mat")]
mat = mats[0]
mat_data_key, mat_data = load_mat(mat)
mat_discharge_df = convert_mat_to_df(mat_data[mat_data_key]["discharge"], column_structure["discharge"])

In [220]:
mat_discharge_df.head()

Unnamed: 0,voltage_measured,current_measured,temperature_measured,current_charge,voltage_charge,time,capacity
0,3.843735,0.001207,5.905007,0.0004,0.0,0.0,0.799
1,3.843497,-0.001267,5.921671,0.0002,3.858,9.438,
2,3.335608,-1.987236,5.984983,1.9992,2.122,19.578,
3,3.306284,-1.986549,6.070582,1.9992,2.121,28.938,
4,3.281057,-1.988597,6.179706,1.999,2.099,38.281,


In [221]:
mat_discharge_df.describe()

Unnamed: 0,voltage_measured,current_measured,temperature_measured,current_charge,voltage_charge,time,capacity
count,280.0,280.0,280.0,280.0,280.0,280.0,1.0
mean,3.243936,-1.171521,11.879033,1.178181,1.153443,1316.701154,0.799
std,0.310617,0.979766,3.374187,0.985029,0.971125,770.607636,
min,2.493079,-1.99084,5.905007,0.0002,0.0,0.0,0.799
25%,3.037483,-1.98819,8.983963,0.0004,0.0,653.68725,0.799
50%,3.114535,-1.987056,11.538474,1.999,1.7915,1306.508,0.799
75%,3.590004,-0.000497,14.571119,1.999,2.01675,1979.0355,0.799
max,3.843735,0.004282,18.02447,1.9992,3.858,2660.313,0.799


In [222]:
mat_discharge_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 280 entries, 0 to 279
Data columns (total 7 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   voltage_measured      280 non-null    float64
 1   current_measured      280 non-null    float64
 2   temperature_measured  280 non-null    float64
 3   current_charge        280 non-null    float64
 4   voltage_charge        280 non-null    float64
 5   time                  280 non-null    float64
 6   capacity              1 non-null      float64
dtypes: float64(7)
memory usage: 15.4 KB
