In [0]:
import scipy.io as sio
import pandas as pd
import numpy as np
import pyspark.pandas as ps

# Load Matlab file into Python

In [0]:
path = "/dbfs/mnt/landing_zone/battery/"
file_name = "Oxford_Battery_Degradation_Dataset_1.mat"
full_path = path + file_name
data = sio.loadmat(full_path)

## Create Utility functions

In [0]:
def flatten(data):
    if len(data) <= 1:
       return flatten(data.flatten()[0]) 
    else:
        return data

In [0]:
def get_data(data):
    data = flatten(data)
    if "t" in data.dtype.names:
        t = data[0].flatten()
        v = data[1].flatten()
        q = data[2].flatten()
        T = data[3].flatten()
        temp_df = pd.DataFrame({
                'time_s': t,
                'voltage_V': v,
                'charge_mAh': q,
                'temperature_C': T
        })
        return temp_df
    elif 'C1ch' in data.dtype.names:
        dfs = []
        for i,k in enumerate(data.dtype.names):
            res = get_data(data[i])
            res["Mode"] = k
            dfs.append(res)
        return pd.concat(dfs)
    
    elif 'cyc0000' in data.dtype.names:
        dfs = []
        for i,k in enumerate(data.dtype.names):
            res = get_data(data[i])
            res["Cycle"] = k
            dfs.append(res)
        return pd.concat(dfs)
    else:
        print("failing")

## Create DataFrame

In [0]:
data_lst = []
keys = [key for key in data if key.startswith('Cell')]
for key in keys:
    res = get_data(data[key])
    res["Cell"] = key
    data_lst.append(res)
data_df = pd.concat(data_lst)

In [0]:
full_path = path + "battery_lifecyle.parquet"
data_df.to_parquet(full_path)

# Convert Pandas dataframe to Spark DF

In [0]:
data_spark_df = spark.read.parquet(full_path[5:])
# display(data_spark_df)

In [0]:
data_spark_df

# Create table in Bronze

In [0]:
data_spark_df.write.format("delta").mode("overwrite").saveAsTable("adw_curious_minds.bronze.battery_lifecyle")