# Assignment to ION Energy's Question for Data Analyst Position

In [None]:
#import required library
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime as dt
from matplotlib.dates import DateFormatter
import matplotlib.dates as mdates
%matplotlib inline
plt.style.use('seaborn-whitegrid')
sns.set()

In [None]:
df = pd.read_csv("Assignment file for Data Analyst - assignment file.csv")
df.head()

First we will add new column converting timestamp to datetime

In [None]:
df["Datetime"] = pd.to_datetime(df["Timestamp"], unit='ms')
df.head()

Lets first get an understanding of data.

In [None]:
df.describe()
df.dtypes

# PAIR1: Grid status & SOC(State of Charge) correlation

In [None]:
pair1 = ["Grid status", "SOC"]
pair1_data = df[pair1]
pair1_data.head()

### Before going for correlation of the pairs lets first prepare the line plots of those with respect to time

In [None]:
fig, ax = plt.subplots(figsize=(15,10))
start,end = 0,200
ax.plot(df[start:end]["Datetime"].values, df[start:end]["Grid status"], color="green", label="Grid status")
ax.plot(df[start:end]["Datetime"].values, df[start:end]["SOC"], color="red", label="SOC")
ax.set(xlabel="Datetime(day/month)",
      ylabel="Grid Status & SOC",
      Title="Grid Status vs SOC over time")

ax.legend(loc="upper right")
# Clean up the x axis dates
ax.xaxis.set_major_locator(mdates.DayLocator(interval=1))
ax.xaxis.set_major_formatter(DateFormatter("%d/%m"))

### Looking at the first 100 values shows SOC of battery being perfect 100% for first 63 points and then decrease in its value drops Grid Status. Then for further charging of battery the SOC doesn't regain completely but about 90% is regained back and continues doing it.

In [None]:
fig, ax = plt.subplots(figsize=(15,10))
start,end = 0,-1
ax.plot(df[start:end]["Datetime"].values, df[start:end]["Grid status"], color="green", label="Grid status")
ax.plot(df[start:end]["Datetime"].values, df[start:end]["SOC"], color="red", label="SOC")
ax.set(xlabel="Datetime(month/year)",
      ylabel="Grid Status & SOC",
      Title="Grid Status vs SOC over time")

ax.legend(loc="upper right")
# Clean up the x axis dates
ax.xaxis.set_major_locator(mdates.MonthLocator(interval=1))
ax.xaxis.set_major_formatter(DateFormatter("%m/%y"))

In [None]:
fig.savefig("SOC vs Grid status-lineplot.png")

### Above plot clearly tells us about the SOC remains in between 0.9 and 0.6 for most cases whenever the Grid status drops and rise again.

### Now lets look at the correlation coefficients
Grid status is binary valued between 0 and 1 whereas SOC is real between 0.0 and 1.0. So computed value is point biserial correlation coefficient.

In [None]:
pair1_corr = pair1_data.corr()
print(pair1_corr)

### Correlation coefficient of 0.228 signifies a weak positive correlation between SOC and Grid status. So SOC does change Grid status but with a small amount.

### Lets visualize it with Heatmap

In [None]:
pair1_visualisation = sns.heatmap(pair1_corr,
                                  xticklabels = pair1_corr.columns,
                                  yticklabels = pair1_corr.columns,
                                  center=0.0, cmap="Blues",
                                  linewidth=0.5,robust=True,
                                  annot=pair1_corr.values, square = False)

### Saving pair1 visualisation to png

In [None]:
pair1_fig = pair1_visualisation.get_figure()
pair1_fig.savefig("pair1_corr_fig.png")

# PAIR2: Equivalent Cycles & SOH(State of Health) correlation

Similar to previous one lets start with line plot first

In [None]:
pair2 = ["Equivalent cycle", "SOH"]
pair2_data = df[pair2]
pair2_data.head()

In [None]:
fig, ax = plt.subplots(figsize=(15,10))
start,end = 0,-1
ax.plot(df[start:end]["Datetime"].values, df[start:end]["Equivalent cycle"], color="green", label="Equivalent Cycles")
ax.plot(df[start:end]["Datetime"].values, df[start:end]["SOH"], color="red", label="SOH")
ax.set(xlabel="Datetime(month/year)",
      ylabel="Equivalent cycles & SOH",
      Title="Equivalent Cycles vs SOH over time")

ax.legend(loc="upper right")
# Clean up the x axis dates
ax.xaxis.set_major_locator(mdates.MonthLocator(interval=1))
ax.xaxis.set_major_formatter(DateFormatter("%m/%y"))

In [None]:
fig.savefig("SOH vs Equivalent Cycles-lineplot.png")

### From the above plot its quite evident that increase in equivalent cycles doesn't change state of health(SOH) of battery much. This signifies the quality of battery being very good.

In [None]:
pair2_corr = pair2_data.corr()
pair2_corr.head()

### As expected the correlation coefficient is very close to -1 signifing negative impact of recharging cycles on battery's health.

Lets visualize the correlation coefficient heatmap

In [None]:
pair2_visualisation = sns.heatmap(pair2_corr,
                                 xticklabels=pair2_corr.columns,
                                 yticklabels=pair2_corr.columns,
                                 center=0.0, robust=True,
                                 cmap="Blues",
                                 annot=True, square=True)

### Saving pair2 visualisation to png

In [None]:
pair2_fig = pair2_visualisation.get_figure()
pair2_fig.savefig("pair2_corr_fig.png")

# PAIR3: SOC(State of Charge) & Temperature correlation

In [None]:
pair3 = ["SOC", "Temperature"]
pair3_data = df[pair3]
pair3_data.head()

### Preparing the lineplot

In [None]:
fig, ax = plt.subplots(figsize=(15,10))
start,end = 0,-1
ax.plot(df[start:end]["Datetime"].values, df[start:end]["Temperature"], color="red", label="Temperature")
ax.plot(df[start:end]["Datetime"].values, df[start:end]["SOC"], color="green", label="SOC")
ax.set(xlabel="Datetime",
      ylabel="Temperature & SOC",
      Title="Temperature vs SOC over time")

ax.legend(loc="upper right")
# Clean up the x axis dates
ax.xaxis.set_major_locator(mdates.MonthLocator(interval=1))
ax.xaxis.set_major_formatter(DateFormatter("%m/%y"))

In [None]:
fig.savefig("Temperature vs SOC-lineplot.png")

### Above plot clearly suggested not much change in SOC with respect to varience in temperature. Going for the correlation coefficient.

In [None]:
pair3_corr = pair3_data.corr()
pair3_corr.head()

### As expected the coefficient of -0.37 suggest weak negative relation between SOC and temperature.

In [None]:
pair3_visualisation = sns.heatmap(pair3_corr,
                                 xticklabels=pair3_corr.columns,
                                 yticklabels=pair3_corr.columns,
                                 center=0.0, fmt="f",
                                 annot=True, square=True)

### Saving pair3 visualisation to png

In [None]:
pair3_fig = pair3_visualisation.get_figure()
pair3_fig.savefig("pair3_corr_fig.png")