In [1]:
## Import approriate packages and assign with shorthand codes
import pandas as pd # pandas: for data ingestion and manipulation
import numpy as np # numpy: for mathematical and algebraic manipulation
import seaborn as sns # seaborn: for neat plots
import matplotlib.pyplot as plt # matplotlib.pyplot: for general plotting
import matplotlib.dates as mdates # matplotlib.datas: for adding dates to plots
from scipy import stats
# to create interactive plots:
%matplotlib widget 
from pandas.plotting import register_matplotlib_converters # to prevent a warning when you first plot a time series
register_matplotlib_converters()  # suppress warning related to development cycles out of sync between matplotlib and pandas

In [2]:
## Import Mogalakwena South data set example and do upfront NaN definitions
fname = '../dataset/SCF_matte1_ex.csv' # file name of data set
df_data = pd.read_csv(fname,na_values=['Tag not found','bad','Bad','Not Connect', '[-11059] No Good Data For Calculation', 'Configure']) # save data in data frame; specify some NaN strings upfront
df_data['datetime']=pd.to_datetime(df_data['datetime']) # Convert column 'datetime' values to time stamp format
df_data.set_index('datetime',inplace=True) # Assign index of data set as 'datetime' column with time stamps
df_data.describe()

Unnamed: 0,matte_w,power
count,37623.0,37533.0
mean,1121.648672,15.014589
std,68.929727,5.181762
min,1001.0,-0.004958
25%,1076.0,13.223397
50%,1118.0,16.467579
75%,1155.0,19.376287
max,1667.0,22.665699


In [3]:
df_data =df_data.loc['2024-03-14 05:00:00':'2024-04-09 05:00:00',:]
df_data.describe()


Unnamed: 0,matte_w,power
count,2061.0,2061.0
mean,1152.575934,17.729946
std,81.426491,5.188622
min,1001.0,-0.002998
25%,1112.0,19.209076
50%,1149.0,19.607941
75%,1189.0,19.971796
max,1667.0,22.665699


In [None]:
#exclude_timestamps = pd.to_datetime(['2024/03/18','2024/03/21','2024/04/01'])

In [None]:
#df_data = df_data[~df_data['datetime'].isin(exclude_timestamps)]

In [14]:
df_data = df_data.loc[df_data['power']>(18)]
df_data.describe()

Unnamed: 0,matte_w,power
count,1735.0,1735.0
mean,1152.563689,19.773748
std,80.739584,0.574084
min,1001.0,18.157869
25%,1112.0,19.424745
50%,1147.0,19.719522
75%,1191.0,20.020898
max,1667.0,22.665699


In [15]:
## Time series plot
# Create axis
fig, ax = plt.subplots()
# Apply date formatting convention
ax.format_xdata = mdates.DateFormatter('%Y-%m-%d')
fig.autofmt_xdate()
# Plot time series
ax.plot(df_data['power'],alpha=0.8,label='fce power')
plt.legend(loc='lower left')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.legend.Legend at 0x1e4c1fe4c88>

In [8]:
df_data.describe()


Unnamed: 0,matte_w,power
count,1735.0,1735.0
mean,1152.563689,19.773748
std,80.739584,0.574084
min,1001.0,18.157869
25%,1112.0,19.424745
50%,1147.0,19.719522
75%,1191.0,20.020898
max,1667.0,22.665699


In [9]:
## Shewhart chart 
# Daily samples in data set; to be downsampled to weekly samples (i.e. n = 7)
df_FP = df_data.loc[:, 'power']
n = 2
c4 = np.sqrt(2/(n-1))*np.math.gamma(n/2)/np.math.gamma((n-1)/2)
# Calculate sample standard deviations
si =df_FP.resample('1D').std()
# Calculate sample means
xbar = df_FP.resample('1D').mean()
# Define training data as first 25 samples
si_train = si[:240]
xbar_train = xbar[:240]
# Calculate sample statistics of sample statistics
sbar = si_train.mean()
xbarbar = xbar_train.mean()
# Calculate sigma values for xbar and s control charts
sigmas = sbar #*np.sqrt(1-c4**2)/c4
sigmax = sbar #/(c4*np.sqrt(n))
# Control limits
UCLs = sbar+3*sigmas
CLs = sbar
LCLs = sbar-3*sigmas
UCLx = xbarbar + 3*sigmax
CLx = xbarbar
LCLx = xbarbar-3*sigmax
# Plot Shewhart charts
fig, ax = plt.subplots(2,1,sharex=True)
ax[0].format_xdata = mdates.DateFormatter('%Y-%m-%d')
fig.autofmt_xdate()
ax[0].plot(si,marker='.')
ax[0].axhline(UCLs,color='r')
ax[0].axhline(LCLs,color='r')
ax[0].axhline(CLs,color='k')
ax[0].title.set_text('Shewhart s chart')
ax[1].plot(xbar,marker='.')
ax[1].axhline(UCLx,color='r')
ax[1].axhline(LCLx,color='r')
ax[1].axhline(CLx,color='k')
ax[1].title.set_text('Shewhart x bar chart')

#ax[1].title.set_text('Shewhart x bar chart')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [None]:
UCLx


In [16]:
## Shewhart chart 
# Daily samples in data set; to be downsampled to weekly samples (i.e. n = 7)
df_FP = df_data.loc[:, 'matte_w']
n = 2
c4 = np.sqrt(2/(n-1))*np.math.gamma(n/2)/np.math.gamma((n-1)/2)
# Calculate sample standard deviations
si =df_FP.resample('1h').std()
# Calculate sample means
xbar = df_FP.resample('1h').mean()
# Define training data as first 25 samples
si_train = si[:240]
xbar_train = xbar[:240]
# Calculate sample statistics of sample statistics
sbar = si_train.mean()
xbarbar = xbar_train.mean()
# Calculate sigma values for xbar and s control charts
sigmas = sbar #*np.sqrt(1-c4**2)/c4
sigmax = sbar #/(c4*np.sqrt(n))
# Control limits
UCLs = sbar+3*sigmas
CLs = sbar
LCLs = sbar-3*sigmas
UCLx = xbarbar + 3*sigmax
CLx = xbarbar
LCLx = xbarbar-3*sigmax
# Plot Shewhart charts
fig, ax = plt.subplots(2,1,sharex=True)
ax[0].format_xdata = mdates.DateFormatter('%Y-%m-%d')
fig.autofmt_xdate()
ax[0].plot(si,marker='.')
ax[0].axhline(UCLs,color='r')
ax[0].axhline(LCLs,color='r')
ax[0].axhline(CLs,color='k')
ax[0].title.set_text('Shewhart s chart')
ax[1].plot(xbar,marker='.')
ax[1].axhline(UCLx,color='r')
ax[1].axhline(LCLx,color='r')
ax[1].axhline(CLx,color='k')
ax[1].title.set_text('Shewhart x bar chart')

#ax[1].title.set_text('Shewhart x bar chart')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [17]:
UCLx

1379.1290995421705

In [18]:
amber_limit = xbarbar + 2*sigmax
amber_limit

1299.8008903722255

In [19]:
red_limit = xbarbar + 3*sigmax
red_limit

1379.1290995421705