In [1]:
import os
# import sys

import pandas as pd
import numpy as np

import pickle as pkl

# import glob
# from datetime import datetime

import plotly.graph_objects as go
import plotly.io as pio
from plotly.subplots import make_subplots

# from sklearn.preprocessing import StandardScaler

# from tqdm import tqdm

# from sklearn.impute import SimpleImputer

# from scipy.signal import butter, lfilter

import json

from itertools import compress

from matplotlib.colors import to_rgb, to_hex

In [2]:
output_dir = '../Output'

if not os.path.exists(output_dir):
    os.mkdir(output_dir)

# Data Processing

## Load Data

In [3]:
# Define the directory path
file_path = '../Data/T1.csv'

# Initialize an empty list to store each file's DataFrame
dataframes_list = []

names = [
    'Date/Time',
    'LV ActivePower',
    'Wind Speed',
    'Theoretical_Power_Curve',
    'Wind Direction (deg)',
]

df = pd.read_csv(file_path, sep=",", skiprows=1, names=names)

# Convert the 'datetime' column to datetime objects
df['Date/Time'] = pd.to_datetime(df['Date/Time'], format='%d %m %Y %H:%M')

# Now you can sort the DataFrame by the 'datetime' column
df = df.sort_values(by='Date/Time')


print(df.head())

            Date/Time  LV ActivePower  Wind Speed  Theoretical_Power_Curve  \
0 2018-01-01 00:00:00      380.047791    5.311336               416.328908   
1 2018-01-01 00:10:00      453.769196    5.672167               519.917511   
2 2018-01-01 00:20:00      306.376587    5.216037               390.900016   
3 2018-01-01 00:30:00      419.645905    5.659674               516.127569   
4 2018-01-01 00:40:00      380.650696    5.577941               491.702972   

   Wind Direction (deg)  
0            259.994904  
1            268.641113  
2            272.564789  
3            271.258087  
4            265.674286  


## Slide02_YearlyChangeInProduction

In [4]:
# Extract month from the 'Date/Time'
if 'Month' not in df:
    df['Month'] = df['Date/Time'].dt.month

# Group by month and calculate mean of 'LV ActivePower'
monthly_data = df.groupby('Month')['LV ActivePower'].sum()

# Create a bar chart
fig = go.Figure([go.Bar(x=[f'Month {m}' for m in monthly_data.index], y=monthly_data.values)])

# Update layout
fig.update_layout(
    title='Monthly Sum of LV ActivePower',
    xaxis_title='Month',
    yaxis_title='LV ActivePower (kW)',
    plot_bgcolor='white',
    xaxis=dict(showline=True, showgrid=False, linecolor='black'),
    yaxis=dict(showline=True, showgrid=True, gridcolor='gray', linecolor='black')
)

# Show plot
fig.show()

In [5]:
# Generate second year data by adding normal noise
np.random.seed(42)  # For reproducibility
noise = np.random.normal(0, 10000000, size=monthly_data.shape)
second_year_data = monthly_data + noise

# Calculate differences
differences = second_year_data - monthly_data

# Create a bar chart with colored bars
fig = go.Figure(data=[go.Bar(
    x=[f'Month {m}' for m in monthly_data.index],
    y=differences,
    marker_color=['green' if x > 0 else 'red' for x in differences]  # Set color based on the condition
)])

# Update layout
fig.update_layout(
    title='Difference Between Year 2 and Year 1 (with Noise)',
    xaxis_title='Month',
    yaxis_title='Difference in LV ActivePower (kW)',
    plot_bgcolor='white',
    xaxis=dict(showline=True, showgrid=False, linecolor='black'),
    yaxis=dict(showline=True, showgrid=True, gridcolor='gray', linecolor='black')
)

# Show plot
fig.show()

In [6]:
file_name = 'Slide02_YearlyChangeInProduction.json'

# Month names
month_names = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

# Prepare JSON data structure
json_data = {
    "x": [month_names[m-1] for m in monthly_data.index],  # m-1 to align month number with index
    "y": differences.tolist(),
    "x_series": ["x"],
    "y_series": ["y"],
    "labels": {
        "x": "Month",
        "y": "Difference in LV ActivePower (2023 - 2022)"
    },
    "series_colors": {
        "y": {month_names[m-1]: "green" if val > 0 else "red" for m, val in zip(monthly_data.index, differences)}
    }
}

# Serialize JSON data to a string
json_str = json.dumps(json_data, indent=4)
print(json_str)

# Optionally, write to a file
with open(os.path.join(output_dir, file_name), 'w') as f:
    json.dump(json_data, f, indent=4)

{
    "x": [
        "Jan",
        "Feb",
        "Mar",
        "Apr",
        "May",
        "Jun",
        "Jul",
        "Aug",
        "Sep",
        "Oct",
        "Nov",
        "Dec"
    ],
    "y": [
        4967141.530112327,
        -1382643.011711846,
        6476885.381006926,
        15230298.564080253,
        -2341533.7472333596,
        -2341369.5694918055,
        15792128.155073915,
        7674347.291529087,
        -4694743.859349521,
        5425600.435859647,
        -4634176.928124622,
        -4657297.5357025685
    ],
    "x_series": [
        "x"
    ],
    "y_series": [
        "y"
    ],
    "labels": {
        "x": "Month",
        "y": "Difference in LV ActivePower (2023 - 2022)"
    },
    "series_colors": {
        "y": {
            "Jan": "green",
            "Feb": "red",
            "Mar": "green",
            "Apr": "green",
            "May": "red",
            "Jun": "red",
            "Jul": "green",
            "Aug": "green",
            "Sep

## Slide 2 Coordinates

In [7]:
coordinates = [
[-116.73259,33.909111],
[-116.729286,33.916096],
[-116.735954,33.905323],
[-116.718285,33.917294],
[-116.732643,33.9118],
[-116.702385,33.91621],
[-116.735947,33.9048],
[-116.702385,33.912697],
[-116.732529,33.906998],
[-116.732521,33.905903],
[-116.736069,33.910629],
[-116.736122,33.912216],
[-116.73275,33.917213],
[-116.706398,33.912071],
[-116.729286,33.915031],
[-116.736145,33.913269],
[-116.714386,33.917294],
[-116.73259,33.910194],
[-116.732475,33.904316],
[-116.702446,33.911583],
[-116.725815,33.914333],
[-116.710388,33.912094],
[-116.718391,33.912594],
[-116.714401,33.911457],
[-116.710388,33.913795],
[-116.702415,33.912121],
[-116.710381,33.912636],
[-116.732651,33.912327],
[-116.710335,33.916775],
[-116.732513,33.905334],
[-116.732567,33.908039],
[-116.732491,33.904797],
[-116.736061,33.910099],
[-116.736,33.907455],
[-116.732605,33.909649],
[-116.732719,33.915562],
[-116.732681,33.915058],
[-116.70639,33.916195],
[-116.736191,33.915394],
[-116.732735,33.916641],
[-116.736092,33.911694],
[-116.714371,33.916134],
[-116.736191,33.915916],
[-116.710388,33.914394],
[-116.735992,33.906914],
[-116.736015,33.909058],
[-116.725815,33.91547],
[-116.729591,33.913994],
[-116.714401,33.915558],
[-116.702385,33.913296],
[-116.702431,33.910366],
[-116.736206,33.916458],
[-116.714386,33.912094],
[-116.732658,33.913403],
[-116.736023,33.908508],
[-116.725861,33.915989],
[-116.714386,33.914394],
[-116.702385,33.913895],
[-116.729286,33.915554],
[-116.718391,33.911995],
[-116.70639,33.917397],
[-116.736046,33.909576],
[-116.702385,33.915096],
[-116.73262,33.910721],
[-116.710396,33.91325],
[-116.70639,33.914494],
[-116.718369,33.913147],
[-116.718391,33.914894],
[-116.706383,33.913853],
[-116.714386,33.916695],
[-116.72583,33.91489],
[-116.70237,33.916851],
[-116.725891,33.917095],
[-116.718391,33.913696],
[-116.732689,33.912895],
[-116.702385,33.915695],
[-116.714386,33.913795],
[-116.706383,33.915058],
[-116.736076,33.911156],
[-116.714386,33.913197],
[-116.732735,33.916103],
[-116.725891,33.916538],
[-116.717186,33.915695],
[-116.710365,33.915005],
[-116.72934,33.917141],
[-116.732689,33.914494],
[-116.73262,33.91127],
[-116.714371,33.914967],
[-116.714386,33.912594],
[-116.70639,33.915596],
[-116.729286,33.914494],
[-116.710335,33.917351],
[-116.732689,33.913994],
[-116.736168,33.914337],
[-116.706398,33.912674],
[-116.70639,33.916794],
[-116.732513,33.906448],
[-116.706413,33.913261],
[-116.729286,33.916595],
[-116.702385,33.917397],
[-116.73259,33.908596],
[-116.736191,33.914894],
[-116.735924,33.904274],
[-116.73616,33.913807],
[-116.71035,33.915585],
[-116.702385,33.910995],
[-116.73613,33.91275],
[-116.735992,33.906399],
[-116.7024,33.914494],
[-116.710396,33.916172],
]

In [8]:
# Function to adjust color lightness
def adjust_color_lightness(color, amount=0.5):
    import colorsys
    try:
        c = to_rgb(color)
        c = colorsys.rgb_to_hls(*c)
        return to_hex(colorsys.hls_to_rgb(c[0], max(0, min(1, amount * c[1])), c[2]))
    except:
        return color

In [9]:
# Total number of points
n_points = len(coordinates)

target_colors = ['yellow']#['green', 'red','yellow'] #Adjust the colors included to achieve the desired plot

# Colors for demonstration, actual implementation would follow your data specifics
base_colors = ['green', 'red', 'yellow']
light_colors = [adjust_color_lightness(color, 1.5) for color in base_colors]
dark_colors = [adjust_color_lightness(color, 0.6) for color in base_colors]

# Colors for each point, default is green
np.random.seed(42)  # For reproducibility
base_color_indices = np.zeros(n_points).astype(int)
warning_indices = np.random.choice(n_points, size=int(0.1 * n_points), replace=False)

red_indices = warning_indices[:3]
yellow_indices = warning_indices[3:]
base_color_indices[red_indices] = 1
base_color_indices[yellow_indices] = 2

point_colors = [base_colors[bci] for bci in base_color_indices]
target_indices = [pc in target_colors for pc in point_colors]

# Calculate the boundaries
longitudes, latitudes = zip(*coordinates)
min_lat, max_lat = min(latitudes), max(latitudes)
min_lon, max_lon = min(longitudes), max(longitudes)

# Expand the bounds by 10%
lat_range = max_lat - min_lat
lon_range = max_lon - min_lon
min_lat -= lat_range * 0.1
max_lat += lat_range * 0.1
min_lon -= lon_range * 0.1
max_lon += lon_range * 0.1

# Prepare data for Plotly
data = [go.Scattergeo(
    lon=list(compress(longitudes,target_indices)),  # Longitude
    lat=list(compress(latitudes, target_indices)),  # Latitude
    mode='markers',
    marker=dict(
        size=7,
        color=[light_colors[base_colors.index(color)] for color in list(compress(point_colors, target_indices))],  # Light fill color
        line=dict(
            width=1,
            color=[dark_colors[base_colors.index(color)] for color in list(compress(point_colors, target_indices))]  # Dark outline color
        )
    )
)]

# Create layout
layout = go.Layout(
    title='GPS Coordinates Visualization',
    geo=dict(
        showland=True,
        landcolor="rgb(217, 217, 217)",
        subunitcolor="rgb(255, 255, 255)",
        countrycolor="rgb(255, 255, 255)",
        showlakes=True,
        lakecolor="rgb(255, 255, 255)",
        showsubunits=True,
        showcountries=True,
        resolution=50,
        projection=dict(
            type="mercator"
        ),
        lonaxis=dict(
            showgrid=True,
            gridwidth=0.5,
            range=[min_lon, max_lon],
            dtick=5
        ),
        lataxis=dict(
            showgrid=True,
            gridwidth=0.5,
            range=[min_lat, max_lat],
            dtick=5
        )
    )
)

# Create the figure
fig = go.Figure(data=data, layout=layout)

# Display the figure
fig.show()

In [10]:
file_name = 'Slide02_TurbineCoordinates_yellow.json'

# Prepare the JSON data structure
plot_json = {
    "lat": list(compress(latitudes, target_indices)),
    "long": list(compress(longitudes, target_indices)),
    "x_series": "long",  # Indicative of what 'x' refers to in a geographical context
    "y_series": "lat",   # Indicative of what 'y' refers to in a geographical context
    "labels": {
        "x": "Longitude",
        "y": "Latitude",
        "title": "Turbine Health Map",
        "xaxis_range": [min_lon, max_lon],
        "yaxis_range": [min_lat, max_lat]
    },
    "map_features": {
        "land_color": "rgb(217, 217, 217)",
        "lake_color": "rgb(255, 255, 255)",
        "resolution": 50,
        "projection_type": "mercator"
    },
    "points": {
        "colors": [light_colors[base_colors.index(color)] for color in list(compress(point_colors, target_indices))],
        "size": 7,
        "outline_color": [dark_colors[base_colors.index(color)] for color in list(compress(point_colors, target_indices))],
        "outline_width": 1
    }
}

# Serialize JSON data to a string
json_str = json.dumps(plot_json, indent=4)
print(json_str)

# Optionally, write to a file
with open(os.path.join(output_dir, file_name), 'w') as f:
    json.dump(plot_json, f, indent=4)

{
    "lat": [
        33.912216,
        33.908039,
        33.909058,
        33.91325,
        33.913853,
        33.913197,
        33.917141,
        33.906448
    ],
    "long": [
        -116.736122,
        -116.732567,
        -116.736015,
        -116.710396,
        -116.706383,
        -116.714386,
        -116.72934,
        -116.732513
    ],
    "x_series": "long",
    "y_series": "lat",
    "labels": {
        "x": "Longitude",
        "y": "Latitude",
        "title": "Turbine Health Map",
        "xaxis_range": [
            -116.73958959999999,
            -116.6989864
        ],
        "yaxis_range": [
            33.9029617,
            33.9187093
        ]
    },
    "map_features": {
        "land_color": "rgb(217, 217, 217)",
        "lake_color": "rgb(255, 255, 255)",
        "resolution": 50,
        "projection_type": "mercator"
    },
    "points": {
        "colors": [
            "#ffff80",
            "#ffff80",
            "#ffff80",
            "#ffff8

## Slide 2 Quarter Summary

In [11]:
# Data setup
categories = ["Up time", "Unexpected failures", "Profit/loss"][::-1]  # Reverse the list
values = [0.8, 0.6, 1.3][::-1]  # Reverse the list to match the categories
is_good_increase = [True, False, True][::-1]  # Reverse to match

# Apply logarithm base 2 transformation
log_values = np.log2(values)

# Determine color based on value and whether increase is good
colors = ['green' if (v > 0 and good) or (v < 0 and not good) else 'red'
          for v, good in zip(log_values, is_good_increase)]

# Create the figure
fig = go.Figure()

# Add the bar for each category
for category, value, color in zip(categories, log_values, colors):
    # Base is the log2 of 1, which is 0
    base = 0
    width = value - base  # Calculate width from log2(1) = 0
    fig.add_trace(go.Bar(
        x=[width],  # width of the bar from the base
        y=[category],
        orientation='h',
        marker_color=color,
        base=base,  # starting point of the bar (log2(1) = 0)
        name=category
    ))

# Update layout to set x-axis and add customization
fig.update_layout(
    xaxis=dict(
        tickvals=np.log2([0.5, 1, 2]),
        ticktext=['Half', 'No Change', 'Double'],
        range=[np.log2(0.5), np.log2(2)],  # Set range from log2(0.5) to log2(2)
        showline=True, 
        showgrid=True, 
        gridcolor='gray', 
        linecolor='black'
    ),
    title="Quarterly Summary of Changes",
    xaxis_title="Change Scale (Log Scale)",
    yaxis_title="Categories",
    plot_bgcolor='white',
    # xaxis=dict(),
    yaxis=dict(showline=True)
)

# Show the plot
fig.show()

In [12]:
file_name = 'Slide02_QuarterlySummary.json'

# # Data setup
# categories = ["Up time", "Unexpected failures", "Profit/loss"][::-1]  # Reverse the list
# values = [0.8, 0.6, 1.3][::-1]  # Reverse the list to match the categories
# is_good_increase = [True, False, True][::-1]  # Reverse to match

# Apply logarithm base 2 transformation
log_values = np.log2(values)

# Determine color based on value and whether increase is good
colors = ['green' if (v > 0 and good) or (v < 0 and not good) else 'red'
          for v, good in zip(log_values, is_good_increase)]

# Prepare JSON data structure
json_data = {
    "x": log_values.tolist(),
    "y": categories,
    "x_series": ["x"],
    "y_series": ["y"],
    "labels": {
        "x": "Change Scale (Log Scale)",
        "y": "Categories"
    },
    "series_colors": colors,
    "xaxis": {
        "tickvals": np.log2([0.5, 1, 2]).tolist(),
        "ticktext": ['Half', 'No Change', 'Double'],
        "range": [np.log2(0.5), np.log2(2)]
    },
}

# Serialize JSON data to a string
json_str = json.dumps(json_data, indent=4)
print(json_str)

# Optionally, write to a file
with open(os.path.join(output_dir, file_name), 'w') as f:
    json.dump(json_data, f, indent=4)

{
    "x": [
        0.37851162325372983,
        -0.7369655941662062,
        -0.3219280948873623
    ],
    "y": [
        "Profit/loss",
        "Unexpected failures",
        "Up time"
    ],
    "x_series": [
        "x"
    ],
    "y_series": [
        "y"
    ],
    "labels": {
        "x": "Change Scale (Log Scale)",
        "y": "Categories"
    },
    "series_colors": [
        "green",
        "green",
        "red"
    ],
    "xaxis": {
        "tickvals": [
            -1.0,
            0.0,
            1.0
        ],
        "ticktext": [
            "Half",
            "No Change",
            "Double"
        ],
        "range": [
            -1.0,
            1.0
        ]
    }
}


## Slide 3 Mean Time to Failure by Turbine Model

In [13]:
# Data setup
models = ['V47-0.66', 'GE1.5-82.5', 'Z50', 'V117-4.3']  # Machine models
durations = [0.8, 2.3, 2.4, 4.8]  # Corresponding durations

# Create the figure
fig = go.Figure()

# Add the bar for each model
for model, duration in zip(models, durations):
    fig.add_trace(go.Bar(
        x=[duration],  # Duration value
        y=[model],  # Model name
        orientation='h',  # Horizontal bars
        name=model  # Legend entry
    ))

# Update layout to set x-axis and add customization
fig.update_layout(
    xaxis={
        'range': [0, 5],  # Set range from 0 to 5
        'title': "Mean Time Until Failure (years)",  # X-axis label
        'showline': True,  # Show the line at the axis
        'showgrid': True,  # Show gridlines
        'gridcolor': 'gray',  # Gridline color
        'linecolor': 'black'  # Line color at the axis
    },
    title="Mean Time Until Failure by Machine Model",
    yaxis_title="Machine Model",
    plot_bgcolor='white',
    yaxis={
        'showline': True,  # Show the line at the axis
        'autorange': "reversed"  # Reverse the y-axis to get the first model at the top
    }
)

# Show the plot
fig.show()

In [14]:
file_name = 'Slide03_MeanTimeToFailureByModel.json'

# # Data setup
# models = ['V47-0.66', 'GE1.5-82.5', 'Z50', 'V117-4.3']  # Machine models
# durations = [0.8, 2.3, 2.4, 4.8]  # Corresponding durations

# Prepare JSON data structure
json_data = {
    "x": durations,
    "y": models,
    "x_series": ["x"],
    "y_series": ["y"],
    "labels": {
        "x": "Mean Time Until Failure (years)",
        "y": "Turbine Model"
    },
}

# Serialize JSON data to a string
json_str = json.dumps(json_data, indent=4)
print(json_str)


# Optionally, write to a file
with open(os.path.join(output_dir, file_name), 'w') as f:
    json.dump(json_data, f, indent=4)

{
    "x": [
        0.8,
        2.3,
        2.4,
        4.8
    ],
    "y": [
        "V47-0.66",
        "GE1.5-82.5",
        "Z50",
        "V117-4.3"
    ],
    "x_series": [
        "x"
    ],
    "y_series": [
        "y"
    ],
    "labels": {
        "x": "Mean Time Until Failure (years)",
        "y": "Turbine Model"
    }
}


## Slide 3 Top 3 Root Cause of Failure

In [15]:
# Data setup
categories = ["Blade", "Bearing", "Gearbox", "Misc."]
values = [0.59, 0.21, 0.18, 0.02]

# Pair categories with their values and sort by values in descending order
paired_data = sorted(zip(categories, values), key=lambda x: x[1], reverse=False)

# Unzip the paired data back into categories and values for plotting
sorted_categories, sorted_values = zip(*paired_data)

# Prepare DataFrame
df = pd.DataFrame({
    'Category': sorted_categories,
    'Value': sorted_values,
    'Dummy': np.zeros(len(sorted_values))  # Dummy column for consistent x-axis positioning
})

# Define colors for each category manually
colors = ['#636EFA', '#EF553B', '#00CC96', '#AB63FA']  # Example colors

# Create the figure
fig = go.Figure()


# Add a single vertical bar with segments for each category
for i, (category, value) in enumerate(zip(df['Category'], df['Value'])):
    fig.add_trace(go.Bar(
        x=[1],  # Single dummy value for all segments
        y=[value],
        name=category,
        orientation='v',
        hoverinfo='name+y',
        marker=dict(color=colors[i])  # Use manually defined colors
    ))

# Update layout for the plot
fig.update_layout(
    barmode='stack',  # Stack the bars vertically
    title="Top 3 Root Causes of Failure",
    xaxis=dict(
        title="",  # No need for x-axis title
        tickvals=[1],  # Single tick
        ticktext=["Total Failures"],  # Label for the single tick
        showticklabels=False  # Hide the tick label for clean visual
    ),
    yaxis=dict(
        title="Percentage",
        tickformat=".1%",  # Format y-axis ticks as percentages
    ),
    showlegend=True,  # Show legend to identify categories
)

# Show the plot
fig.show()

In [16]:
file_name = 'Slide03_Top3Failures.json'

# # Data setup and sorting as previously defined
# categories = ["Blade", "Bearing", "Gearbox", "Misc."]
# values = [0.59, 0.21, 0.18, 0.07]
# paired_data = sorted(zip(categories, values), key=lambda x: x[1], reverse=True)
# sorted_categories, sorted_values = zip(*paired_data)

# Define colors for each category manually
colors = ['#636EFA', '#EF553B', '#00CC96', '#AB63FA']  # Example colors

# Prepare JSON data structure
json_data = {
    "x": [1],  # Single dummy value for x-axis for all segments
    "y": list(sorted_values),
    "x_series": ["x"],
    "y_series": ["y"],
    "labels": {
        "x": "",  # No need for x-axis title
        "y": "Percentage"
    },
    "categories": list(sorted_categories),
    "colors": colors,
    "barmode": "stack",
    "title": "Top 3 Root Causes of Failure",
    "yaxis_tickformat": ".1%",
    "showlegend": True
}

# Serialize JSON data to a string
json_str = json.dumps(json_data, indent=4)
print(json_str)

# Optionally, write to a file
with open(os.path.join(output_dir, file_name), 'w') as f:
    json.dump(json_data, f, indent=4)

{
    "x": [
        1
    ],
    "y": [
        0.02,
        0.18,
        0.21,
        0.59
    ],
    "x_series": [
        "x"
    ],
    "y_series": [
        "y"
    ],
    "labels": {
        "x": "",
        "y": "Percentage"
    },
    "categories": [
        "Misc.",
        "Gearbox",
        "Bearing",
        "Blade"
    ],
    "colors": [
        "#636EFA",
        "#EF553B",
        "#00CC96",
        "#AB63FA"
    ],
    "barmode": "stack",
    "title": "Top 3 Root Causes of Failure",
    "yaxis_tickformat": ".1%",
    "showlegend": true
}


## Slides 6 & 8 Predicted Bearing Fault

In [17]:
file_path = '../Data/Slide6-8.pkl'

with open(file_path,'rb') as f:
    df_cumsum = pkl.load(f)

In [18]:
# Find the maximum value for setting up y-axis ticks
max_value = df_cumsum.max().max()
next_hundred = (max_value // 100 + 1) * 100
threshold = 100#next_hundred / 2

In [19]:
fig = go.Figure()

# Plot each time series
for series in df_cumsum.columns:
    fig.add_trace(go.Scatter(x=df_cumsum.index, y=df_cumsum[series], mode='lines', name=series))
    # Find first index where series exceeds threshold
    exceed = df_cumsum[df_cumsum[series] >= threshold]
    if not exceed.empty:
        first_exceed = exceed.index[0]
        fig.add_trace(go.Scatter(x=[first_exceed], y=[df_cumsum.loc[first_exceed, series]], mode='markers', marker_symbol='circle', marker_color='red', marker_size=10))

# Add threshold line and shaded area
fig.add_shape(type="line",
              x0=df_cumsum.index[0], x1=df_cumsum.index[-1],
              y0=threshold, y1=threshold,
              line=dict(dash="dash", color="red", width=2))
fig.add_vrect(x0=df_cumsum.index[0], x1=df_cumsum.index[-1],
              fillcolor="red", opacity=0.2,
              line_width=0,
              y0=threshold/next_hundred, y1=1)  # Use next_hundred directly

# Update layout for y-axis ticks
fig.update_layout(
    title="Time Series Data with Threshold",
    xaxis_title="Date",
    yaxis_title="Value",
    yaxis=dict(tickvals=[0, threshold, next_hundred], range=[0, next_hundred])
)

# Show the plot
fig.show()

In [20]:
from datetime import datetime

In [21]:
file_name = 'Slide08_PredictedBearingFault_Novelets.json'
# Serialize the DataFrame to a simpler format for JSON
data_json = df_cumsum.reset_index().to_dict(orient='list')

# Convert Timestamps in the index to string format (ISO format)
data_json['index'] = [ts.isoformat() for ts in data_json['index']]

# Similarly, convert any other Timestamps to strings for threshold exceedance markers
markers = {
    series: df_cumsum[df_cumsum[series] > threshold].index[0].isoformat() 
    for series in df_cumsum.columns if not df_cumsum[df_cumsum[series] > threshold].empty
}

# Prepare the main JSON data structure
plot_json = {
    "x": data_json['index'],  # x values: datetime index as strings
    "y": {col: data_json[col] for col in df_cumsum.columns},  # y values: dictionary of time series data
    "x_series": "index",
    "y_series": list(df_cumsum.columns),
    "labels": {
        "x": "Date",
        "y": "Value",
        "title": "Time Series Data with Threshold",
        "yaxis_tickvals": [0, threshold, next_hundred],
        "yaxis_range": [0, next_hundred]
    },
    "threshold": {
        "value": threshold,
        "color": "red",
        "opacity": 0.2,
        "line_style": "dash",
        "line_width": 2
    },
    "markers": {
        "threshold_exceedance": {
            "color": "red",
            "size": 10,
            "symbol": "circle",
            "series": markers
        }
    }
}

# Serialize JSON data to a string
json_str = json.dumps(plot_json, indent=4)
print(json_str)

# Optionally, write to a file
with open(os.path.join(output_dir,file_name), 'w') as f:
    json.dump(plot_json, f, indent=4)

{
    "x": [
        "2019-04-17T00:00:00",
        "2019-04-21T10:11:55.347868825",
        "2019-04-25T20:23:50.695737650",
        "2019-04-30T06:35:46.043606476",
        "2019-05-04T16:47:41.391475301",
        "2019-05-09T02:59:36.739344126",
        "2019-05-13T13:11:32.087212952",
        "2019-05-17T23:23:27.435081777",
        "2019-05-22T09:35:22.782950602",
        "2019-05-26T19:47:18.130819428",
        "2019-05-31T05:59:13.478688253",
        "2019-06-04T16:11:08.826557078",
        "2019-06-09T02:23:04.174425904",
        "2019-06-13T12:34:59.522294729",
        "2019-06-17T22:46:54.870163555",
        "2019-06-22T08:58:50.218032380",
        "2019-06-26T19:10:45.565901205",
        "2019-07-01T05:22:40.913770031",
        "2019-07-05T15:34:36.261638856",
        "2019-07-10T01:46:31.609507681",
        "2019-07-14T11:58:26.957376507",
        "2019-07-18T22:10:22.305245332",
        "2019-07-23T08:22:17.653114157",
        "2019-07-27T18:34:13.000982983",
        "2019

## Slide 4: Proportion of Failure Types by Model# Data setup

In [22]:
# Data setup
models = ['V47-0.66','GE1.5-82.5','Z50','V117-4.3']
categories = ["Blade", "Bearing", "Gearbox", "Misc."]
base_values = [0.6, 0.2, 0.15, 0.05]
temp_list = []
for m in models:
    for c in categories:
        temp_list.append([m, c, np.random.uniform(0,1)])

m_list, c_list, v_list = zip(*temp_list)
temp_dict = {
    'Model':m_list,
    'Category':c_list,
    'Value':v_list,
}

df = pd.DataFrame(temp_dict)
# Group by 'Model' and normalize 'Value' to sum to 1 within each group
df['Normalized Value'] = df.groupby('Model')['Value'].transform(lambda x: x / x.sum())

# Define colors for each category manually
colors = ['#636EFA', '#EF553B', '#00CC96', '#AB63FA']  # Example colors

# Create the figure
fig = go.Figure()


# Add a single vertical bar with segments for each category
for i, (model, category, value) in enumerate(zip(df['Model'], df['Category'], df['Normalized Value'])):
    fig.add_trace(go.Bar(
        x=[model],  # Single dummy value for all segments
        y=[value],
        name=category,
        orientation='v',
        hoverinfo='name+y',
        marker=dict(color=colors[i%4])  # Use manually defined colors
    ))

# Update layout for the plot
fig.update_layout(
    barmode='stack',  # Stack the bars vertically
    title="Top 3 Root Causes of Failure",
    xaxis=dict(
        title="",  # No need for x-axis title
        tickvals=models,  # Single tick
        ticktext=["Total Failures"],  # Label for the single tick
        showticklabels=False  # Hide the tick label for clean visual
    ),
    yaxis=dict(
        title="Percentage",
        tickformat=".1%",  # Format y-axis ticks as percentages
    ),
    showlegend=True,  # Show legend to identify categories
)

# Show the plot
fig.show()

In [23]:
# Data setup
models = ['V47-0.66','GE1.5-82.5','Z50','V117-4.3']
categories = ["Blade", "Bearing", "Gearbox", "Misc."]
temp_list = []
for m in models:
    for c in categories:
        temp_list.append([m, c, np.random.uniform(0,.05)])

m_list, c_list, v_list = zip(*temp_list)
temp_dict = {
    'Model':m_list,
    'Category':c_list,
    'Value':v_list,
}

df = pd.DataFrame(temp_dict)

# Create the figure object
fig = go.Figure()

# Find unique categories and models
categories = df['Category'].unique()
models = df['Model'].unique()

# Colors for each model - assuming a limited number of models
colors = ['blue', 'orange', 'green', 'red']

# Plot each model's data within each category
for i, model in enumerate(models):
    # Filter the DataFrame for the current model
    df_filtered = df[df['Model'] == model]

    # Add a bar for each category within the model
    fig.add_trace(go.Bar(
        x=df_filtered['Category'],  # Categories as x-axis
        y=df_filtered['Value'],  # Normalized values as height of the bars
        name=model,  # Legend name
        marker_color=colors[i % len(colors)]  # Color of the bars
    ))

# Update the layout of the figure
fig.update_layout(
    barmode='group',  # Group bars by x-axis category
    title="Failures by Model",
    xaxis=dict(title="Failure Category"),  # Rotate category names 90 degrees
    yaxis=dict(title="Percent Failure by Model", tickformat=".0%"),  # Show y-values as percentages
    legend_title="Model"
)

# Show the plot
fig.show()

In [24]:
file_name = 'Slide04_FailureTypeByModel.json'

color_map = {model: colors[i % len(colors)] for i, model in enumerate(models)}

# Prepare the JSON data structure
plot_json = {
    "x_series": "Category",  # Categories are mapped to the x-axis
    "y_series": "Value",  # Values are mapped to the y-axis
    "labels": {
        "x": "Failure Category",
        "y": "Percent Failure by Model",
        "title": "Failures by Model",
        "legend_title": "Model"
    },
    "data": [
        {
            "model": model,
            "category_data": [
                {
                    "category": row['Category'],
                    "value": row['Value'],
                    "color": color_map[model]
                }
                for index, row in df[df['Model'] == model].iterrows()
            ]
        }
        for model in models
    ],
    "layout": {
        "barmode": "group",
        "xaxis_tickformat": ".0%",  # Assuming value transformation as needed elsewhere
    }
}

# Serialize JSON data to a string
json_str = json.dumps(plot_json, indent=4)
print(json_str)

# Optionally, write to a file
with open(os.path.join(output_dir, file_name), 'w') as f:
    json.dump(plot_json, f, indent=4)

{
    "x_series": "Category",
    "y_series": "Value",
    "labels": {
        "x": "Failure Category",
        "y": "Percent Failure by Model",
        "title": "Failures by Model",
        "legend_title": "Model"
    },
    "data": [
        {
            "model": "V47-0.66",
            "category_data": [
                {
                    "category": "Blade",
                    "value": 0.038039252430844876,
                    "color": "blue"
                },
                {
                    "category": "Bearing",
                    "value": 0.028063859878474814,
                    "color": "blue"
                },
                {
                    "category": "Gearbox",
                    "value": 0.03854835899772805,
                    "color": "blue"
                },
                {
                    "category": "Misc.",
                    "value": 0.02468977981821954,
                    "color": "blue"
                }
            ]
        },
    

## Slide 5 & 7 Bearing Health: Failures

In [25]:
# Parameters
plot_mode = 'pred' #Change this based on slide 5 or 7, 'pred' or 'fail'

num_machines = 50  # Example number of machines
num_bearings = 4   # Example number of bearings per machine

parameter_dict = {
    'fail': {
        'seed':42,
        'color':'red',
        'json_file_name':'Slide05_BearingHealthFailures.json',
        },
    'pred': {
        'seed':43,
        'color':'yellow',
        'json_file_name':'Slide07_BearingHealthPredictions.json',
        },
}

# Generate the dataset
np.random.seed(parameter_dict[plot_mode]['seed'])  # For reproducibility
dataset = np.random.uniform(0, 0.6, [num_machines, num_bearings]) > 0.5
dataset[:,1] = np.random.uniform(0, 0.8, [num_machines]) > 0.5

# Create figure
fig = go.Figure()

# Sort machines by the number of True values
sorted_indices = np.argsort(-dataset.sum(axis=1))  # Negative for descending sort

for i, idx in enumerate(sorted_indices):
    machine_data = dataset[idx]
    # Add each machine as a separate trace (stacked bar)
    fig.add_trace(go.Bar(
        x=[i]*num_bearings,  # Position on the x-axis
        y=[1]*num_bearings,  # Each bearing takes one unit space on y-axis
        name=f"Turbine {idx+1}",
        marker=dict(
            color=[parameter_dict[plot_mode]['color'] if status else 'green' for status in machine_data],
            line=dict(color='black', width=1)  # Black line to visually separate bearings
        ),
        orientation='v'  # Vertical bars
    ))

# Update layout
fig.update_layout(
    title="Turbine Bearings Status",
    xaxis=dict(title="Turbines", tickmode='array', tickvals=list(range(num_machines)), ticktext=[f"Turbine {i+1}" for i in sorted_indices]),
    yaxis=dict(title="Bearings", range=[0, num_bearings]),
    barmode='stack',  # Stack the bars
    bargap=0  # No gap between bars
)

# Show the plot
fig.show()

In [26]:
file_name = parameter_dict[plot_mode]['json_file_name']

# Prepare JSON data structure
plot_json = {
    "x_series": "Machine Index",  # Identifies what the x-axis represents
    "y_series": "Bearing Count",  # Identifies what the y-axis represents
    "labels": {
        "title": "Machines and Bearings Status",
        "x": "Machines",
        "y": "Bearings"
    },
    "data": [
        {
            "machine_index": int(i),  # Machine index for identification
            "bearings": [
                {
                    "bearing_index": int(j),
                    "status": parameter_dict[plot_mode]['color'] if status else "green"
                }
                for j, status in enumerate(dataset[sorted_indices[i]])
            ]
        }
        for i in range(num_machines)
    ],
    "layout": {
        "barmode": "stack",
        "bargap": 0,
        "xaxis": {
            "tickmode": "array",
            "tickvals": list(range(num_machines)),
            "ticktext": [f"Machine {i+1}" for i in sorted_indices]
        },
        "yaxis": {
            "range": [0, num_bearings]
        }
    }
}

# Serialize JSON data to a string
json_str = json.dumps(plot_json, indent=4)
print(json_str)

# Optionally, write to a file
with open(os.path.join(output_dir, file_name), 'w') as f:
    json.dump(plot_json, f, indent=4)

{
    "x_series": "Machine Index",
    "y_series": "Bearing Count",
    "labels": {
        "title": "Machines and Bearings Status",
        "x": "Machines",
        "y": "Bearings"
    },
    "data": [
        {
            "machine_index": 0,
            "bearings": [
                {
                    "bearing_index": 0,
                    "status": "yellow"
                },
                {
                    "bearing_index": 1,
                    "status": "yellow"
                },
                {
                    "bearing_index": 2,
                    "status": "yellow"
                },
                {
                    "bearing_index": 3,
                    "status": "green"
                }
            ]
        },
        {
            "machine_index": 1,
            "bearings": [
                {
                    "bearing_index": 0,
                    "status": "yellow"
                },
                {
                    "bearing_index": 1,
