In [1]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [2]:
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime, timedelta

In [3]:
# Load validated schedule and processed sensor data
with open("/content/drive/MyDrive/SmartFactory/Results/Week8/week8_validated_schedule.json", 'r') as f:
    schedule_data = json.load(f)
schedule = pd.DataFrame(schedule_data)
schedule["rescheduled_time"] = pd.to_datetime(schedule["rescheduled_time"])

In [4]:
final_schedule_df = pd.DataFrame(schedule)


In [5]:
schedule.head()

Unnamed: 0,job_id,machine_id,original_time,rescheduled_time,duration,failure_prob,urgency_score,priority,delayed,delay_minutes,hour
0,JOB-A-M1A-0,A-M1A,1438344,1970-01-01 00:00:00.001438344,74,0.236,0.273,High,False,0.0,0
1,JOB-A-M1A-20,A-M1A,1508994,1970-01-01 00:00:00.001508994,56,0.4,0.429,High,False,0.0,0
2,JOB-A-M1A-40,A-M1A,1584829,1970-01-01 00:00:00.001584829,46,0.457,0.449,Medium,False,0.0,0
3,JOB-A-M1A-60,A-M1A,1668822,1970-01-01 00:00:00.001668824,50,0.56,0.681,Medium,True,3e-05,0
4,JOB-A-OUX-0,A-OUX,1433552,1970-01-01 00:00:00.001433552,63,0.29,0.307,Low,False,0.0,0


In [6]:
with open("/content/drive/MyDrive/SmartFactory/Dataset/processed_sensor_data.json", 'r') as f:
    sensor = pd.DataFrame(json.load(f))
sensor["timestamp"] = pd.to_datetime(sensor["timestamp"])

In [7]:
sensor.head()

Unnamed: 0,timestamp,machine_id,temperature,vibration,rpm,pressure,humidity,voltage,failure,year,...,voltage_lag_1,voltage_lag_2,temperature_roc,vibration_roc,rpm_roc,pressure_roc,humidity_roc,voltage_roc,temperature_cummax,vibration_cummax
0,2015-05-26 20:55:13.644,B-MY9,71.47,0.94,2694,7.5,65.72,215.55,0,2015,...,,,,,,,,,71.47,0.94
1,2015-05-27 20:15:37.644,N-PR7,64.6,2.44,3054,6.96,44.25,226.71,0,2015,...,,,,,,,,,64.6,2.44
2,2015-05-28 07:53:40.644,Y-09D,76.14,1.93,2952,6.41,51.6,243.14,0,2015,...,,,,,,,,,76.14,1.93
3,2015-05-29 09:45:29.644,X-PYA,73.72,1.87,2607,7.13,59.94,229.21,0,2015,...,,,,,,,,,73.72,1.87
4,2015-05-30 06:22:48.644,G-2H4,77.37,0.57,3063,4.08,47.57,221.11,0,2015,...,,,,,,,,,77.37,0.57


# Live System Performance Summary

In [8]:
def compute_performance_metrics(schedule_df, sensor_df):
    # Total jobs processed
    total_jobs = schedule_df['job_id'].nunique()

    # Job delay statistics
    avg_delay = schedule_df['delay_minutes'].mean()
    max_delay = schedule_df['delay_minutes'].max()
    jobs_delayed_pct = (schedule_df['delay_minutes'] > 0).mean() * 100

    # Failure rate in monitored period
    failure_rate = sensor_df['failure'].mean() * 100

    metrics = {
        "Total Jobs Scheduled": total_jobs,
        "Average Job Delay (minutes)": round(avg_delay, 2),
        "Maximum Job Delay (minutes)": max_delay,
        "Percentage Jobs Delayed (%)": round(jobs_delayed_pct, 2),
        "Machine Failure Rate (%)": round(failure_rate, 2)
    }
    return metrics

performance_metrics = compute_performance_metrics(schedule, sensor)
print("System Performance Metrics:")
for k, v in performance_metrics.items():
    print(f"- {k}: {v}")

System Performance Metrics:
- Total Jobs Scheduled: 279
- Average Job Delay (minutes): 0.0
- Maximum Job Delay (minutes): 6e-05
- Percentage Jobs Delayed (%): 30.47
- Machine Failure Rate (%): 9.94


# Time Series Dashboard for Monitoring Key Metrics

In [9]:
# Prepare daily schedule metrics
schedule['date'] = schedule['rescheduled_time'].dt.date
daily_metrics = schedule.groupby('date').agg(
    jobs_scheduled=('job_id', 'count'),
    avg_delay=('delay_minutes', 'mean'),
    jobs_delayed_pct=('delay_minutes', lambda x: (x > 0).mean() * 100)
).reset_index()

# Prepare daily failure counts
failures_daily = sensor[sensor['failure'] == 1].groupby(sensor['timestamp'].dt.date).size().reset_index(name='failures')
failures_daily.rename(columns={'index': 'date', 0: 'failures'}, inplace=True)
failures_daily.columns = ['date', 'failures']
failures_daily['date'] = pd.to_datetime(failures_daily['date'])

# Ensure both 'date' columns are datetime64
daily_metrics['date'] = pd.to_datetime(daily_metrics['date'])
failures_daily['date'] = pd.to_datetime(failures_daily['date'])

# Now merge on 'date'
daily_summary = pd.merge(
    daily_metrics,
    failures_daily,
    on='date',
    how='left'
).fillna(0)

daily_summary.rename(columns={'failures': 'failures_count'}, inplace=True)

In [10]:
# Plot interactive time series for jobs scheduled, delay %, failures

fig_ts = go.Figure()
fig_ts.add_trace(go.Scatter(x=daily_summary['date'], y=daily_summary['jobs_scheduled'], mode='lines+markers', name='Jobs Scheduled'))
fig_ts.add_trace(go.Scatter(x=daily_summary['date'], y=daily_summary['avg_delay'], mode='lines+markers', name='Avg Delay (min)', yaxis='y2'))
fig_ts.add_trace(go.Scatter(x=daily_summary['date'], y=daily_summary['failures_count'], mode='lines+markers', name='Failures', yaxis='y3'))

fig_ts.update_layout(
    title='Daily System Performance Monitoring',
    xaxis=dict(title='Date'),
    yaxis=dict(title='Jobs Scheduled', side='left'),
    yaxis2=dict(title='Average Delay (min)', overlaying='y', side='right'),
    yaxis3=dict(title='Failures', overlaying='y', side='right', position=0.95, anchor='free'),
    legend=dict(x=0.01, y=0.99)
)

fig_ts.write_html("/content/drive/MyDrive/SmartFactory/Results/Week10/Monitoring/daily_performance_monitoring.html")
fig_ts.show()

# Alert Generation: Job Delay & Failure Anomalies

In [11]:
# Thresholds
DELAY_THRESHOLD = 30  # minutes
FAILURE_RATE_THRESHOLD = 5  # %

alerts = []

# Alert for jobs delayed more than threshold
delayed_jobs = schedule[schedule['delay_minutes'] > DELAY_THRESHOLD]
for _, row in delayed_jobs.iterrows():
    alert = {
        "alert_type": "Job Delay",
        "job_id": row['job_id'],
        "machine_id": row['machine_id'],
        "delay_minutes": row['delay_minutes'],
        "rescheduled_time": str(row['rescheduled_time']),
        "priority": row['priority']
    }
    alerts.append(alert)

In [12]:
# Alert if daily failure count exceeds threshold (simple anomaly)
if daily_summary['failures_count'].max() > FAILURE_RATE_THRESHOLD:
    alert = {
        "alert_type": "High Failure Count",
        "date": str(daily_summary.loc[daily_summary['failures_count'].idxmax()]['date']),
        "failures_count": int(daily_summary['failures_count'].max())
    }
    alerts.append(alert)

In [13]:
# Save alerts to JSON
with open("/content/drive/MyDrive/SmartFactory/Results/Week10/Alerts/alerts.json", "w") as f:
    json.dump(alerts, f, indent=4)

print(f"Total Alerts Generated: {len(alerts)}")

Total Alerts Generated: 0


# Deep Dive Visualization: Delay Distribution by Priority

In [14]:
fig_delay_dist = px.box(schedule, x='priority', y='delay_minutes',
                        title="Distribution of Job Delays by Priority",
                        labels={'priority': 'Job Priority', 'delay_minutes': 'Delay (minutes)'},
                        color='priority')
fig_delay_dist.write_html("/content/drive/MyDrive/SmartFactory/Results/Week10/Monitoring/delay_distribution_priority.html")
fig_delay_dist.show()

# Machine Health Dashboard - Rolling Failure Rates

In [15]:
sensor['date'] = sensor['timestamp'].dt.date
rolling_failure_rate = sensor.groupby(['machine_id', 'date'])['failure'].mean().reset_index()
rolling_failure_rate['date'] = pd.to_datetime(rolling_failure_rate['date'])
rolling_failure_rate['rolling_failure_rate_7d'] = rolling_failure_rate.groupby('machine_id')['failure'].transform(lambda x: x.rolling(window=7, min_periods=1).mean())

fig_health = px.line(rolling_failure_rate, x='date', y='rolling_failure_rate_7d', color='machine_id',
                     title='7-Day Rolling Failure Rate per Machine',
                     labels={'rolling_failure_rate_7d': '7-Day Rolling Failure Rate'})
fig_health.write_html("/content/drive/MyDrive/SmartFactory/Results/Week10/Monitoring/machine_health_rolling_failure.html")
fig_health.show()

# Feedback Loop: User Correction Analysis (Mockup example)

In [16]:
# Suppose a feedback file where operators mark if job delay was due to system or external factors
feedback_mock = pd.DataFrame({
    'job_id': schedule['job_id'].sample(20, random_state=42),
    'delay_reason': np.random.choice(['System', 'External', 'Unknown'], size=20, p=[0.6, 0.3, 0.1])
})

feedback_summary = feedback_mock['delay_reason'].value_counts(normalize=True).reset_index()
feedback_summary.columns = ['delay_reason', 'proportion']

fig_feedback = px.pie(feedback_summary, values='proportion', names='delay_reason',
                      title='Operator Feedback on Delay Reasons')
fig_feedback.write_html("/content/drive/MyDrive/SmartFactory/Results/Week10/Monitoring/delay_reason_feedback.html")
fig_feedback.show()

In [17]:
# Export Summary Report (CSV)

performance_report = pd.DataFrame([performance_metrics])
performance_report.to_csv("/content/drive/MyDrive/SmartFactory/Results/Week10/Performance/system_performance_summary.csv", index=False)

print("Week 10 Deployment & Monitoring script executed successfully.")
print("Dashboards and alerts saved in respective folders.")


Week 10 Deployment & Monitoring script executed successfully.
Dashboards and alerts saved in respective folders.


In [18]:
schedule['rescheduled_time'] = pd.to_datetime(schedule['rescheduled_time'], unit='s')
schedule['scheduled_start'] = schedule['rescheduled_time']
schedule['scheduled_end'] = schedule['scheduled_start'] + pd.to_timedelta(schedule['duration'], unit='m')

np.random.seed(42)
schedule['actual_start'] = schedule['scheduled_start'] + pd.to_timedelta(np.random.randint(-5, 10, size=len(schedule)), unit='m')
schedule['actual_end'] = schedule['actual_start'] + pd.to_timedelta(schedule['duration'], unit='m')

schedule['scheduled_duration'] = schedule['duration']
schedule['actual_duration'] = (schedule['actual_end'] - schedule['actual_start']).dt.total_seconds() / 60
schedule['duration_diff'] = schedule['actual_duration'] - schedule['scheduled_duration']

schedule['delay_minutes'] = schedule['duration_diff'].apply(lambda x: max(x, 0))

schedule['job_status'] = np.where(
    (schedule['delay_minutes'] > 10) | (schedule['failure_prob'] > 0.7),
    'Failure', 'Success'
)

for col in ['scheduled_start', 'scheduled_end', 'actual_start', 'actual_end']:
    schedule[col] = schedule[col].dt.strftime('%Y-%m-%d %H:%M:%S')

In [19]:
schedule.to_json("/content/drive/MyDrive/SmartFactory/Results/Week10/week10_final_schedule.json", orient='records', indent=2)

print(f"Augmented schedule saved")

Augmented schedule saved
