In [13]:
import pandas as pd
import numpy as np
import random
import plotly.express as px
from datetime import datetime, timedelta

# Define the start and end date of production
start_date = datetime(2022, 1, 1, 0, 0, 0)
end_date = datetime(2022, 3, 31, 23, 59, 59)

# Define the time interval between each part production
time_interval = [i for i in range(100, 121)]

# Define the time interval of the break time
break_time = [datetime(2022, 1, 1, 6, 30, 0), datetime(2022, 1, 1, 7, 0, 0)]

# Define the product status
product_status = [0.9] * int((end_date - start_date).total_seconds() / 100) # 90% of the products are good
for i in range(len(product_status)):
    if i % 1000 == 0: # every 1000 parts, the product status will fluctuate
        if random.random() < 0.5:
            product_status[i] = 0.7
        else:
            product_status[i] = 0.95

# Define the defect area
defect_area = ['A', 'B', 'C', 'D', 'E', 'F']

# Define the defect location
defect_location = {
    'A': ['A1', 'A2', 'A3', 'A4', 'A5', 'A6', 'A7', 'A8', 'A9', 'A10'],
    'B': ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B9', 'B10'],
    'C': ['C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10'],
    'D': ['D1', 'D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'D9', 'D10'],
    'E': ['E1', 'E2', 'E3', 'E4', 'E5', 'E6', 'E7', 'E8', 'E9', 'E10'],
    'F': ['F1', 'F2', 'F3', 'F4', 'F5', 'F6', 'F7', 'F8', 'F9', 'F10']
}

# Generate sample data
data = []
for i in range(int((end_date - start_date).total_seconds() / 100)):
    current_time = start_date + timedelta(seconds=i*100)
    if current_time >= break_time[0] and current_time <= break_time[1]:
        continue
    current_status = 'good' if random.random() < product_status[i] else 'bad'
    current_defect_area = np.random.choice(defect_area, p=[0.1, 0.1, 0.1, 0.1, 0.3, 0.3])
    current_defect_location = np.random.choice(defect_location[current_defect_area])
    data.append([current_time, current_status, current_defect_area, current_defect_location])

# Convert the sample data to a pandas dataframe
df = pd.DataFrame(data, columns=['time', 'product_status', 'defect_area', 'defect_location'])

# Plot the scatter plot
fig = px.scatter(df, x='time', y='defect_area', color='product_status')

# Print the total number of products and the defect ratio of each area
print('Total number of products:', len(df))
for area in defect_area:
    area_df = df[df['defect_area'] == area]
    print('Defect ratio of area {}: {:.2f}%'.format(area, len(area_df[area_df['product_status'] == 'bad']) / len(area_df) * 100))

# Save the sample data to a csv file
df.to_csv('sample_data.csv', index=False)


Total number of products: 77740
Defect ratio of area A: 10.64%
Defect ratio of area B: 10.07%
Defect ratio of area C: 10.34%
Defect ratio of area D: 9.75%
Defect ratio of area E: 10.06%
Defect ratio of area F: 9.86%


In [14]:
fig

In [17]:
daily_defect_rate = df.groupby([pd.Grouper(key='time', freq='D'), 'defect_area'])['product_status'].apply(lambda x: (x == 'bad').sum() / len(x) * 100).reset_index()
print(daily_defect_rate)

fig = px.line(daily_defect_rate, x='time', y='product_status', color='defect_area')
fig.show()


          time defect_area  product_status
0   2022-01-01           A       12.500000
1   2022-01-01           B        7.594937
2   2022-01-01           C       19.148936
3   2022-01-01           D        2.352941
4   2022-01-01           E        8.823529
..         ...         ...             ...
535 2022-03-31           B       10.309278
536 2022-03-31           C        7.594937
537 2022-03-31           D        9.782609
538 2022-03-31           E       12.698413
539 2022-03-31           F        9.787234

[540 rows x 3 columns]
