## Required Libraries

In [98]:
!pip install fastapi uvicorn scikit-learn pandas pydantic




In [99]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler, OneHotEncoder

from fastapi import FastAPI, File, UploadFile
from pydantic import BaseModel


## Load Data


In [100]:
# Generate synthetic data
np.random.seed(42)
m = 100
data = {
    "Machine_ID": np.arange(1, m+1),
    "Temperature": np.random.uniform(50, 100, m),
    "Run_Time": np.random.uniform(10, 200, m),
    "Downtime_Flag": np.random.choice([0, 1], size=m, p=[0.7, 0.3]),
    "Pressure": np.random.uniform(20, 150, m),
    "Vibration_Level": np.random.uniform(0, 10, m),
    "Maintenance_Schedule": np.random.choice(["Yes", "No"], size=m, p=[0.4, 0.6])
}

df = pd.DataFrame(data)

# Save to a CSV file
df.to_csv("manufacturing_data.csv", index=False)



In [101]:
# Load Dataset
df = pd.read_csv("manufacturing_data.csv")
print(df.head())


   Machine_ID  Temperature    Run_Time  Downtime_Flag    Pressure  \
0           1    68.727006   15.971545              0   26.718624   
1           2    97.535715  130.917978              0   89.076102   
2           3    86.599697   69.727636              0   90.282566   
3           4    79.932924  106.628431              1  102.865887   
4           5    57.800932  182.437630              0  114.391873   

   Vibration_Level Maintenance_Schedule  
0         1.031239                   No  
1         9.025529                   No  
2         5.052524                  Yes  
3         8.264575                   No  
4         3.200496                   No  


## Data Processing

In [102]:
# Identify categorical columns (non-numeric)
categorical_cols = df.select_dtypes(include=["object"]).columns.tolist()

# Handle categorical columns: One-Hot Encoding
if categorical_cols:
  encoder = OneHotEncoder(sparse_output=False, handle_unknown="ignore")
  encoded_data = encoder.fit_transform(df[categorical_cols])
  encoded_df = pd.DataFrame(encoded_data, columns=encoder.get_feature_names_out(categorical_cols))
  df = pd.concat([df.drop(categorical_cols, axis=1), encoded_df], axis=1)


In [103]:
# Identify the target column (assumes 'Downtime_Flag' as target, adjust if necessary)
target_column = None
for column in df.columns:
  if "Flag" in column or "target" in column.lower():  # Modify based on common naming conventions
    target_column = column
    break

if target_column is None:
  raise ValueError("Target column not found in the dataset")

# Separate features and target
X = df.drop(columns=[target_column])
y = df[target_column]


print(target_column)

Downtime_Flag


In [104]:
# Feature scaling for numerical columns
numerical_cols = X.select_dtypes(include=[np.number]).columns.tolist()
scaler = StandardScaler()
X[numerical_cols] = scaler.fit_transform(X[numerical_cols])


## Predictive Model


In [105]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)


In [60]:
# Train the model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

# Print evaluation results
print(f"Accuracy: {accuracy}")
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

Accuracy: 0.699942


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Classification Report:
               precision    recall  f1-score   support

           0       0.70      1.00      0.82    699942
           1       0.00      0.00      0.00    300058

    accuracy                           0.70   1000000
   macro avg       0.35      0.50      0.41   1000000
weighted avg       0.49      0.70      0.58   1000000


Confusion Matrix:
 [[699942      0]
 [300058      0]]
