<a href="https://colab.research.google.com/github/kavyakapoor200/Predictive_analysis/blob/main/predictive_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [188]:
import pandas as pd
import random

# Generate a dataset with 100 rows
data = {
    "Machine_ID": list(range(1, 501)),  # Machine IDs from 1 to 100
    "Temperature": [random.randint(60, 100) for _ in range(500)],  # Random temperatures between 60 and 100
    "Run_Time": [random.randint(50, 500) for _ in range(500)],  # Random runtime between 50 and 500 hours
    "Downtime_Flag": [random.choice([0, 1]) for _ in range(500)]  # Random binary flag (0 or 1)
}

# Convert it to a DataFrame
df = pd.DataFrame(data)
# Save the dataset to a CSV file
df.to_csv("big_sample_data1.csv", index=False)

# Display the first 10 rows of the dataset
print("Dataset created and saved as 'big_sample_data.csv'. First 10 rows:")
print(df.head(10))

Dataset created and saved as 'big_sample_data.csv'. First 10 rows:
   Machine_ID  Temperature  Run_Time  Downtime_Flag
0           1           69        65              1
1           2           70       420              0
2           3           95        78              0
3           4           97       388              1
4           5           63       409              1
5           6           70        78              1
6           7           60       103              0
7           8           98       391              0
8           9           70       304              1
9          10           94       192              0


In [192]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Load the dataset
df = pd.read_csv("big_sample_data1.csv")

# Define features (X) and target (y)
X = df[["Temperature", "Run_Time"]]
y = df["Downtime_Flag"]

# Split the dataset into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Dataset loaded and split into training and testing sets.")
print(f"Training samples: {len(X_train)}, Testing samples: {len(X_test)}")



Dataset loaded and split into training and testing sets.
Training samples: 400, Testing samples: 100


In [191]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()
model.fit(X_train, y_train)
print("Model training completed.")

from sklearn.metrics import accuracy_score, f1_score
# Make predictions on the testing data
y_pred = model.predict(X_test)
print(f"Model features: {X.columns}")


Model training completed.
Model features: Index(['Temperature', 'Run_Time'], dtype='object')


In [193]:
# Calculate accuracy and F1 score
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f"Model Evaluation:\nAccuracy: {accuracy:.2f}\nF1 Score: {f1:.2f}")

Model Evaluation:
Accuracy: 0.40
F1 Score: 0.57


In [194]:
import pickle
import joblib
joblib.dump(model, "logistic_model.pkl")
print("Model saved successfully.")


Model saved successfully.


In [195]:
# Example: Predict downtime for a new data point
new_data = pd.DataFrame({"Temperature": [100], "Run_Time": [150]})
prediction = loaded_model.predict(new_data)
print(prediction)
probability= loaded_model.predict_proba(new_data)
print(probability)
confidence = max(loaded_model.predict_proba(new_data)[0])

print(f"Prediction: {'Downtime' if prediction[0] == 1 else 'No Downtime'}, Confidence: {confidence:.2f}")

[0]
[[0.52869497 0.47130503]]
Prediction: No Downtime, Confidence: 0.53


In [None]:
!pip install fastapi uvicorn scikit-learn pandas pydantic python-multipart




In [196]:
from fastapi import FastAPI, File, UploadFile, HTTPException, Form
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score
import pandas as pd
import pickle
import joblib
from contextlib import asynccontextmanager

# Use the lifespan context manager for app startup and shutdown
@asynccontextmanager
async def lifespan(app: FastAPI):
    global model
    model = None  # Initialize the model as None
    try:
        # Load the model if available
        model = joblib.load("logistic_model.pkl")
        print("Model loaded successfully.")
        print("Expected features:", model.feature_names_in_)
    except FileNotFoundError:
        print("No pre-trained model found. Train the model first.")
    yield  # Continue with FastAPI's normal lifespan behavior

# Create FastAPI app with the lifespan event
app = FastAPI(lifespan=lifespan)

# Add CORS middleware (optional, useful for frontend integration)
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Global variables
data = None

# Endpoint to upload data
@app.post("/upload")
async def upload_data(file: UploadFile = File(...)):
    global data
    if file.content_type != "text/csv":
        raise HTTPException(status_code=400, detail="Only CSV files are accepted.")

    try:
        # Read the CSV data
        data = pd.read_csv(file.file)
        if 'Downtime_Flag' not in data.columns:
            raise ValueError("The uploaded file must contain a 'Downtime_Flag' column.")

        return {"message": "File uploaded successfully.", "data_preview": data.head().to_dict()}
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Error processing file: {str(e)}")

# Endpoint to train the model
@app.post("/train")
def train_model():
    global model, data
    if data is None:
        raise HTTPException(status_code=400, detail="No data uploaded. Please upload data first.")

    try:
        X = data[["Temperature", "Run_Time"]]
        y = data["Downtime_Flag"]

        # Train-test split
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        # Train a Logistic Regression model
        model = LogisticRegression()
        model.fit(X_train, y_train)

        # Evaluate the model
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred, average='binary')

        # Save the model
        joblib.dump(model, "logistic_model.pkl")

        return {"message": "Model trained successfully.", "accuracy": accuracy, "f1_score": f1}
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Error during training: {str(e)}")

# Endpoint to make predictions
class PredictInput(BaseModel):
    Temperature: float
    Run_Time: float

@app.post("/predict")
def predict(input: PredictInput):
    global model
    if model is None:
        raise HTTPException(status_code=400, detail="Model not trained. Please train the model first.")

    try:
        # Prepare the input data
        input_data = pd.DataFrame([input.dict()])

        # Make the prediction
        prediction = model.predict(input_data)[0]
        confidence = max(model.predict_proba(input_data)[0])

        return {"Downtime": "Yes" if prediction == 1 else "No", "Confidence": confidence}

    except Exception as e:
        raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")


In [None]:
import pickle
from pyngrok import ngrok
import uvicorn
import threading
# Function to run the FastAPI app using Uvicorn
def run():
    uvicorn.run(app, host="localhost", port=8000)


# Start the FastAPI app in a background thread
thread = threading.Thread(target=run)
thread.start()

# Set up ngrok to tunnel the app
public_url = ngrok.connect(8000)
print(f"FastAPI app is live at: {public_url}")

INFO:     Started server process [3851]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
ERROR:    [Errno 98] error while attempting to bind on address ('127.0.0.1', 8000): address already in use
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.


Model loaded successfully.
Expected features: ['Temperature' 'Run_Time']
FastAPI app is live at: NgrokTunnel: "https://5152-34-86-232-7.ngrok-free.app" -> "http://localhost:8000"


In [None]:
!apt-get update
!apt-get install curl


0% [Working]            Get:1 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
0% [Connecting to archive.ubuntu.com (185.125.190.82)] [1 InRelease 5,484 B/129 kB 4%] [Connected to                                                                                                    Get:2 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,626 B]
0% [Connecting to archive.ubuntu.com (185.125.190.82)] [1 InRelease 69.2 kB/129 kB 54%] [2 InRelease0% [Connecting to archive.ubuntu.com (185.125.190.82)] [1 InRelease 124 kB/129 kB 96%] [Connected to                                                                                                    Hit:3 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Hit:4 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:5 https://r2u.stat.illinois.edu/ubuntu jammy InRelease [6,555 B]
Get:6 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Get:7 http://securi

In [None]:
from google.colab import files

uploaded = files.upload()
file_name = list(uploaded.keys())[0]
print(f"Uploaded file: {file_name}")


Saving big_sample_data1.csv to big_sample_data1 (4).csv
Uploaded file: big_sample_data1 (4).csv


In [197]:
# Example of using variables in the curl command
file_name = "big_sample_data1.csv"
public_url = "5152-34-86-232-7.ngrok-free.app"

# Properly formatted curl command with f-string
!curl -X POST "{public_url}/upload" \
-H "Content-Type: multipart/form-data" \
-F "file=@{file_name};type=text/csv"


In [198]:
!curl -X POST "5152-34-86-232-7.ngrok-free.app"


In [199]:
!curl -X POST "https://c518-34-86-232-7.ngrok-free.app/predict" \
-H "Content-Type: application/json" \
-d '{"Temperature": 75.0, "Run_Time": 120.5}'



INFO:     34.86.232.7:0 - "POST /predict HTTP/1.1" 200 OK
{"Downtime":"Yes","Confidence":0.5407108299161656}

<ipython-input-25-e3b58afb32ce>:67: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  input_data = pd.DataFrame([input.dict()])
