<a href="https://colab.research.google.com/github/roronoa2003/immobilthon-4.0/blob/main/immobilithon_4_0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output, State
import pandas as pd
import plotly.express as px
import base64
import io
import numpy as np
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import StackingClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import pickle
import os

# Feature Engineering
def add_features(df):
    df['RPM_Moving_Avg'] = df['Engine RPM'].rolling(window=5).mean()
    df['Temperature_Moving_Avg'] = df['Engine Temperature'].rolling(window=5).mean()
    df['Speed_Moving_Avg'] = df['Vehicle Speed'].rolling(window=5).mean()
    df['Fuel_Moving_Avg'] = df['Fuel Consumption'].rolling(window=5).mean()
    df['RPM_Std_Dev'] = df['Engine RPM'].rolling(window=5).std()
    df['Temperature_Std_Dev'] = df['Engine Temperature'].rolling(window=5).std()
    return df.fillna(0)

# Severity and Recommendations
def determine_severity(issue):
    severity_map = {
        "Engine Overload": "Critical",
        "Overheating": "Critical",
        "High Speed": "Moderate",
        "High Fuel Consumption": "Minor"
    }
    return severity_map.get(issue, "Unknown")

def maintenance_recommendations(issue):
    recommendations = {
        "Engine Overload": "Inspect engine load and reduce strain.",
        "Overheating": "Check coolant levels and radiator functionality.",
        "High Speed": "Advise safer driving speeds.",
        "High Fuel Consumption": "Inspect fuel system and driving habits."
    }
    return recommendations.get(issue, "Consult a mechanic.")

# Load and preprocess data
def load_and_preprocess_data(file_path):
    df = pd.read_excel(file_path)

    base_features = ['Engine RPM', 'Engine Temperature', 'Vehicle Speed', 'Fuel Consumption']
    df = add_features(df)
    all_features = base_features + [
        'RPM_Moving_Avg', 'Temperature_Moving_Avg', 'Speed_Moving_Avg',
        'Fuel_Moving_Avg', 'RPM_Std_Dev', 'Temperature_Std_Dev'
    ]
    target = 'Health Issue'

    def determine_health(row):
        issues = []
        if row['Engine RPM'] > 6000:
            issues.append("Engine Overload")
        if row['Engine Temperature'] > 100:
            issues.append("Overheating")
        if row['Vehicle Speed'] > 120:
            issues.append("High Speed")
        if row['Fuel Consumption'] > 12:
            issues.append("High Fuel Consumption")
        return issues[0] if issues else "Healthy"

    df[target] = df.apply(determine_health, axis=1)

    label_encoder = LabelEncoder()
    df['Health Issue Encoded'] = label_encoder.fit_transform(df[target])
    with open('label_encoder.pkl', 'wb') as file:
        pickle.dump(label_encoder, file)

    scaler = StandardScaler()
    df[all_features] = scaler.fit_transform(df[all_features])
    with open('scaler.pkl', 'wb') as file:
        pickle.dump(scaler, file)

    return df, all_features, 'Health Issue Encoded'

# Train model
def train_model(df, features, target):
    X = df[features]
    y = df[target]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)

    base_models = [
        ('rf', RandomForestClassifier(n_estimators=50, max_depth=5, random_state=42, n_jobs=-1)),
        ('gb', GradientBoostingClassifier(n_estimators=50, max_depth=3, random_state=42)),
        ('lr', LogisticRegression(max_iter=100))
    ]

    stack_model = StackingClassifier(
        estimators=base_models, final_estimator=RandomForestClassifier(n_estimators=20, random_state=42, n_jobs=-1)
    )

    stack_model.fit(X_train, y_train)
    y_pred = stack_model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Model Accuracy: {accuracy * 100:.2f}%")
    print(classification_report(y_test, y_pred))

    # Confusion matrix
    ConfusionMatrixDisplay.from_estimator(stack_model, X_test, y_test)
    plt.show()

    pickle.dump(stack_model, open("vehicle_health_model.pkl", "wb"))
    return stack_model

# Load or train model
if not os.path.exists("vehicle_health_model.pkl"):
    df, features, target = load_and_preprocess_data("Final_Updated_Vehicle_Data_10000_Rows.xlsx")
    model = train_model(df, features, target)
else:
    model = pickle.load(open("vehicle_health_model.pkl", "rb"))
    with open('label_encoder.pkl', 'rb') as file:
        label_encoder = pickle.load(file)
    with open('scaler.pkl', 'rb') as file:
        scaler = pickle.load(file)

# Prediction function
def predict_health(data):
    all_features = [
        'Engine RPM', 'Engine Temperature', 'Vehicle Speed', 'Fuel Consumption',
        'RPM_Moving_Avg', 'Temperature_Moving_Avg', 'Speed_Moving_Avg',
        'Fuel_Moving_Avg', 'RPM_Std_Dev', 'Temperature_Std_Dev'
    ]
    data = add_features(data)
    data[all_features] = scaler.transform(data[all_features])
    predictions = model.predict(data[all_features])
    data['Health Issue Prediction'] = label_encoder.inverse_transform(predictions)
    data['Severity'] = data['Health Issue Prediction'].apply(determine_severity)
    data['Recommendation'] = data['Health Issue Prediction'].apply(maintenance_recommendations)
    return data

# Dash app
app = dash.Dash(__name__)
app.title = "Enhanced Vehicle Health Dashboard"

app.layout = html.Div([
    html.H1("Enhanced Vehicle Health Dashboard", style={'textAlign': 'center'}),
    html.Div([
        dcc.Upload(
            id='upload-data',
            children=html.Div(['Drag and Drop or ', html.A('Select a File (.xlsx)')]),
            style={
                'width': '100%', 'height': '60px', 'lineHeight': '60px',
                'borderWidth': '1px', 'borderStyle': 'dashed', 'borderRadius': '5px',
                'textAlign': 'center', 'margin': '10px'
            },
            multiple=False
        ),
        html.Div(id='output-data-upload')
    ]),
    html.Div(id='service-pamphlet', style={'marginTop': 20}),
    html.Div(id='health-chart', style={'marginTop': 20}),
    html.Div(id='trend-analysis', style={'marginTop': 20})
])

@app.callback(
    [Output('output-data-upload', 'children'),
     Output('service-pamphlet', 'children'),
     Output('health-chart', 'children'),
     Output('trend-analysis', 'children')],
    [Input('upload-data', 'contents')],
    [State('upload-data', 'filename')]
)
def update_output(contents, filename):
    if contents is not None:
        try:
            content_type, content_string = contents.split(',')
            decoded = base64.b64decode(content_string)
            df_uploaded = pd.read_excel(io.BytesIO(decoded))

            required_columns = ['Engine RPM', 'Engine Temperature', 'Vehicle Speed', 'Fuel Consumption']
            if not all(col in df_uploaded.columns for col in required_columns):
                return html.Div("Uploaded file is missing required columns."), "", "", ""

            df_predicted = predict_health(df_uploaded.copy())
            issues_df = df_predicted[df_predicted['Health Issue Prediction'] != "Healthy"]
            normal_df = df_predicted[df_predicted['Health Issue Prediction'] == "Healthy"]

            issues_summary = issues_df['Health Issue Prediction'].value_counts()
            normal_count = len(normal_df)

            if not issues_summary.empty:
                fig = px.bar(
                    issues_summary, x=issues_summary.index, y=issues_summary.values,
                    labels={'x': 'Issues', 'y': 'Count'}, title="Summary of Health Issues"
                )
                chart = dcc.Graph(figure=fig)
            else:
                chart = html.Div("No issues detected.")

            pamphlet = html.Div([
                html.H3("Service Pamphlet"),
                html.H4("Issues Detected:"),
                html.Ul([
                    html.Li(f"{issue}: {count} occurrences ({determine_severity(issue)}) - {maintenance_recommendations(issue)}")
                    for issue, count in issues_summary.items()
                ]),
                html.H4("Normal Functioning:"),
                html.P(f"Number of healthy readings: {normal_count}")
            ])

            # Trend analysis
            trend_fig = px.line(df_uploaded, x=df_uploaded.index, y='Engine RPM', title="RPM Trends Over Time")
            trend_chart = dcc.Graph(figure=trend_fig)

            return (
                html.Div([
                    html.H5(f"Uploaded File: {filename}"),
                    html.H6(f"Total Records: {df_uploaded.shape[0]}"),
                    html.Div("Sample Data:"),
                    html.Pre(df_uploaded.head().to_string(index=False))
                ]),
                pamphlet,
                chart,
                trend_chart
            )
        except Exception as e:
            return html.Div(f"Error processing file: {str(e)}"), "", "", ""

    else:
        return html.Div("Please upload an Excel file to proceed."), "", "", ""

if __name__ == '__main__':
    app.run_server(debug=False)


<IPython.core.display.Javascript object>

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from datetime import timedelta

# Load your data here
df = pd.read_excel('Updated_Vehicle_Data_with_Random_Step_Distance_Travelled.xlsx', sheet_name='Worksheet')

# Convert the Timestamp column to datetime format
df['Timestamp'] = pd.to_datetime(df['Timestamp'])

# Prepare the data for Engine Oil Capacity and Tyre Pressure
df_oil = df[['Timestamp', 'Engine Oil Capacity']].dropna().sort_values(by='Timestamp')
df_tyre = df[['Timestamp', 'Tyre Pressure']].dropna().sort_values(by='Timestamp')

# Reset index to have a proper numerical index for LSTM
df_oil.reset_index(inplace=True)
df_tyre.reset_index(inplace=True)

# Scaling the data for LSTM
scaler_oil = MinMaxScaler(feature_range=(0, 1))
oil_scaled = scaler_oil.fit_transform(df_oil[['Engine Oil Capacity']])

scaler_tyre = MinMaxScaler(feature_range=(0, 1))
tyre_scaled = scaler_tyre.fit_transform(df_tyre[['Tyre Pressure']])

# Function to create sequences for LSTM
def create_sequences(data, time_step=10):
    X, y = [], []
    for i in range(len(data) - time_step):
        X.append(data[i:(i + time_step), 0])
        y.append(data[i + time_step, 0])
    return np.array(X), np.array(y)

# Creating sequences for oil capacity and tyre pressure
time_step = 10
X_oil, y_oil = create_sequences(oil_scaled, time_step)
X_tyre, y_tyre = create_sequences(tyre_scaled, time_step)

# Reshaping the data to be [samples, time steps, features]
X_oil = X_oil.reshape((X_oil.shape[0], X_oil.shape[1], 1))
X_tyre = X_tyre.reshape((X_tyre.shape[0], X_tyre.shape[1], 1))

# Building the LSTM model
def build_lstm_model(input_shape):
    model = Sequential()
    model.add(LSTM(50, return_sequences=True, input_shape=input_shape))
    model.add(LSTM(50, return_sequences=False))
    model.add(Dense(25))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

# Training the LSTM model for Engine Oil Capacity
lstm_model_oil = build_lstm_model((X_oil.shape[1], 1))
lstm_model_oil.fit(X_oil, y_oil, epochs=20, batch_size=16, verbose=1)

# Predicting future values for oil capacity
future_steps = 365
oil_predictions = []

input_sequence = X_oil[-1]  # Start from the last known input sequence

for _ in range(future_steps):
    prediction = lstm_model_oil.predict(input_sequence.reshape(1, time_step, 1))
    oil_predictions.append(prediction[0][0])
    input_sequence = np.append(input_sequence[1:], prediction)

# Inverse transform predictions
oil_predictions_inverse = scaler_oil.inverse_transform(np.array(oil_predictions).reshape(-1, 1))

# Finding the future date when Engine Oil Capacity reaches zero
future_date_oil_lstm = next((df_oil['Timestamp'].iloc[-1] + timedelta(days=i) for i, value in enumerate(oil_predictions_inverse) if value <= 0), "Prediction beyond reasonable range")

# Training the LSTM model for Tyre Pressure
lstm_model_tyre = build_lstm_model((X_tyre.shape[1], 1))
lstm_model_tyre.fit(X_tyre, y_tyre, epochs=20, batch_size=16, verbose=1)

# Predicting future values for tyre pressure
tyre_predictions = []

input_sequence_tyre = X_tyre[-1]  # Start from the last known input sequence

for _ in range(future_steps):
    prediction_tyre = lstm_model_tyre.predict(input_sequence_tyre.reshape(1, time_step, 1))
    tyre_predictions.append(prediction_tyre[0][0])
    input_sequence_tyre = np.append(input_sequence_tyre[1:], prediction_tyre)

# Inverse transform predictions
tyre_predictions_inverse = scaler_tyre.inverse_transform(np.array(tyre_predictions).reshape(-1, 1))

# Finding the future date when Tyre Pressure reaches 26
future_date_tyre_lstm = next((df_tyre['Timestamp'].iloc[-1] + timedelta(days=i) for i, value in enumerate(tyre_predictions_inverse) if value <= 26), "Prediction beyond reasonable range")

# Output the predicted dates
print("Future date when Engine Oil Capacity reaches zero:", future_date_oil_lstm)
print("Future date when Tyre Pressure reaches 26:", future_date_tyre_lstm)


Epoch 1/20



Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 7ms/step - loss: 0.0610
Epoch 2/20
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step - loss: 0.0239
Epoch 3/20
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - loss: 0.0153
Epoch 4/20
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6ms/step - loss: 0.0077
Epoch 5/20
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - loss: 0.0050
Epoch 6/20
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 7ms/step - loss: 0.0034
Epoch 7/20
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step - loss: 0.0034
Epoch 8/20
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step - loss: 0.0034
Epoch 9/20
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 7ms/step - loss: 0.0023
Epoch 10/20
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step - loss: 0.0022



Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - loss: 0.0981
Epoch 2/20
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6ms/step - loss: 0.0861
Epoch 3/20
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - loss: 0.0869
Epoch 4/20
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step - loss: 0.0871
Epoch 5/20
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 6ms/step - loss: 0.0890
Epoch 6/20
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - loss: 0.0865
Epoch 7/20
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step - loss: 0.0873
Epoch 8/20
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 6ms/step - loss: 0.0872
Epoch 9/20
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - loss: 0.0863
Epoch 10/20
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step - loss: 0.0869


In [None]:
!pip install dash

Collecting dash
  Downloading dash-2.18.2-py3-none-any.whl.metadata (10 kB)
Collecting Werkzeug<3.1 (from dash)
  Downloading werkzeug-3.0.6-py3-none-any.whl.metadata (3.7 kB)
Collecting dash-html-components==2.0.0 (from dash)
  Downloading dash_html_components-2.0.0-py3-none-any.whl.metadata (3.8 kB)
Collecting dash-core-components==2.0.0 (from dash)
  Downloading dash_core_components-2.0.0-py3-none-any.whl.metadata (2.9 kB)
Collecting dash-table==5.0.0 (from dash)
  Downloading dash_table-5.0.0-py3-none-any.whl.metadata (2.4 kB)
Collecting retrying (from dash)
  Downloading retrying-1.3.4-py3-none-any.whl.metadata (6.9 kB)
Downloading dash-2.18.2-py3-none-any.whl (7.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m33.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dash_core_components-2.0.0-py3-none-any.whl (3.8 kB)
Downloading dash_html_components-2.0.0-py3-none-any.whl (4.1 kB)
Downloading dash_table-5.0.0-py3-none-any.whl (3.9 kB)
Downloadi

In [None]:
!pip install dash pandas plotly openpyxl tensorflow scikit-learn

