<a href="https://colab.research.google.com/github/k-ganda/database_design_pld5/blob/main/database_design.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [17]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import requests
import joblib

class UserBehaviorPredictor:
    def __init__(self):
        # Load the dataset
        self.df = pd.read_csv('user_behavior_dataset.csv')
        self.model = None
        self.scaler = StandardScaler()

    def prepare_data(self):
        # Separate features and target
        X = self.df.drop(['User ID', 'User Behavior Class'], axis=1)
        y = self.df['User Behavior Class']

        # Convert categorical variables
        X = pd.get_dummies(X, columns=['Device Model', 'Operating System', 'Gender'])

        # Scale numerical features
        numerical_cols = ['App Usage Time (min/day)', 'Screen On Time (hours/day)',
                         'Battery Drain (mAh/day)', 'Number of Apps Installed',
                         'Data Usage (MB/day)', 'Age']
        X[numerical_cols] = self.scaler.fit_transform(X[numerical_cols])

        return X, y

    def train_model(self):
        # Prepare data
        X, y = self.prepare_data()

        # Split the data
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=42
        )

        # Train Random Forest model (better than Decision Tree for this case)
        self.model = RandomForestClassifier(
            n_estimators=100,
            max_depth=10,
            random_state=42
        )
        self.model.fit(X_train, y_train)

        # Print model performance
        train_score = self.model.score(X_train, y_train)
        test_score = self.model.score(X_test, y_test)
        print(f"Training accuracy: {train_score:.3f}")
        print(f"Testing accuracy: {test_score:.3f}")

        # Save the model and scaler
        joblib.dump(self.model, 'user_behavior_rf_model.joblib')
        joblib.dump(self.scaler, 'scaler.joblib')

    def fetch_latest_entry(self, api_url=" https://database-design-pld5.onrender.com/docs#/"):
        """
        Fetch the latest entry from the API
        Note: Replace the API URL with your actual endpoint
        """
        try:
            response = requests.get(api_url)
            if response.status_code == 200:
                data = response.json()
                print("Latest entry fetched:", data)
                return data
            else:
                print(f"Error fetching data: {response.status_code}")
                return None
        except Exception as e:
            print(f"Error connecting to API: {str(e)}")
            return None

    def prepare_input_data(self, input_data):
        """
        Prepare a single input entry for prediction
        """
        # Convert input to DataFrame
        input_df = pd.DataFrame([input_data])

        # Drop User ID if present
        if 'User ID' in input_df.columns:
            input_df = input_df.drop('User ID', axis=1)

        # Create dummy variables
        input_df = pd.get_dummies(input_df, columns=['Device Model', 'Operating System', 'Gender'])

        # Ensure all columns from training are present
        for col in self.model.feature_names_in_:
            if col not in input_df.columns:
                input_df[col] = 0

        # Reorder columns to match training data
        input_df = input_df[self.model.feature_names_in_]

        return input_df

    def predict(self, input_data):
        """
        Make prediction for new input data
        """
        # Prepare input data
        prepared_data = self.prepare_input_data(input_data)

        # Make prediction
        prediction = self.model.predict(prepared_data)
        probabilities = self.model.predict_proba(prepared_data)

        # Get prediction confidence
        confidence = np.max(probabilities) * 100

        return {
            'predicted_class': int(prediction[0]),
            'confidence': f"{confidence:.2f}%",
            'probabilities': {f"Class {i}": f"{prob:.2f}%"
                            for i, prob in enumerate(probabilities[0])}
        }

# Example usage
if __name__ == "__main__":
    # Initialize predictor
    predictor = UserBehaviorPredictor()

    # Train the model
    print("Training model...")
    predictor.train_model()


    print("\nFetching latest entry...")

    sample_entry = {
        'Device Model': 'iPhone 12',
        'Operating System': 'iOS',
        'App Usage Time (min/day)': 187,
        'Screen On Time (hours/day)': 4.3,
        'Battery Drain (mAh/day)': 1367,
        'Number of Apps Installed': 58,
        'Data Usage (MB/day)': 988,
        'Age': 31,
        'Gender': 'Female'
    }

    print("\nMaking prediction for sample entry:")
    prediction_result = predictor.predict(sample_entry)
    print("\nPrediction Results:")
    print(f"Predicted User Behavior Class: {prediction_result['predicted_class']}")
    print(f"Prediction Confidence: {prediction_result['confidence']}")
    print("\nClass Probabilities:")
    for class_name, probability in prediction_result['probabilities'].items():
        print(f"{class_name}: {probability}")

Training model...
Training accuracy: 1.000
Testing accuracy: 1.000

Fetching latest entry...

Making prediction for sample entry:

Prediction Results:
Predicted User Behavior Class: 5
Prediction Confidence: 100.00%

Class Probabilities:
Class 0: 0.00%
Class 1: 0.00%
Class 2: 0.00%
Class 3: 0.00%
Class 4: 1.00%
