# Model Training and Calibrating

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import joblib

# Load the scraped data
def load_data(file_path):
    """Loads scraped data from a CSV file."""
    return pd.read_csv(file_path)

# Data preprocessing
def preprocess_data(df):
    """Preprocesses scraped data by handling missing values, encoding categories, and scaling features."""
    df.dropna(inplace=True)
    
    # Encoding categorical features
    label_encoders = {}
    for col in ['category', 'brand']:  # Modify based on available categorical columns
        if col in df.columns:
            le = LabelEncoder()
            df[col] = le.fit_transform(df[col])
            label_encoders[col] = le
    
    # Scaling numerical features
    scaler = StandardScaler()
    if 'price' in df.columns:
        df['price'] = scaler.fit_transform(df[['price']])
    
    return df, label_encoders, scaler

# Train machine learning model
def train_model(df):
    """Trains a RandomForest model to predict price or any other key metric."""
    X = df.drop(columns=['price'])  # Features (modify as needed)
    y = df['price']  # Target variable
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    
    # Predictions
    y_pred = model.predict(X_test)
    
    # Evaluation
    print("Mean Absolute Error:", mean_absolute_error(y_test, y_pred))
    print("Mean Squared Error:", mean_squared_error(y_test, y_pred))
    print("R-squared Score:", r2_score(y_test, y_pred))
    
    return model

# Save the trained model
def save_model(model, scaler, label_encoders, filename='trained_model.pkl'):
    """Saves the trained model along with preprocessing objects."""
    joblib.dump({'model': model, 'scaler': scaler, 'label_encoders': label_encoders}, filename)
    print("Model saved successfully!")

if __name__ == "__main__":
    # Load and preprocess data
    file_path = "scraped_data.csv"  # Update with actual path
    data = load_data(file_path)
    data, encoders, scaler = preprocess_data(data)
    
    # Train and save model
    trained_model = train_model(data)
    save_model(trained_model, scaler, encoders)
