In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score


In [2]:
file_path = '/content/car_purchasing.csv'
data = pd.read_csv(file_path, encoding='ISO-8859-1')

In [3]:
data = data.drop(columns=['customer name', 'customer e-mail'])


In [4]:
X = data.drop(columns=['car purchase amount'])
y = data['car purchase amount']

In [5]:
categorical_features = ['country']
numerical_features = ['gender', 'age', 'annual Salary', 'credit card debt', 'net worth']

In [6]:
# Preprocessing pipeline
categorical_transformer = OneHotEncoder(drop='first', handle_unknown='ignore')
numerical_transformer = StandardScaler()

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)
    ]
)


In [7]:
model = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))
])


In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [10]:
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Calculate accuracy (for classification problems)
accuracy = np.mean((y_pred >= np.mean(y_test)) == (y_test >= np.mean(y_test)))

print("Model Evaluation:")
print(f"Mean Squared Error: {mse:.2f}")
print(f"R^2 Score: {r2:.2f}")
print(f"Accuracy: {accuracy:.2f}")

# Save the results and pipeline for future use
import joblib
joblib.dump(model, 'sales_forecast_model.pkl')

Model Evaluation:
Mean Squared Error: 5549526.71
R^2 Score: 0.95
Accuracy: 0.95




['sales_forecast_model.pkl']

In [12]:
from google.colab import drive
import shutil
import os

# Mount Google Drive
drive.mount('/content/drive')

# Define the source and destination paths
source_file = '/content/sales_forecast_model.pkl'  # Path to your file in Colab
destination_path = '/content/drive/MyDrive/Colab Notebooks'  # Path to Colab Notebooks folder

# Copy the file to Google Drive
try:
    # Create the destination folder if it doesn't exist
    os.makedirs(destination_path, exist_ok=True)

    # Copy the file
    shutil.copy2(source_file, destination_path)
    print(f"File successfully copied to 'Colab Notebooks' folder in Google Drive")
except FileNotFoundError:
    print(f"Source file {source_file} not found")
except PermissionError:
    print("Permission denied. Make sure you have write access to Google Drive")
except Exception as e:
    print(f"An error occurred: {str(e)}")

Mounted at /content/drive
File successfully copied to 'Colab Notebooks' folder in Google Drive
