<a href="https://colab.research.google.com/github/prasanna8985/Wood-cost-prediction/blob/main/Untitled1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Generate the dataset
wood_types = [
    "Teak", "Oak", "Pine", "Bamboo", "Maple", "Walnut", "Mahogany",
    "Cedar", "Birch", "Cherry", "Ash", "Poplar", "Beech", "Spruce",
    "Ebony", "Redwood", "Alder", "Fir", "Elm", "Acacia"
]
use_cases = [
    "Furniture", "Shelves", "Tool handles", "Cutting boards",
    "Decorative items", "Panels", "Doors", "Cabinetry", "Crafts"
]
durability_levels = ["Medium", "High", "Very High"]

np.random.seed(42)
data = []
for _ in range(200):  # Generate 200 rows for better training
    wood_type = np.random.choice(wood_types)
    length = np.random.randint(80, 251)
    width = np.random.randint(10, 61)
    thickness = round(np.random.uniform(1.0, 4.5), 1)
    use_case = np.random.choice(use_cases)
    durability = np.random.choice(durability_levels)
    # Hypothetical cost based on wood type, dimensions, and durability
    base_cost = np.random.uniform(20, 100)  # Base cost factor
    cost = base_cost + 0.5 * length + 0.3 * width + 2 * thickness
    if durability == "High":
        cost *= 1.1
    elif durability == "Very High":
        cost *= 1.2
    data.append([wood_type, length, width, thickness, use_case, durability, round(cost, 2)])

# Create DataFrame
columns = ["Wood Type", "Length (cm)", "Width (cm)", "Thickness (cm)", "Use Case", "Durability", "Cost"]
df = pd.DataFrame(data, columns=columns)

# Add a feature for volume
df["Volume"] = df["Length (cm)"] * df["Width (cm)"] * df["Thickness (cm)"]

# Split dataset into features and target
X = df.drop("Cost", axis=1)
y = df["Cost"]

# Define preprocessing for numerical and categorical features
numerical_features = ["Length (cm)", "Width (cm)", "Thickness (cm)", "Volume"]
categorical_features = ["Wood Type", "Use Case", "Durability"]

numerical_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(handle_unknown="ignore")

preprocessor = ColumnTransformer(
    transformers=[
        ("num", numerical_transformer, numerical_features),
        ("cat", categorical_transformer, categorical_features),
    ]
)

# Define the model pipeline
model = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("regressor", RandomForestRegressor(random_state=42))
])

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Hyperparameter tuning with GridSearchCV
param_grid = {
    "regressor__n_estimators": [100, 200, 300],
    "regressor__max_depth": [None, 10, 20, 30],
    "regressor__min_samples_split": [2, 5, 10],
    "regressor__min_samples_leaf": [1, 2, 4]
}

grid_search = GridSearchCV(model, param_grid, cv=3, scoring="r2", n_jobs=-1)
grid_search.fit(X_train, y_train)

# Best model
best_model = grid_search.best_estimator_

# Make predictions
y_pred = best_model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Mean Squared Error: {mse}")
print(f"R-squared: {r2}")

# Example function to predict cost based on user input
def predict_cost(user_input):
    """Predict wood cost based on user input.

    Args:
        user_input (dict): Dictionary with keys ["Wood Type", "Length (cm)", "Width (cm)", "Thickness (cm)", "Use Case", "Durability"].

    Returns:
        float: Predicted cost.
    """
    input_df = pd.DataFrame([user_input])
    input_df["Volume"] = input_df["Length (cm)"] * input_df["Width (cm)"] * input_df["Thickness (cm)"]
    return best_model.predict(input_df)[0]

# Example usage
example_input = {
    "Wood Type": "Teak",
    "Length (cm)": 150,
    "Width (cm)": 30,
    "Thickness (cm)": 2.5,
    "Use Case": "Furniture",
    "Durability": "High"
}

predicted_cost = predict_cost(example_input)
print(f"Predicted Cost: {predicted_cost}")


Mean Squared Error: 475.7019887265322
R-squared: 0.616962719876132
Predicted Cost: 173.52096496068813


In [None]:
example_input = {
    "Wood Type": "Teak",
    "Length (cm)": 200,
    "Width (cm)": 30,
    "Thickness (cm)": 2,
    "Use Case": "Furniture",
    "Durability": "High"
}

predicted_cost = predict_cost(example_input)
print(f"Predicted Cost: {predicted_cost}")


Predicted Cost: 171.3775824005944
