<a href="https://colab.research.google.com/github/meshachaderele/timber-price/blob/main/IDF_2024_Timber_Price_Predictor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# The Problem

**Case Study:**
Standardizing Timber Product Pricing for a Forestry Company

**Background:**
You have recently joined a Forestry company as a Forest Product Officer. The company faces a significant challenge in establishing a standardized method for pricing its timber products. This inconsistency in pricing has led to confusion and inefficiencies within the company's operations.

**Objective:**
Your task is to develop a comprehensive strategy to address the pricing inconsistency issue and establish a standardized pricing framework for the company's timber products.

Note: You can access this notebook and files on my github at https://bit.ly/idf24-timber-price

# Load the Data

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


# Load the data

data = pd.read_csv('https://raw.githubusercontent.com/meshachaderele/timber-price/main/timber_price_data.csv')


# Explore the Data

In [None]:
# Exploratory Data Analysis
data.head()

In [None]:
data.describe()

In [None]:
data.isna().any()

In [None]:
data.columns

In [None]:
import seaborn as sns

variables = ['DBH', 'Height', 'Age', 'Latitude', 'Longitude', 'Timber Price']

# Plot histograms for each variable
plt.figure(figsize=(15, 10))
for i, var in enumerate(variables, 1):
    plt.subplot(3, 2, i)
    sns.histplot(data[var], bins=20, kde=True)
    plt.title(f'Histogram of {var}')
    plt.xlabel(var)
    plt.ylabel('Frequency')

plt.tight_layout()
plt.show()

In [None]:
# Check for correlation

import matplotlib.pyplot as plt

# Assuming 'data' is your DataFrame
correlation_matrix = data.corr()

# Plotting the heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', vmin=-1, vmax=1)
plt.title('Correlation Matrix')
plt.show()

# Data Preprocessing for Modeling

In [None]:

# Splitting data into features (X) and target variable (y)
X = data[['DBH', 'Height', 'Age', 'Species', 'Soil_Type', 'Latitude', 'Longitude']]
y = data['Timber Price']



In [None]:
# Encoding categorical variables

from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Categorical features to be one-hot encoded
cat_features = ['Species', 'Soil_Type']

# Preprocessing pipeline for categorical features
cat_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Combine preprocessing for all features
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', cat_transformer, cat_features)
    ], remainder='passthrough'
)



In [None]:
from sklearn.model_selection import train_test_split
# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



# Model Training and Testing

In [None]:
# Load libraries
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Model Training
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)

# Create a pipeline
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('model', rf_model)
])
pipeline.fit(X_train, y_train)



In [None]:
# Model Evaluation
y_pred = pipeline.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", rmse)
print("R² Score:", r2)


In [None]:
# Visualize predicted vs. actual price
from scipy import stats

plt.scatter(y_test, y_pred, alpha=0.5)

slope, intercept, rvalue, pvalue, stderr = stats.linregress(y_test,y_pred)

plt.plot(y_test,slope*y_test+intercept)

plt.xlabel("Actual Price")
plt.ylabel("Predicted Price")
plt.title("Actual vs. Predicted Timber Price")
plt.show()

# Make Predictions With the Trained Model

In [None]:

pred_data = pd.read_csv('https://raw.githubusercontent.com/meshachaderele/timber-price/main/timber_price_val_data.csv')


In [None]:
pred_data.head()

In [None]:
# Make the prediction
timber_price_predict = pipeline.predict(pred_data)

In [None]:
#Join the prediction to table
pred_data['Timber Price'] = timber_price_predict
pred_data.head()

# Deploy Model into Interface

In [None]:
import joblib

# Save the pipeline model
joblib.dump(pipeline, 'timber_price_model.joblib')

In [None]:
!pip install streamlit

In [None]:
%%writefile app.py
import streamlit as st
import pandas as pd
import joblib

# Load the trained model
model = joblib.load('timber_price_model.joblib')

def predict_price(features):
    # Convert input dictionary to DataFrame
    input_data = pd.DataFrame([features])
    # Make prediction
    prediction = model.predict(input_data)
    return prediction[0]

def main():
    st.title('Timber Price Prediction App')
    st.write('Enter the following details to predict Timber Price:')

    # Input fields
    dbh = st.number_input('Diameter at Breast Height (DBH)', min_value=0.1, step=0.1)
    height = st.number_input('Height (in feet)', min_value=1.0, step=1.0)
    age = st.number_input('Age (in years)', min_value=1, step=1)
    species = st.selectbox('Species', ['Pine', 'Oak', 'Maple', 'Spruce'])
    soil_type = st.selectbox('Soil Type', ['Sandy', 'Clay', 'Loam'])
    latitude = st.number_input('Latitude', min_value=-90.0, max_value=90.0, step=0.001)
    longitude = st.number_input('Longitude', min_value=-180.0, max_value=180.0, step=0.001)

    # Create a dictionary from user inputs
    features = {
        'DBH': dbh,
        'Height': height,
        'Age': age,
        'Species': species,
        'Soil_Type': soil_type,
        'Latitude': latitude,
        'Longitude': longitude
    }

    # Predict Timber Price
    if st.button('Predict'):
        prediction = predict_price(features)
        st.success(f'Predicted Timber Price: ${prediction:.2f}')

if __name__ == '__main__':
    main()


In [None]:
# Install necessary packages
#! pip install streamlit -q
!wget -q -O - ipv4.icanhazip.com
! streamlit run app.py & npx localtunnel --port 8501
