In [28]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import joblib

# Load the dataset
data_path = "C:\\Users\\Kevin\\Desktop\\laptop_price.csv"
laptop_data = pd.read_csv(data_path, encoding='ISO-8859-1')



In [29]:
   
# Check the data types of all columns to confirm the type of 'Price_euros'
print(laptop_data.dtypes)

# Extract the names of the categorical columns for one-hot encoding
categorical_cols = laptop_data.select_dtypes(include=['object']).columns.tolist()

# Remove the 'Price_euros' column from the list if it's there
if 'Price_euros' in categorical_cols:
    categorical_cols.remove('Price_euros')



laptop_ID             int64
Company              object
Product              object
TypeName             object
Inches              float64
ScreenResolution     object
Cpu                  object
Ram                  object
Memory               object
Gpu                  object
OpSys                object
Weight               object
Price_euros         float64
dtype: object


In [30]:
# Preprocess the data
# Extracting the names of the categorical columns for one-hot encoding
#categorical_cols = laptop_data.select_dtypes(include=['object']).columns.tolist()
#categorical_cols.remove('Price_euros')  # Assuming 'Price_euros' is the target variable

In [31]:
# Define the categorical transformer with OneHotEncoder
categorical_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])


In [32]:
# Create the column transformer to transform categorical columns
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', categorical_transformer, categorical_cols)
    ],
    remainder='passthrough'
)

In [33]:
# Split the data into features and target
X = laptop_data.drop('Price_euros', axis=1)
y = laptop_data['Price_euros']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [34]:
# Create a pipeline that processes the data and then fits the model
pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                           ('regressor', LinearRegression())])

# Train the linear regression model
pipeline.fit(X_train, y_train)

# Predict on the test set
y_pred = pipeline.predict(X_test)

# Evaluate the model's performance
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

# Print out the scores
print(f'R² score: {r2}')
print(f'Mean Absolute Error: {mae}')

R² score: 0.7958079521914924
Mean Absolute Error: 213.9498524031765


In [35]:
# Save the pipeline model for future use with Streamlit
joblib.dump(pipeline, 'laptop_price_prediction_model.joblib')


['laptop_price_prediction_model.joblib']

In [37]:
import streamlit as st
import pandas as pd
import joblib

# Function to load the trained model
def load_model():
    model_path = 'laptop_price_prediction_model.joblib'
    model = joblib.load(model_path)
    return model

model = load_model()

def main():
    st.title('Laptop Price Predictor')

    # User inputs for available features
    brand = st.selectbox('Brand', ['Apple', 'Dell', 'Lenovo', 'HP', 'Asus'])
    processor_type = st.selectbox('Processor Type', ['Intel Core i7', 'Intel Core i5', 'AMD Ryzen'])
    ram = st.selectbox('RAM', ['8GB', '16GB', '32GB'])
    storage = st.selectbox('Storage', ['256GB SSD', '512GB SSD', '1TB HDD'])
    screen_size = st.selectbox('Screen Size', ['13"', '15"', '17"'])

    # Placeholder values for other required features not collected from the user
    placeholders = {
        'TypeName': 'Ultrabook',  # Example placeholder
        'Company': brand,  # Dynamically use the brand as company
        'Cpu': processor_type,  # Use processor type for CPU
        'Weight': '1.5',  # Example weight
        'laptop_ID': 1,  # Example laptop ID
        'OpSys': 'Windows',  # Operating system
        'Inches': float(screen_size.strip('"')),  # Convert screen size to float
        'Ram': ram,  # Use user input for RAM
        'Memory': storage,  # Use storage as memory
        'Product': 'Generic Model',  # Example product model
        'Gpu': 'Integrated',  # Example GPU
        'ScreenResolution': '1920x1080'  # Example screen resolution
    }

    # Convert placeholders into DataFrame for prediction
    input_df = pd.DataFrame([placeholders])

    if st.button('Predict Price'):
        # Make prediction
        predicted_price = model.predict(input_df)[0]

        # Display prediction
        st.success(f'Predicted Laptop Price: ${predicted_price:.2f}')

if __name__ == '__main__':
    main()

#streamlit run laptop_price_predict.py
