# 🚩 Reviewer's Final Comment:
* Overall Good.
* Lack of data pre processing is evident. For example, didn't check for missing values.
* MSE is too high due to lack of preprocessing.
* I would have loved implementing visualizing the correlation matrix in a heatmap.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd

# Update with the correct path to your dataset
file_path = '/content/drive/My Drive/laptop_prices.csv'

# Load the dataset
df = pd.read_csv(file_path)
print(df.head())

  Company      Product   TypeName  Inches  Ram     OS  Weight  Price_euros  \
0   Apple  MacBook Pro  Ultrabook    13.3    8  macOS    1.37      1339.69   
1   Apple  Macbook Air  Ultrabook    13.3    8  macOS    1.34       898.94   
2      HP       250 G6   Notebook    15.6    8  No OS    1.86       575.00   
3   Apple  MacBook Pro  Ultrabook    15.4   16  macOS    1.83      2537.45   
4   Apple  MacBook Pro  Ultrabook    13.3    8  macOS    1.37      1803.60   

     Screen  ScreenW  ...  RetinaDisplay CPU_company CPU_freq      CPU_model  \
0  Standard     2560  ...            Yes       Intel      2.3        Core i5   
1  Standard     1440  ...             No       Intel      1.8        Core i5   
2   Full HD     1920  ...             No       Intel      2.5  Core i5 7200U   
3  Standard     2880  ...            Yes       Intel      2.7        Core i7   
4  Standard     2560  ...            Yes       Intel      3.1        Core i5   

  PrimaryStorage  SecondaryStorage PrimaryStorageT

In [None]:
# Get the most frequent brand name
most_frequent_brand = df['Company'].value_counts().head(5)

print(f"The top 5  brand name is: {most_frequent_brand}")

The most frequent brand name is: Company
Dell      291
Lenovo    289
HP        268
Asus      152
Acer      101
Name: count, dtype: int64


In [None]:
# Calculate the average price of laptops for each brand
average_price_by_brand = df.groupby('Company')['Price_euros'].mean().reset_index()

# Rename columns for clarity (optional)
average_price_by_brand.columns = ['Company', 'average_price']

print(average_price_by_brand)

      Company  average_price
0        Acer     633.464455
1       Apple    1564.198571
2        Asus    1123.829737
3       Chuwi     314.296667
4        Dell    1199.225120
5     Fujitsu     729.000000
6      Google    1677.666667
7          HP    1080.314664
8      Huawei    1424.000000
9          LG    2099.000000
10     Lenovo    1093.862215
11        MSI    1728.908148
12   Mediacom     295.000000
13  Microsoft    1612.308333
14      Razer    3346.142857
15    Samsung    1413.444444
16    Toshiba    1267.812500
17       Vero     217.425000
18     Xiaomi    1133.462500


In [None]:

# Find the brand with the highest average price

#average_price_by_brand['average_price']: This extracts the average_price column from the average_price_by_brand DataFrame.
#idxmax(): This finds the index of the row with the highest value in the average_price column.
#loc[]: This is used to access a specific row of the DataFrame by its index. In this case, it retrieves the row corresponding to the highest average price.
highest_price_brand = average_price_by_brand.loc[average_price_by_brand['average_price'].idxmax()]

# Find the brand with the lowest average price
lowest_price_brand = average_price_by_brand.loc[average_price_by_brand['average_price'].idxmin()]

# Display the results
print(f"Brand with the highest average price: {highest_price_brand['Company']} (${highest_price_brand['average_price']:.2f})")
print(f"Brand with the lowest average price: {lowest_price_brand['Company']} (${lowest_price_brand['average_price']:.2f})")

Brand with the highest average price: Razer ($3346.14)
Brand with the lowest average price: Vero ($217.43)


In [None]:
# Select the relevant numeric columns (Price, CPU frequency, Ram, Inches, Weight)
numeric_columns = ['Price_euros', 'CPU_freq', 'Ram', 'Inches', 'Weight']
# Calculate the correlation matrix
correlation_matrix = df[numeric_columns].corr()
# Display the correlation matrix
print("Correlation Matrix:")
print(correlation_matrix)


Correlation Matrix:
             Price_euros  CPU_freq       Ram    Inches    Weight
Price_euros     1.000000  0.428847  0.740287  0.070091  0.211883
CPU_freq        0.428847  1.000000  0.366254  0.303115  0.318649
Ram             0.740287  0.366254  1.000000  0.245009  0.389370
Inches          0.070091  0.303115  0.245009  1.000000  0.827339
Weight          0.211883  0.318649  0.389370  0.827339  1.000000


In [None]:
# Sort the correlation values of Price_euros with other features
correlation_with_price = correlation_matrix['Price_euros'].sort_values(ascending=False)
print("\nCorrelation of numeric features with Price_euros:")
print(correlation_with_price)


Correlation of numeric features with Price_euros:
Price_euros    1.000000
Ram            0.740287
CPU_freq       0.428847
Weight         0.211883
Inches         0.070091
Name: Price_euros, dtype: float64


In [None]:
# Check for missing values in PrimaryStorage and SecondaryStorage (optional)
print(df[['PrimaryStorage', 'SecondaryStorage']].isnull().sum())

PrimaryStorage      0
SecondaryStorage    0
dtype: int64


In [None]:
# Create the new StorageTotal column by summing PrimaryStorage and SecondaryStorage
df['StorageTotal'] = df['PrimaryStorage'] + df['SecondaryStorage']

# Display the first few rows to check the new column
print(df[['PrimaryStorage', 'SecondaryStorage', 'StorageTotal']].head())

   PrimaryStorage  SecondaryStorage  StorageTotal
0             128                 0           128
1             128                 0           128
2             256                 0           256
3             512                 0           512
4             256                 0           256


In [None]:
# Select features and target

df_clean = df[['Ram', 'Inches', 'CPU_freq', 'PrimaryStorage', 'GPU_company', 'Price_euros']].dropna()

X = df_clean[['Ram', 'Inches', 'CPU_freq', 'PrimaryStorage', 'GPU_company']]
y = df_clean['Price_euros']

# Handle categorical variable GPU_company using one-hot encoding
X = pd.get_dummies(X, columns=['GPU_company'], drop_first=True)

# Split data into training and testing sets
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

from sklearn.linear_model import LinearRegression


# Create and train the linear regression model
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)

# Predict on the test set
y_pred_lr = lr_model.predict(X_test)


# Evaluate the model
print("Linear Regression:")
print("R-squared:", r2_score(y_test, y_pred_lr))
print("MSE:", mean_squared_error(y_test, y_pred_lr))

Linear Regression:
R-squared: 0.6372275325456753
MSE: 218068.7374502096


In [None]:
# Drop rows with missing values in relevant columns
df_clean = df[['Inches', 'Ram', 'PrimaryStorage', 'Weight', 'TypeName']].dropna()

# Separate the features (X) and target (y)
X = df_clean[['Inches', 'Ram', 'PrimaryStorage', 'Weight']]
y = df_clean['TypeName']

In [None]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
# Create and train the Random Forest Classifier
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Predict on the test set
y_pred_rf = rf_model.predict(X_test)

# Evaluate the model
print("Random Forest Classifier:")
print("Accuracy:", accuracy_score(y_test, y_pred_rf))
print(classification_report(y_test, y_pred_rf))

Random Forest Classifier:
Accuracy: 0.7928286852589641
                    precision    recall  f1-score   support

2 in 1 Convertible       0.53      0.35      0.42        23
            Gaming       0.82      0.80      0.81        41
           Netbook       0.50      0.25      0.33         4
          Notebook       0.85      0.93      0.89       139
         Ultrabook       0.69      0.71      0.70        38
       Workstation       0.25      0.17      0.20         6

          accuracy                           0.79       251
         macro avg       0.61      0.53      0.56       251
      weighted avg       0.78      0.79      0.78       251



In [None]:
from sklearn.linear_model import LogisticRegression

# Create and train the Logistic Regression model
lr_model = LogisticRegression(max_iter=200)
lr_model.fit(X_train, y_train)

# Predict on the test set
y_pred_lr = lr_model.predict(X_test)

# Evaluate the model
print("\nLogistic Regression:")
print("Accuracy:", accuracy_score(y_test, y_pred_lr))
print(classification_report(y_test, y_pred_lr))



Logistic Regression:
Accuracy: 0.6533864541832669
                    precision    recall  f1-score   support

2 in 1 Convertible       0.00      0.00      0.00        23
            Gaming       0.75      0.59      0.66        41
           Netbook       0.00      0.00      0.00         4
          Notebook       0.66      0.98      0.79       139
         Ultrabook       0.36      0.11      0.16        38
       Workstation       0.00      0.00      0.00         6

          accuracy                           0.65       251
         macro avg       0.30      0.28      0.27       251
      weighted avg       0.54      0.65      0.57       251



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
