In [108]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler

In [117]:
dataset_filename = "datasets/Mobile phone price.csv"
data = pd.read_csv(dataset_filename, encoding='utf-8')


selected_columns = ['Brand_LabelEncoded','Storage ','RAM ', 'Screen Size (inches)' ,'Battery Capacity (mAh)']
target_column = 'Price ($)'
columns_to_drop = ['Model', 'Camera (MP)']
data = data.drop(columns=columns_to_drop)
data = data.dropna()


In [118]:

data['Price ($)'] = data['Price ($)'].str.replace(',', '.', regex=False)
data['Price ($)'] = data['Price ($)'].str.replace('$', '', regex=False)
data['Price ($)'] = pd.to_numeric(data['Price ($)'], errors='coerce')

data['Storage '] = data['Storage '].str.replace('GB', '', regex=False)
data['RAM '] = data['RAM '].str.replace('GB', '', regex=False)
data['Screen Size (inches)'] = data['Screen Size (inches)'].str.replace(',', '.')

le = LabelEncoder()
data['Brand_LabelEncoded'] = le.fit_transform(data['Brand'])
brand_code_mapping = {brand_name: brand_code for brand_name, brand_code in zip(data['Brand'], data['Brand_LabelEncoded'])}



# Примените нормализацию к данным

for selected_column in selected_columns:    
    data[selected_column] = pd.to_numeric(data[selected_column], errors='coerce')
    print(f"{selected_column} {data[selected_column].isna().sum()}")
    
data = data.dropna()
# data.head()



Brand_LabelEncoded 0
Storage  0
RAM  0
Screen Size (inches) 2
Battery Capacity (mAh) 0


In [119]:
X = data[selected_columns]  
y = data[target_column]  

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)



mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error:", mse)
print("R-squared:", r2)


Mean Squared Error: 23699.537943940522
R-squared: 0.7300458571279869


In [120]:
for selected_column in selected_columns:
    X = data[selected_column].array.reshape(-1, 1) 
    y = data[target_column]  
  
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    model = LinearRegression()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    

    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    print(f"{selected_column} Mean Squared Error:", round(mse,3))
    print(f"{selected_column} R-squared:", round(r2,3))
    print()

Brand_LabelEncoded Mean Squared Error: 72258.163
Brand_LabelEncoded R-squared: 0.177

Storage  Mean Squared Error: 49062.562
Storage  R-squared: 0.441

RAM  Mean Squared Error: 68486.783
RAM  R-squared: 0.22

Screen Size (inches) Mean Squared Error: 91173.052
Screen Size (inches) R-squared: -0.039

Battery Capacity (mAh) Mean Squared Error: 65466.541
Battery Capacity (mAh) R-squared: 0.254

