In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.model_selection import learning_curve
import xgboost as xgb
import lightgbm as lgb



In [None]:
df = pd.read_csv('laptopprices.csv')

In [None]:
df.head(10)

In [None]:
df.dtypes

In [None]:
df.isnull().sum()

In [None]:
df.columns

In [None]:
plt.Figure(figsize=(12,6))
sns.histplot(df['Price ($)'], bins=50, kde = True, color= 'blue')

plt.title('Price Disstribution')
plt.xlabel('Price ($)')
plt.ylabel('Frequency')
plt.show()

In [None]:
plt.figure(figsize=(10, 8))
numerical_features = ['RAM (GB)', 'Screen Size (inch)', 'Battery Life (hours)', 'Weight (kg)', 'Price ($)']
corr_matrix = df[numerical_features].corr()
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Correlation Heatmap')
plt.show()

In [None]:
numerical_features = ['RAM (GB)', 'Screen Size (inch)', 'Battery Life (hours)', 'Weight (kg)', 'Price ($)']
corr_matrix = df[numerical_features].corr()
print(corr_matrix)

In [None]:
plt.Figure(figsize=(14,6))
sns.boxenplot(x='Brand', y='Price ($)', data=df)
plt.xticks(rotation=45)
plt.title('Price Distribution by Brand')
plt.show()

In [None]:
plt.figure(figsize=(12, 6))
sns.scatterplot(x='Screen Size (inch)', y='Price ($)', hue='GPU', data=df, palette='tab10', alpha=0.7)
plt.title('Screen Size vs Price by GPU')
plt.xlabel('Screen Size (inch)')
plt.ylabel('Price ($)')
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.show()

In [None]:
plt.figure(figsize=(14, 6))
df.groupby('Processor')['Price ($)'].mean().plot(kind='bar', color='orange')
plt.title('Average Price by Processor')
plt.xlabel('Processor')
plt.ylabel('Average Price ($)')
plt.xticks(rotation=45)
plt.show()

In [None]:
plt.figure(figsize=(14, 6))
df.groupby('Processor')['Price ($)'].min().plot(kind='bar', color='teal')
plt.title('Minimum Price by Processor')
plt.xlabel('Processor')
plt.ylabel('Minimum Price ($)')
plt.xticks(rotation=45)
plt.show()

In [None]:
plt.figure(figsize=(14, 6))
df.groupby('Processor')['Price ($)'].max().plot(kind='bar', color='red')
plt.title('Maximum Price by Processor')
plt.xlabel('Processor')
plt.ylabel('Maximum Price ($)')
plt.xticks(rotation=45)
plt.show()

In [None]:
plt.figure(figsize=(14, 6))
df.groupby('Battery Life (hours)')['Price ($)'].mean().plot(kind='line', color='blue', marker='o')
plt.title('Average Price by Battery Life (hours)')
plt.xlabel('Battery Life (hours)')
plt.ylabel('Average Price ($)')
plt.grid(True)
plt.show()

In [None]:
plt.figure(figsize=(14, 6))
sns.countplot(x='Operating System', data=df, palette='viridis')
plt.title('Number of Laptops by Operating System')
plt.xticks(rotation=45)
plt.show()

In [None]:
categorical_features = ['Brand', 'Processor', 'Storage', 'GPU', 'Resolution', 'Operating System']
numerical_features = ['RAM (GB)', 'Screen Size (inch)', 'Battery Life (hours)', 'Weight (kg)']

encoder = OneHotEncoder(sparse=False, drop='first')
categorical_Encoder = encoder.fit_transform(df[categorical_features])

scaler = StandardScaler()
numerical_scaled = scaler.fit_transform(df[numerical_features])


x = np.hstack((numerical_scaled, categorical_Encoder))
y = df['Price ($)'].values

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [None]:
model = LinearRegression()
model.fit(x_train, y_train)

In [None]:
y_pred = model.predict(x_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse:.2f}")
print(f"R-squared: {r2:.2f}")

In [None]:
plt.figure(figsize=(10, 6))
sns.scatterplot(x=y_test, y=y_pred, alpha=0.7, color='green')
plt.plot([y.min(), y.max()], [y.min(), y.max()], color='red', linestyle='--')
plt.xlabel('Actual Price ($)')
plt.ylabel('Predicted Price ($)')
plt.title('Actual vs Predicted Prices')
plt.show()


In [None]:
param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5]
}



model = RandomForestRegressor()
grid_search = GridSearchCV(model, param_grid, cv=5, n_jobs=-1, scoring='r2')
grid_search.fit(x_train, y_train)

In [None]:
print("En iyi parametreler:", grid_search.best_params_)
print("En iyi skor (R²):", grid_search.best_score_)

In [None]:
results_df = pd.DataFrame(grid_search.cv_results_)
results_df = results_df[['params', 'mean_test_score', 'std_test_score', 'rank_test_score']]
results_df = results_df.sort_values(by='rank_test_score')
print(results_df.head())

In [None]:
best_model = grid_search.best_estimator_
y_pred = best_model.predict(x_test)
r2 = r2_score(y_test, y_pred)
print("Test R²:", r2)

In [None]:

cv_scores = grid_search.cv_results_['mean_test_score']
print("Cross-Validation Skorları:", cv_scores)

In [None]:
# Düşük fiyatlı laptoplar (<1500$) için R²
low_price_mask = (y_test < 1500)
r2_low = r2_score(y_test[low_price_mask], y_pred[low_price_mask])

# Yüksek fiyatlı laptoplar (>3500$) için R²
high_price_mask = (y_test > 3500)
r2_high = r2_score(y_test[high_price_mask], y_pred[high_price_mask])

print("Düşük Fiyatlılar R²:", r2_low)
print("Yüksek Fiyatlılar R²:", r2_high)

In [None]:
from sklearn.metrics import mean_squared_error

mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
print("Test MSE:", mse)
print("Test RMSE:", rmse)

In [None]:

from sklearn.model_selection import cross_val_predict, KFold
import numpy as np

cv = KFold(n_splits=5, shuffle=True, random_state=42)  
for fold, (train_idx, test_idx) in enumerate(cv.split(x_train)):
    if fold == 4:  
        low_price_count = np.sum(y_train[test_idx] < 1500)
        high_price_count = np.sum(y_train[test_idx] > 3500)
        print(f"5. Fold - Düşük Fiyatlılar: {low_price_count}, Yüksek Fiyatlılar: {high_price_count}")

In [None]:
sns.boxplot(x=df['Price ($)'])
plt.show()

In [None]:



categorical_cols = ['Brand', 'Operating System', 'GPU', 'Processor', 'Storage', 'Resolution']

# One-Hot Encoding 
df_encoded = pd.get_dummies(df, columns=categorical_cols, drop_first=True)


X = df_encoded.drop('Price ($)', axis=1)
y = df_encoded['Price ($)']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
model_xgb = xgb.XGBRegressor(
    objective='reg:squarederror',
    n_estimators=200,
    max_depth=5,
    learning_rate=0.1,
    subsample=0.8,
    colsample_bytree=0.8,
    gamma=0.1,
    random_state=42
)


model_xgb.fit(X_train, y_train)


y_pred_xgb = model_xgb.predict(X_test)


r2_xgb = r2_score(y_test, y_pred_xgb)
rmse_xgb = np.sqrt(mean_squared_error(y_test, y_pred_xgb))
print("XGBoost Test R²:", r2_xgb)
print("XGBoost Test RMSE:", rmse_xgb)


In [None]:
model_lgb = lgb.LGBMRegressor(
    objective='regression',
    num_leaves=31,
    learning_rate=0.05,
    n_estimators=200,
    max_depth=-1,
    subsample=0.8,
    colsample_bytree=0.8,
    reg_alpha=0.1,
    reg_lambda=0.1,
    random_state=42
)


model_lgb.fit(X_train, y_train)


y_pred_lgb = model_lgb.predict(X_test)


r2_lgb = r2_score(y_test, y_pred_lgb)
rmse_lgb = np.sqrt(mean_squared_error(y_test, y_pred_lgb))
print("LightGBM Test R²:", r2_lgb)
print("LightGBM Test RMSE:", rmse_lgb)

In [None]:

y_pred_lgb = model_lgb.predict(X_test)


plt.figure(figsize=(10, 6))
sns.scatterplot(x=y_test, y=y_pred_lgb, alpha=0.7, color='blue', label='LightGBM Tahminleri')
plt.plot([y.min(), y.max()], [y.min(), y.max()], color='red', linestyle='--', label='Mükemmel Tahmin')
plt.xlabel('Gerçek Fiyat ($)')
plt.ylabel('Tahmin Edilen Fiyat ($)')
plt.title('LightGBM: Gerçek vs Tahmin Edilen Fiyatlar')
plt.legend()
plt.show()

In [None]:

y_pred_xgb = model_xgb.predict(X_test)


plt.figure(figsize=(10, 6))
sns.scatterplot(x=y_test, y=y_pred_xgb, alpha=0.7, color='orange', label='XGBoost Tahminleri')
plt.plot([y.min(), y.max()], [y.min(), y.max()], color='red', linestyle='--', label='Mükemmel Tahmin')
plt.xlabel('Gerçek Fiyat ($)')
plt.ylabel('Tahmin Edilen Fiyat ($)')
plt.title('XGBoost: Gerçek vs Tahmin Edilen Fiyatlar')
plt.legend()
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
sns.scatterplot(x=y_test, y=y_pred_lgb, alpha=0.7, color='blue', label='LightGBM Tahminleri')
sns.scatterplot(x=y_test, y=y_pred_xgb, alpha=0.7, color='orange', label='XGBoost Tahminleri')
plt.plot([y.min(), y.max()], [y.min(), y.max()], color='red', linestyle='--', label='Mükemmel Tahmin')
plt.xlabel('Gerçek Fiyat ($)')
plt.ylabel('Tahmin Edilen Fiyat ($)')
plt.title('LightGBM vs XGBoost: Gerçek vs Tahmin Edilen Fiyatlar')
plt.legend()
plt.show()

In [None]:

y_train_pred_lgb = model_lgb.predict(X_train)
train_r2_lgb = r2_score(y_train, y_train_pred_lgb)
train_rmse_lgb = np.sqrt(mean_squared_error(y_train, y_train_pred_lgb))

y_test_pred_lgb = model_lgb.predict(X_test)
test_r2_lgb = r2_score(y_test, y_test_pred_lgb)
test_rmse_lgb = np.sqrt(mean_squared_error(y_test, y_test_pred_lgb))

print("LightGBM Eğitim R²:", train_r2_lgb)
print("LightGBM Test R²:", test_r2_lgb)
print("LightGBM Eğitim RMSE:", train_rmse_lgb)
print("LightGBM Test RMSE:", test_rmse_lgb)

In [None]:
from sklearn.model_selection import cross_val_score


cv_scores_lgb = cross_val_score(model_lgb, X_train, y_train, cv=5, scoring='r2')
print("LightGBM Çapraz Doğrulama R² Skorları:", cv_scores_lgb)
print("LightGBM Ortalama Çapraz Doğrulama R²:", cv_scores_lgb.mean())