In [28]:
# Kütüphanelerin yüklenmesi
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.metrics import r2_score
import statsmodels.api as sm
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

In [29]:
def load_data(file_path):
    data = pd.read_csv(file_path)
    x = data.iloc[:,2:3]
    y = data.iloc[:,5:]
    X = x.values
    Y = y.values
    return X, Y
X, Y = load_data('../data/maaslar_yeni.csv')

In [30]:
# Linear Regression
def linear_regression(X, Y):
    lin_reg = LinearRegression()
    lin_reg.fit(X, Y)
    print('Linear R2 değeri:', r2_score(Y, lin_reg.predict(X)))
    return lin_reg
linear_model = linear_regression(X, Y)

Linear R2 değeri: 0.5285811733746243


In [31]:
model1 = sm.OLS(linear_model.predict(X), X)
print(model1.fit().summary())

                                 OLS Regression Results                                
Dep. Variable:                      y   R-squared (uncentered):                   0.942
Model:                            OLS   Adj. R-squared (uncentered):              0.940
Method:                 Least Squares   F-statistic:                              468.1
Date:                Fri, 20 Sep 2024   Prob (F-statistic):                    1.93e-19
Time:                        16:47:11   Log-Likelihood:                         -287.43
No. Observations:                  30   AIC:                                      576.9
Df Residuals:                      29   BIC:                                      578.3
Df Model:                           1                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

In [32]:
# Polynomial Regression
def polynomial_regression(X, Y, degree=4):
    poly_reg = PolynomialFeatures(degree=degree)
    X_poly = poly_reg.fit_transform(X)
    lin_reg2 = LinearRegression()
    lin_reg2.fit(X_poly, Y)
    print(f'Polynomial (degree {degree}) R2 değeri:', r2_score(Y, lin_reg2.predict(X_poly)))
    return lin_reg2, poly_reg
poly_model, poly_transformer = polynomial_regression(X, Y, degree=4)

Polynomial (degree 4) R2 değeri: 0.8174873280442542


In [33]:
model2 = sm.OLS(poly_model.predict(poly_transformer.fit_transform(X)), X)
print(model2.fit().summary())

                                 OLS Regression Results                                
Dep. Variable:                      y   R-squared (uncentered):                   0.759
Model:                            OLS   Adj. R-squared (uncentered):              0.751
Method:                 Least Squares   F-statistic:                              91.39
Date:                Fri, 20 Sep 2024   Prob (F-statistic):                    1.82e-10
Time:                        16:47:11   Log-Likelihood:                         -311.94
No. Observations:                  30   AIC:                                      625.9
Df Residuals:                      29   BIC:                                      627.3
Df Model:                           1                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

In [34]:
# Support Vector Regression (SVR)
def svr_regression(X, Y):
    sc1 = StandardScaler()
    X_scaled = sc1.fit_transform(X)
    sc2 = StandardScaler()
    Y_scaled = np.ravel(sc2.fit_transform(Y.reshape(-1, 1)))
    
    svr_reg = SVR(kernel='rbf')
    svr_reg.fit(X_scaled, Y_scaled)
    
    print('SVR R2 değeri:', r2_score(Y_scaled, svr_reg.predict(X_scaled)))
    return svr_reg, sc1, sc2
# Support Vector Regression (SVR)
svr_model, scaler_X, scaler_Y = svr_regression(X, Y)

SVR R2 değeri: 0.5841869084594333


In [35]:
# SVR model tahminini ve OLS modelini düzeltme
predictions_scaled = svr_model.predict(scaler_X.transform(X))  # Tahmin için orijinal X'i ölçeklendir
predictions = scaler_Y.inverse_transform(predictions_scaled.reshape(-1, 1))  # Tahminleri orijinal ölçeğe döndür

# Şimdi, OLS modelini tahminlerle oluştur
model3 = sm.OLS(predictions, X)  # Bağımsız değişkenler için orijinal X'i kulla
print(model3.fit().summary())

                                 OLS Regression Results                                
Dep. Variable:                      y   R-squared (uncentered):                   0.925
Model:                            OLS   Adj. R-squared (uncentered):              0.922
Method:                 Least Squares   F-statistic:                              356.5
Date:                Fri, 20 Sep 2024   Prob (F-statistic):                    7.76e-18
Time:                        16:47:11   Log-Likelihood:                         -283.92
No. Observations:                  30   AIC:                                      569.8
Df Residuals:                      29   BIC:                                      571.2
Df Model:                           1                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

In [36]:
# Decision Tree Regression
def decision_tree_regression(X, Y):
    dt_reg = DecisionTreeRegressor(random_state=0)
    dt_reg.fit(X, Y)
    print('Decision Tree R2 değeri:', r2_score(Y, dt_reg.predict(X)))
    return dt_reg
# Decision Tree Regression
dt_model = decision_tree_regression(X, Y)

Decision Tree R2 değeri: 0.8343186200100907


In [40]:
model4 = sm.OLS(dt_model.predict(X), X)
print(model4.fit().summary())

                                 OLS Regression Results                                
Dep. Variable:                      y   R-squared (uncentered):                   0.751
Model:                            OLS   Adj. R-squared (uncentered):              0.742
Method:                 Least Squares   F-statistic:                              87.29
Date:                Fri, 20 Sep 2024   Prob (F-statistic):                    3.01e-10
Time:                        16:48:04   Log-Likelihood:                         -312.62
No. Observations:                  30   AIC:                                      627.2
Df Residuals:                      29   BIC:                                      628.6
Df Model:                           1                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

In [41]:
# Random Forest Regression
def random_forest_regression(X, Y, n_estimators=10):
    rf_reg = RandomForestRegressor(n_estimators=n_estimators, random_state=0)
    rf_reg.fit(X, Y.ravel())

    print('Random Forest R2 değeri:', r2_score(Y, rf_reg.predict(X)))
    return rf_reg
# Random Forest Regression
rf_model = random_forest_regression(X, Y)

Random Forest R2 değeri: 0.8284081476481634


In [42]:
model5 = sm.OLS(rf_model.predict(X),X)
print(model5.fit().summary())

                                 OLS Regression Results                                
Dep. Variable:                      y   R-squared (uncentered):                   0.719
Model:                            OLS   Adj. R-squared (uncentered):              0.709
Method:                 Least Squares   F-statistic:                              74.13
Date:                Fri, 20 Sep 2024   Prob (F-statistic):                    1.75e-09
Time:                        16:48:16   Log-Likelihood:                         -315.35
No. Observations:                  30   AIC:                                      632.7
Df Residuals:                      29   BIC:                                      634.1
Df Model:                           1                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------