## Özet Tanımı

In [1]:
#Veri kümesini indireceğiz.
#Gerekli ve gereksiz bağımsız değişkenleri sileceğiz. (Pi Value) veya (Korelasyon Değeri) ile.
#5 farklı yönteme göre regresyon modeli inşa edeceğiz.
#Yöntemlerin başarılarını karşılaştıracağız.

# Gerekli Kütüphaneler

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Veri Setinin Eklenmesi

In [3]:
df = pd.read_csv('Maas_Odev.csv')

In [4]:
df

Unnamed: 0,Calisan ID,unvan,UnvanSeviyesi,Kidem,Puan,maas
0,1,Cayci,1,5,70,2250
1,2,Sekreter,2,5,70,2500
2,3,Uzman Yardimcisi,3,5,70,3000
3,4,Uzman,4,5,70,4000
4,5,Proje Yoneticisi,5,5,70,5500
5,6,Sef,6,5,70,7500
6,7,Mudur,7,5,70,10000
7,8,Direktor,8,5,70,15000
8,9,C-level,9,5,70,25000
9,10,CEO,10,5,70,50000


## Korelasyon İle Veri Silme

In [5]:
#Korealasyon ile.
df = df.drop(columns=['unvan','Calisan ID'],axis=1)
df.corr()

Unnamed: 0,UnvanSeviyesi,Kidem,Puan,maas
UnvanSeviyesi,1.0,-0.1252,0.034948,0.727036
Kidem,-0.1252,1.0,0.322796,0.117964
Puan,0.034948,0.322796,1.0,0.201474
maas,0.727036,0.117964,0.201474,1.0


In [6]:
#Kidem ve puan çok az etkiliyor.
df = df.drop(columns=['Kidem','Puan'],axis=1)
df.head()

Unnamed: 0,UnvanSeviyesi,maas
0,1,2250
1,2,2500
2,3,3000
3,4,4000
4,5,5500


## Bağımlı Ve Bağımsız Değişkenlerin Belirlenmesi

In [7]:
x = df['UnvanSeviyesi'].values
y = df['maas'].values
x = x.reshape(-1,1)
y = y.reshape(-1,1)

## Modellerin Oluşturulması

In [8]:
from sklearn.model_selection import train_test_split

In [9]:
xtrain, xtest, ytrain, ytest = train_test_split(x,y,test_size=0.33,random_state=0)

In [10]:
from sklearn.preprocessing import StandardScaler

In [11]:
scaler = StandardScaler()
xtrain = scaler.fit_transform(xtrain)
xtest = scaler.fit_transform(xtest)

In [12]:
from sklearn.linear_model import LinearRegression

In [13]:
l_model = LinearRegression()
l_model.fit(x,y)

In [14]:
from sklearn.tree import DecisionTreeRegressor

In [15]:
dt_reg = DecisionTreeRegressor(random_state=0)
dt_reg.fit(x,y)

In [16]:
from sklearn.ensemble import RandomForestRegressor

In [17]:
rf_reg = RandomForestRegressor(n_estimators=10,random_state=0)
rf_reg.fit(x,y.ravel())

In [18]:
from sklearn.svm import SVR

In [19]:
sv_reg = SVR()
sv_reg.fit(x,y.ravel())

In [20]:
from sklearn.preprocessing import PolynomialFeatures

In [21]:
poly_reg = PolynomialFeatures(degree=4)
x_poly = poly_reg.fit_transform(x)
lin_reg = LinearRegression()
lin_reg.fit(x_poly,y)

## Yöntemlerin R2 Ve Ols İle Karşılaştırılması

In [22]:
from sklearn.metrics import r2_score

In [23]:
print("----R2 Skorları----")
print("Polynomial Regression: " + str(r2_score(y,lin_reg.predict(x_poly))))
print("Support Vector Regression: " + str(r2_score(y,sv_reg.predict(x))))
print("Random Forest Regression: " + str(r2_score(y,rf_reg.predict(x))))
print("Decision Tree Regressor: " + str(r2_score(y,dt_reg.predict(x))))
print("Linear Regression: " + str(r2_score(y,l_model.predict(x))))

----R2 Skorları----
Polynomial Regression: 0.8174873280442536
Support Vector Regression: -0.16614926412751907
Random Forest Regression: 0.8284081476481634
Decision Tree Regressor: 0.8343186200100907
Linear Regression: 0.5285811733746242


In [24]:
import statsmodels.api as sm

In [25]:
model = sm.OLS(rf_reg.predict(x),x)
print(model.fit().summary())

                                 OLS Regression Results                                
Dep. Variable:                      y   R-squared (uncentered):                   0.719
Model:                            OLS   Adj. R-squared (uncentered):              0.709
Method:                 Least Squares   F-statistic:                              74.13
Date:                Sun, 23 Feb 2025   Prob (F-statistic):                    1.75e-09
Time:                        15:49:43   Log-Likelihood:                         -315.35
No. Observations:                  30   AIC:                                      632.7
Df Residuals:                      29   BIC:                                      634.1
Df Model:                           1                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

In [26]:
model = sm.OLS(dt_reg.predict(x),x)
print(model.fit().summary())

                                 OLS Regression Results                                
Dep. Variable:                      y   R-squared (uncentered):                   0.751
Model:                            OLS   Adj. R-squared (uncentered):              0.742
Method:                 Least Squares   F-statistic:                              87.29
Date:                Sun, 23 Feb 2025   Prob (F-statistic):                    3.01e-10
Time:                        15:49:43   Log-Likelihood:                         -312.62
No. Observations:                  30   AIC:                                      627.2
Df Residuals:                      29   BIC:                                      628.6
Df Model:                           1                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

In [27]:
model = sm.OLS(lin_reg.predict(x_poly),x)
print(model.fit().summary())

                                 OLS Regression Results                                
Dep. Variable:                      y   R-squared (uncentered):                   0.759
Model:                            OLS   Adj. R-squared (uncentered):              0.751
Method:                 Least Squares   F-statistic:                              91.39
Date:                Sun, 23 Feb 2025   Prob (F-statistic):                    1.82e-10
Time:                        15:49:43   Log-Likelihood:                         -311.94
No. Observations:                  30   AIC:                                      625.9
Df Residuals:                      29   BIC:                                      627.3
Df Model:                           1                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

In [28]:
model = sm.OLS(sv_reg.predict(x),x)
print(model.fit().summary())

                                 OLS Regression Results                                
Dep. Variable:                      y   R-squared (uncentered):                   0.787
Model:                            OLS   Adj. R-squared (uncentered):              0.779
Method:                 Least Squares   F-statistic:                              107.0
Date:                Sun, 23 Feb 2025   Prob (F-statistic):                    3.04e-11
Time:                        15:49:43   Log-Likelihood:                         -277.50
No. Observations:                  30   AIC:                                      557.0
Df Residuals:                      29   BIC:                                      558.4
Df Model:                           1                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

In [29]:
model = sm.OLS(l_model.predict(x),x)
print(model.fit().summary())

                                 OLS Regression Results                                
Dep. Variable:                      y   R-squared (uncentered):                   0.942
Model:                            OLS   Adj. R-squared (uncentered):              0.940
Method:                 Least Squares   F-statistic:                              468.1
Date:                Sun, 23 Feb 2025   Prob (F-statistic):                    1.93e-19
Time:                        15:49:43   Log-Likelihood:                         -287.43
No. Observations:                  30   AIC:                                      576.9
Df Residuals:                      29   BIC:                                      578.3
Df Model:                           1                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

In [30]:
#Ols ve r2_score farklı çıkmasının sebebi birinin y değerleri ve tahmin değerleri ile karşılaştırması.
#Diğerinin ise tahmin değerlerini x değerlerine uyarlamaya çalışmasıdır.