## Kütüphaneler

In [30]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Veri Ön İşleme

### Veri yükleme

In [31]:
veriler = pd.read_csv("../veri/veriler.csv")
veriler.head(3)

Unnamed: 0,ulke,boy,kilo,yas,cinsiyet
0,tr,130,30,10,e
1,tr,125,36,11,e
2,tr,135,34,10,k


### Veri ön işleme

In [32]:
boy = veriler[["boy"]]
boy.head(3)

Unnamed: 0,boy
0,130
1,125
2,135


In [33]:
boyKilo = veriler[["boy","kilo"]]
boyKilo.head(3)

Unnamed: 0,boy,kilo
0,130,30
1,125,36
2,135,34


### Encoder

Ülke 

In [34]:
ulke = veriler.iloc[:,0:1].values
ulke.T

array([['tr', 'tr', 'tr', 'tr', 'tr', 'tr', 'tr', 'tr', 'tr', 'us', 'us',
        'us', 'us', 'us', 'us', 'fr', 'fr', 'fr', 'fr', 'fr', 'fr', 'fr']],
      dtype=object)

In [35]:
from sklearn import preprocessing
le = preprocessing.LabelEncoder()

ulke[:,0] = le.fit_transform(veriler.iloc[:,0])
ulke.T


array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0]],
      dtype=object)

In [36]:
ohe = preprocessing.OneHotEncoder()
ulke = ohe.fit_transform(ulke).toarray()
ulke

array([[0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.]])

Cinsiyet

In [37]:
c = veriler.iloc[:, -1:].values
c.T

array([['e', 'e', 'k', 'k', 'e', 'e', 'e', 'e', 'k', 'e', 'k', 'k', 'k',
        'k', 'k', 'e', 'e', 'e', 'e', 'k', 'k', 'k']], dtype=object)

In [38]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
leCinsiyet = LabelEncoder()
c[:,-1] = leCinsiyet.fit_transform(veriler.iloc[:,-1])
c.T

array([[0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1]],
      dtype=object)

In [39]:
oheCinsiyet = OneHotEncoder()
c = oheCinsiyet.fit_transform(c).toarray()
c

array([[1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.]])

### Numpy dizilerinin dataframe dönüştürülmesi

In [40]:
df_ulke = pd.DataFrame(data=ulke, index=range(22), columns=["fr","tr", "us"])
df_ulke.head(3)

Unnamed: 0,fr,tr,us
0,0.0,1.0,0.0
1,0.0,1.0,0.0
2,0.0,1.0,0.0


In [41]:
data = veriler.iloc[:, 1:4].values
df_data = pd.DataFrame(data=data, index=range(22), columns=["boy", "kilo", "yas"])
df_data.head(3)

Unnamed: 0,boy,kilo,yas
0,130,30,10
1,125,36,11
2,135,34,10


In [42]:
cinsiyet = c[:,0:1]
df_cinsiyet = pd.DataFrame(data=cinsiyet, index=range(22), columns=["cinsiyet"] ) # 1: Erkek, 0: Kadın
df_cinsiyet.head(3)

Unnamed: 0,cinsiyet
0,1.0
1,1.0
2,0.0


### Dataframe birleştirme işlemi

In [43]:
ulke_data = pd.concat([df_ulke, df_data], axis=1)
ulke_data.head(3)

Unnamed: 0,fr,tr,us,boy,kilo,yas
0,0.0,1.0,0.0,130,30,10
1,0.0,1.0,0.0,125,36,11
2,0.0,1.0,0.0,135,34,10


In [44]:
ulke_data_cinsiyet = pd.concat([ulke_data, df_cinsiyet], axis=1)
ulke_data_cinsiyet.head(3)

Unnamed: 0,fr,tr,us,boy,kilo,yas,cinsiyet
0,0.0,1.0,0.0,130,30,10,1.0
1,0.0,1.0,0.0,125,36,11,1.0
2,0.0,1.0,0.0,135,34,10,0.0


### Verilerin eğitim ve test içn bölünmesi

In [45]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(ulke_data, cinsiyet, test_size=0.33, random_state=0)

### Verilerin ölçeklenmesi

In [46]:
from sklearn.linear_model import LinearRegression

regressor = LinearRegression()

regressor.fit(x_train, y_train)

y_pred = regressor.predict(x_test)

In [47]:
y_pred.T

array([[ 0.98720204, -0.12036863,  0.05009703,  0.07137418,  0.72473935,
         0.64615044, -0.03567453,  0.32612171]])

In [48]:
y_test.T

array([[0., 0., 0., 0., 1., 0., 0., 0.]])

y = boy

In [49]:
boy = ulke_data_cinsiyet.iloc[:,3:4].values


In [50]:
sol = ulke_data_cinsiyet.iloc[:,:3]
sag = ulke_data_cinsiyet.iloc[:,4:]

data = pd.concat([sol, sag], axis=1)
data.head(3)

Unnamed: 0,fr,tr,us,kilo,yas,cinsiyet
0,0.0,1.0,0.0,30,10,1.0
1,0.0,1.0,0.0,36,11,1.0
2,0.0,1.0,0.0,34,10,0.0


In [51]:
x_train, x_test, y_train, y_test = train_test_split(data, boy, test_size=0.33, random_state=0)


In [52]:
regressor1 = LinearRegression()

regressor1.fit(x_train, y_train)

y_pred1 = regressor1.predict(x_test)

In [53]:
y_pred1.T

array([[182.26638686, 152.87161474, 162.79386375, 158.30668577,
        130.82888952, 173.96138408, 150.12782663, 157.26898922]])

In [54]:
y_test.T

array([[164, 165, 167, 162, 125, 166, 155, 159]], dtype=int64)

### Backward Elimination

In [55]:
import statsmodels.api as sm

X = np.append(arr=np.ones((22, 1)).astype(int), values=data, axis=1)
X 

array([[  1.,   0.,   1.,   0.,  30.,  10.,   1.],
       [  1.,   0.,   1.,   0.,  36.,  11.,   1.],
       [  1.,   0.,   1.,   0.,  34.,  10.,   0.],
       [  1.,   0.,   1.,   0.,  30.,   9.,   0.],
       [  1.,   0.,   1.,   0.,  38.,  12.,   1.],
       [  1.,   0.,   1.,   0.,  90.,  30.,   1.],
       [  1.,   0.,   1.,   0.,  80.,  25.,   1.],
       [  1.,   0.,   1.,   0.,  90.,  35.,   1.],
       [  1.,   0.,   1.,   0.,  60.,  22.,   0.],
       [  1.,   0.,   0.,   1., 105.,  33.,   1.],
       [  1.,   0.,   0.,   1.,  55.,  27.,   0.],
       [  1.,   0.,   0.,   1.,  50.,  44.,   0.],
       [  1.,   0.,   0.,   1.,  58.,  39.,   0.],
       [  1.,   0.,   0.,   1.,  59.,  41.,   0.],
       [  1.,   0.,   0.,   1.,  62.,  55.,   0.],
       [  1.,   1.,   0.,   0.,  70.,  47.,   1.],
       [  1.,   1.,   0.,   0.,  90.,  23.,   1.],
       [  1.,   1.,   0.,   0.,  80.,  27.,   1.],
       [  1.,   1.,   0.,   0.,  88.,  28.,   1.],
       [  1.,   1.,   0.,   0.,

In [56]:
X_l = data.iloc[:,[0,1,2,3,4,5]].values
X_l

array([[  0.,   1.,   0.,  30.,  10.,   1.],
       [  0.,   1.,   0.,  36.,  11.,   1.],
       [  0.,   1.,   0.,  34.,  10.,   0.],
       [  0.,   1.,   0.,  30.,   9.,   0.],
       [  0.,   1.,   0.,  38.,  12.,   1.],
       [  0.,   1.,   0.,  90.,  30.,   1.],
       [  0.,   1.,   0.,  80.,  25.,   1.],
       [  0.,   1.,   0.,  90.,  35.,   1.],
       [  0.,   1.,   0.,  60.,  22.,   0.],
       [  0.,   0.,   1., 105.,  33.,   1.],
       [  0.,   0.,   1.,  55.,  27.,   0.],
       [  0.,   0.,   1.,  50.,  44.,   0.],
       [  0.,   0.,   1.,  58.,  39.,   0.],
       [  0.,   0.,   1.,  59.,  41.,   0.],
       [  0.,   0.,   1.,  62.,  55.,   0.],
       [  1.,   0.,   0.,  70.,  47.,   1.],
       [  1.,   0.,   0.,  90.,  23.,   1.],
       [  1.,   0.,   0.,  80.,  27.,   1.],
       [  1.,   0.,   0.,  88.,  28.,   1.],
       [  1.,   0.,   0.,  40.,  29.,   0.],
       [  1.,   0.,   0.,  66.,  32.,   0.],
       [  1.,   0.,   0.,  56.,  42.,   0.]])

In [57]:
model = sm.OLS(boy, X_l).fit()
model.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.885
Model:,OLS,Adj. R-squared:,0.849
Method:,Least Squares,F-statistic:,24.69
Date:,"Thu, 28 Jul 2022",Prob (F-statistic):,5.41e-07
Time:,13:31:18,Log-Likelihood:,-73.95
No. Observations:,22,AIC:,159.9
Df Residuals:,16,BIC:,166.4
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
x1,114.0688,8.145,14.005,0.000,96.802,131.335
x2,108.3030,5.736,18.880,0.000,96.143,120.463
x3,104.4714,9.195,11.361,0.000,84.978,123.964
x4,0.9211,0.119,7.737,0.000,0.669,1.174
x5,0.0814,0.221,0.369,0.717,-0.386,0.549
x6,-10.5980,5.052,-2.098,0.052,-21.308,0.112

0,1,2,3
Omnibus:,1.031,Durbin-Watson:,2.759
Prob(Omnibus):,0.597,Jarque-Bera (JB):,0.624
Skew:,0.407,Prob(JB):,0.732
Kurtosis:,2.863,Cond. No.,524.0


In [58]:
X_l = data.iloc[:,[0,1,2,3,5]].values
model1 = sm.OLS(boy, X_l).fit()
model1.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.884
Model:,OLS,Adj. R-squared:,0.857
Method:,Least Squares,F-statistic:,32.47
Date:,"Thu, 28 Jul 2022",Prob (F-statistic):,9.32e-08
Time:,13:31:18,Log-Likelihood:,-74.043
No. Observations:,22,AIC:,158.1
Df Residuals:,17,BIC:,163.5
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
x1,115.6583,6.734,17.175,0.000,101.451,129.866
x2,109.0786,5.200,20.978,0.000,98.108,120.049
x3,106.5445,7.090,15.026,0.000,91.585,121.504
x4,0.9405,0.104,9.029,0.000,0.721,1.160
x5,-11.1093,4.733,-2.347,0.031,-21.096,-1.123

0,1,2,3
Omnibus:,0.871,Durbin-Watson:,2.719
Prob(Omnibus):,0.647,Jarque-Bera (JB):,0.459
Skew:,0.351,Prob(JB):,0.795
Kurtosis:,2.91,Cond. No.,397.0


In [59]:
X_l = data.iloc[:,[0,1,2,3]].values
model2 = sm.OLS(boy, X_l).fit()
model2.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.847
Model:,OLS,Adj. R-squared:,0.821
Method:,Least Squares,F-statistic:,33.16
Date:,"Thu, 28 Jul 2022",Prob (F-statistic):,1.52e-07
Time:,13:32:30,Log-Likelihood:,-77.131
No. Observations:,22,AIC:,162.3
Df Residuals:,18,BIC:,166.6
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
x1,119.8136,7.265,16.491,0.000,104.550,135.077
x2,109.8084,5.804,18.919,0.000,97.615,122.002
x3,114.4212,6.984,16.382,0.000,99.747,129.095
x4,0.7904,0.092,8.595,0.000,0.597,0.984

0,1,2,3
Omnibus:,2.925,Durbin-Watson:,2.855
Prob(Omnibus):,0.232,Jarque-Bera (JB):,1.499
Skew:,0.605,Prob(JB):,0.473
Kurtosis:,3.416,Cond. No.,369.0
