## Kütüphaneler

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Veri Ön İşleme

### Veri yükleme

In [2]:
veriler = pd.read_csv("../veri/veriler.csv")
veriler.head(3)

Unnamed: 0,ulke,boy,kilo,yas,cinsiyet
0,tr,130,30,10,e
1,tr,125,36,11,e
2,tr,135,34,10,k


### Veri ön işleme

In [3]:
boy = veriler[["boy"]]
boy.head(3)

Unnamed: 0,boy
0,130
1,125
2,135


In [4]:
boyKilo = veriler[["boy","kilo"]]
boyKilo.head(3)

Unnamed: 0,boy,kilo
0,130,30
1,125,36
2,135,34


### Encoder

Ülke 

In [5]:
ulke = veriler.iloc[:,0:1].values
ulke.T

array([['tr', 'tr', 'tr', 'tr', 'tr', 'tr', 'tr', 'tr', 'tr', 'us', 'us',
        'us', 'us', 'us', 'us', 'fr', 'fr', 'fr', 'fr', 'fr', 'fr', 'fr']],
      dtype=object)

In [6]:
from sklearn import preprocessing
le = preprocessing.LabelEncoder()

ulke[:,0] = le.fit_transform(veriler.iloc[:,0])
ulke.T


array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0]],
      dtype=object)

In [7]:
ohe = preprocessing.OneHotEncoder()
ulke = ohe.fit_transform(ulke).toarray()
ulke

array([[0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.]])

Cinsiyet

In [8]:
c = veriler.iloc[:, -1:].values
c.T

array([['e', 'e', 'k', 'k', 'e', 'e', 'e', 'e', 'k', 'e', 'k', 'k', 'k',
        'k', 'k', 'e', 'e', 'e', 'e', 'k', 'k', 'k']], dtype=object)

In [9]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
leCinsiyet = LabelEncoder()
c[:,-1] = leCinsiyet.fit_transform(veriler.iloc[:,-1])
c.T

array([[0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1]],
      dtype=object)

In [10]:
oheCinsiyet = OneHotEncoder()
c = oheCinsiyet.fit_transform(c).toarray()
c

array([[1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.]])

### Numpy dizilerinin dataframe dönüştürülmesi

In [11]:
df_ulke = pd.DataFrame(data=ulke, index=range(22), columns=["fr","tr", "us"])
df_ulke.head(3)

Unnamed: 0,fr,tr,us
0,0.0,1.0,0.0
1,0.0,1.0,0.0
2,0.0,1.0,0.0


In [12]:
data = veriler.iloc[:, 1:4].values
df_data = pd.DataFrame(data=data, index=range(22), columns=["boy", "kilo", "yas"])
df_data.head(3)

Unnamed: 0,boy,kilo,yas
0,130,30,10
1,125,36,11
2,135,34,10


In [13]:
cinsiyet = c[:,0:1]
df_cinsiyet = pd.DataFrame(data=cinsiyet, index=range(22), columns=["cinsiyet"] ) # 1: Erkek, 0: Kadın
df_cinsiyet.head(3)

Unnamed: 0,cinsiyet
0,1.0
1,1.0
2,0.0


### Dataframe birleştirme işlemi

In [14]:
ulke_data = pd.concat([df_ulke, df_data], axis=1)
ulke_data.head(3)

Unnamed: 0,fr,tr,us,boy,kilo,yas
0,0.0,1.0,0.0,130,30,10
1,0.0,1.0,0.0,125,36,11
2,0.0,1.0,0.0,135,34,10


In [15]:
ulke_data_cinsiyet = pd.concat([ulke_data, df_cinsiyet], axis=1)
ulke_data_cinsiyet.head(3)

Unnamed: 0,fr,tr,us,boy,kilo,yas,cinsiyet
0,0.0,1.0,0.0,130,30,10,1.0
1,0.0,1.0,0.0,125,36,11,1.0
2,0.0,1.0,0.0,135,34,10,0.0


### Verilerin eğitim ve test içn bölünmesi

In [16]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(ulke_data, cinsiyet, test_size=0.33, random_state=0)

### Verilerin ölçeklenmesi

In [17]:
from sklearn.linear_model import LinearRegression

regressor = LinearRegression()

regressor.fit(x_train, y_train)

y_pred = regressor.predict(x_test)

In [18]:
y_pred.T

array([[ 0.98720204, -0.12036863,  0.05009703,  0.07137418,  0.72473935,
         0.64615044, -0.03567453,  0.32612171]])

In [19]:
y_test.T

array([[0., 0., 0., 0., 1., 0., 0., 0.]])

y = boy

In [21]:
boy = ulke_data_cinsiyet.iloc[:,3:4].values


In [22]:
sol = ulke_data_cinsiyet.iloc[:,:3]
sag = ulke_data_cinsiyet.iloc[:,4:]

data = pd.concat([sol, sag], axis=1)
data.head(3)

Unnamed: 0,fr,tr,us,kilo,yas,cinsiyet
0,0.0,1.0,0.0,30,10,1.0
1,0.0,1.0,0.0,36,11,1.0
2,0.0,1.0,0.0,34,10,0.0


In [23]:
x_train, x_test, y_train, y_test = train_test_split(data, boy, test_size=0.33, random_state=0)


In [24]:
regressor1 = LinearRegression()

regressor1.fit(x_train, y_train)

y_pred1 = regressor1.predict(x_test)

In [25]:
y_pred1.T

array([[182.26638686, 152.87161474, 162.79386375, 158.30668577,
        130.82888952, 173.96138408, 150.12782663, 157.26898922]])

In [26]:
y_test.T

array([[164, 165, 167, 162, 125, 166, 155, 159]], dtype=int64)