# Linear Regression

---

$y = \alpha + \beta x$

$\begin{equation}
y_i = \beta_0 + \beta x_i + \epsilon_i
\end{equation}$

$maaş = \beta_0 + \beta_{deneyim} + \epsilon_i$

$y$ = bağımlı değişken

$x$ = bağımsız değişken

$b_0$ = bias, constant, intercept, y eksenini kesen nokta, katsayı

$b_1$ = coeff

---

$residual  = y - y_{head} = y - \hat{y}$

$y = doğru değer$

$y_{head} = tahmin edilen değer$

---
## $RSS = \sum_{i=1}^{n} \big(y^{(i)}-h_\theta x^{(i)} \big)^2 $
## $MSE = \frac{1}{n} \sum_{i=1}^{n} \big(y^{(i)}-h_\theta x^{(i)} \big)^2 $
## $MSE = \frac{1}{n} \sum_{i=1}^{n} \big(y-\hat{y} \big)^2 $

---


In [None]:
# import library
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

# import data
df = pd.read_csv("../input/mldldatasets/linear-regression-dataset.csv",sep = ";")

# plot data
plt.scatter(df.deneyim,df.maas)
plt.xlabel("deneyim")
plt.ylabel("maas")
plt.show()

x = df.deneyim.values.reshape(-1,1)
y = df.maas.values.reshape(-1,1)

# linear regression model
linear_reg = LinearRegression()

linear_reg.fit(x,y)

# prediction

b0 = linear_reg.predict(np.array(0).reshape(-1,1))
print("b0: ",b0)

b0_ = linear_reg.intercept_
print("b0_: ",b0_)   # y eksenini kestigi nokta intercept

b1 = linear_reg.coef_
print("b1: ",b1)   # egim slope

# maas = 1663 + 1138*deneyim 

maas_yeni = 1663 + 1138*11
print(maas_yeni)

print(linear_reg.predict(np.array(11).reshape(-1,1)))

# visualize line
array = np.array([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]).reshape(-1,1)  # deneyim

y_pred = linear_reg.predict(array)  # maas
plt.scatter(x,y)
plt.plot(array, y_pred,color = "red")

linear_reg.predict(np.array(100).reshape(-1,1))

# Multiple Linear Regression

## $y = \beta_0 + \beta_1 x_1 + \beta_2 x_2 + \beta_n x_n + \epsilon_i$

$boy = a + b(kilo) + c(yaş) + d(ayakkabıNo) + \epsilon$


In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression

df = pd.read_csv("../input/mldldatasets/multiple-linear-regression-dataset.csv",sep = ";")

x = df.iloc[:,[0,2]].values
y = df.maas.values.reshape(-1,1)

# %% fitting data
multiple_linear_regression = LinearRegression()
multiple_linear_regression.fit(x,y)

print("b0: ", multiple_linear_regression.intercept_)
print("b1,b2: ",multiple_linear_regression.coef_)

# predict
multiple_linear_regression.predict(np.array([[10,35],[5,35]]))

# Polynomial Linear Regression
## $\displaystyle f(x) = \beta_0 + \beta_1 \, x + \beta_2 \, x^2 + \beta_3 \, x^3 + \beta_n \, x^n $

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

df = pd.read_csv("../input/mldldatasets/polynomial-regression.csv",sep = ";")

y = df.araba_max_hiz.values.reshape(-1,1)
x = df.araba_fiyat.values.reshape(-1,1)

lr = LinearRegression()
lr.fit(x,y)
y_pred = lr.predict(x)

print("10 milyon tl lik araba hizi tahmini: ",lr.predict(np.array(10000).reshape(-1,1)))


from sklearn.preprocessing import PolynomialFeatures
polynomial_regression = PolynomialFeatures(degree = 2)

x_polynomial = polynomial_regression.fit_transform(x)


linear_regression2 = LinearRegression()
linear_regression2.fit(x_polynomial,y)

y_pred2 = linear_regression2.predict(x_polynomial)

plt.scatter(x,y)
plt.ylabel("araba_max_hiz")
plt.xlabel("araba_fiyat")
plt.plot(x,y_pred,color="red",label ="linear")
plt.plot(x,y_pred2,color= "green",label = "poly")
plt.legend()
plt.show()

# Decision Tree Regression

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

df = pd.read_csv("../input/mldldatasets/decision-tree-regression-dataset.csv",sep = ";",header = None)

x = df.iloc[:,0].values.reshape(-1,1)
y = df.iloc[:,1].values.reshape(-1,1)

#%%  decision tree regression
from sklearn.tree import DecisionTreeRegressor
tree_reg = DecisionTreeRegressor()   # random sate = 0
tree_reg.fit(x,y)


tree_reg.predict(np.array(5.5).reshape(-1,1))
x_ = np.arange(min(x),max(x),0.01).reshape(-1,1)
y_pred = tree_reg.predict(np.array(x_).reshape(-1,1))
# %% visualize
plt.scatter(x,y,color="red")
plt.plot(x_,y_pred,color = "green")
plt.xlabel("tribun level")
plt.ylabel("ucret")
plt.show()

# Random Forest Regression

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

df = pd.read_csv("../input/mldldatasets/random-forest-regression-dataset.csv",sep = ";",header = None)

x = df.iloc[:,0].values.reshape(-1,1)
y = df.iloc[:,1].values.reshape(-1,1)

# %%
from sklearn.ensemble import RandomForestRegressor
rf = RandomForestRegressor(n_estimators = 100, random_state = 42)
rf.fit(x,y)

print("7.8 seviyesinde fiyatın ne kadar olduğu: ",rf.predict(np.array(7.8).reshape(-1,1)))

x_ = np.arange(min(x),max(x),0.01).reshape(-1,1)
y_pred = rf.predict(x_)

# visualize
plt.scatter(x,y,color="red")
plt.plot(x_,y_pred,color="green")
plt.xlabel("tribun level")
plt.ylabel("ucret")
plt.show()

# Evaluation Regression Models

## R Square With Linear Regression

In [None]:
# import library
import pandas as pd
import matplotlib.pyplot as plt

# import data
df = pd.read_csv("../input/mldldatasets/linear-regression-dataset.csv",sep = ";")



#%% linear regression

# sklearn library
from sklearn.linear_model import LinearRegression

# linear regression model
linear_reg = LinearRegression()

x = df.deneyim.values.reshape(-1,1)
y = df.maas.values.reshape(-1,1)

linear_reg.fit(x,y)

y_pred = linear_reg.predict(x)  # maas

# plot data
plt.scatter(df.deneyim,df.maas)
plt.xlabel("deneyim")
plt.ylabel("maas")
plt.plot(x, y_pred,color = "red")


#%%
from sklearn.metrics import r2_score

print("r_square score: ", r2_score(y,y_pred))