In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 

In [2]:
class MyLinearRegression:
    def __init__(self, alpha = 0.001, iterations=10000):
        self.alpha = alpha
        self.iterations = iterations
        self.w = None
    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.w = np.zeros(n_features)
        
        for i in range(self.iterations):
            h = X.dot(self.w)
            diff = h - y
            gradient = (1/n_samples) * diff.T.dot(X)
            self.w = self.w - self.alpha * gradient
        return self.w
    def predict(self, X):
        return X.dot(self.w)   

In [3]:
df1 = pd.read_csv('../5th_Lesson/data/turboaz.csv')

In [4]:
# currency_rates = CurrencyRates()
def apply_azn(col):
    lst = col.split()
    if 'AZN' in col:
        return float(lst[0])
    if '$' in col:
        # return currency_rates.convert('USD', 'AZN', float(lst[0]))
        return float(lst[0]) * 1.7
    return 0


In [5]:
df1['Qiymet'] = df1['Qiymet'].apply(apply_azn)
df1['Yurush'] = df1['Yurush'].apply(lambda x: int(''.join(x.strip().replace('km', '').split())))

df = df1[['Buraxilish ili', 'Yurush', 'Qiymet']].copy()

In [6]:
df

Unnamed: 0,Buraxilish ili,Yurush,Qiymet
0,1999,366000,12500.0
1,2014,102000,53550.0
2,2002,469700,11700.0
3,1998,556680,9700.0
4,2000,300000,12700.0
...,...,...,...
1323,1996,325000,8800.0
1324,1994,280000,6300.0
1325,1998,272000,10900.0
1326,2000,207000,11300.0


In [7]:
from sklearn.model_selection import train_test_split

In [8]:
df['Buraxilish ili'] = (df['Buraxilish ili'] - df['Buraxilish ili'].mean()) / df['Buraxilish ili'].std()
df['Yurush'] = (df['Yurush'] - df['Yurush'].mean()) / df['Yurush'].std()
df['Qiymet'] = (df['Qiymet'] - df['Qiymet'].mean()) / df['Qiymet'].std()

X = df[['Buraxilish ili', 'Yurush']]
y = df['Qiymet']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

x0 = np.ones(len(X_train))
inputs = np.array([x0, X_train['Buraxilish ili'], X_train['Yurush']]).T
w = np.array([0, 0, 0])
outputs = y_train

# x00 = np.ones(len(X_test))
# X_test = pd.DataFrame([x00, X_test['Buraxilish ili'], X_test['Yurush']]).T

In [9]:
my_model = MyLinearRegression()
final_coef = my_model.fit(inputs, outputs)

In [10]:
X_test['prediction'] = X_test['Buraxilish ili'] * final_coef[1] + X_test['Yurush'] * final_coef[2] + final_coef[0] * 1

In [11]:
# X_test['prediction'] = model.predict(X_test)

In [12]:
X_test

Unnamed: 0,Buraxilish ili,Yurush,prediction
1201,-0.350117,0.669419,-0.353810
115,0.212783,-0.411624,0.207631
979,0.025150,-0.320428,0.045758
175,-0.913017,0.444646,-0.792634
63,-0.162484,0.715888,-0.204864
...,...,...,...
885,0.025150,0.317942,-0.014186
764,-0.537750,-0.378462,-0.408723
752,0.025150,0.351104,-0.017300
1270,-0.913017,0.666144,-0.813433


In [13]:
y_test = pd.DataFrame(y_test)

In [14]:
def denormalize_price(col):
    col = col * df1['Qiymet'].std() + df1['Qiymet'].mean()
    return col

def denormalize_year(col):
    col = col * df1['Buraxilish ili'].std() + df1['Buraxilish ili'].mean()
    return col

def denormalize_yurush(col):
    col = col * df1['Yurush'].std() + df1['Yurush'].mean()
    return col

In [15]:
y_test['Qiymet'] = y_test['Qiymet'].apply(denormalize_price)
X_test['prediction'] = X_test['prediction'].apply(denormalize_price)
X_test.columns
# X_test[1] = X_test[1].apply(denormalize_year)
# X_test[2] = X_test[2].apply(denormalize_yurush)

Index(['Buraxilish ili', 'Yurush', 'prediction'], dtype='object')

In [16]:
X_test

Unnamed: 0,Buraxilish ili,Yurush,prediction
1201,-0.350117,0.669419,10286.799031
115,0.212783,-0.411624,17950.708447
979,0.025150,-0.320428,15741.068754
175,-0.913017,0.444646,4296.673928
63,-0.162484,0.715888,12319.981878
...,...,...,...
885,0.025150,0.317942,14922.810569
764,-0.537750,-0.378462,9537.218864
752,0.025150,0.351104,14880.303650
1270,-0.913017,0.666144,4012.759591


# With built-in Library

In [17]:
X_train

Unnamed: 0,Buraxilish ili,Yurush
970,-0.913017,0.580337
963,-0.162484,0.715888
796,-0.537750,0.251618
405,2.652017,-1.472811
828,-0.350117,0.002902
...,...,...
1095,-0.162484,-0.323786
1130,-0.162484,0.557157
1294,-0.350117,0.056791
860,-0.537750,-0.494529


In [18]:
X_test

Unnamed: 0,Buraxilish ili,Yurush,prediction
1201,-0.350117,0.669419,10286.799031
115,0.212783,-0.411624,17950.708447
979,0.025150,-0.320428,15741.068754
175,-0.913017,0.444646,4296.673928
63,-0.162484,0.715888,12319.981878
...,...,...,...
885,0.025150,0.317942,14922.810569
764,-0.537750,-0.378462,9537.218864
752,0.025150,0.351104,14880.303650
1270,-0.913017,0.666144,4012.759591


In [19]:
X_test

Unnamed: 0,Buraxilish ili,Yurush,prediction
1201,-0.350117,0.669419,10286.799031
115,0.212783,-0.411624,17950.708447
979,0.025150,-0.320428,15741.068754
175,-0.913017,0.444646,4296.673928
63,-0.162484,0.715888,12319.981878
...,...,...,...
885,0.025150,0.317942,14922.810569
764,-0.537750,-0.378462,9537.218864
752,0.025150,0.351104,14880.303650
1270,-0.913017,0.666144,4012.759591


In [20]:
from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(X_train, y_train)
model.predict(X_test.drop('prediction', axis=1))

array([-3.52159257e-01,  2.06560775e-01,  4.41822406e-02, -7.95113094e-01,
       -2.01985245e-01, -5.45062652e-01, -8.00210603e-03, -1.38776036e-01,
       -3.58102350e-01, -4.25313584e-01,  1.44812624e-02,  2.99860047e+00,
       -9.67573497e-01,  2.50558494e+00, -1.41715999e-01, -1.41770431e-01,
       -9.61465700e-02,  1.99948657e+00, -3.31289193e-01, -2.49705215e-01,
        1.38211060e+00,  8.17783530e-04, -6.90223335e-01, -9.39415977e-02,
       -4.32272327e-02,  1.84677117e-01, -8.39739530e-01, -1.90225392e-01,
       -1.05701450e-01, -5.25217901e-01, -7.70650395e-01, -2.24715528e-01,
       -3.07769488e-01,  1.00585203e+00, -7.28946240e-01, -1.47867137e-01,
       -2.48970224e-01, -3.15119396e-01, -6.16356759e-01, -1.82140493e-01,
        4.49172313e-02, -3.48928973e-01, -9.41848819e-01, -3.98853915e-01,
       -7.15634949e-01, -7.62932992e-01, -4.10619648e-01, -4.00789881e-01,
       -5.35561468e-01, -2.01985245e-01, -4.76907733e-01, -2.31477443e-01,
       -2.92112714e-01, -

In [21]:
model.predict([[-0.350117, 0.669419	]])



array([-0.35215919])

## Prediction of Built-in Library

In [22]:
-0.35215919 * df1.Qiymet.std()+ df1.Qiymet.mean()

10309.33402899674

In [23]:
df1.iloc[1201]

Sheher                                                           Bakı
Marka                                                        Mercedes
Model                                                           C 180
Buraxilish ili                                                   1998
Ban novu                                                    Universal
Reng                                                            Yaşıl
Muherrikin hecmi                                                1.8 L
Muherrikin gucu                                              122 a.g.
Yanacaq novu                                                   Benzin
Yurush                                                         360395
Suretler qutusu                                              Mexaniki
Oturucu                                                          Arxa
Yeni                                                             Xeyr
Qiymet                                                        11500.0
Extra Info          