# Auto Insurance in Sweden
SUMMARY

X = number of claims

Y = total payment for all the claims in thousands of Swedish Kronor for geographical zones in Sweden

**We try to implement Simple Linear Regression from scratch in Python and compare it to Sklearn!**

# Importing Libraries

In [None]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

# Reading Data

In [None]:
df = pd.read_csv('../input/auto-insurance-in-sweden/swedish_insurance.csv')
df.head()

In [None]:
df.rename(columns={"X": "Claims", "Y": "Payment"}, inplace = True)
df.head()

# Exploring Data

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.corr()

In [None]:
sns.heatmap(df.corr(), center = 0, annot =True, cmap='coolwarm');

In [None]:
plt.scatter(df.Claims, df.Payment);
plt.xlabel('Claims');
plt.ylabel('Payment');

#  From Correlation and Visualization, there is a linear relation between Claims and Payment

In [None]:
# Splitting to Feature and Label and Converting into numpy array

X = df.iloc[:,0].values
y = df.iloc[:,1].values

# Building Everything from Scratch

# Mean Formula
![image.png](attachment:image.png)

In [None]:
def Mean (data):
    return sum(data)/ len(data)

# Variance Formula
![image.png](attachment:image.png)

In [None]:
def Variance(data):
    return Mean ((data - Mean(data))** 2)

# Covariance Formula
![image.png](attachment:image.png)

In [None]:
def Covariance(x ,y):
    return sum ( (x - Mean(x)) * (y - Mean(y)) ) / (len(x)-1)

# Linear Regression Coefficients
![image.png](attachment:image.png)

In [None]:
def Coeff_Regression(x,y):
    b1 = Covariance(x,y) / Variance(x)
    b0 = Mean(y) - b1 * Mean(x)
    return b0 , b1

# Mean Squared Error
![image.png](attachment:image.png)

In [None]:
def MSE (actual, predicted):
    return Mean ((actual - predicted)**2)

In [None]:
b0 , b1 = Coeff_Regression(X,y)
y_pred = b0 + X * b1

In [None]:
plt.scatter(X,y);
plt.plot(X,y_pred);

In [None]:
MSE(y,y_pred)

# Using LinearRegression from Sklearn

In [None]:
from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()
X = X.reshape(-1,1)
lin_reg.fit(X,y)

In [None]:
y_pred2 = lin_reg.predict(X)
plt.scatter(X,y);
plt.plot(X,y_pred2);

In [None]:
MSE(y,y_pred2)

# Sklearn results are very close to our results

Try to remove possible outliers:


In [None]:
df = df[df['Claims']<100]
X = df.iloc[:,0].values
y = df.iloc[:,1].values

b0 , b1 = Coeff_Regression(X,y)
y_pred = b0 + X * b1

plt.scatter(X,y);
plt.plot(X,y_pred);

In [None]:
MSE(y,y_pred)

In [None]:
from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()
X = X.reshape(-1,1)
lin_reg.fit(X,y)

y_pred2 = lin_reg.predict(X)
plt.scatter(X,y);
plt.plot(X,y_pred2);

In [None]:
MSE(y,y_pred2)

# Also, Sklearn results are very close to our results