### Importing Libraries

In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

In [2]:
df = pd.read_csv(r"C:\Users\HP\Downloads\dataframe_.csv")

In [3]:
df.head()

Unnamed: 0,input,output
0,-122.740667,-130.572085
1,-121.531419,-129.938929
2,-134.917019,-130.141832
3,-120.605951,-125.760932
4,-129.894781,-112.785214


In [4]:
df.isnull().sum()

input     1
output    1
dtype: int64

In [5]:
#Removing null values
df.dropna(inplace= True)

### Detecting outliers,skewness.

In [6]:
# Check for outliers using the Z-score method
z = np.abs(stats.zscore(df))
print("Number of rows with outliers:", (z > 3).sum(axis=1).sum())

Number of rows with outliers: 0


In [7]:
df.skew(axis = 1, skipna = True)

0      NaN
1      NaN
2      NaN
3      NaN
4      NaN
        ..
1692   NaN
1693   NaN
1694   NaN
1695   NaN
1696   NaN
Length: 1696, dtype: float64

In [8]:
print(df['input'].skew())

-0.20318515325323347


- A skewness value between -0.5 and 0.5 indicates that the distribution is fairly symmetrical.


In [9]:
from sklearn.model_selection import train_test_split

In [10]:
x = df[['input']]
y = df[['output']]

In [11]:
x_train,x_test,y_train,y_test = train_test_split(x,y,train_size = 0.8,random_state = 100)

## Data Preprocessing on Training Data

In [12]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

x_train_transformed = scaler.fit_transform(x_train)

print(x_train_transformed)

[[ 1.10691483]
 [ 1.13511019]
 [-0.81946317]
 ...
 [-1.09900795]
 [-1.13803776]
 [-1.44104103]]


## Data Preprocessing on Training Data

In [13]:
x_test_transformed = scaler.fit_transform(x_test)

## Model Training

### Applying LinearRegression

In [14]:
from sklearn.linear_model import LinearRegression

linear_model = LinearRegression()

linear_model.fit(x_train_transformed,y_train)

In [15]:
y_test_predict = linear_model.predict(x_test_transformed)

In [16]:
from sklearn import metrics

print('linear_Mean_Absolute_Error : ', metrics.mean_absolute_error(y_test,y_test_predict))

print('linear_Mean_Squared_Error : ', metrics.mean_squared_error(y_test,y_test_predict))

print('linear_Root_Mean_Squared_Error : ', np.sqrt(metrics.mean_squared_error(y_test,y_test_predict)))

linear_Mean_Absolute_Error :  39.15060840465052
linear_Mean_Squared_Error :  2734.16104459923
linear_Root_Mean_Squared_Error :  52.28920581342989


In [17]:
y_train_predict = linear_model.predict(x_train_transformed)

### Evaluation Metrics

In [18]:
from sklearn import metrics

print('Mean Absolute Error: ', metrics.mean_absolute_error(y_train,y_train_predict))

print('Mean Squared Error: ', metrics.mean_squared_error(y_train,y_train_predict))

print('Root Mean Squared Error: ', np.sqrt(metrics.mean_squared_error(y_train,y_train_predict)))

Mean Absolute Error:  43.34589761226847
Mean Squared Error:  3307.049349030765
Root Mean Squared Error:  57.50695044106203


- by comparring y_test_predict and y_train_predict, we can say that it is under fit model

In [19]:
metrics.median_absolute_error(y_test, y_test_predict)

31.102295802861295

In [20]:
print(metrics.r2_score(y_test, y_test_predict))

0.33148901701968114


In [21]:
r2 = metrics.r2_score(y_test, y_test_predict)
n = len(y_test)
k = x_test.shape[1]

r2_adj = 1 - (1-r2)*(n-1)/(n-k-1)

print(r2_adj)

0.3295111738747689


# Feature Engineering 

## Hyperparameter Tunning using Lasso,Ridge,ElasticNet

In [22]:
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.linear_model import ElasticNet

### Lasso

In [24]:
reg_lasso = Lasso(alpha=1.0)
reg_lasso.fit(x_train, y_train)

In [25]:
y1_pred_lasso = reg_lasso.predict(x_test)

In [27]:
print('lasso_Mean_Absolute_Error: ', metrics.mean_absolute_error(y_test,y1_pred_lasso))

print('lasso_Mean_Squared_Error: ', metrics.mean_squared_error(y_test,y1_pred_lasso))

print('lasso_Root_Mean_Squared_Error: ', np.sqrt(metrics.mean_squared_error(y_test,y1_pred_lasso)))

lasso_Mean_Absolute_Error:  39.130407812374045
lasso_Mean_Squared_Error:  2731.2009330841656
lasso_Root_Mean_Squared_Error:  52.260892961029334


### Ridge

In [28]:
reg_ridge = Ridge(alpha=1.0)
reg_ridge.fit(x_train, y_train)

In [29]:
y1_pred_ridge = reg_ridge.predict(x_test)

In [31]:
print('ridge_Mean_Absolute_Error: ', metrics.mean_absolute_error(y_test,y1_pred_ridge))

print('ridge_Mean_Squared_Error: ', metrics.mean_squared_error(y_test,y1_pred_ridge))

print('ridge_Root_Mean_Squared_Error: ', np.sqrt(metrics.mean_squared_error(y_test,y1_pred_ridge)))

ridge_Mean_Absolute_Error:  39.12986972750126
ridge_Mean_Squared_Error:  2731.10043853104
ridge_Root_Mean_Squared_Error:  52.25993148226507


### ElasticNet

In [32]:
reg_elastic = ElasticNet(alpha=1.0)
reg_elastic.fit(x_train, y_train)

In [33]:
y1_pred_elastic = reg_elastic.predict(x_test)

In [34]:
print('elastic_Mean_Absolute_Error: ', metrics.mean_absolute_error(y_test,y1_pred_elastic))

print('elastic_Mean_Squared_Error: ', metrics.mean_squared_error(y_test,y1_pred_elastic))

print('elastic_Root_Mean_Squared_Error: ', np.sqrt(metrics.mean_squared_error(y_test,y1_pred_elastic)))

elastic_Mean_Absolute_Error:  39.130250621025645
elastic_Mean_Squared_Error:  2731.1715414413534
elastic_Root_Mean_Squared_Error:  52.260611759157136


### 2. Add new features to the data, such as the square of the input

In [35]:
df["input_squared"] = df["input"] ** 2

In [36]:
df.head()

Unnamed: 0,input,output,input_squared
0,-122.740667,-130.572085,15065.271434
1,-121.531419,-129.938929,14769.885901
2,-134.917019,-130.141832,18202.602016
3,-120.605951,-125.760932,14545.795441
4,-129.894781,-112.785214,16872.654053


In [37]:
x2 = df[['input','input_squared']]
y2 = df['output']

In [38]:
x2_train,x2_test,y2_train,y2_test = train_test_split(x2,y2,train_size = 0.8,random_state = 100)

In [39]:
x2_train_transformed = scaler.fit_transform(x2_train)

print(x2_train_transformed)

[[ 1.10691483  0.28739829]
 [ 1.13511019  0.35859468]
 [-0.81946317 -0.39248554]
 ...
 [-1.09900795  0.19416012]
 [-1.13803776  0.28988529]
 [-1.44104103  1.14821119]]


In [40]:
x2_test_transformed = scaler.fit_transform(x2_test)

In [41]:
linear_model.fit(x2_train_transformed,y2_train)

In [42]:
y2_test_predict = linear_model.predict(x2_test_transformed)

In [43]:
print('squ_Mean_Absolute_Error: ', metrics.mean_absolute_error(y2_test,y2_test_predict))

print('squ_Mean_Squared_Error: ', metrics.mean_squared_error(y2_test,y2_test_predict))

print('squ_Root_Mean_Squared_Error: ', np.sqrt(metrics.mean_squared_error(y2_test,y2_test_predict)))

squ_Mean_Absolute_Error:  30.429102774707477
squ_Mean_Squared_Error:  1657.6656696834523
squ_Root_Mean_Squared_Error:  40.71444055471538


## Hyperparameter Tunning using Lasso,Ridge,ElasticNet for squared inputs

### Laso

In [44]:
x2_reg_lasso = Lasso(alpha=1.0)
x2_reg_lasso.fit(x2_train, y2_train)

In [45]:
y2_pred_lasso = x2_reg_lasso.predict(x2_test)

In [46]:
print('sqr_lasso_Mean_Absolute_Error: ', metrics.mean_absolute_error(y2_test,y2_pred_lasso))

print('sqr_lasso_Mean_Squared_Error: ', metrics.mean_squared_error(y2_test,y2_pred_lasso))

print('sqr_lasso_Root_Mean_Squared_Error: ', np.sqrt(metrics.mean_squared_error(y2_test,y2_pred_lasso)))

sqr_lasso_Mean_Absolute_Error:  30.53760714577344
sqr_lasso_Mean_Squared_Error:  1663.6820422206936
sqr_lasso_Root_Mean_Squared_Error:  40.78825863187461


In [47]:
data = {
    'Linear Regression' : ['Normal', 'Polynomial', 'Lasso', 'Ridge', 'Elastic Net','squared_data_linear_model'],
    'Mean Absolute Error' : [43.34589761226847,50.033170380372965, 49.52513727674608, 49.524617416433195,  49.52488350010858, 30.429102774707477],
    'Mean_Squared_Error' : [3307.049349030765,4287.343532087805,4268.448992272407,4268.372774181582,4268.41173127329,1657.6656696834523],
    'Root_Mean_Squared_Error' : [57.50695044106203,65.47780946311357,65.3333681381299,65.33278483412124,65.33308297695196,40.71444055471538]}
Data = pd.DataFrame(data)

In [48]:
Data

Unnamed: 0,Linear Regression,Mean Absolute Error,Mean_Squared_Error,Root_Mean_Squared_Error
0,Normal,43.345898,3307.049349,57.50695
1,Polynomial,50.03317,4287.343532,65.477809
2,Lasso,49.525137,4268.448992,65.333368
3,Ridge,49.524617,4268.372774,65.332785
4,Elastic Net,49.524884,4268.411731,65.333083
5,squared_data_linear_model,30.429103,1657.66567,40.714441


- By comparring all the metric values, by squaring the feature we can get the 