In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
cols = ['population', 'profit']
df = pd.read_csv('datasets/profits.data', names=cols)
df.head()

Unnamed: 0,population,profit
0,6.1101,17.592
1,5.5277,9.1302
2,8.5186,13.662
3,7.0032,11.854
4,5.8598,6.8233


In [3]:
print('Dataset shape: ')
df.shape

Dataset shape: 


(97, 2)

In [4]:
print('Dataset info: ')
df.info()

Dataset info: 
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 97 entries, 0 to 96
Data columns (total 2 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   population  97 non-null     float64
 1   profit      97 non-null     float64
dtypes: float64(2)
memory usage: 1.6 KB


In [5]:
print('Checking for any null values: ')
df.isnull().any()

Checking for any null values: 


population    False
profit        False
dtype: bool

In [6]:
print('Checking for unique values in each column')
df.nunique()

Checking for unique values in each column


population    97
profit        97
dtype: int64

In [7]:
X = df.iloc[:,0:1].values
y = df.iloc[:,  1].values

In [8]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

In [9]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X_train, y_train)

LinearRegression()

In [10]:
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

In [11]:
from sklearn.metrics import mean_absolute_error, r2_score

rmse = np.sqrt(mean_absolute_error(y_train, y_train_pred))
r2   = r2_score(y_train, y_train_pred)
print('Training accuracy: ')
print('RMSE is      {:.2f}'.format(rmse))
print('R2 score is: {:.2f}'.format(r2))
print('----------------------')

Training accuracy: 
RMSE is      1.47
R2 score is: 0.73
----------------------


In [12]:
rmse = np.sqrt(mean_absolute_error(y_test, y_test_pred))
r2   = r2_score(y_test, y_test_pred)
print('Testing accuracy: ')
print('RMSE is      {:.2f}'.format(rmse))
print('R2 score is: {:.2f}'.format(r2))

Testing accuracy: 
RMSE is      1.53
R2 score is: 0.59


In [13]:
accuracy = model.score(X_test, y_test)
print('Model accuracy: {:.2f}'.format(accuracy))

Model accuracy: 0.59
