<a href="https://colab.research.google.com/github/nauanova02/assignment/blob/master/regression_IRIS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Regression on IRIS dataset

### Importing libraries

In [None]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import seaborn as sn
from sklearn.datasets import load_iris
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split as tts
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt

### Load the datasets and working on it

In [None]:
iris = load_iris()
iris_df = pd.DataFrame(data = iris.data, columns = iris.feature_names)
target_df = pd.DataFrame(data= iris.target, columns= ['species'])

In [None]:
def converter(specie):
    if specie == 0:
        return 'setosa'
    elif specie == 1:
        return 'versicolor'
    else:
        return 'virginica'
target_df['species_name'] = target_df['species'].apply(converter)
final_df = pd.concat([iris_df, target_df], axis= 1)

### Studying on the dataframe

In [None]:
final_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 6 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  150 non-null    float64
 1   sepal width (cm)   150 non-null    float64
 2   petal length (cm)  150 non-null    float64
 3   petal width (cm)   150 non-null    float64
 4   species            150 non-null    int64  
 5   species_name       150 non-null    object 
dtypes: float64(4), int64(1), object(1)
memory usage: 7.2+ KB


In [None]:
final_df.describe()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),species
count,150.0,150.0,150.0,150.0,150.0
mean,5.843333,3.057333,3.758,1.199333,1.0
std,0.828066,0.435866,1.765298,0.762238,0.819232
min,4.3,2.0,1.0,0.1,0.0
25%,5.1,2.8,1.6,0.3,0.0
50%,5.8,3.0,4.35,1.3,1.0
75%,6.4,3.3,5.1,1.8,2.0
max,7.9,4.4,6.9,2.5,2.0


In [None]:
final_df.head(10)

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),species,species_name
0,5.1,3.5,1.4,0.2,0,setosa
1,4.9,3.0,1.4,0.2,0,setosa
2,4.7,3.2,1.3,0.2,0,setosa
3,4.6,3.1,1.5,0.2,0,setosa
4,5.0,3.6,1.4,0.2,0,setosa
5,5.4,3.9,1.7,0.4,0,setosa
6,4.6,3.4,1.4,0.3,0,setosa
7,5.0,3.4,1.5,0.2,0,setosa
8,4.4,2.9,1.4,0.2,0,setosa
9,4.9,3.1,1.5,0.1,0,setosa


### Linear Regression Model

In [None]:
# Variables
X= final_df.drop(labels= ['sepal length (cm)', 'species_name'], axis= 1)
y= final_df['sepal length (cm)']

# Splitting the Dataset 
X_train, X_test, y_train, y_test = tts(X, y, test_size= 0.35, random_state= 101)

In [None]:
# Instantiating LinearRegression() Model and fitting the model
model = LinearRegression().fit(X_train, y_train)

# Making Predictions
pred = model.predict(X_test)

# Evaluating Model's Performance
print('Mean Absolute Error:', mean_absolute_error(y_test, pred))
print('Mean Squared Error:', mean_squared_error(y_test, pred))
print('Mean Root Squared Error:', np.sqrt(mean_squared_error(y_test, pred)))

Mean Absolute Error: 0.2547540025709068
Mean Squared Error: 0.09813154872776475
Mean Root Squared Error: 0.3132595548866223


### Logistic Regression

In [None]:
# Variables
A = final_df.drop(labels= ['species', 'species_name'], axis= 1)
b = final_df['species']

# Splitting the Dataset 
A_train, A_test, b_train, b_test = tts(A, b, test_size= 0.35, random_state= 101)

In [None]:
model_log = LogisticRegression(random_state=0, solver="lbfgs", multi_class='auto').fit(A_train, b_train)

# Making Predictions
pred = model_log.predict(A_test)

# Evaluating Model's Performance
print('Mean Absolute Error:', mean_absolute_error(b_test, pred))
print('Mean Squared Error:', mean_squared_error(b_test, pred))
print('Mean Root Squared Error:', np.sqrt(mean_squared_error(b_test, pred)))

Mean Absolute Error: 0.018867924528301886
Mean Squared Error: 0.018867924528301886
Mean Root Squared Error: 0.13736056394868904


In [None]:
# confusion matrix on original and predicted data
confusion_matrix(b_test, pred)

array([[16,  0,  0],
       [ 0, 22,  1],
       [ 0,  0, 14]])