# Logistic Regression 1

### Install libraries

In [1]:
# ! pip install seaborn

### Import libraries

In [2]:
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

### Load data

In [3]:
# List available data sets
print(sns.get_dataset_names())

['anagrams', 'anscombe', 'attention', 'brain_networks', 'car_crashes', 'diamonds', 'dots', 'dowjones', 'exercise', 'flights', 'fmri', 'geyser', 'glue', 'healthexp', 'iris', 'mpg', 'penguins', 'planets', 'seaice', 'taxis', 'tips', 'titanic']


In [4]:
# Load iris data set
data = sns.load_dataset("iris")
data.shape

(150, 5)

### Examine data

In [5]:
# Review data
data.head(10)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
5,5.4,3.9,1.7,0.4,setosa
6,4.6,3.4,1.4,0.3,setosa
7,5.0,3.4,1.5,0.2,setosa
8,4.4,2.9,1.4,0.2,setosa
9,4.9,3.1,1.5,0.1,setosa


In [6]:
# List targets
data['species'].unique()

array(['setosa', 'versicolor', 'virginica'], dtype=object)

### Prepare data

In [7]:
# Prepare data set for training
X = data.iloc[:, :-1]     # feature values - all columns except last one
y = data.iloc[:, -1]      # target values - last column of data frame

In [8]:
# Split data into 80% training and 20% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Train model

In [9]:
# Train logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


##### Look closely at the error message above

In [10]:
# Train the model again
model = LogisticRegression(max_iter = 200)
model.fit(X_train, y_train)

In [11]:
# Similar to linear regression, logistic regression calculates intercept and coefficient values
print('intercept:', model.intercept_[0])
print('coefficents:', model.coef_)

intercept: 9.032471716709502
coefficents: [[-0.39723742  0.9607765  -2.37393998 -1.0032536 ]
 [ 0.51286881 -0.2534895  -0.21550291 -0.76905844]
 [-0.11563139 -0.70728699  2.58944289  1.77231205]]


### Test model

In [12]:
# Test the model
predictions = model.predict(X_test)

# Print predictions
print(predictions)

['versicolor' 'setosa' 'virginica' 'versicolor' 'versicolor' 'setosa'
 'versicolor' 'virginica' 'versicolor' 'versicolor' 'virginica' 'setosa'
 'setosa' 'setosa' 'setosa' 'versicolor' 'virginica' 'versicolor'
 'versicolor' 'virginica' 'setosa' 'virginica' 'setosa' 'virginica'
 'virginica' 'virginica' 'virginica' 'virginica' 'setosa' 'setosa']


### Model evaluation

In [13]:
# Check precision, recall and f1-score
print(classification_report(y_test, predictions))

              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        10
  versicolor       1.00      1.00      1.00         9
   virginica       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



### Model accuracy

In [14]:
# Check model accuracy
accuracy = model.score(X_test, y_test)
print("accuracy =", round((accuracy * 100), 2), "%")

accuracy = 100.0 %
