# Logistic Regression for binary classification 

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn import metrics

# -----------------------------
# 1. Dataset   You can read csv file also
# -----------------------------


df = pd.read_csv("C:/Users/dell/Downloads/DATA SETS/Play Tennis.csv")
df.head()

Unnamed: 0,Day,Outlook,Temprature,Humidity,Wind,Play_Tennis
0,D1,Sunny,Hot,High,Weak,No
1,D2,Sunny,Hot,High,Strong,No
2,D3,Overcast,Hot,High,Weak,Yes
3,D4,Rain,Mild,High,Weak,Yes
4,D5,Rain,Cool,Normal,Weak,Yes


In [3]:
# -----------------------------
# 2. Preprocessing
# -----------------------------
X = df.drop(columns=['Day','Play_Tennis'])   # features
y = df['Play_Tennis']                        # target


In [4]:
# One-hot encode categorical features
X = pd.get_dummies(X, drop_first=True)
X


Unnamed: 0,Outlook_Rain,Outlook_Sunny,Temprature_Hot,Temprature_Mild,Humidity_Normal,Wind_Weak
0,False,True,True,False,False,True
1,False,True,True,False,False,False
2,False,False,True,False,False,True
3,True,False,False,True,False,True
4,True,False,False,False,True,True
5,True,False,False,False,True,False
6,False,False,False,False,True,False
7,False,True,False,True,False,True
8,False,True,False,False,True,True
9,True,False,False,True,True,True


In [5]:
# Encode target variable (Yes=1, No=0)
le = LabelEncoder()
y = le.fit_transform(y)
y



array([0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0])

In [6]:
# Split into Train/Test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,random_state=42,stratify=y)


In [7]:
# -----------------------------
# 3. Logistic Regression Model
# -----------------------------
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)

# Predictions
y_pred = log_reg.predict(X_test)
y_prob = log_reg.predict_proba(X_test)

In [8]:
# -----------------------------
# 4. Evaluation
# -----------------------------
print("Accuracy:", metrics.accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", metrics.confusion_matrix(y_test, y_pred))



Accuracy: 0.6

Confusion Matrix:
 [[0 2]
 [0 3]]


In [9]:
print("\nClassification Report:\n", 
      metrics.classification_report(y_test, y_pred, target_names=["No", "Yes"]))



Classification Report:
               precision    recall  f1-score   support

          No       0.00      0.00      0.00         2
         Yes       0.60      1.00      0.75         3

    accuracy                           0.60         5
   macro avg       0.30      0.50      0.38         5
weighted avg       0.36      0.60      0.45         5



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# Logistic Regression for multi class classification 

In [2]:
import pandas as pd
fish = pd.read_csv('C:/Users/dell/Downloads/DATA SETS/dataset_Fish.csv')
fish.head()

Unnamed: 0,Species,Weight,Length1,Length2,Length3,Height,Width
0,Bream,242.0,23.2,25.4,30.0,11.52,4.02
1,Bream,290.0,24.0,26.3,31.2,12.48,4.3056
2,Bream,340.0,23.9,26.5,31.1,12.3778,4.6961
3,Bream,363.0,26.3,29.0,33.5,12.73,4.4555
4,Bream,430.0,26.5,29.0,34.0,12.444,5.134


In [3]:
fish['Species'].unique()

array(['Bream', 'Roach', 'Whitefish', 'Parkki', 'Perch', 'Pike', 'Smelt'],
      dtype=object)

In [4]:
fish.isnull().sum()

Species    0
Weight     0
Length1    0
Length2    0
Length3    0
Height     0
Width      0
dtype: int64

In [5]:
X = fish.iloc[:, 1:]
y = fish.loc[:, 'Species']

In [6]:
X.head()

Unnamed: 0,Weight,Length1,Length2,Length3,Height,Width
0,242.0,23.2,25.4,30.0,11.52,4.02
1,290.0,24.0,26.3,31.2,12.48,4.3056
2,340.0,23.9,26.5,31.1,12.3778,4.6961
3,363.0,26.3,29.0,33.5,12.73,4.4555
4,430.0,26.5,29.0,34.0,12.444,5.134


In [7]:
y.head()

0    Bream
1    Bream
2    Bream
3    Bream
4    Bream
Name: Species, dtype: object

### Scaling the input features using MinMaxScaler

In [8]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(X)
X_scaled = scaler.transform(X)


In [9]:
X_scaled[0:5]

array([[0.14666667, 0.30485437, 0.30909091, 0.35810811, 0.56833405,
        0.41897835],
       [0.17575758, 0.32038835, 0.32545455, 0.37837838, 0.62405535,
        0.45923545],
       [0.20606061, 0.3184466 , 0.32909091, 0.37668919, 0.61812335,
        0.51427887],
       [0.22      , 0.36504854, 0.37454545, 0.41722973, 0.63856611,
        0.48036479],
       [0.26060606, 0.36893204, 0.37454545, 0.42567568, 0.6219658 ,
        0.57600361]])

### Label Encoding the target variable using LabelEncoder

In [10]:
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)
y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 6, 6, 6, 6, 6, 6, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5])

### Splitting into train and test datasets using train_test_split

In [11]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test= train_test_split(X_scaled, y, test_size=0.2, random_state=42)

### Model Building and training

In [12]:
from sklearn.linear_model import LogisticRegression
logReg = LogisticRegression()
# training the model
logReg.fit(X_train, y_train)

### Predicting the output

In [13]:
y_pred = logReg.predict(X_test)

### Confusion Matrix ( You can also plot simple confusion matrix)

In [18]:
from sklearn import metrics
print("Accuracy:", metrics.accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", metrics.confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", metrics.classification_report(y_test, y_pred))



Accuracy: 0.875

Confusion Matrix:
 [[10  0  0  0  0  0  0]
 [ 0  1  0  0  0  0  0]
 [ 0  0  9  0  0  0  0]
 [ 0  0  0  3  0  0  0]
 [ 0  0  1  0  0  0  0]
 [ 0  0  0  0  0  5  0]
 [ 0  0  3  0  0  0  0]]

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         1
           2       0.69      1.00      0.82         9
           3       1.00      1.00      1.00         3
           4       0.00      0.00      0.00         1
           5       1.00      1.00      1.00         5
           6       0.00      0.00      0.00         3

    accuracy                           0.88        32
   macro avg       0.67      0.71      0.69        32
weighted avg       0.79      0.88      0.82        32



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


### Using SVM model


In [19]:
from sklearn.svm import SVC

# -----------------------------
# 4. SVM Model
# -----------------------------
svm_clf = SVC(kernel='rbf', C=1.0, gamma='scale', probability=True, random_state=42)
svm_clf.fit(X_train, y_train)



In [20]:
# -----------------------------
# 5. Predictions & Evaluation
# -----------------------------
y_pred = svm_clf.predict(X_test)





In [21]:
print("Accuracy:", metrics.accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", metrics.confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", metrics.classification_report(y_test, y_pred))

Accuracy: 0.875

Confusion Matrix:
 [[10  0  0  0  0  0  0]
 [ 0  1  0  0  0  0  0]
 [ 0  0  9  0  0  0  0]
 [ 0  0  0  3  0  0  0]
 [ 0  0  1  0  0  0  0]
 [ 0  0  0  0  0  5  0]
 [ 0  0  3  0  0  0  0]]

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         1
           2       0.69      1.00      0.82         9
           3       1.00      1.00      1.00         3
           4       0.00      0.00      0.00         1
           5       1.00      1.00      1.00         5
           6       0.00      0.00      0.00         3

    accuracy                           0.88        32
   macro avg       0.67      0.71      0.69        32
weighted avg       0.79      0.88      0.82        32



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
