In [None]:
# Logistic Regression 

import pandas as pd   
import matplotlib.pyplot as plt     
import numpy as np      
from sklearn.model_selection import train_test_split        
from sklearn.preprocessing import StandardScaler    
from sklearn.linear_model import LogisticRegression

data = pd.read_csv('C:/Users/abdul/OneDrive/Desktop/ByteWise_ML/Day_10/diabetes.csv')
X = data.drop(columns='Outcome')
y = data['Outcome']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

ss = StandardScaler()
X_train_scaled = ss.fit_transform(X_train)
X_test_scaled = ss.transform(X_test)

clf = LogisticRegression()
clf.fit(X_train_scaled, y_train)
y_pred = clf.predict(X_test_scaled)

from sklearn.metrics import accuracy_score, precision_score, recall_score

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

print('Accuracy = ', accuracy)
print('Precision', precision)
print('Recall', recall)

In [6]:
# Decision Trees 
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, accuracy_score

iris = load_iris()
X = iris.data
y = iris.target

pd.DataFrame(X).isnull().sum()

scaler = StandardScaler()
X_standardized = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_standardized, y, test_size=0.2, random_state=42)

clf = DecisionTreeClassifier()
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)
conf_matrix = confusion_matrix(y_test, y_pred)
accuracy = ((accuracy_score(y_test, y_pred))*100 )

print(f"Confusion Matrix",conf_matrix)
print(f"Accuracy: ",accuracy,"%")


Confusion Matrix [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]
Accuracy:  100.0 %


In [None]:
# Model Evaluation Using ROC_AU
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, roc_curve
import matplotlib.pyplot as plt

titanic = pd.read_csv('C:/Users/abdul/OneDrive/Desktop/ByteWise_ML/Day_10/titanic.csv')
print(titanic.isnull().sum())

titanic['Age'].fillna(titanic['Age'].median(), inplace=True)
titanic.dropna(inplace=True)
print(titanic.isnull().sum())

features = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']
X = titanic[features]
y = titanic['Survived']

X = pd.get_dummies(X, columns=['Pclass', 'Sex', 'Embarked'])

scaler = StandardScaler()
numerical_features = ['Age', 'SibSp', 'Parch', 'Fare']
X[numerical_features] = scaler.fit_transform(X[numerical_features])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LogisticRegression()
model.fit(X_train, y_train)

y_proba = model.predict_proba(X_test)[:, 1]
roc_auc = roc_auc_score(y_test, y_proba)
fpr, tpr, _ = roc_curve(y_test, y_proba)

print(f"ROC-AUC:",roc_auc)




In [46]:
# Using recall, precision and F1_score in Decision tree Classifier 

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import precision_score, recall_score, f1_score

spam_data = pd.read_csv('C:/Users/abdul/OneDrive/Desktop/ByteWise_ML/Day_11/spam_ham_dataset.csv')

print(spam_data.keys())
print(spam_data.isnull().sum())

X = spam_data.drop(columns=['label'])
y = spam_data['label']

categorical_features = X.select_dtypes(include=['object']).columns
X = pd.get_dummies(X, columns=categorical_features)

scaler = StandardScaler()
features = X.select_dtypes(include=['int64']).columns
X[features] = scaler.fit_transform(X[features])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

clf = DecisionTreeClassifier()
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)
precision = precision_score(y_test, y_pred, pos_label='spam')
recall = recall_score(y_test, y_pred, pos_label='spam')
f1 = f1_score(y_test, y_pred, pos_label='spam')

print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-Score: {f1:.2f}")


Index(['Unnamed: 0', 'label', 'text', 'label_num'], dtype='object')
Unnamed: 0    0
label         0
text          0
label_num     0
dtype: int64
Precision: 1.00
Recall: 1.00
F1-Score: 1.00


In [84]:
# I dropped the order Accuracy Column because the order was unresolvable.
# logistic regression to predict customer satisfaction and evaluate the model using accuracy and confusion matrix.
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix

customer = pd.read_csv('C:/Users/abdul/OneDrive/Desktop/ByteWise_ML/Day_11/Customer-survey-data.csv')

customer['Overall Delivery Experience (Rating)'].fillna(customer['Overall Delivery Experience (Rating)'].median(), inplace=True)
customer.dropna(inplace=True)


features = ['Customer ID','Food Quality (Rating)','Speed of Delivery (Rating)']
X = customer[features]
y = customer['Overall Delivery Experience (Rating)']


scaler = StandardScaler()
X[features] = scaler.fit_transform(X[features])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LogisticRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)*100
conf_matrix = confusion_matrix(y_test, y_pred)

print("Accuracy:\n",accuracy,"%")
print("Confusion Matrix:\n",conf_matrix)


Accuracy:
 43.40885684860968 %
Confusion Matrix:
 [[127   3  27   0  87]
 [ 10   6 272   0 105]
 [ 38  11 321   0 124]
 [  0  15  58   0 155]
 [ 18  39 137   0 389]]


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  customer['Overall Delivery Experience (Rating)'].fillna(customer['Overall Delivery Experience (Rating)'].median(), inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[features] = scaler.fit_transform(X[features])
