In [None]:
# Predicting Employee Attrition Using Logistic Regression
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_score, recall_score, f1_score

data = pd.read_csv('C:/Users/abdul/OneDrive/Desktop/ByteWise_ML/Day_12/WA_Fn-UseC_-HR-Employee-Attrition.csv')
print(data.head(3))
print(data.isnull().sum())


X = data.drop(columns=['Attrition'])
y = data['Attrition']

categorical_cols = X.select_dtypes(include=['object']).columns
numerical_cols = X.select_dtypes(include=['int64', 'float64']).columns

X = pd.get_dummies(X, columns=categorical_cols) # Encoding categorical Variables 

scaler = StandardScaler()
X[numerical_cols] = scaler.fit_transform(X[numerical_cols])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LogisticRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

precision = precision_score(y_test, y_pred, pos_label='Yes')
recall = recall_score(y_test, y_pred, pos_label='Yes')
f1 = f1_score(y_test, y_pred, pos_label='Yes')

print(f'Precision:',precision)
print(f'Recall:',recall)
print(f'F1-score:',f1)

In [None]:
# Classifying Credit Card Fraud Using Decision Trees

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score, roc_curve
import matplotlib.pyplot as plt           

data = pd.read_csv('C:/Users/abdul/OneDrive/Desktop/ByteWise_ML/Day_10/BTW_Week5_Datasets/Extracted files/credit_card/creditcard.csv')
data.head(3).round(3)

X = data.drop(columns=['Class'])
y = data['Class']

scaler = StandardScaler()
X_standardized = scaler.fit_transform(X)


X_train, X_test, y_train, y_test = train_test_split(X_standardized, y, test_size=0.2, random_state=42)


clf = DecisionTreeClassifier()
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)
y_proba = clf.predict_proba(X_test)[:, 1]

conf_matrix = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred) * 100
roc_auc = roc_auc_score(y_test, y_proba)
fpr, tpr, _ = roc_curve(y_test, y_proba)

print(f"Confusion Matrix:\n",conf_matrix)
print(f"Accuracy:",accuracy,"%")
print(f"ROC-AUC:",roc_auc)

plt.figure()
plt.plot(fpr, tpr, color='darkorange', lw=2)
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 2.0])
plt.ylim([0.0, 2.5])
plt.title('Receiver Operating Characteristic (ROC)')
plt.show()




In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, roc_auc_score, roc_curve
import matplotlib.pyplot as plt

data = pd.read_csv('C:/Users/abdul/OneDrive/Desktop/ByteWise_ML/Day_10/heart_disease_data.csv')

X = data.drop(columns=['cholesterol_level'])
y = data['cholesterol_level']

categorical_cols = X.select_dtypes(include=['object']).columns
numerical_cols = X.select_dtypes(include=['int64', 'float64']).columns

X = pd.get_dummies(X, columns=categorical_cols)

scaler = StandardScaler()
X[numerical_cols] = scaler.fit_transform(X[numerical_cols])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LogisticRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:,1]

accuracy = accuracy_score(y_test, y_pred) * 100
roc_auc = roc_auc_score(y_test, y_proba)
fpr, tpr, _ = roc_curve(y_test, y_proba, pos_label='Yes')

print(f"Accuracy:", accuracy, "%")
print(f"ROC-AUC:", roc_auc)

plt.figure()
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.title('Receiver Operating Characteristic (ROC)')
plt.show()


In [None]:
# Classifying Emails as Spam Using Decision Trees

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import precision_score, recall_score, f1_score

data = pd.read_csv('C:/Users/abdul/OneDrive/Desktop/ByteWise_ML/Day_12/mail_data.csv')
print(data.head(10))
data_1 = data.dropna()  

data_1['Category'] = data_1['Category'].map({'spam': 0, 'ham': 1})

X = data_1['Message']
y = data_1['Category']
vectorizer = TfidfVectorizer(min_df=1, stop_words='english', lowercase=True)
X_vect = vectorizer.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_vect, y, test_size=0.2, random_state=42)

clf = DecisionTreeClassifier()
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f"Precision:", precision)
print(f"Recall:", recall)
print(f"F1-Score:", f1)

input_your = [" Free entry in 2 a wkly comp to win FA Cup fina"]
input_data_features = vectorizer.transform(input_your)

prediction = clf.predict(input_data_features)
print(prediction)

if (prediction == 1 ):
    print('Ham')
else:
    print('Spam')

In [None]:
# Predicting Customer Satisfaction Using Logistic Regression
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix


data = pd.read_csv('C:/Users/abdul/OneDrive/Desktop/ByteWise_ML/Day_11/Customer-survey-data.csv')
data.head()

data['Overall Delivery Experience (Rating)'].fillna(data['Overall Delivery Experience (Rating)'].mean(), inplace=True)
data.dropna(inplace=True)

data.drop(columns=['Order Accuracy'], inplace=True)

features = ['Customer ID', 'Food Quality (Rating)', 'Speed of Delivery (Rating)']
X = data[features]
y = data['Overall Delivery Experience (Rating)']

bins = [0, 2, 4, 6, 8, 10]
labels = [1, 2, 3, 4, 5]
y = pd.cut(y, bins=bins, labels=labels)


scaler = StandardScaler()
X[features] = scaler.fit_transform(X[features])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred) * 100
conf_matrix = confusion_matrix(y_test, y_pred)

print("Accuracy:", accuracy, "%")
print("Confusion Matrix:\n", conf_matrix)


customer_id = int(input("Enter Your Id : "))
food_quality = float(input("Enter food Quality : "))
speed_of_delivery = float(input("Rate the Speed of Delivery: "))
input_data = pd.DataFrame({
    'Customer ID': [customer_id],
    'Food Quality (Rating)': [food_quality],
    'Speed of Delivery (Rating)': [speed_of_delivery]
})


input_data[features] = scaler.transform(input_data[features])
prediction = model.predict(input_data)


satisfaction_threshold = 3
is_satisfied = prediction >= satisfaction_threshold

if(is_satisfied==True):
    print("The Customer Is Satisfied.")
else:
    print("The Customer Is Not  Satisfied.")