In [None]:
#   1.Classifying Loan Status Using Decision Trees

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score, roc_curve
import matplotlib.pyplot as plt           

data = pd.read_csv('C:/Users/abdul/OneDrive/Desktop/ByteWise_ML/Day_13/Loan_1.csv')
data.isnull().sum()
data['annual_income_joint'].fillna(data['annual_income_joint'].mean(), inplace=True)
data.dropna(inplace=True)
data.drop(columns='debt_to_income',inplace=True)
data.isnull().sum()

X = data.drop(columns=['issue_month'])
y = data['issue_month']

categorical_cols = X.select_dtypes(include=['object']).columns
numerical_cols = X.select_dtypes(include=['int64', 'float64']).columns

X = pd.get_dummies(X, columns=categorical_cols)

scaler = StandardScaler()
X[numerical_cols] = scaler.fit_transform(X[numerical_cols])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = DecisionTreeClassifier()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:,1]

accuracy = accuracy_score(y_test, y_pred) * 100
print(f"Accuracy:", accuracy, "%")

y = data['issue_month'].astype('category').cat.codes

if len(y.unique()) > 2:
    # Multi-class case
    roc_auc = roc_auc_score(y_test, model.predict_proba(X_test), multi_class='ovr')
else:
    roc_auc = roc_auc_score(y_test, y_proba)

print(f"ROC AUC Score:",roc_auc)
fpr, tpr, _ = roc_curve(y_test, y_proba, pos_label='Yes')

plt.figure()
plt.plot(fpr, tpr, color='darkorange', lw=2)
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.show()



In [None]:
# Predicting Hospital Readmission Using Logistic Regression
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_score, recall_score, f1_score
import matplotlib.pyplot as plt

data = pd.read_csv('C:/Users/abdul/OneDrive/Desktop/ByteWise_ML/Day_13/hospital_readmissions.csv')
print(data.dtypes)
data.isnull().sum() # No null values 

X = data.drop(columns=['time_in_hospital'])
y = data['time_in_hospital']

categorical_cols = X.select_dtypes(include=['object']).columns
numerical_cols = X.select_dtypes(include=['int64', 'float64']).columns

X = pd.get_dummies(X, columns=categorical_cols)

scaler = StandardScaler()
X[numerical_cols] = scaler.fit_transform(X[numerical_cols])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
precision = precision_score(y_test, y_pred,average= 'weighted')
recall = recall_score(y_test, y_pred,average= 'weighted')
f1 = f1_score(y_test, y_pred,average= 'weighted')

print(f"Precision:", precision)
print(f"Recall:", recall)
print(f"F1-Score:", f1)



In [None]:
# Classifying Digit Images Using Decision Trees

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

mnist = pd.read_csv('C:/Users/abdul/OneDrive/Desktop/ByteWise_ML/Day_13/mnist_train.csv')

X = mnist.drop(columns=['label'])  
y = mnist['label'] 

X = X / 255.0  

y = y.astype(int)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = DecisionTreeClassifier()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")

cm = confusion_matrix(y_test, y_pred)
print(f"Confusion Matrix : \n ", cm )


In [None]:
# Predicting Loan Approval Using Logistic Regression

import pandas as pd       
import matplotlib.pyplot as plt        
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression


df = pd.read_csv('C:/Users/abdul/OneDrive/Desktop/ByteWise_ML/Project [Loan Default Rates]_4/Copy of loan.csv')
print(df.isnull().sum())

df['Credit_History'].fillna(df['Credit_History'].mean(), inplace=True)
df.dropna(inplace=True)

X = df.drop(columns=['Loan_Amount_Term'])
y = df['Loan_Amount_Term']

categorical_cols = X.select_dtypes(include=['object']).columns
numerical_cols = X.select_dtypes(include=['int64', 'float64']).columns

X = pd.get_dummies(X, columns=categorical_cols)

scaler = StandardScaler()
X[numerical_cols] = scaler.fit_transform(X[numerical_cols])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: \n {accuracy:.2f}")

cm = confusion_matrix(y_test, y_pred)
print(f"Confusion Matrix : \n ", cm )



In [88]:
# Classifying Wine Quality Using Decision Trees
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, roc_auc_score
import matplotlib.pyplot as plt

df = pd.read_csv('C:/Users/abdul/OneDrive/Desktop/ByteWise_ML/Day_10/wine_quality.csv')
print(df.isnull().sum())
print(df.head(3))

X = df.drop(columns=['quality'])
y = df['quality']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = DecisionTreeClassifier()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:,1]

accuracy = accuracy_score(y_test, y_pred)


print(f"Accuracy: {accuracy:.4f}")



fixed acidity           0
volatile acidity        0
citric acid             0
residual sugar          0
chlorides               0
free sulfur dioxide     0
total sulfur dioxide    0
density                 0
pH                      0
sulphates               0
alcohol                 0
quality                 0
Id                      0
dtype: int64
   fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
0            7.4              0.70         0.00             1.9      0.076   
1            7.8              0.88         0.00             2.6      0.098   
2            7.8              0.76         0.04             2.3      0.092   

   free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \
0                 11.0                  34.0   0.9978  3.51       0.56   
1                 25.0                  67.0   0.9968  3.20       0.68   
2                 15.0                  54.0   0.9970  3.26       0.65   

   alcohol  quality  Id  
0      9.4   