# Implementation of KNN

In [1]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score



In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:

iris = load_iris()
X = iris.data
y = iris.target



In [4]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)



In [5]:

clf = KNeighborsClassifier(n_neighbors=4)



In [6]:

clf.fit(X_train, y_train)



In [7]:
y_pred = clf.predict(X_test)

In [8]:

print("Predictions for the first 5 samples:", clf.predict(X_test[:5]))



Predictions for the first 5 samples: [1 0 2 1 1]


In [9]:

print("Class probabilities for the first 5 samples:", clf.predict_proba(X_test[:5]))


Class probabilities for the first 5 samples: [[0. 1. 0.]
 [1. 0. 0.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 1. 0.]]


In [10]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy of the model:", accuracy)

Accuracy of the model: 1.0


In [11]:
accuracy_alternative = clf.score(X_test, y_test)
print("Accuracy of the model (using score method):", accuracy_alternative)

Accuracy of the model (using score method): 1.0


# Implementation of Linear Regression

In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [13]:
df = pd.read_csv('/content/drive/MyDrive/Concept of AI -- week 7/loan_data.csv')
df.head()


Unnamed: 0,person_age,person_gender,person_education,person_income,person_emp_exp,person_home_ownership,loan_amnt,loan_intent,loan_int_rate,loan_percent_income,cb_person_cred_hist_length,credit_score,previous_loan_defaults_on_file,loan_status
0,22.0,female,Master,71948.0,0,RENT,35000.0,PERSONAL,16.02,0.49,3.0,561,No,1
1,21.0,female,High School,12282.0,0,OWN,1000.0,EDUCATION,11.14,0.08,2.0,504,Yes,0
2,25.0,female,High School,12438.0,3,MORTGAGE,5500.0,MEDICAL,12.87,0.44,3.0,635,No,1
3,23.0,female,Bachelor,79753.0,0,RENT,35000.0,MEDICAL,15.23,0.44,2.0,675,No,1
4,24.0,male,Master,66135.0,1,RENT,35000.0,MEDICAL,14.27,0.53,4.0,586,No,1


In [14]:
df.drop(columns=['person_gender','person_education','person_home_ownership','loan_intent','previous_loan_defaults_on_file'],inplace = True)

In [15]:
#Handling Missing Data
df.isnull().sum()
df.dropna(inplace=True)

In [16]:
X = df.drop('loan_status', axis=1)
y = df['loan_status']

In [17]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



In [18]:
model = LinearRegression()
model.fit(X_train, y_train)



In [19]:
y_pred = model.predict(X_test)



In [20]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R-squared: {r2}")


Mean Squared Error: 0.1298256670597265
R-squared: 0.25153353178045035


# Implementation of Logistic Regression

In [21]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import pandas as pd

In [22]:
df = pd.read_csv('/content/drive/MyDrive/Concept of AI -- week 7/healthcare-dataset-stroke-data.csv')
df.head()

Unnamed: 0,id,gender,age,hypertension,heart_disease,ever_married,work_type,Residence_type,avg_glucose_level,bmi,smoking_status,stroke
0,9046,Male,67.0,0,1,Yes,Private,Urban,228.69,36.6,formerly smoked,1
1,51676,Female,61.0,0,0,Yes,Self-employed,Rural,202.21,,never smoked,1
2,31112,Male,80.0,0,1,Yes,Private,Rural,105.92,32.5,never smoked,1
3,60182,Female,49.0,0,0,Yes,Private,Urban,171.23,34.4,smokes,1
4,1665,Female,79.0,1,0,Yes,Self-employed,Rural,174.12,24.0,never smoked,1


In [23]:
df.columns


Index(['id', 'gender', 'age', 'hypertension', 'heart_disease', 'ever_married',
       'work_type', 'Residence_type', 'avg_glucose_level', 'bmi',
       'smoking_status', 'stroke'],
      dtype='object')

In [24]:
#Handling Missing Data
df.isnull().sum()
df.dropna(inplace=True)

In [25]:
df.drop(columns=['gender','ever_married','work_type','Residence_type','smoking_status'],inplace = True)

In [26]:
#drop all the columns having string
X = df.drop('stroke', axis=1)
y = df['stroke']



In [27]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



In [28]:

model = LogisticRegression(max_iter=10000) # Increased max_iter to ensure convergence
model.fit(X_train, y_train)



In [29]:

y_pred = model.predict(X_test)



In [30]:

accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [31]:
print(f"Accuracy: {accuracy}")
print(f"Confusion Matrix:\n{conf_matrix}")
print(f"Classification Report:\n{class_report}")

Accuracy: 0.9460285132382892
Confusion Matrix:
[[929   0]
 [ 53   0]]
Classification Report:
              precision    recall  f1-score   support

           0       0.95      1.00      0.97       929
           1       0.00      0.00      0.00        53

    accuracy                           0.95       982
   macro avg       0.47      0.50      0.49       982
weighted avg       0.89      0.95      0.92       982

