In [None]:
df="Rasel_Parvez",'221-15-5432'
df

('Rasel_Parvez', '221-15-5432')

In [None]:
import pandas as pd
df = pd.read_csv('/content/Sample_Data_AI_Lab_Final.csv')
df
print("Dataset Overview:")
print(df.head())
print(df.info())

Dataset Overview:
   MultipleLines  InternetService  OnlineSecurity  OnlineBackup  \
0              1                0               3             3   
1              0                2               0             1   
2              1                2               0             0   
3              0                1               0             0   
4              0                1               1             0   

   DeviceProtection  TechSupport  StreamingTV  StreamingMovies  \
0                 3            3            3                3   
1                 1            0            1                0   
2                 0            0            0                0   
3                 0            0            0                1   
4                 1            0            0                0   

   MonthlyCharges TotalCharges        Contract  
0           24.10      1734.65        Two year  
1           88.15       3973.2  Month-to-month  
2           74.95      2869.85  Mon

In [None]:
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
print("\nMissing Values:")
print(df.isnull().sum())
df['TotalCharges'].fillna(df['TotalCharges'].median(), inplace=True)
print("\nMissing Values After Imputation:")
print(df.isnull().sum())


Missing Values:
MultipleLines        0
InternetService      0
OnlineSecurity       0
OnlineBackup         0
DeviceProtection     0
TechSupport          0
StreamingTV          0
StreamingMovies      0
MonthlyCharges       0
TotalCharges        10
Contract             0
dtype: int64

Missing Values After Imputation:
MultipleLines       0
InternetService     0
OnlineSecurity      0
OnlineBackup        0
DeviceProtection    0
TechSupport         0
StreamingTV         0
StreamingMovies     0
MonthlyCharges      0
TotalCharges        0
Contract            0
dtype: int64


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['TotalCharges'].fillna(df['TotalCharges'].median(), inplace=True)


In [None]:
from sklearn.preprocessing import LabelEncoder
df = pd.get_dummies(df, columns=['Contract'], drop_first=True)
label_encoder = LabelEncoder()
df['InternetService'] = label_encoder.fit_transform(df['InternetService'])
print("\nDataset After Encoding:")
print(df.head())


Dataset After Encoding:
   MultipleLines  InternetService  OnlineSecurity  OnlineBackup  \
0              1                0               3             3   
1              0                2               0             1   
2              1                2               0             0   
3              0                1               0             0   
4              0                1               1             0   

   DeviceProtection  TechSupport  StreamingTV  StreamingMovies  \
0                 3            3            3                3   
1                 1            0            1                0   
2                 0            0            0                0   
3                 0            0            0                1   
4                 1            0            0                0   

   MonthlyCharges  TotalCharges  Contract_One year  Contract_Two year  
0           24.10       1734.65              False               True  
1           88.15       3973.20

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
numerical_features = ['MonthlyCharges', 'TotalCharges']
df[numerical_features] = scaler.fit_transform(df[numerical_features])
print("\nDataset After Scaling:")
print(df[numerical_features].head())


Dataset After Scaling:
   MonthlyCharges  TotalCharges
0        0.058209      0.197998
1        0.695522      0.456312
2        0.564179      0.328993
3        0.374627      0.025352
4        0.350249      0.011620


In [None]:
from sklearn.model_selection import train_test_split
df['Target'] = (df['MonthlyCharges'] > 0.5).astype(int)
X = df.drop(columns=['Target'])
y = df['Target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
print("\nTrain-Test Split:")
print("Training Set Size:", X_train.shape)
print("Testing Set Size:", X_test.shape)


Train-Test Split:
Training Set Size: (4190, 12)
Testing Set Size: (1796, 12)


In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
y_pred_knn = knn.predict(X_test)
print("\nKNN Classification Report:")
print(classification_report(y_test, y_pred_knn))


KNN Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.99      0.99       868
           1       0.99      0.99      0.99       928

    accuracy                           0.99      1796
   macro avg       0.99      0.99      0.99      1796
weighted avg       0.99      0.99      0.99      1796



In [None]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
print("\nRandom Forest Classification Report:")
print(classification_report(y_test, y_pred_rf))


Random Forest Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       868
           1       1.00      1.00      1.00       928

    accuracy                           1.00      1796
   macro avg       1.00      1.00      1.00      1796
weighted avg       1.00      1.00      1.00      1796



In [None]:
from sklearn.naive_bayes import GaussianNB
nb = GaussianNB()
nb.fit(X_train, y_train)
y_pred_nb = nb.predict(X_test)
print("\nNaïve Bayes Classification Report:")
print(classification_report(y_test, y_pred_nb))


Naïve Bayes Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.78      0.87       868
           1       0.83      1.00      0.91       928

    accuracy                           0.89      1796
   macro avg       0.91      0.89      0.89      1796
weighted avg       0.91      0.89      0.89      1796



In [None]:
results = {
    "KNN": accuracy_score(y_test, y_pred_knn),
    "Random Forest": accuracy_score(y_test, y_pred_rf),
    "Naive Bayes": accuracy_score(y_test, y_pred_nb),
}

print("\nComparative Analysis:")
for model, accuracy in results.items():
    print(f"{model}: Accuracy = {accuracy:.2f}")


Comparative Analysis:
KNN: Accuracy = 0.99
Random Forest: Accuracy = 1.00
Naive Bayes: Accuracy = 0.89
