In [None]:
#p6
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


# Step 1: Load dataset
file_path = "N:\CS2225 DS\Datasets\Program6_diabetes.csv"  
try:
    df = pd.read_csv(file_path, encoding='latin1')  
    print("Dataset Preview:\n", df.head())
except FileNotFoundError:
    print(f"Error: File '{file_path}' not found. Please check the file path.")
    exit(1)
except pd.errors.EmptyDataError:
    print(f"Error: File '{file_path}' is empty or invalid.")
    exit(1)
except Exception as e:
    print(f"Error reading CSV file: {e}")
    exit(1)


# Step 2: Check if required columns exist
required_columns = ['Glucose', 'Outcome']
if not all(col in df.columns for col in required_columns):
    print(f"Error: Required columns {required_columns} not found. Available columns: {list(df.columns)}")
    exit(1)


# Step 3: Prepare data
try:
    X = df[['Glucose']]  
    y = df['Outcome']    
except KeyError as e:
    print(f"Error accessing columns: {e}")
    exit(1)


# Step 4: Train/Test split
try:
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
except Exception as e:
    print(f"Train/test split failed: {e}")
    exit(1)


# Step 5: Model training
try:
    model = LogisticRegression()
    model.fit(X_train, y_train)
except Exception as e:
    print(f"Model training failed: {e}")
    exit(1)


# Step 6: Prediction and Evaluation
try:
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print("Accuracy:", accuracy)
except Exception as e:
    print(f"Prediction or evaluation failed: {e}")
    exit(1)


# Step 7: Predict for a new value
try:
    new_data = [[120]]  
    predicted_result = model.predict(new_data)
    print("Predicted Outcome for Glucose level 120 mg/dL:", "Diabetic" if predicted_result[0] == 1 else "Non-diabetic")
except Exception as e:
    print(f"Prediction for new value failed: {e}")
    exit(1)


Dataset Preview:
    Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0            6      148             72             35        0  33.6   
1            1       85             66             29        0  26.6   
2            8      183             64              0        0  23.3   
3            1       89             66             23       94  28.1   
4            0      137             40             35      168  43.1   

   DiabetesPedigreeFunction  Age  Outcome  
0                     0.627   50        1  
1                     0.351   31        0  
2                     0.672   32        1  
3                     0.167   21        0  
4                     2.288   33        1  
Accuracy: 0.7857142857142857
Predicted Outcome for Glucose level 120 mg/dL: Non-diabetic




In [None]:
#ex6
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

file_path = "N:\CS2225 DS\Datasets\Program6_diabetes.csv"
df = pd.read_csv(file_path, encoding='latin1')
print("Dataset Preview:\n", df.head())

X = df.drop("Outcome", axis=1)   
y = df["Outcome"]                


X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=0
)

model = LogisticRegression(max_iter=500)   
model.fit(X_train, y_train)


y_pred = model.predict(X_test)


accuracy = accuracy_score(y_test, y_pred)
print("\nModel Evaluation:")
print("Accuracy:", accuracy)
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


new_patient = [[2, 120, 70, 20, 80, 28.0, 0.5, 35]]  

prediction = model.predict(new_patient)[0]
probability = model.predict_proba(new_patient)[0][1]

print("\nPrediction for New Patient:")
print("Predicted Outcome:", "Diabetic" if prediction == 1 else "Non-diabetic")
print(f"Probability of being Diabetic: {probability:.2f}")


Dataset Preview:
    Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0            6      148             72             35        0  33.6   
1            1       85             66             29        0  26.6   
2            8      183             64              0        0  23.3   
3            1       89             66             23       94  28.1   
4            0      137             40             35      168  43.1   

   DiabetesPedigreeFunction  Age  Outcome  
0                     0.627   50        1  
1                     0.351   31        0  
2                     0.672   32        1  
3                     0.167   21        0  
4                     2.288   33        1  

Model Evaluation:
Accuracy: 0.8246753246753247

Confusion Matrix:
 [[98  9]
 [18 29]]

Classification Report:
               precision    recall  f1-score   support

           0       0.84      0.92      0.88       107
           1       0.76      0.62      0.68        47

    accur

