In [4]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import numpy as np

data=pd.read_csv("water_potability.csv")

# Handle missing values (a simple strategy like median imputation)
for col in ['ph', 'Sulfate', 'Trihalomethanes']:
    data[col].fillna(data[col].median(), inplace=True)

print(data.head())

# Define features (x) and target (y)
# Exclude 'Potability' as it is the target variable
x = data.drop('Potability', axis=1)
y = data["Potability"]

print(x.head())
print(y.head())

x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=2)
lr = LogisticRegression(max_iter=5000)
lr.fit(x_train,y_train)
y_lr_pred=lr.predict(x_test)
print("y_test shape:", y_test.shape)
print("y_lr_pred shape:", y_lr_pred.shape)
accuracy_lr=accuracy_score(y_test,y_lr_pred)
print(accuracy_lr)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data[col].fillna(data[col].median(), inplace=True)


         ph    Hardness        Solids  Chloramines     Sulfate  Conductivity  \
0  7.036752  204.890455  20791.318981     7.300212  368.516441    564.308654   
1  3.716080  129.422921  18630.057858     6.635246  333.073546    592.885359   
2  8.099124  224.236259  19909.541732     9.275884  333.073546    418.606213   
3  8.316766  214.373394  22018.417441     8.059332  356.886136    363.266516   
4  9.092223  181.101509  17978.986339     6.546600  310.135738    398.410813   

   Organic_carbon  Trihalomethanes  Turbidity  Potability  
0       10.379783        86.990970   2.963135           0  
1       15.180013        56.329076   4.500656           0  
2       16.868637        66.420093   3.055934           0  
3       18.436524       100.341674   4.628771           0  
4       11.558279        31.997993   4.075075           0  
         ph    Hardness        Solids  Chloramines     Sulfate  Conductivity  \
0  7.036752  204.890455  20791.318981     7.300212  368.516441    564.308654   