In [123]:
import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression,LinearRegression,Ridge
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report,mean_absolute_error,mean_squared_error,r2_score
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

In [88]:
data={
    'Age':[22,25,47,52,46,56,23,30],
    'Income':[2500,2700,6500,7000,6200,7500,2600,4000],
     'Visit Time':[5,7,3,2,4,1,6,5],
     'Purchase':[1,1,0,0,0,0,1,1]   #Purchase=1, Not Purchase=0

}

In [89]:
df=pd.DataFrame(data)
df

Unnamed: 0,Age,Income,Visit Time,Purchase
0,22,2500,5,1
1,25,2700,7,1
2,47,6500,3,0
3,52,7000,2,0
4,46,6200,4,0
5,56,7500,1,0
6,23,2600,6,1
7,30,4000,5,1


In [90]:
X=df.drop('Purchase',axis=1)
y=df['Purchase']

In [91]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)


In [92]:
pipe=Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler()),
    ('model', LogisticRegression())
                         ])
pipe

In [93]:
pipe.fit(X_train,y_train)

In [94]:
y_pred=pipe.predict(X_test)
print('Actual Data ',y_test.values)
print('Predicted Data ',y_pred)


Actual Data  [1 0]
Predicted Data  [1 0]


In [95]:
#Evaluate
accuracy=accuracy_score(y_test,y_pred)
print('Accuracy:',accuracy)

Accuracy: 1.0


In [96]:
conf_matrix=confusion_matrix(y_test,y_pred)
print('Confusion Matrix:\n',conf_matrix)

Confusion Matrix:
 [[1 0]
 [0 1]]


In [97]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         1
           1       1.00      1.00      1.00         1

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2



In [98]:
data={
    "Hours Studied":[1,2,3,4,5,6,8,10],
    "Attendence":[20,30,40,60,65,70,80,90],
    "Extra Classes":['No','Yes','No','Yes','Yes','Yes','NO','Yes'],
    "Marks":[30,40,35,55,65,90,90,85]
}



In [99]:
df=pd.DataFrame(data)
df

Unnamed: 0,Hours Studied,Attendence,Extra Classes,Marks
0,1,20,No,30
1,2,30,Yes,40
2,3,40,No,35
3,4,60,Yes,55
4,5,65,Yes,65
5,6,70,Yes,90
6,8,80,NO,90
7,10,90,Yes,85


In [100]:
X=df.drop('Marks',axis=1)
y=df['Marks']

In [101]:
num_features=["Hours Studied","Attendence"]

In [102]:
cat_features=["Extra Classes"]

In [103]:
num_transformer=Pipeline(steps=[
    ('imputer',SimpleImputer(strategy='mean')),
    ('scaler',StandardScaler())
])

In [104]:
cat_transformer=Pipeline(steps=[
    ('imputer',SimpleImputer(strategy='most_frequent')),
    ('encoder',OneHotEncoder(handle_unknown='ignore',sparse_output=False))
])


In [105]:
preprocessor=ColumnTransformer(transformers=[
    ('num',num_transformer,num_features),
    ('cat',cat_transformer,cat_features)
])

In [106]:
pipe=Pipeline(steps=[
    ('preprocessor',preprocessor),
    ('model',LogisticRegression())
])
pipe

In [107]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

In [108]:
pipe.fit(X_train,y_train)

In [109]:
y_pred=pipe.predict(X_test)
print('Actual Data ',y_test.values)
print('Predicted Data ',y_pred)

Actual Data  [40 90]
Predicted Data  [30 65]


In [110]:
mean_absolute_error=np.mean(np.abs(y_pred-y_test))
print('Mean Absolute Error:',mean_absolute_error)


Mean Absolute Error: 17.5


In [111]:
r2_score=pipe.score(X_test,y_test)
print('R2 Score:',r2_score)

R2 Score: 0.0


In [116]:
models={
    'Logistic Regression':LogisticRegression(),
    'Decision Tree':DecisionTreeRegressor(max_depth=5,random_state=42),
    'Random Forest':RandomForestRegressor(),
    'Ridge':Ridge(alpha=1.0),

}


In [128]:
results=[]
for name,model in models.items():
  pipeline=Pipeline(steps=[
      ('preprocessor',preprocessor),
      ('model',model)
  ])
  pipeline.fit(X_train,y_train)
  y_pred=pipeline.predict(X_test)
  r2_score=pipeline.score(X_test,y_test)
  results.append(
      {  "Model":name,
          "R2 Score":r2_score,
          "Mean Absolute Error":mean_absolute_error(y_test,y_pred),
          "Mean Squared Error":mean_squared_error(y_test,y_pred),
          "Root Mean Squared Error":np.sqrt(mean_squared_error(y_test,y_pred))

      }

      )

for i in results:
  print(i)





{'Model': 'Logistic Regression', 'R2 Score': 0.0, 'Mean Absolute Error': 17.5, 'Mean Squared Error': 362.5, 'Root Mean Squared Error': np.float64(19.03943276465977)}
{'Model': 'Decision Tree', 'R2 Score': 0.42000000000000004, 'Mean Absolute Error': 17.5, 'Mean Squared Error': 362.5, 'Root Mean Squared Error': np.float64(19.03943276465977)}
{'Model': 'Random Forest', 'R2 Score': 0.563352, 'Mean Absolute Error': 13.55, 'Mean Squared Error': 272.90500000000003, 'Root Mean Squared Error': np.float64(16.51983656093486)}
{'Model': 'Ridge', 'R2 Score': 0.5754447459747468, 'Mean Absolute Error': 11.786509528557843, 'Mean Squared Error': 265.34703376578324, 'Root Mean Squared Error': np.float64(16.289476166095188)}
