In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("D:\\DataSet\\covid_toy.csv")
df.head(3)

Unnamed: 0,age,gender,fever,cough,city,has_covid
0,60,Male,103.0,Mild,Kolkata,No
1,27,Male,100.0,Mild,Delhi,Yes
2,42,Male,101.0,Mild,Delhi,No


In [3]:
from sklearn.model_selection import train_test_split

x = df.drop(columns = ["has_covid"])
y = df["has_covid"]

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [4]:
from sklearn.preprocessing import FunctionTransformer

numerical = []
categorical = []

def func_dtype(x_train):
    for i in x_train.select_dtypes(include= ["int64","float64"]).columns:
        numerical.append(i)
    for k in x_train.select_dtypes(include="object").columns:
        categorical.append(k)

dtype_filter = FunctionTransformer(func_dtype)

x = dtype_filter.transform(x_train)

print("numerical feature : ",numerical)
print("categorical feature : ",categorical)

numerical feature :  ['age', 'fever']
categorical feature :  ['gender', 'cough', 'city']


In [5]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [6]:
numeric_transformer = Pipeline(steps=[
    ("impute", SimpleImputer(strategy="mean")),
    ("sclaer", StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ("impute", SimpleImputer(strategy="most_frequent")),
    ("onehot", OneHotEncoder(handle_unknown="ignore"))
])

preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, numerical),
        ("cat", categorical_transformer, categorical)
    ]
)

clf = Pipeline(steps=[
    ("perprocessor", preprocessor),
    ("classifier", LogisticRegression())
])

clf.fit(x_train, y_train)

y_pred = clf.predict(x_test)

acc = accuracy_score(y_test, y_pred)
acc

0.45

In [7]:
df = pd.read_csv("D:\\DataSet\\insurance.csv")
df.head(3)

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462


In [8]:
x = df.drop(columns = ["charges"])
y = df["charges"]
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [9]:
from sklearn.preprocessing import FunctionTransformer

numerical = []
categorical = []

def func_dtype(x_train):
    for i in x_train.select_dtypes(include= ["int64","float64"]).columns:
        numerical.append(i)
    for k in x_train.select_dtypes(include="object").columns:
        categorical.append(k)

dtype_filter = FunctionTransformer(func_dtype)

x = dtype_filter.transform(x_train)

print("numerical feature : ",numerical)
print("categorical feature : ",categorical)

numerical feature :  ['age', 'bmi', 'children']
categorical feature :  ['sex', 'smoker', 'region']


In [10]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

In [11]:
numeric_transformer = Pipeline(steps=[
    ("impute", SimpleImputer(strategy="mean")),
    ("sclaer", StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ("impute", SimpleImputer(strategy="most_frequent")),
    ("onehot", OneHotEncoder(handle_unknown="ignore"))
])

preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, numerical),
        ("cat", categorical_transformer, categorical)
    ]
)

clf = Pipeline(steps=[
    ("perprocessor", preprocessor),
    ("classifier", LinearRegression())
])

clf.fit(x_train, y_train)

y_pred = clf.predict(x_test)

score = r2_score(y_test, y_pred)
score

0.7835929767120722

In [12]:
import pandas as pd
import numpy as np

In [13]:
df = pd.read_csv("D:\\DataSet\\tips.csv")
df.head(3)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3


In [14]:
from sklearn.model_selection import train_test_split

x = df.drop(columns = ["tip"])
y = df["tip"]

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [15]:
numerical = []
categoical = []

def dtype_filter(data):
    for i in data.select_dtypes(include=["int64", "float64"]).columns:
        numerical.append(i)
    for j in data.select_dtypes(include=["object"]).columns:
        categoical.append(j)

dtype_filter(x_train)
print("Numerical dtype : ",numerical)
print("Categoical dtype : ",categoical)

Numerical dtype :  ['total_bill', 'size']
Categoical dtype :  ['sex', 'smoker', 'day', 'time']


In [16]:
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

In [17]:
numerical_feature = Pipeline(steps=[
    ("impute", SimpleImputer(strategy="mean")),
    ("scaler", StandardScaler())
])

categoical_feature = Pipeline(steps=[
    ("impute", SimpleImputer(strategy="most_frequent")),
    ("encode", OneHotEncoder(drop="first", sparse_output=False))
])

preprocessor = ColumnTransformer(transformers=[
    ("num", numerical_feature, numerical),
    ("cat", categoical_feature, categoical)
])

clf = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("classifier", LinearRegression())
])

clf.fit(x_train, y_train)

y_pred = clf.predict(x_test)

score = r2_score(y_test, y_pred)
score

0.4373018194348246