In [2]:
import numpy as np
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.datasets import fetch_california_housing

#Load the californiaHousing dataset.
housing = fetch_california_housing(as_frame=True)

#Define the numeric features nad categorical features.
numeric_features = housing.feature_names[:2]
categorical_features= housing.feature_names[2:]

#Define the preprocessing pipelines for the numeric features and the categorical features.
numeric_transformer = Pipeline(steps=[('scaler',StandardScaler())])
categorical_transformer=Pipeline(steps=['onehot',OneHotEncoder(handle_unknown='ignore')])


#Use columntransformer to combine the numeric ans categorical transformers.
preprocessor =ColumnTransformer(
    tranformers=[
        ('num',numeric_transformer,numeric_features),
        ('cat',categorical_transformer,categorical_features)
    ])

#Define the pipeline with the processor and linearregression model.
pipline =Pipeline(steps=[('preprocessor',preprocessor),
                       ('regressor'),LinearRegression()])

#convert the data and target to pandas Dataframe
x=pd.DataFrame(housing.data,columns=housing.feature_names)
y=pd.series(housing.target)

#Fit the pipeline to the data.
pipeline.fit(x,y)

#Predict on new data.
x_new=x.iloc[:10]
y_pred=pipeline.predict(x_new)
print(y_pred)

HTTPError: HTTP Error 403: Forbidden

# Covid Toy==>

In [4]:
df=pd.read_csv("C:\\Users\\user\\Datasets\\covid_toy.csv")

In [5]:
df.head()

Unnamed: 0,age,gender,fever,cough,city,has_covid
0,60,Male,103.0,Mild,Kolkata,No
1,27,Male,100.0,Mild,Delhi,Yes
2,42,Male,101.0,Mild,Delhi,No
3,31,Female,98.0,Mild,Kolkata,No
4,65,Female,101.0,Mild,Mumbai,No


In [13]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split

In [14]:
x_train,x_test,y_train,y_test=train_test_split(df.drop('has_covid',axis=1),df['has_covid'],test_size=0.2,random_state=42)


In [15]:
#Define the columns that need to be preprocessed
categorical_features=['gender','city']
numeric_features=['age','fever']

In [16]:
#Create transformers
numeric_transformer=Pipeline(steps=[
    ('imputer',SimpleImputer(strategy='mean')),
    ('scaler',StandardScaler())
])

categorical_transformer=Pipeline(steps=[
    ('imputer',SimpleImputer(strategy='most_frequent')),
    ('onehot',OneHotEncoder(handle_unknown='ignore'))
])

#combine transformers
preprocessor = ColumnTransformer(
    transformers=[
        ('num',numeric_transformer,numeric_features),
        ('cat',categorical_transformer,categorical_features)
    ])

# Create the pipeline
clf = Pipeline(steps = 
              [('preprocessor', preprocessor),
              ('classifier', LogisticRegression())])

# Train the model
clf.fit(x_train, y_train)

# Evaluate the model
y_pred = clf.predict(x_test)

In [17]:
from sklearn.metrics import accuracy_score

In [18]:
accuracy_score(y_test,y_pred)

0.65

# Social_Network_Ads ==>

In [19]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.compose import ColumnTransformer

In [20]:
df=pd.read_csv("C:\\Users\\user\\Datasets\\Social_Network_Ads.csv")

In [21]:
df.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0


In [22]:
df=df.drop(columns=['User ID','Gender'])
x=df.drop(columns=['Purchased'],axis=1)
y=df['Purchased']

In [23]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)

In [24]:
pipe=Pipeline([
    ('scaler',StandardScaler()),
    ('pca',PCA(n_components=2)),
    ('calssifier',RandomForestClassifier(n_estimators=100,random_state=42))
])

In [25]:
pipe

In [26]:
pipe.fit(x_train,y_train)

In [27]:
y_pred=pipe.predict(x_test)

In [28]:
acc=accuracy_score(y_test,y_pred)
print(acc)

0.8875


# Tips==>

In [31]:
df=pd.read_csv("C:\\Users\\user\\Datasets\\tips.csv")

In [32]:
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [33]:
x=df.drop(columns=['total_bill'],axis=1)
y=df['total_bill']

In [34]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)

In [35]:
numeric_features=['tip','size']
categorical_features=['sex','smoker','day','time']

In [36]:
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LinearRegression 
#Create transformers
numeric_transformer=Pipeline(steps=[
    ('imputer',SimpleImputer(strategy='mean')),
    ('scaler',StandardScaler())
])

categorical_transformer=Pipeline(steps=[
    ('imputer',SimpleImputer(strategy='most_frequent')),
    ('onehot',OneHotEncoder(handle_unknown='ignore'))
])

#combine transformers
preprocessor = ColumnTransformer(
    transformers=[
        ('num',numeric_transformer,numeric_features),
        ('cat',categorical_transformer,categorical_features)
    ])

# Create the pipeline
clf = Pipeline(steps = 
              [('preprocessor', preprocessor),
              ('classifier', LinearRegression())])

# Train the model
clf.fit(x_train, y_train)

# Evaluate the model
y_pred = clf.predict(x_test)

In [37]:
from sklearn.metrics import r2_score

In [38]:
r2_score(y_test,y_pred)

0.6240808714290969