In [1]:
#scikit-learn pipelining example
#logistic regression model with standard scaler preprocessing


from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
import numpy as np

# Sample data
X = np.array([[1, 2], [2, 3], [3, 4]])
y = np.array([0, 1, 0])

# Define pipeline
pipeline = Pipeline([
    ('scaler', StandardScaler()),  
    ('model', LogisticRegression())  
])

pipeline.fit(X, y)
print("Predictions:", pipeline.predict(X))


Predictions: [0 0 0]


In [3]:
#pipeline example with custom dataset
# PyTorch pipeline example with custom dataset

import torch
from torch.utils.data import DataLoader, Dataset

# Custom Dataset
class CustomDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __getitem__(self, index):
        return self.data[index]

    def __len__(self):
        return len(self.data)

# Sample data and pipeline
data = torch.tensor([[1.0], [2.0], [3.0]])
data_loader = DataLoader(CustomDataset(data), batch_size=2, shuffle=True)

for batch in data_loader:
    print("Batch:", batch)


Batch: tensor([[1.],
        [3.]])
Batch: tensor([[2.]])


In [None]:
#tensorflow pipeline example

%pip install tensorflow
import tensorflow as tf

# Sample data
data = tf.data.Dataset.from_tensor_slices([1, 2, 3, 4])

# Define pipeline
pipeline = data.map(lambda x: x * 2).batch(2)
for batch in pipeline:
    print("Batch:", batch.numpy())


Collecting tensorflow
  Using cached tensorflow-2.18.0-cp311-cp311-win_amd64.whl.metadata (3.3 kB)
Collecting tensorflow-intel==2.18.0 (from tensorflow)
  Using cached tensorflow_intel-2.18.0-cp311-cp311-win_amd64.whl.metadata (4.9 kB)
Collecting absl-py>=1.0.0 (from tensorflow-intel==2.18.0->tensorflow)
  Using cached absl_py-2.1.0-py3-none-any.whl.metadata (2.3 kB)
Collecting astunparse>=1.6.0 (from tensorflow-intel==2.18.0->tensorflow)
  Using cached astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=24.3.25 (from tensorflow-intel==2.18.0->tensorflow)
  Using cached flatbuffers-24.12.23-py2.py3-none-any.whl.metadata (876 bytes)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow-intel==2.18.0->tensorflow)
  Using cached gast-0.6.0-py3-none-any.whl.metadata (1.3 kB)
Collecting google-pasta>=0.1.1 (from tensorflow-intel==2.18.0->tensorflow)
  Using cached google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting libclang>=13.0.0 (from

In [None]:
#dask pipeline example

import dask.dataframe as dd
import pandas as pd

# Create a Dask DataFrame
df = dd.from_pandas(pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}), npartitions=1)

# Filter and compute
pipeline = df[df['A'] > 1].compute()
print(pipeline)


   A  B
1  2  5
2  3  6


In [12]:
%pip install spacy
!python -m spacy download en_core_web_sm

import spacy

# Load spaCy pipeline
nlp = spacy.load("en_core_web_sm")

# Process text
doc = nlp("This is an NLP pipeline demonstration.")
for token in doc:
    print(f"Token: {token.text}, POS: {token.pos_}")



Collecting spacy
  Using cached spacy-3.8.3-cp311-cp311-win_amd64.whl.metadata (27 kB)
Collecting spacy-legacy<3.1.0,>=3.0.11 (from spacy)
  Using cached spacy_legacy-3.0.12-py2.py3-none-any.whl.metadata (2.8 kB)
Collecting spacy-loggers<2.0.0,>=1.0.0 (from spacy)
  Using cached spacy_loggers-1.0.5-py3-none-any.whl.metadata (23 kB)
Collecting murmurhash<1.1.0,>=0.28.0 (from spacy)
  Using cached murmurhash-1.0.11-cp311-cp311-win_amd64.whl.metadata (2.0 kB)
Collecting cymem<2.1.0,>=2.0.2 (from spacy)
  Using cached cymem-2.0.10-cp311-cp311-win_amd64.whl.metadata (8.6 kB)
Collecting preshed<3.1.0,>=3.0.2 (from spacy)
  Using cached preshed-3.0.9-cp311-cp311-win_amd64.whl.metadata (2.2 kB)
Collecting thinc<8.4.0,>=8.3.0 (from spacy)
  Downloading thinc-8.3.3-cp311-cp311-win_amd64.whl.metadata (15 kB)
Collecting wasabi<1.2.0,>=0.9.1 (from spacy)
  Using cached wasabi-1.1.3-py3-none-any.whl.metadata (28 kB)
Collecting srsly<3.0.0,>=2.4.3 (from spacy)
  Using cached srsly-2.5.0-cp311-cp311-w