# Utilizing Model pools in Pytorch

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, Subset
from torch.optim.lr_scheduler import StepLR

# Define your dataset here
# For the sake of example, let's assume we are using CIFAR10
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
train_dataset_1 = Subset(train_dataset,range(1, len(train_dataset), 10))
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
test_dataset_1 = Subset(test_dataset,range(1, len(test_dataset), 10))

train_loader = DataLoader(train_dataset_1, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset_1, batch_size=100, shuffle=False)

# Define the model pool
model_pool = {
    'resnet18': models.resnet18(pretrained=True),
    'vgg16': models.vgg16(pretrained=True)
}

# Modify the final layer of each model to fit the CIFAR10 dataset
num_classes = 10
model_pool['resnet18'].fc = nn.Linear(model_pool['resnet18'].fc.in_features, num_classes)
model_pool['vgg16'].classifier[6] = nn.Linear(model_pool['vgg16'].classifier[6].in_features, num_classes)

# Function to train and evaluate a model
def train_and_evaluate(model, train_loader, test_loader, epochs=1):
    #device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    #model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
    scheduler = StepLR(optimizer, step_size=1, gamma=0.7)

    # Training loop
    for epoch in range(epochs):
        model.train()
        for batch_idx, (data, target) in enumerate(train_loader):
            #data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
        scheduler.step()

    # Evaluation
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            #data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += criterion(output, target).item()
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    test_accuracy = correct / len(test_loader.dataset)
    return test_accuracy


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:08<00:00, 21244345.46it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /Users/sebasmos/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 104MB/s] 
Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /Users/sebasmos/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100%|██████████| 528M/528M [00:05<00:00, 101MB/s]  


In [2]:
# Train and evaluate each model in the pool
model_performance = {}
for model_name, model in model_pool.items():
    print(f"Training and evaluating {model_name}")
    accuracy = train_and_evaluate(model, train_loader, test_loader)
    model_performance[model_name] = accuracy
    print(f"Model: {model_name}, Accuracy: {accuracy}")

# Find the best performing model
best_model_name = max(model_performance, key=model_performance.get)
print(f"Best model: {best_model_name} with accuracy: {model_performance[best_model_name]}")

Training and evaluating resnet18
Model: resnet18, Accuracy: 0.548
Training and evaluating vgg16


# Transfer Learning

In [None]:
import torch
import torchvision.models as models
import torch.nn as nn
# Load a pre-trained model
model = models.resnet50(pretrained=True)
# Freeze all layers in the model
for param in model.parameters():
  param.requires_grad = False

# Replace the final layer with a new one for our specific task
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, num_classes)
# num_classes is the number of your new classes
# Fine-tuning the model
# Assume the use of a dataloader 'train_loader'
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
criterion = nn.CrossEntropyLoss()
num_epochs = 1
for epoch in range(num_epochs): # num_epochs is your desired number of epochs
  for inputs, labels in train_loader:
    optimizer.zero_grad()
    outputs = model(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 122MB/s]


# Feature extraction

In [None]:
import torch
from torchvision import models, transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder

# Load a pre-trained model and set it to evaluation mode
model = models.resnet50(pretrained=True)
model.eval()
# Define a transformation for the input data
transform = transforms.Compose([ transforms.Resize(256),
                                transforms.CenterCrop(224),
                                 transforms.ToTensor() ])

# Load your dataset
# Assume the use of a dataloader 'test_loader'
# Extract features
features = []
with torch.no_grad():
  for inputs, _ in test_loader:
    outputs = model(inputs)
    features.extend(outputs)

# 'features' now contains the extracted features from the dataset



# Ensemble Learning

In [None]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression


# Load a sample dataset
data = load_iris()
X, y = data.data, data.target

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize pre-trained models (these should be replaced with your actual pre-trained models)
model1 = RandomForestClassifier(n_estimators=100, random_state=42).fit(X_train, y_train)
model2 = GradientBoostingClassifier(n_estimators=100, random_state=42).fit(X_train, y_train)
model3 = LogisticRegression(max_iter=200, random_state=42).fit(X_train, y_train)

# Creating an ensemble using Voting Classifier
ensemble_model = VotingClassifier(estimators=[ ('model1', model1),
 ('model2', model2),
  ('model3', model3) ], voting='hard')
# Fit ensemble model on training data and evaluate
ensemble_model.fit(X_train, y_train)
ensemble_predictions = ensemble_model.predict(X_test)
print(f"Ensemble Model Accuracy: {accuracy_score(y_test, ensemble_predictions)}")

Ensemble Model Accuracy: 1.0


# Cross Domain Application

In [None]:
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
import joblib

retail_df = pd.read_csv('/content/retail_sales_dataset.csv')
retail_df = retail_df.drop(['Gender','Date', 'Customer ID', 'Transaction ID'], axis=1)
retail_df['Product Category'] = pd.Categorical(retail_df['Product Category'])
retail_df['Product Category Codes'] = retail_df['Product Category'].cat.codes

In [None]:
features = retail_df.drop(['Total Amount','Product Category'] , axis=1)
scaler = StandardScaler()
retail_features_scaled = scaler.fit_transform(features)
target = retail_df['Total Amount']
X_train, X_test, y_train, y_test = train_test_split(retail_features_scaled, target, test_size=0.3, random_state=0)


In [None]:
#Function for creating and predicting values
def model_predict(model_name,X_train,y_train):
  regressor = model_name

  # Fit the Algorithm
  regressor.fit(X_train, y_train)

  # Predicting on the test set
  y_pred_test = regressor.predict(X_test)

  return y_pred_test


In [None]:
#Creating instance of model
retail_model=LinearRegression()

#Predicting on test set
y_pred_test_lr=model_predict(model_name=retail_model,X_train=X_train,y_train=y_train)

retail_mse = mean_squared_error(y_test, y_pred_test_lr)

print(f"Mean Squared Error in Reatil Sales: {retail_mse}")

joblib.dump(retail_model, "retail_model.joblib")

Mean Squared Error in Reatil Sales: 44620.781249803535


['retail_model.joblib']

In [None]:
online_df = pd.read_csv('/content/OnlineCustomerSalesData.csv')
online_df = online_df.drop(['Customer_id', 'Gender','Purchase_DATE', 'Purchase_VALUE','Browser', 'Newsletter', 'Voucher'] , axis=1)

In [None]:
online_df

Unnamed: 0,Age,Revenue_Total,N_Purchases,Pay_Method,Time_Spent
0,53,45.3,2,1,885
1,18,36.2,3,2,656
2,52,10.6,1,0,761
3,29,54.1,5,1,906
4,21,56.9,1,1,605
...,...,...,...,...,...
65791,30,10.9,4,1,894
65792,33,29.3,1,0,722
65793,50,25.4,5,3,424
65794,56,29.2,1,3,731


In [None]:
online_marketplace_features = online_df.drop( 'Revenue_Total', axis=1)
online_marketplace_target = online_df['Revenue_Total']

In [None]:
from sklearn.preprocessing import StandardScaler

# Load the pre-trained retail sales model
model = joblib.load('/content/retail_model.joblib')
# Preprocess online marketplace data
scaler = StandardScaler()
online_marketplace_features_scaled = scaler.fit_transform(online_marketplace_features)
# Apply the model to the target domain
predicted_sales = model.predict(online_marketplace_features_scaled)
# Calculate the model's performance
mse = mean_squared_error(online_marketplace_target, predicted_sales)
print(f"Mean Squared Error in Online Marketplace: {mse}")

joblib.dump(model, "online_sales_model.joblib")

Mean Squared Error in Online Marketplace: 457470.9124484482


['online_sales_model.joblib']

# Resource Optimization

In [None]:
from keras.applications.vgg16 import VGG16, preprocess_input
ffrom tensorflow.keras.utils import load_img, img_to_array
import numpy as np
# Load a pre-trained VGG16 model
model = VGG16(weights='imagenet', include_top=False)
# Function to prepare an image for the model
def prepare_image(file_path):
  img = load_img(file_path,
                 target_size=(224, 224))
  img_array = img_to_array(img)
  img_array_expanded = np.expand_dims(img_array, axis=0)
  return preprocess_input(img_array_expanded)
# Prepare and predict on a new image
test_image = prepare_image('/content/dataset/HappyFish.jpg')
features = model.predict(test_image)
# Use features for classification or further processing

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
import time
# Example: Timing the feature extraction
start_time = time.time()
features = model.predict(test_image)
end_time = time.time()
print(f"Feature extraction took {end_time - start_time} seconds.")

Feature extraction took 0.6298120021820068 seconds.


# Concept Similarity

In [None]:
import torch
import torchvision.models as models
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader, Subset
from torch.nn.functional import cosine_similarity

# Load pre-trained models
model_names = ['resnet18', 'alexnet', 'vgg16']
models = {name: getattr(models, name)(pretrained=True) for name in model_names}
for model in models.values():
    model.eval()  # Set model to evaluation mode

# Define a transform to normalize the data
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images to fit models' input size
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Load CIFAR10 dataset
dataset = CIFAR10(root='./data', train=False, download=True, transform=transform)
dataset_1 = Subset(dataset,range(1, len(dataset), 10))
loader = DataLoader(dataset_1, batch_size=10, shuffle=True)

def extract_features(model, loader):
    with torch.no_grad():
        for images, _ in loader:
            return model(images).flatten(1)  # Flatten the features

# Extract features from the first batch
features = {name: extract_features(model, loader) for name, model in models.items()}

# Calculate cosine similarity
similarity_threshold = 0.5
similar_models = {}
for name1, features1 in features.items():
    for name2, features2 in features.items():
        if name1 != name2:
            similarity = cosine_similarity(features1, features2).mean().item()

            if similarity > similarity_threshold:
                similar_models[(name1, name2)] = similarity


print("Similar Models based on Feature Cosine Similarity:")
for model_pair, sim in similar_models.items():
    print(f"{model_pair}: {sim}")




Files already downloaded and verified
Similar Models based on Feature Cosine Similarity:
('resnet18', 'vgg16'): 0.5362259149551392
('alexnet', 'vgg16'): 0.6577829122543335
('vgg16', 'resnet18'): 0.5362259149551392
('vgg16', 'alexnet'): 0.6577829122543335
