# Chapter 3 Overview of Transfer Learning Algorithms

In this exercise we write codes to to `establish a baseline` framework for `transfer learning` practice

and `introduce` some `datasets`.

## 1. Imports

In [67]:
import numpy as np
import pandas as pd
import glob
import matplotlib.pyplot as plt

from IPython.display import display

from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

## 2. Data Preparation

Just like `ImageNet` dataset for computer vision, `Transfer Learning` has some standard datasets and benchmarks.
* `Object Recognition` Datasets, such as office-31, Office-Home
* `Handwritten digits`, such as MNIST, USPS, and SVHN
* `Sentiment analysis` datasets, such as Amaxon Review dataset, 20Newsgroup, and Reuters-21578
* `Face Recognition` datasets such as CMU-PIE
* `Human activity recognition` datasets such as DSADS and Opportunity

### Download and unzip

In [3]:
!mkdir data
!wget -P data https://wjdcloud.blob.core.windows.net/dataset/OFFICE31.zip
!wget -P data https://wjdcloud.blob.core.windows.net/dataset/office31_resnet50.zip

mkdir: data: File exists
--2024-07-25 17:22:38--  https://wjdcloud.blob.core.windows.net/dataset/OFFICE31.zip
Resolving wjdcloud.blob.core.windows.net (wjdcloud.blob.core.windows.net)... 20.60.131.4
Connecting to wjdcloud.blob.core.windows.net (wjdcloud.blob.core.windows.net)|20.60.131.4|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 79552513 (76M) [application/zip]
Saving to: ‘data/OFFICE31.zip’


2024-07-25 17:24:40 (648 KB/s) - ‘data/OFFICE31.zip’ saved [79552513/79552513]

--2024-07-25 17:24:40--  https://wjdcloud.blob.core.windows.net/dataset/office31_resnet50.zip
Resolving wjdcloud.blob.core.windows.net (wjdcloud.blob.core.windows.net)... 20.60.131.4
Connecting to wjdcloud.blob.core.windows.net (wjdcloud.blob.core.windows.net)|20.60.131.4|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 227388034 (217M) [application/zip]
Saving to: ‘data/office31_resnet50.zip’


2024-07-25 17:30:13 (669 KB/s) - ‘data/office31_resnet50.zip’ saved

In [7]:
# unzipping the data
!unzip -qn ./data/office31_resnet50.zip -d data
!unzip -qn ./data/OFFICE31.zip -d data

### Data Wrangling

In [108]:
def wrangling(data_path:str, domain:str):
    # load extracted features - resnet50_features
    X = pd.read_csv(f'{data_path}/resnet50_feature/amazon_{domain}.csv', header=None)
    X = X.rename(columns={2048: 'target'})
    target = 'target'

    # split features ang target from the csv
    y = X[target]
    X = X.drop(columns=[target])

    print(f'{domain} X shape {X.shape} y shape {y.shape}')
    
    return X, y

In [109]:
Xs,Ys = wrangling('data', domain='amazon')
Xt, Yt = wrangling('data', domain='webcam')
display('Source X',Xs.head(),'Source y',Ys.head(),'Target X', Xt.head(), 'Target y', Yt.head())

amazon X shape (2817, 2048) y shape (2817,)
webcam X shape (795, 2048) y shape (795,)


'Source X'

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2038,2039,2040,2041,2042,2043,2044,2045,2046,2047
0,0.011726,1.928495,0.467546,0.073322,0.740273,0.092805,0.056363,0.031336,1.440191,0.041238,...,1.62932,0.019851,0.0,0.19785,0.275725,0.004216,0.302958,0.031908,0.175205,0.166139
1,0.002589,0.240411,0.358171,0.040898,0.052421,0.040592,0.140565,0.017696,0.269039,0.028463,...,0.016501,0.191592,0.051515,0.067583,0.289404,0.100091,0.266336,0.601984,0.795638,0.693593
2,0.019037,0.509267,1.413801,0.295167,0.425771,0.01507,0.196558,0.181648,0.198146,0.30703,...,0.755519,0.074087,0.373561,0.315758,0.062195,0.190882,0.083827,0.996922,0.47772,0.262765
3,1.341362,1.312891,1.043079,0.46227,0.43225,0.897018,0.050082,0.076174,0.34028,0.471088,...,1.343223,0.479982,1.210341,0.015797,0.0797,0.091558,0.430091,0.027529,0.037952,0.602081
4,0.002672,0.191623,0.425395,0.331961,0.09871,0.0,0.037058,0.123914,0.034198,0.25124,...,0.278524,0.002588,0.696498,0.349591,0.11259,0.789248,0.424191,0.249623,0.563543,0.271393


'Source y'

0    23.0
1    16.0
2     6.0
3     7.0
4     0.0
Name: target, dtype: float64

'Target X'

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2038,2039,2040,2041,2042,2043,2044,2045,2046,2047
0,0.472762,0.642904,0.37155,1.37717,1.413957,1.556663,0.12538,1.239406,0.253532,0.547048,...,0.795987,0.14838,1.766209,0.250389,0.042226,0.025835,0.473243,0.288611,0.000968,0.088618
1,0.710252,1.619362,0.20768,0.179621,0.658437,1.949497,0.587555,0.085866,1.931049,1.423679,...,0.124418,0.959986,0.402001,1.130188,0.076225,0.199461,0.47482,1.330238,1.516034,0.027432
2,0.024314,0.039557,0.038186,0.117979,1.789194,0.713017,1.18533,0.471633,1.547264,0.528375,...,0.362608,0.011839,0.700429,0.017442,0.610215,0.1695,0.172165,0.028137,0.334309,0.379488
3,0.606116,0.06635,0.05197,1.033019,0.210776,0.558731,0.383807,2.590502,0.247573,0.079687,...,0.661523,0.366647,0.814646,0.736966,0.083871,0.019002,0.270441,0.001266,0.440576,0.100392
4,0.100082,0.714086,0.744,0.580675,1.019657,0.952223,0.562736,1.38477,0.666057,0.710087,...,0.657762,0.568982,0.172749,0.330045,0.116303,0.090555,0.306064,0.241835,0.10591,0.10708


'Target y'

0    22.0
1    26.0
2     8.0
3    29.0
4    15.0
Name: target, dtype: float64

### Create Model - KNN classifier

In [110]:
def KNNModel(k_neighbors):
    model = KNeighborsClassifier(n_neighbors=k_neighbors)
    model.fit(Xs, Ys)
    
    y_pred = model.predict(Xt)
    
    acc = accuracy_score(y_pred=y_pred, y_true=Yt)
    display(f'Accuracy of the model with {k_neighbors} neighbors is {acc}')

KNNModel(1)

'Accuracy of the model with 1 neighbors is 0.7459119496855345'